[LPT] Copy constant with several outputs before blob update (cherry-pick to master) (#2198)

* [LPT] Copy constant implementation

* [LPT] the same Constant ops as FQ interval boundaries
This commit is contained in:
Edward Shogulin
2020-09-15 09:18:58 +03:00
committed by GitHub
parent ff3c5fce99
commit ac2370b420
15 changed files with 190 additions and 102 deletions

View File

@@ -11,8 +11,7 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
InferenceEngine::Precision::FP32
};
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
@@ -22,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
},
};
const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
};
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(params)),
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);

View File

@@ -21,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
}
};
const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
};
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(params)),
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);

View File

@@ -20,7 +20,7 @@ public:
typedef std::tuple<
InferenceEngine::Precision,
InferenceEngine::SizeVector,
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>,
std::string,
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
> MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;

View File

@@ -23,16 +23,16 @@ namespace LayerTestsDefinitions {
std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
std::string targetDevice;
InferenceEngine::details::LayerTransformation::Params params;
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
std::tie(netPrecision, inputShape, targetDevice, param) = obj.param;
std::tie(netPrecision, shapes, targetDevice, param) = obj.param;
std::ostringstream result;
result << netPrecision.name() << "_" <<
CommonTestUtils::vec2str(inputShape) << "_" <<
CommonTestUtils::vec2str(shapes.first) << "_" << CommonTestUtils::vec2str(shapes.second) << "_" <<
targetDevice << "_" <<
param.fqOnData << "_" <<
param.fqOnWeights;
@@ -43,15 +43,16 @@ void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() {
threshold = 0.01f;
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
InferenceEngine::details::LayerTransformation::Params params;
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam();
std::tie(netPrecision, shapes, targetDevice, param) = this->GetParam();
auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
precision,
inputShape,
shapes.first,
shapes.second,
param.fqOnData,
param.fqOnWeights);
}

View File

@@ -16,7 +16,8 @@ class MatMulWithOptimizedConstantFakeQuantizeFunction {
public:
static std::shared_ptr<ngraph::Function> getOriginal(
const ngraph::element::Type precision,
const ngraph::Shape& inputShape,
const ngraph::Shape& inputShape1,
const ngraph::Shape& inputShape2,
const FakeQuantizeOnData& fqOnData,
const FakeQuantizeOnData& fqOnWeights);
};

View File

@@ -13,34 +13,41 @@ namespace subgraph {
std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
const ngraph::element::Type precision,
const ngraph::Shape& inputShape,
const ngraph::Shape& inputShape1,
const ngraph::Shape& inputShape2,
const FakeQuantizeOnData& fqOnData,
const FakeQuantizeOnData& fqOnWeights) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
const auto fakeQuantizeOnActivations = fqOnData.empty() ?
nullptr :
ngraph::builder::makeFakeQuantize(
input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
const ngraph::Shape weightsShape = { inputShape[1], 10 };
const auto lowConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputLowValues);
const auto highConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputHighValues);
const auto fakeQuantizeOnActivations = std::make_shared<ngraph::opset1::FakeQuantize>(
input,
lowConstantOnActivations,
highConstantOnActivations,
lowConstantOnActivations,
highConstantOnActivations,
fqOnWeights.quantizationLevel);
const ngraph::Shape weightsShape = { inputShape2[0], inputShape1[1] };
const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
const auto lowConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
const auto highConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
const auto lowConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
const auto highConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
weightsConst,
lowConstant,
highConstant,
lowConstant,
highConstant,
lowConstantOnWeights,
highConstantOnWeights,
lowConstantOnWeights,
highConstantOnWeights,
fqOnWeights.quantizationLevel);
const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
fqOnData.empty() ? input : fakeQuantizeOnActivations,
fakeQuantizeOnActivations,
fakeQuantizeOnWeights,
false,
false);
inputShape1[1] != inputShape2[0]);
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");