[LPT] Copy constant with several outputs before blob update (cherry-pick to master) (#2198)

* [LPT] Copy constant implementation * [LPT] the same Constant ops as FQ interval boundaries
2020-09-15 09:18:58 +03:00
parent ff3c5fce99
commit ac2370b420
15 changed files with 190 additions and 102 deletions
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
@@ -11,8 +11,7 @@ using namespace LayerTestsDefinitions;

 namespace {
 const std::vector<InferenceEngine::Precision> netPrecisions = {
-    InferenceEngine::Precision::FP32,
-    InferenceEngine::Precision::FP16
+    InferenceEngine::Precision::FP32
 };

 const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
@@ -22,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
    },
 };

+const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
+};
+
 INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
-        ::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
+        ::testing::ValuesIn(inputShapes),
        ::testing::Values(CommonTestUtils::DEVICE_CPU),
        ::testing::ValuesIn(params)),
    MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
@@ -21,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
    }
 };

+const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
+};
+
 INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
-        ::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
+        ::testing::ValuesIn(inputShapes),
        ::testing::Values(CommonTestUtils::DEVICE_GPU),
        ::testing::ValuesIn(params)),
    MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp
@@ -20,7 +20,7 @@ public:

 typedef std::tuple<
    InferenceEngine::Precision,
-    InferenceEngine::SizeVector,
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>,
    std::string,
    MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
 > MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
@@ -23,16 +23,16 @@ namespace LayerTestsDefinitions {
 std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
    testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
    InferenceEngine::Precision netPrecision;
-    InferenceEngine::SizeVector inputShape;
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
    std::string targetDevice;
    InferenceEngine::details::LayerTransformation::Params params;
    MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;

-    std::tie(netPrecision, inputShape, targetDevice, param) = obj.param;
+    std::tie(netPrecision, shapes, targetDevice, param) = obj.param;

    std::ostringstream result;
    result << netPrecision.name() << "_" <<
-        CommonTestUtils::vec2str(inputShape) << "_" <<
+        CommonTestUtils::vec2str(shapes.first) << "_" << CommonTestUtils::vec2str(shapes.second) << "_" <<
        targetDevice << "_"  <<
        param.fqOnData << "_" <<
        param.fqOnWeights;
@@ -43,15 +43,16 @@ void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() {
    threshold = 0.01f;

    InferenceEngine::Precision netPrecision;
-    InferenceEngine::SizeVector inputShape;
+    std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
    InferenceEngine::details::LayerTransformation::Params params;
    MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
-    std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam();
+    std::tie(netPrecision, shapes, targetDevice, param) = this->GetParam();
    auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

    function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
        precision,
-        inputShape,
+        shapes.first,
+        shapes.second,
        param.fqOnData,
        param.fqOnWeights);
 }
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp
@@ -16,7 +16,8 @@ class MatMulWithOptimizedConstantFakeQuantizeFunction {
 public:
    static std::shared_ptr<ngraph::Function> getOriginal(
        const ngraph::element::Type precision,
-        const ngraph::Shape& inputShape,
+        const ngraph::Shape& inputShape1,
+        const ngraph::Shape& inputShape2,
        const FakeQuantizeOnData& fqOnData,
        const FakeQuantizeOnData& fqOnWeights);
 };
--- a/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp
+++ b/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp
@@ -13,34 +13,41 @@ namespace subgraph {

 std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
    const ngraph::element::Type precision,
-    const ngraph::Shape& inputShape,
+    const ngraph::Shape& inputShape1,
+    const ngraph::Shape& inputShape2,
    const FakeQuantizeOnData& fqOnData,
    const FakeQuantizeOnData& fqOnWeights) {
-    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
-    const auto fakeQuantizeOnActivations = fqOnData.empty() ?
-        nullptr :
-        ngraph::builder::makeFakeQuantize(
-            input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
-            fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
+    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));

-    const ngraph::Shape weightsShape = { inputShape[1], 10 };
+    const auto lowConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputLowValues);
+    const auto highConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputHighValues);
+    const auto fakeQuantizeOnActivations = std::make_shared<ngraph::opset1::FakeQuantize>(
+        input,
+        lowConstantOnActivations,
+        highConstantOnActivations,
+        lowConstantOnActivations,
+        highConstantOnActivations,
+        fqOnWeights.quantizationLevel);
+
+    const ngraph::Shape weightsShape = { inputShape2[0], inputShape1[1] };
    const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
+
    const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
-    const auto lowConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
-    const auto highConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
+    const auto lowConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
+    const auto highConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
    const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
        weightsConst,
-        lowConstant,
-        highConstant,
-        lowConstant,
-        highConstant,
+        lowConstantOnWeights,
+        highConstantOnWeights,
+        lowConstantOnWeights,
+        highConstantOnWeights,
        fqOnWeights.quantizationLevel);

    const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
-        fqOnData.empty() ? input : fakeQuantizeOnActivations,
+        fakeQuantizeOnActivations,
        fakeQuantizeOnWeights,
        false,
-        false);
+        inputShape1[1] != inputShape2[0]);

    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
    return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");