[LPT] Multiinput with one parent and FQ with three Constant (#2066)

* [LPT] FakeQuantize with three constants * [LPT] Dequantization ops on thw inputs with one parent
2020-09-07 20:31:45 +03:00 · 2020-09-07 20:31:45 +03:00 · dc8bbd930f
commit dc8bbd930f
parent b225ddf414
17 changed files with 616 additions and 74 deletions
--- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
@ -140,7 +140,9 @@ public:

    static void replaceLayer(TransformationContext& context, const CNNLayerPtr source, const CNNLayerPtr target);

-    static CNNLayerPtr addScaleShiftBetween(
+    // Add ScaleShift beween parent and child layers. Affected edges (output and input ports) are not specified.
+    // As result ScaleShift will be added for all edges between parent and children.
+    static std::vector<CNNLayerPtr> addScaleShiftBetween(
        TransformationContext& context,
        const CNNLayerPtr parent,
        const CNNLayerPtr child,
@ -158,7 +160,8 @@ public:
        DataPtr parentOutData,
        CNNLayer::Ptr layer,
        const std::string& nextLayerName,
-        ICNNNetwork& net);
+        ICNNNetwork& net,
+        const int childInsDataIndex = -1);

    IE_SUPPRESS_DEPRECATED_START
    static void fillInScaleShift(ScaleShiftLayer* layer, const size_t channels, const float* scales, const float* shifts);
--- a/inference-engine/src/low_precision_transformations/src/activation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/activation.cpp
@ -105,8 +105,14 @@ void ActivationTransformation::transform(TransformationContext& context, CNNLaye

    const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*activationLayer);
    for (const CNNLayerPtr& child : children) {
-        CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(context, activationLayer, child,
-                                                                                 DequantizationDetails(scales, shifts));
-        context.dequantizationLayersNames.insert(dequantizationLayer->name);
+        const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
+            context,
+            activationLayer,
+            child,
+            DequantizationDetails(scales, shifts));
+
+        for (const auto& dequantizationLayer : dequantizationLayers) {
+            context.dequantizationLayersNames.insert(dequantizationLayer->name);
+        }
    }
 }
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@ -253,12 +253,15 @@ void ConcatTransformation::addDequantizationLayers(
                    getLayerDequantizationCallback(*layer, layer->name, layerDequantizationScales, layerDequantizationShifts);
                }

-                CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
+                const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
                    context,
                    std::make_shared<CNNLayer>(*layer),
                    child,
                    DequantizationDetails(layerDequantizationScales, layerDequantizationShifts, layerDequantizationScales.size()));
-                context.dequantizationLayersNames.insert(dequantizationLayer->name);
+
+                for (const CNNLayerPtr& dequantizationLayer : dequantizationLayers) {
+                    context.dequantizationLayersNames.insert(dequantizationLayer->name);
+                }
            }
        }

@ -275,14 +278,17 @@ void ConcatTransformation::addDequantizationLayers(
                getLayerDequantizationCallback(*layer, originalName, layerDequantizationScales, layerDequantizationShifts);
            }

-            CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
+            const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
                context,
                std::make_shared<CNNLayer>(*layer),
                nullptr,
                DequantizationDetails(layerDequantizationScales, layerDequantizationShifts, layerDequantizationScales.size()),
                originalName);
-            context.dequantizationLayersNames.insert(dequantizationLayer->name);
-            subgraph.layers[dequantizationLayer->name] = dequantizationLayer.get();
+
+            for (const CNNLayerPtr& dequantizationLayer : dequantizationLayers) {
+                context.dequantizationLayersNames.insert(dequantizationLayer->name);
+                subgraph.layers[dequantizationLayer->name] = dequantizationLayer.get();
+            }
        }
    }
 }
--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@ -254,12 +254,15 @@ void LayerTransformation::addDequantizationLayer(

    const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
    for (const CNNLayerPtr& child : children) {
-        const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
+        const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
            context,
            std::make_shared<CNNLayer>(layer),
            child,
            DequantizationDetails(dequantizationScales, dequantizationShifts, outputChannelsCount));
-        context.dequantizationLayersNames.insert(dequantizationLayer->name);
+
+        for (const auto& dequantizationLayer : dequantizationLayers) {
+            context.dequantizationLayersNames.insert(dequantizationLayer->name);
+        }
    }

    OutputsDataMap outputs;
@ -269,13 +272,16 @@ void LayerTransformation::addDequantizationLayer(
        const std::string dequantizationLayerName = layer.name;
        CNNNetworkHelper::renameLayer(context.network, layer.name, layer.name + LayerTransformation::lastLayerPostfix);

-        const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
+        const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
            context,
            std::make_shared<CNNLayer>(layer),
            nullptr,
            DequantizationDetails(dequantizationScales, dequantizationShifts, outputChannelsCount),
            dequantizationLayerName);
-        context.dequantizationLayersNames.insert(dequantizationLayer->name);
+
+        for (const auto& dequantizationLayer : dequantizationLayers) {
+            context.dequantizationLayersNames.insert(dequantizationLayer->name);
+        }
    }
 }

--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@ -439,8 +439,7 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::transformFakeQuantizeToConst(Transfor
                                                                        const CNNLayerPtr fakeQuantize,
                                                                        const Blob::Ptr weights,
                                                                        const std::string& constLayerName) {
-    std::vector<CNNLayerPtr> constLayersToRemove;
-    constLayersToRemove.reserve(fakeQuantize->insData.size());
+    std::set<CNNLayerPtr> constLayersToRemove;

    for (const DataWeakPtr& insDataWeak : fakeQuantize->insData) {
        const DataPtr insData = insDataWeak.lock();
@ -456,7 +455,7 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::transformFakeQuantizeToConst(Transfor
                               << fakeQuantize->name << "' is nullable";
        }

-        constLayersToRemove.push_back(parent);
+        constLayersToRemove.insert(parent);
    }

    for (const CNNLayerPtr& parent : constLayersToRemove) {
@ -1049,7 +1048,7 @@ void CNNNetworkHelper::replaceLayer(TransformationContext& context, const CNNLay
    networkImpl->addLayer(target);
 }

-CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& context, const CNNLayerPtr parent,
+std::vector<CNNLayerPtr> CNNNetworkHelper::addScaleShiftBetween(TransformationContext& context, const CNNLayerPtr parent,
                                                   const CNNLayerPtr child,
                                                   const DequantizationDetails& dequantizationDetails,
                                                   const std::string& name) {
@ -1078,66 +1077,92 @@ CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& contex
            CNNNetworkHelper::updateBlobs(*child, "biases", updatedShifts);
        }

-        return child;
+        return { child };
    }

    // Searching the connection between the layers
-    int l1_out_i = 0;
+
+    // specify parent/child edges here and manipulate with them below
+    std::vector<int> parentOutDataIndexes;
+    std::vector<int> childInsDataIndexes;
    if (child != nullptr) {
-        for (; l1_out_i < parent->outData.size(); l1_out_i++) {
-            if (getInputTo(parent->outData[l1_out_i]).find(child->name) !=
-                getInputTo(parent->outData[l1_out_i]).end()) {
-                break;
+        for (int l1_out_i = 0; l1_out_i < parent->outData.size(); l1_out_i++) {
+            auto& inputTo = getInputTo(parent->outData[l1_out_i]);
+            if (inputTo.find(child->name) != inputTo.end()) {
+                parentOutDataIndexes.push_back(l1_out_i);
            }
        }
+
+        for (size_t i = 0; i < child->insData.size(); ++i) {
+            const auto& insData = child->insData[i];
+            const CNNLayerPtr& creatorLayer = getCreatorLayer(insData.lock()).lock();
+            if (creatorLayer->name == parent->name) {
+                childInsDataIndexes.push_back(i);
+            }
+        }
+    } else {
+        parentOutDataIndexes.push_back(0);
+        childInsDataIndexes.push_back(0);
    }
-    if (l1_out_i == parent->outData.size()) {
+
+    if (childInsDataIndexes.empty()) {
        if (child != nullptr)
            THROW_IE_EXCEPTION << "Can't find layer " << child->name << " among layer " << parent->name << " outputs";
        else
            THROW_IE_EXCEPTION << "Layer '" << parent->name << "' has invalid output";
    }

-    DataPtr outData = parent->outData[l1_out_i];
+    std::vector<CNNLayerPtr> ssCnnLayers;
+    ssCnnLayers.reserve(childInsDataIndexes.size());
+    for (int l1_out_i : parentOutDataIndexes) {
+        DataPtr outData = parent->outData[l1_out_i];

-    std::string layerName = name.empty() ? (child != nullptr ? (parent->name + "_ScaleShift_" + child->name)
-                                                             : (parent->name + "_ScaleShift"))
-                                         : name;
+        for (int i = 0; i < childInsDataIndexes.size(); ++i) {
+            const int childInsDataIndex = childInsDataIndexes[i];
+            std::string layerName = name.empty() ?
+                (child != nullptr ?
+                    (parent->name + "_ScaleShift" + (childInsDataIndexes.size() == 1 ? "" : std::to_string(childInsDataIndex)) + "_" + child->name) :
+                    (parent->name + "_ScaleShift" + (childInsDataIndexes.size() == 1 ? "" : std::to_string(childInsDataIndex))))
+                : name;

-    Precision ssPrecision = context.getOriginalLayerPrecision(parent->name, outData->getName());
-    if (ssPrecision == Precision::UNSPECIFIED) {
-        if (child != nullptr)
-            ssPrecision = child->precision;
-        else
-            ssPrecision = Precision::FP32;
-    }
+            Precision ssPrecision = context.getOriginalLayerPrecision(parent->name, outData->getName());
+            if (ssPrecision == Precision::UNSPECIFIED) {
+                if (child != nullptr)
+                    ssPrecision = child->precision;
+                else
+                    ssPrecision = Precision::FP32;
+            }

-    LayerParams ssCnnLayerParams {layerName, "ScaleShift", ssPrecision};
-    CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
+            LayerParams ssCnnLayerParams{ layerName, "ScaleShift", ssPrecision };
+            CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));

-    const std::vector<size_t> dims = outData->getDims();
+            const std::vector<size_t> dims = outData->getDims();

-    if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
-        if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
-            THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
+            if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
+                if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
+                    THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
+                }
+            }
+            addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network, childInsDataIndex);
+
+            {
+                ScaleShiftLayer* scshLayer = dynamic_cast<ScaleShiftLayer*>(ssCnnLayer.get());
+                if (scshLayer == nullptr) {
+                    THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class";
+                }
+                fillInScaleShift(
+                    scshLayer,
+                    dequantizationDetails.channelsCount,
+                    dequantizationDetails.scales.data(),
+                    dequantizationDetails.shifts.data());
+            }
+
+            CNNNetworkHelper::setOutDataPrecision(*ssCnnLayer, ssPrecision);
+            ssCnnLayers.push_back(ssCnnLayer);
        }
    }
-    addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network);

-    {
-        ScaleShiftLayer* scshLayer = dynamic_cast<ScaleShiftLayer*>(ssCnnLayer.get());
-        if (scshLayer == nullptr) {
-            THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class";
-        }
-        fillInScaleShift(
-            scshLayer,
-            dequantizationDetails.channelsCount,
-            dequantizationDetails.scales.data(),
-            dequantizationDetails.shifts.data());
-    }
-
-    CNNNetworkHelper::setOutDataPrecision(*ssCnnLayer, ssPrecision);
-    return ssCnnLayer;
+    return ssCnnLayers;
 }

 CNNLayerPtr CNNNetworkHelper::addConstBetween(ICNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2,
@ -1177,7 +1202,8 @@ void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
    DataPtr parentOutData,
    CNNLayer::Ptr layer,
    const std::string& nextLayerName,
-    ICNNNetwork& net) {
+    ICNNNetwork& net,
+    const int childInsDataIndex) {
    CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
    if (netImpl == nullptr) {
        THROW_IE_EXCEPTION << "unexpected network type";
@ -1188,7 +1214,7 @@ void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
        netImpl->getLayerByName(nextLayerName.c_str(), nextLayer, nullptr);
    }

-    if (layer && (nextLayerName.empty() || (parentOutData == nullptr) ||
+    if (layer && (nextLayerName.empty() || (parentOutData == nullptr) || (childInsDataIndex != -1) ||
                  (getInputTo(parentOutData).find(nextLayerName) != getInputTo(parentOutData).end()))) {
        auto getTensorDesc = [](CNNLayerPtr& nextLayer) {
            const DataPtr insData = nextLayer->insData[0].lock();
@ -1222,12 +1248,18 @@ void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
        if (!nextLayerName.empty()) {
            // CNNLayerPtr nextLayer = getInputTo(parentOutData)[nextLayerName];
            getInputTo(newEdgeAfterLayer)[nextLayerName] = nextLayer;
+
            if (parentOutData != nullptr) {
                getInputTo(parentOutData).erase(nextLayerName);
-                for (size_t i = 0; i < nextLayer->insData.size(); i++) {
-                    if (nextLayer->insData[i].lock() == parentOutData) {
-                        nextLayer->insData[i] = newEdgeAfterLayer;
+
+                if (childInsDataIndex == -1) {
+                    for (size_t i = 0; i < nextLayer->insData.size(); i++) {
+                        if (nextLayer->insData[i].lock() == parentOutData) {
+                            nextLayer->insData[i] = newEdgeAfterLayer;
+                        }
                    }
+                } else {
+                    nextLayer->insData[childInsDataIndex] = newEdgeAfterLayer;
                }
            } else {
                // TODO: why new?
@ -1348,20 +1380,21 @@ size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLaye
    bool wasFound = false;
    for (auto dataIt = parentLayer->outData.begin(); dataIt != parentLayer->outData.end(); ++dataIt) {
        auto data = *dataIt;
-        for (auto inputIt = getInputTo(data).begin(); inputIt != getInputTo(data).end(); ++inputIt) {
+
+        auto inputIt = getInputTo(data).begin();
+        while (inputIt != getInputTo(data).end()) {
            auto currentChildLayer = inputIt->second;
            if (currentChildLayer == nullptr) {
                THROW_IE_EXCEPTION << "Output layer for '" << parentLayer->name << "'is absent";
            }
-            if (currentChildLayer->name == childLayer->name) {
-                getInputTo(data).erase(inputIt);
-                wasFound = true;
-                break;
-            }
-        }

-        if (wasFound) {
-            break;
+            if (currentChildLayer->name == childLayer->name) {
+                inputIt = getInputTo(data).erase(inputIt);
+                wasFound = true;
+                continue;
+            }
+
+            ++inputIt;
        }
    }
    if (!wasFound) {
@ -1370,7 +1403,8 @@ size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLaye
    }

    wasFound = false;
-    for (auto it = childLayer->insData.begin(); it != childLayer->insData.end(); ++it) {
+    auto it = childLayer->insData.begin();
+    while (it != childLayer->insData.end()) {
        auto data = it->lock();
        if (data == nullptr) {
            THROW_IE_EXCEPTION << "Input layer data for '" << childLayer->name << "'is absent";
@ -1379,11 +1413,14 @@ size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLaye
        if (currentParentLayer == nullptr) {
            THROW_IE_EXCEPTION << "Input layer for '" << childLayer->name << "'is absent";
        }
+
        if (currentParentLayer->name == parentLayer->name) {
-            childLayer->insData.erase(it);
+            it = childLayer->insData.erase(it);
            wasFound = true;
-            break;
+            continue;
        }
+
+        ++it;
    }
    if (!wasFound) {
        THROW_IE_EXCEPTION << "Input layer '" << parentLayer->name << "' was not found for '" << childLayer->name
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
+    {
+        { 256ul, ngraph::Shape { 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        { 255ul, ngraph::Shape { 1 }, { -12.8f }, { 12.7f }, { -12.8f }, { 12.7f } }
+    },
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(params)),
+    MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_with_one_parent_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/multiply_with_one_parent_transformation.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/multiply_with_one_parent_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<MultiplyWithOneParentTransformationValues> values = {
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } }
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, MultiplyWithOneParentTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(values)),
+    MultiplyWithOneParentTransformation::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
+    {
+        { 256ul, ngraph::Shape { 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
+        { 255ul, ngraph::Shape { 1 }, { -12.8f }, { 12.7f }, { -12.8f }, { 12.7f } }
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::ValuesIn(params)),
+    MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_with_one_parent_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/multiply_with_one_parent_transformation.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/multiply_with_one_parent_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<MultiplyWithOneParentTransformationValues> values = {
+    {
+        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } }
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(LPT, MultiplyWithOneParentTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::ValuesIn(values)),
+    MultiplyWithOneParentTransformation::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp
@ -0,0 +1,38 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
+
+namespace LayerTestsDefinitions {
+
+class MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fqOnData;
+    ngraph::builder::subgraph::FakeQuantizeOnData fqOnWeights;
+};
+
+typedef std::tuple<
+    InferenceEngine::Precision,
+    InferenceEngine::SizeVector,
+    std::string,
+    MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
+> MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;
+
+class MatMulWithOptimizedConstantFakeQuantizeTransformation :
+    public testing::WithParamInterface<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/multiply_with_one_parent_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/multiply_with_one_parent_transformation.hpp
@ -0,0 +1,40 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
+#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
+
+namespace LayerTestsDefinitions {
+
+class MultiplyWithOneParentTransformationValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize;
+};
+
+typedef std::tuple<
+    InferenceEngine::Precision,
+    InferenceEngine::SizeVector,
+    std::string,
+    MultiplyWithOneParentTransformationValues
+> MultiplyWithOneParentTransformationParams;
+
+class MultiplyWithOneParentTransformation :
+    public testing::WithParamInterface<MultiplyWithOneParentTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<MultiplyWithOneParentTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+
+private:
+    void validate();
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp
@ -0,0 +1,63 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
+    testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShape;
+    std::string targetDevice;
+    InferenceEngine::details::LayerTransformation::Params params;
+    MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
+
+    std::tie(netPrecision, inputShape, targetDevice, param) = obj.param;
+
+    std::ostringstream result;
+    result << netPrecision.name() << "_" <<
+        CommonTestUtils::vec2str(inputShape) << "_" <<
+        targetDevice << "_"  <<
+        param.fqOnData << "_" <<
+        param.fqOnWeights;
+    return result.str();
+}
+
+void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() {
+    threshold = 0.01f;
+
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::details::LayerTransformation::Params params;
+    MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
+    std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam();
+    auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
+        precision,
+        inputShape,
+        param.fqOnData,
+        param.fqOnWeights);
+}
+
+TEST_P(MatMulWithOptimizedConstantFakeQuantizeTransformation, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_with_one_parent_transformaion.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/multiply_with_one_parent_transformaion.cpp
@ -0,0 +1,84 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/multiply_with_one_parent_transformation.hpp"
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include <ie_core.hpp>
+#include "common_test_utils/common_utils.hpp"
+#include "ngraph_functions/low_precision_transformations/multiply_with_one_parent_function.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string MultiplyWithOneParentTransformation::getTestCaseName(testing::TestParamInfo<MultiplyWithOneParentTransformationParams> obj) {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShape;
+    std::string targetDevice;
+    MultiplyWithOneParentTransformationValues values;
+
+    std::tie(netPrecision, inputShape, targetDevice, values) = obj.param;
+
+    std::ostringstream result;
+    result << netPrecision.name() << "_" << CommonTestUtils::vec2str(inputShape);
+    return result.str();
+}
+
+void MultiplyWithOneParentTransformation::SetUp() {
+    threshold = 0.01f;
+
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShape;
+    InferenceEngine::details::LayerTransformation::Params params;
+    MultiplyWithOneParentTransformationValues values;
+    std::tie(netPrecision, inputShape, targetDevice, values) = this->GetParam();
+    auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    function = ngraph::builder::subgraph::MultiplyWithOneParentFunction::getOriginal(precision, inputShape, values.fakeQuantize);
+
+    validate();
+}
+
+void MultiplyWithOneParentTransformation::validate() {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::SizeVector inputShape;
+    std::string targetDevice;
+    InferenceEngine::details::LayerTransformation::Params params = LayerTestsUtils::LayerTransformationParamsFactory::createParams();
+    MultiplyWithOneParentTransformationValues values;
+    std::tie(netPrecision, inputShape, targetDevice, values) = this->GetParam();
+
+    const InferenceEngine::CNNNetwork network = transform(params);
+
+    IE_SUPPRESS_DEPRECATED_START
+
+    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
+    EXPECT_EQ(1, outputs.size());
+
+    std::map<std::string, InferenceEngine::DataPtr>::iterator it = outputs.begin();
+    const InferenceEngine::CNNLayerPtr outputLayer = getCreatorLayer(it->second).lock();
+    EXPECT_TRUE(outputLayer != nullptr);
+    EXPECT_EQ("Eltwise", outputLayer->type);
+
+    // check #1: successful transformation execution
+    EXPECT_EQ(2ul, outputLayer->insData.size());
+    const auto parents = InferenceEngine::details::CNNNetworkHelper::getParents(*outputLayer);
+    EXPECT_EQ(2ul, parents.size());
+    EXPECT_EQ("ScaleShift", parents[0]->type);
+
+    // check #2: successful graph handling
+    EXPECT_EQ("FakeQuantize", parents[1]->type);
+    EXPECT_EQ(1ul, InferenceEngine::details::CNNNetworkHelper::getParents(*parents[0]).size());
+    EXPECT_EQ("FakeQuantize", InferenceEngine::details::CNNNetworkHelper::getParents(*parents[0])[0]->type);
+
+    IE_SUPPRESS_DEPRECATED_END
+}
+
+TEST_P(MultiplyWithOneParentTransformation, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp
@ -0,0 +1,26 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/ngraph.hpp>
+#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+class MatMulWithOptimizedConstantFakeQuantizeFunction {
+public:
+    static std::shared_ptr<ngraph::Function> getOriginal(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fqOnData,
+        const FakeQuantizeOnData& fqOnWeights);
+};
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/multiply_with_one_parent_function.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/multiply_with_one_parent_function.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/ngraph.hpp>
+#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+class MultiplyWithOneParentFunction {
+public:
+    static std::shared_ptr<ngraph::Function> getOriginal(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fakeQuantize);
+};
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
--- a/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp
+++ b/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp
@ -0,0 +1,51 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
+    const ngraph::element::Type precision,
+    const ngraph::Shape& inputShape,
+    const FakeQuantizeOnData& fqOnData,
+    const FakeQuantizeOnData& fqOnWeights) {
+    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
+    const auto fakeQuantizeOnActivations = fqOnData.empty() ?
+        nullptr :
+        ngraph::builder::makeFakeQuantize(
+            input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
+            fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
+
+    const ngraph::Shape weightsShape = { inputShape[1], 10 };
+    const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
+    const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
+    const auto lowConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
+    const auto highConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
+    const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
+        weightsConst,
+        lowConstant,
+        highConstant,
+        lowConstant,
+        highConstant,
+        fqOnWeights.quantizationLevel);
+
+    const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
+        fqOnData.empty() ? input : fakeQuantizeOnActivations,
+        fakeQuantizeOnWeights,
+        false,
+        false);
+
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
+    return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");
+}
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
--- a/inference-engine/tests/ngraph_functions/src/low_precision_transformations/multiply_with_one_parent_function.cpp
+++ b/inference-engine/tests/ngraph_functions/src/low_precision_transformations/multiply_with_one_parent_function.cpp
@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/low_precision_transformations/multiply_with_one_parent_function.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+std::shared_ptr<ngraph::Function> MultiplyWithOneParentFunction::getOriginal(
+    const ngraph::element::Type precision,
+    const ngraph::Shape& inputShape,
+    const FakeQuantizeOnData& fqOnData) {
+    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
+
+    const auto fakeQuantize = ngraph::builder::makeFakeQuantize(
+            input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
+        fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
+
+    const auto multiply = std::make_shared<ngraph::opset1::Multiply>(fakeQuantize->output(0), fakeQuantize->output(0));
+
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(multiply) };
+    return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyWithOneParentFunction");
+}
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph