[LPT] MoveFakeQuantize (#6723)

* add move_fake_quantize_for_concat_transformation, mfk and mfk_function * fix relu_transformation.cpp * backup * add change * add cpu test * [LPT] MoveFakeQuantizeTransformation: fixes * get InferenceEngine::NotImplemented * fix ieFuncTests * try without new cpu_test * fix cpuFuncTests and ieFuncTests * fix tests * fix lin * add cpu test * fix link and matcher in move_fake_quantize.cpp * update matcher * add gpu test * naming fix * move_fake_quantize.cpp add set_fr_name for new_concat * naming new fq fix * fix NetworkHelper::copyInfo naming * concat.cpp naming fix * gpu tests fix * rm network_helper changes * rm extra output * resolve conversations * resolve other conversations * add multi inputs for concat * fix lin * fix move_fake_qunatize naming * rm maxpool from mfk_function * mkldnn update * fix style * rm extra change * fix concat matcher * rm mkldnn_plugin changes * fix conversations * fix interval * fix and add isQuantizedStatic, add attribute and negative tests * add negative plugin tests * fix style: Co-authored-by: Edward Shogulin <edward.shogulin@intel.com>
2021-09-15 17:15:57 +03:00
parent 0df7dab345
commit 5b285ed105
11 changed files with 957 additions and 3 deletions
--- a/inference-engine/src/low_precision_transformations/include/low_precision/move_fake_quantize.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/move_fake_quantize.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/ngraph.hpp>
+#include "low_precision/layer_transformation.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API MoveFakeQuantize : public LayerTransformation {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    MoveFakeQuantize(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
+    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
+};
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -138,6 +138,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
        const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat });

        NetworkHelper::copyInfo({ concat, convert }, convert);
+        convert->set_friendly_name(concat->get_friendly_name() + "/DequantizationConvert");
        lastDequantization = convert;
    }

@@ -150,6 +151,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
                ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(subtractNodes, 1)));

        NetworkHelper::copyInfo({ concat, subtract }, subtract);
+        subtract->set_friendly_name(concat->get_friendly_name() + "/DequantizationSubtract");
        lastDequantization = subtract;
    }

@@ -163,6 +165,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
            layerDequantizations[0].multiply->get_output_element_type(0));

        NetworkHelper::copyInfo({ concat, multiply }, multiply);
+        multiply->set_friendly_name(concat->get_friendly_name() + "/DequantizationMultyply");
        lastDequantization = multiply;
    }

@@ -325,13 +328,12 @@ bool ConcatTransformation::isQuantizedStatic(const std::shared_ptr<const Node>&
        return false;
    }

-    const auto axis = concat->get_axis();
    const auto outputRank = concat->get_output_partial_shape(0).rank();
-    if (axis < 0 && outputRank.is_dynamic()) {
+    if (outputRank.is_dynamic()) {
        return false;
    }

-    const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outputRank);
+    const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), concat->get_axis(), outputRank);
    return normalizedAxis == 1ul;
 }

--- a/inference-engine/src/low_precision_transformations/src/low_precision.cpp
+++ b/inference-engine/src/low_precision_transformations/src/low_precision.cpp
@@ -66,6 +66,7 @@
 #include "low_precision/transpose.hpp"
 #include "low_precision/unsqueeze.hpp"
 #include "low_precision/variadic_split.hpp"
+#include "low_precision/move_fake_quantize.hpp"

 // cleanup transformations
 #include "low_precision/convert.hpp"
@@ -197,6 +198,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
    prerequisites->add_matcher<PullReshapeThroughDequantization>(supportedTypes);
    prerequisites->add_matcher<PullTransposeThroughDequantization>(supportedTypes);
    prerequisites->add_matcher<ngraph::pass::LinOpSequenceFusion>();
+    prerequisites->add_matcher<ngraph::pass::low_precision::MoveFakeQuantize>();

    manager.register_pass<TypeRelaxedReplacer>();

--- a/inference-engine/src/low_precision_transformations/src/move_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/move_fake_quantize.cpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/move_fake_quantize.hpp"
+
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/opsets/opset1.hpp>
+
+#include <memory>
+#include <ngraph/ngraph.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/or.hpp>
+
+#include "low_precision/concat.hpp"
+#include "low_precision/network_helper.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MoveFakeQuantize, "MoveFakeQuantize", 0);
+
+MoveFakeQuantize::MoveFakeQuantize(const Params& params) : LayerTransformation(params) {
+    const auto concat = ngraph::pattern::wrap_type<opset1::Concat>(pattern::consumers_count(1));
+    const auto operation = ngraph::pattern::wrap_type<opset1::Relu>({ concat });
+    const auto input_low = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
+    const auto input_high = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
+    const auto output_low = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
+    const auto output_high = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
+    const auto fq_with_operation = ngraph::pattern::wrap_type<opset1::FakeQuantize>({ operation,
+        input_low,
+        input_high,
+        output_low,
+        output_high});
+    const auto fq = ngraph::pattern::wrap_type<opset1::FakeQuantize>({ concat,
+        input_low,
+        input_high,
+        output_low,
+        output_high });
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(
+        std::make_shared<pattern::op::Or>(OutputVector{fq, fq_with_operation}),
+        "MoveFakeQuantize");
+    this->register_matcher(m, callback);
+}
+
+bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
+    auto fq = m.get_match_root();
+    auto operation = fq->get_input_node_shared_ptr(0);
+    std::shared_ptr<ngraph::Node> concat;
+    bool only_concat = true;
+    std::string fq_original_name = fq->get_friendly_name(), operation_original_name;
+    if (is_type<opset1::Concat>(operation)) {
+        concat = operation;
+    } else {
+        operation_original_name = operation->get_friendly_name();
+        concat = operation->get_input_node_shared_ptr(0);
+        only_concat = false;
+    }
+    if (!ConcatTransformation::isQuantizedStatic(concat)) {
+        return false;
+    }
+    std::vector<std::shared_ptr<ngraph::Node>> fqs;
+    size_t input_size = concat->get_input_size();
+    for (size_t i{ 0 }; i < input_size; ++i) {
+        std::shared_ptr<ngraph::Node> fq_input;
+        if (only_concat) {
+            fq_input = concat->get_input_node_shared_ptr(i);
+        } else {
+            auto input = concat->get_input_node_shared_ptr(i);
+            fq_input = operation->clone_with_new_inputs({ input });
+            fq_input->set_friendly_name(operation_original_name + "_" + std::to_string(i + 1));
+        }
+        auto newFq = fq->clone_with_new_inputs({ fq_input,
+            fq->get_input_node_shared_ptr(1),
+            fq->get_input_node_shared_ptr(2),
+            fq->get_input_node_shared_ptr(3),
+            fq->get_input_node_shared_ptr(4) });
+        newFq->set_friendly_name(fq_original_name + "_" + std::to_string(i + 1));
+        fqs.push_back(newFq);
+    }
+    ngraph::copy_runtime_info(fq, fqs);
+    auto newConcat = concat->clone_with_new_inputs(ngraph::OutputVector(fqs.begin(), fqs.end()));
+    newConcat->set_friendly_name(concat->get_friendly_name());
+    replace_node(fq, newConcat);
+    NetworkHelper::copyInfo(concat, newConcat);
+    updateOutput(context, newConcat, fq);
+    return true;
+}
+
+bool MoveFakeQuantize::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
+    return true;
+}
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/move_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/move_fake_quantize_transformation.cpp
@@ -0,0 +1,364 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_transformation.hpp"
+
+#include <string>
+#include <sstream>
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <low_precision/concat.hpp>
+
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <low_precision/relu.hpp>
+
+#include <low_precision/low_precision.hpp>
+
+#include "low_precision/move_fake_quantize.hpp"
+#include <low_precision/fake_quantize_decomposition.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
+#include "lpt_ngraph_functions/common/builders.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "lpt_ngraph_functions/relu_function.hpp"
+#include "simple_low_precision_transformer.hpp"
+
+using namespace testing;
+using namespace ngraph;
+using namespace ngraph::pass;
+
+namespace {
+
+class MoveFakeQuantizeTransformationActualValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
+    std::string operation;
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationActualValues& values) {
+    return out << "_" <<
+        values.fakeQuantizeBefore1 << "_" <<
+        values.convertBefore1.outPrecision << "_" <<
+        values.dequantizationBefore1 << "_" <<
+        values.fakeQuantizeBefore2 << "_" <<
+        values.convertBefore2.outPrecision << "_" <<
+        values.dequantizationBefore2 << "_" <<
+        values.operation << "_" <<
+        values.fakeQuantizeAfter << "_" <<
+        values.convertAfter.outPrecision << "_" <<
+        values.dequantizationAfter;
+}
+
+class MoveFakeQuantizeTransformationResultValues {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
+    std::string operation;
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
+    ngraph::element::Type precisionAfterOperation;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfterNotFQ;
+};
+
+inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationResultValues& values) {
+    return out << "_" <<
+        values.fakeQuantizeBefore1 << "_" <<
+        values.convertBefore1.outPrecision << "_" <<
+        values.dequantizationBefore1 << "_" <<
+        values.fakeQuantizeBefore2 << "_" <<
+        values.convertBefore2.outPrecision << "_" <<
+        values.dequantizationBefore2 << "_" <<
+        values.operation << "_" <<
+        values.fakeQuantizeAfter << "_" <<
+        values.convertAfter << "_" <<
+        values.dequantizationAfter << "_" <<
+        values.dequantizationAfterNotFQ;
+}
+
+class MoveFakeQuantizeTransformationTestValues {
+public:
+    MoveFakeQuantizeTransformationTestValues() = default;
+    MoveFakeQuantizeTransformationTestValues(
+        const TestTransformationParams& params,
+        const bool multiChannels,
+        const  std::int64_t axis,
+        const MoveFakeQuantizeTransformationActualValues& actual,
+        const MoveFakeQuantizeTransformationResultValues& result,
+        const bool addNotPrecisionPreservedOperation = false,
+        const bool checkIntervalsAlignmentAttributes = true) :
+        params(params),
+        multiChannels(multiChannels),
+        axis(axis),
+        actual(actual),
+        result(result) {}
+
+    TestTransformationParams params;
+    bool multiChannels;
+    std::int64_t axis;
+    MoveFakeQuantizeTransformationActualValues actual;
+    MoveFakeQuantizeTransformationResultValues result;
+    // add not precision preserved operation to set output precision for FakeQuantize
+    // don't set to 'true' by default to keep test cases with tested operation as output
+};
+
+inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationTestValues& values) {
+    return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result;
+}
+
+typedef std::tuple <
+    ngraph::element::Type,
+    ngraph::PartialShape,
+    MoveFakeQuantizeTransformationTestValues
+> MoveFakeQuantizeTransformationParams;
+
+class MoveFakeQuantizeTransformation : public LayerTransformation, public testing::WithParamInterface<MoveFakeQuantizeTransformationParams> {
+public:
+    void SetUp() override {
+        const ngraph::element::Type precision = std::get<0>(GetParam());
+        const ngraph::PartialShape shape = std::get<1>(GetParam());
+        MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(GetParam());
+
+        // dequantization output precision depends on input precision
+        // to avoid huge amount of tests cases let's define dequantization output precision as input precision
+        if (!testValues.actual.dequantizationBefore1.multiply.empty()) {
+            testValues.actual.dequantizationBefore1.multiply.outPrecision = precision;
+        }
+        if (!testValues.actual.dequantizationBefore2.multiply.empty()) {
+            testValues.actual.dequantizationBefore2.multiply.outPrecision = precision;
+        }
+
+        IntervalsAlignmentSharedValue::Interval interval{ -1.28f, 2.55f };
+
+        actualFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
+            precision,
+            shape,
+            testValues.actual.fakeQuantizeBefore1,
+            testValues.actual.convertBefore1,
+            testValues.actual.dequantizationBefore1,
+            testValues.actual.fakeQuantizeBefore2,
+            testValues.actual.convertBefore2,
+            testValues.actual.dequantizationBefore2,
+            testValues.actual.operation,
+            testValues.actual.fakeQuantizeAfter,
+            testValues.actual.convertAfter,
+            testValues.actual.dequantizationAfter,
+            {
+                ngraph::builder::subgraph::make_shared_attribute_ptr<PrecisionPreservedAttribute>(true),
+                ngraph::builder::subgraph::make_shared_attribute_ptr<IntervalsAlignmentAttribute>(interval, 256),
+                ngraph::builder::subgraph::make_shared_attribute_ptr<QuantizationAlignmentAttribute>(false)
+            },
+            ngraph::element::undefined,
+            {},
+            testValues.axis);
+        auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
+                ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
+            });
+
+        auto quantizationRestrictions = testValues.multiChannels ?
+            std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
+            std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
+                ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
+                });
+
+        const auto params = TestTransformationParams::toParams(testValues.params);
+        ov::pass::Manager manager;
+        manager.register_pass<ngraph::pass::low_precision::MoveFakeQuantize>(params);
+        manager.run_passes(actualFunction);
+        // dequantization output precision depends on input precision
+        // to avoid huge amount of tests cases let's define dequantization output precision as input precision
+        if (!testValues.result.dequantizationAfter.multiply.empty()) {
+            testValues.result.dequantizationAfter.multiply.outPrecision = precision;
+        }
+
+        if (!testValues.params.updatePrecisions &&
+            (precision == ngraph::element::f32) &&
+            !testValues.result.dequantizationAfter.convert.empty()) {
+            testValues.result.dequantizationAfter.convert = {};
+        }
+
+        referenceFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
+            precision,
+            shape,
+            testValues.result.fakeQuantizeBefore1,
+            testValues.result.convertBefore1,
+            testValues.result.dequantizationBefore1,
+            testValues.result.fakeQuantizeBefore2,
+            testValues.result.convertBefore2,
+            testValues.result.dequantizationBefore2,
+            testValues.result.operation,
+            testValues.result.fakeQuantizeAfter,
+            testValues.result.convertAfter,
+            testValues.result.dequantizationAfter,
+            {
+                ngraph::builder::subgraph::make_shared_attribute_ptr<PrecisionPreservedAttribute>(true),
+                ngraph::builder::subgraph::make_shared_attribute_ptr<IntervalsAlignmentAttribute>(interval, 256),
+                ngraph::builder::subgraph::make_shared_attribute_ptr<QuantizationAlignmentAttribute>(false)
+            },
+            testValues.result.precisionAfterOperation,
+            {},
+            testValues.axis);
+    }
+
+    static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
+        const ngraph::element::Type precision = std::get<0>(obj.param);
+        const ngraph::PartialShape shape = std::get<1>(obj.param);
+        const MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(obj.param);
+
+        std::ostringstream result;
+        result <<
+            LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" <<
+            (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
+            "axis_" << testValues.axis << "_" <<
+            testValues.actual << "_" <<
+            testValues.result << "_";
+        return result.str();
+    }
+};
+
+TEST_P(MoveFakeQuantizeTransformation, CompareFunctions) {
+    actualFunction->validate_nodes_and_infer_types();
+    auto res = compare_functions(referenceFunction, actualFunction, true, true, true, true, true);
+    ASSERT_TRUE(res.first) << res.second;
+
+    const auto actualFakeQuantizes = LayerTransformation::get<opset1::FakeQuantize>(actualFunction);
+    ASSERT_TRUE(checkIfOutputAttributesSharedValuesAreTheSame<std::shared_ptr<PrecisionsAttribute>>(actualFakeQuantizes)) <<
+        "PrecisionsAttribute are not the same";
+}
+
+const std::vector<ngraph::element::Type> precisions = {
+    ngraph::element::f32,
+    ngraph::element::f16
+};
+
+namespace testValues1 {
+const std::vector<ngraph::PartialShape> shapes = {
+    { 1, 3, 9, 9 },
+    { 4, 3, 9, 9 },
+    { Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic() }
+};
+const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
+    // U8: concat
+    {
+        LayerTransformation::createParamsU8I8(),
+        false,
+        1,
+        {
+            {},
+            {},
+            {},
+            {},
+            {},
+            {},
+            "",
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {}
+        },
+        {
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {},
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {},
+            "",
+            {},
+            {},
+            {},
+        },
+        false,
+        false
+    },
+    {
+        LayerTransformation::createParamsU8I8(),
+        false,
+        1,
+        {
+            {},
+            {},
+            {},
+            {},
+            {},
+            {},
+            "relu",
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {}
+        },
+        {
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {},
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {},
+            "relu",
+            {},
+            {},
+            {},
+        },
+        false,
+        false
+    },
+    {
+        LayerTransformation::createParamsU8I8(),
+        false,
+        0,
+        {
+            {},
+            {},
+            {},
+            {},
+            {},
+            {},
+            "",
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {}
+        },
+        {
+            {},
+            {},
+            {},
+            {},
+            {},
+            {},
+            "",
+            { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+            {},
+            {}
+        },
+        false,
+        false
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_LPT,
+    MoveFakeQuantizeTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(precisions),
+        ::testing::ValuesIn(shapes),
+        ::testing::ValuesIn(testValues)),
+    MoveFakeQuantizeTransformation::getTestCaseName);
+} // namespace testValues1
+} // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<ngraph::element::Type> netPrecisions = {
+    ngraph::element::f32,
+    //ngraph::element::f16
+};
+
+const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true)
+};
+
+const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
+  // without operation
+  {
+        {},
+        {},
+        {},
+        {},
+        {},
+        {},
+        "",
+        { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+        {},
+        {},
+        "Concatenation",
+        "U8",
+        1,
+    },
+    // with ReLU operation
+    {
+        {},
+        {},
+        {},
+        {},
+        {},
+        {},
+        "relu",
+        { 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
+        {},
+        {},
+        "Concatenation",
+        "U8",
+        1
+    },
+    // negative axis
+    {
+        {},
+        {},
+        {},
+        {},
+        {},
+        {},
+        "",
+        {256ul, {},  {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
+        {},
+        {},
+        "Concatenation",
+        "FP32",
+        0
+    }
+};
+
+const std::vector<ngraph::Shape> shapes = {
+    { 1, 3, 16, 16 },
+    { 4, 3, 16, 16 }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(shapes),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn(params)),
+    MoveFakeQuantizeTransformation::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<ngraph::element::Type> netPrecisions = {
+    ngraph::element::f32,
+    ngraph::element::f16
+};
+
+    const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
+       LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(),
+    };
+
+    const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
+     // without operation
+    {
+       {},
+       {},
+       {},
+       {},
+       {},
+       {},
+       "",
+       { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+       {},
+       {},
+       "Concat",
+       "U8",
+       1,
+    },
+   // with ReLU operation
+    {
+       {},
+       {},
+       {},
+       {},
+       {},
+       {},
+       "relu",
+       { 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
+       {},
+       {},
+       "Concat",
+       "U8",
+       1
+    },
+     // negative axis
+    {
+       {},
+       {},
+       {},
+       {},
+       {},
+       {},
+       "",
+       {256ul, {},  {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
+       {},
+       {},
+       "Concat",
+       "FP32",
+       0
+    }
+    };
+
+    const std::vector<ngraph::Shape> shapes = {
+    { 1, 3, 16, 16 },
+    { 4, 3, 16, 16 }
+    };
+
+    INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(shapes),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn(params)),
+    MoveFakeQuantizeTransformation::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/move_fake_quantize_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/move_fake_quantize_transformation.hpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <memory>
+
+#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
+
+#include "low_precision/move_fake_quantize.hpp"
+
+#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
+
+namespace LayerTestsDefinitions {
+
+class MoveFakeQuantizeTransformationParam {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
+    std::string operation;
+    ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
+    ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
+    ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
+    std::string layerName;
+    std::string expectedKernelType;
+    std::int64_t axis;
+};
+
+typedef std::tuple <
+    ngraph::element::Type,
+    ngraph::Shape,
+    std::string,
+    ngraph::pass::low_precision::LayerTransformation::Params,
+    MoveFakeQuantizeTransformationParam
+> MoveFakeQuantizeTransformationParams;
+
+class MoveFakeQuantizeTransformation :
+    public testing::WithParamInterface<MoveFakeQuantizeTransformationParams>,
+    public LayerTestsUtils::LayerTransformation {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj);
+
+protected:
+    void SetUp() override;
+
+    void Run() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/move_fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/move_fake_quantize_transformation.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string MoveFakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
+    ngraph::element::Type netPrecision;
+    ngraph::PartialShape inputShape;
+    std::string targetDevice;
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    MoveFakeQuantizeTransformationParam param;
+    std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;
+
+    std::ostringstream result;
+    result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) <<
+        param.operation << param.fakeQuantizeAfter;
+    return result.str();
+}
+
+void MoveFakeQuantizeTransformation::SetUp() {
+    ngraph::element::Type netPrecision;
+    ngraph::PartialShape inputShape;
+    ngraph::pass::low_precision::LayerTransformation::Params params;
+    MoveFakeQuantizeTransformationParam param;
+    std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam();
+
+    function = ngraph::builder::subgraph::MoveFakeQuantize::get(
+        netPrecision,
+        inputShape,
+        param.fakeQuantizeBefore1,
+        param.convertBefore1,
+        param.dequantizationBefore1,
+        param.fakeQuantizeBefore2,
+        param.convertBefore2,
+        param.dequantizationBefore2,
+        param.operation,
+        param.fakeQuantizeAfter,
+        param.convertAfter,
+        param.dequantizationAfter,
+        {},
+        {},
+        {},
+        param.axis);
+}
+
+void MoveFakeQuantizeTransformation::Run() {
+    LayerTestsCommon::Run();
+
+    const auto params = std::get<4>(GetParam());
+    const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
+    auto expectedPrecision = params.expectedKernelType;
+    if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
+        expectedPrecision = "FP16";
+    }
+    EXPECT_EQ(actualPrecision, expectedPrecision);
+}
+
+TEST_P(MoveFakeQuantizeTransformation, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/move_fake_quantize_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/move_fake_quantize_function.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <ngraph/ngraph.hpp>
+#include "low_precision/layer_transformation.hpp"
+#include "common/fake_quantize_on_data.hpp"
+#include "common/dequantization_operations.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+class MoveFakeQuantize {
+public:
+    static std::shared_ptr<ngraph::Function> get(
+        const ngraph::element::Type inputPrecision,
+        const ngraph::PartialShape& inputShape,
+        const FakeQuantizeOnDataWithConstant& fqOnData1,
+        const DequantizationOperations::Convert& convert1,
+        const DequantizationOperations& dequantization1,
+        const FakeQuantizeOnDataWithConstant& fqOnData2,
+        const DequantizationOperations::Convert& convert2,
+        const DequantizationOperations& dequantization2,
+        const std::string& operation,
+        const FakeQuantizeOnDataWithConstant& fqOnData3,
+        const DequantizationOperations::Convert& convert3,
+        const DequantizationOperations& dequantization3,
+        const std::vector<std::shared_ptr<Variant>>& concatAttributes,
+        const ngraph::element::Type precisionAfterOperation,
+        const DequantizationOperations& dequantizationAfter,
+        const std::int64_t& axis);
+};
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/move_fake_quantize_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/move_fake_quantize_function.cpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
+#include <low_precision/relu.hpp>
+
+#include <ngraph/opsets/opset1.hpp>
+#include "ngraph_ops/type_relaxed.hpp"
+#include "low_precision/network_helper.hpp"
+
+#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
+#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
+#include "lpt_ngraph_functions/common/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+
+using namespace ngraph::pass;
+
+std::shared_ptr<ngraph::Function> MoveFakeQuantize::get(
+    const ngraph::element::Type inputPrecision,
+    const ngraph::PartialShape& inputShape,
+    const FakeQuantizeOnDataWithConstant& fqOnData1,
+    const DequantizationOperations::Convert& convert1,
+    const DequantizationOperations& dequantization1,
+    const FakeQuantizeOnDataWithConstant& fqOnData2,
+    const DequantizationOperations::Convert& convert2,
+    const DequantizationOperations& dequantization2,
+    const std::string& operation,
+    const FakeQuantizeOnDataWithConstant& fqOnData3,
+    const DequantizationOperations::Convert& convert3,
+    const DequantizationOperations& dequantization3,
+    const std::vector<std::shared_ptr<Variant>>& concatAttributes,
+    const ngraph::element::Type precisionAfterOperation,
+    const DequantizationOperations& dequantizationAfter,
+    const std::int64_t& axis) {
+
+    const auto input1 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
+    input1->set_friendly_name("input1");
+
+    const auto input2 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
+    input2->set_friendly_name("input2");
+    std::shared_ptr<Node> parent1 = input1, parent2 = input2;
+    if (!fqOnData1.empty()) {
+        if (operation == "relu") {
+            auto relu1 = std::make_shared<ngraph::opset1::Relu>(input1->output(0));
+            parent1 = makeFakeQuantize(relu1, inputPrecision, fqOnData1);
+        } else {
+            parent1 = makeFakeQuantize(input1, inputPrecision, fqOnData1);
+        }
+        parent1->set_friendly_name("concat_fq1");
+        if (!convert1.empty()) {
+            parent1 = std::make_shared<opset1::Convert>(parent1, convert1.outPrecision);
+        }
+        if (!dequantization1.empty()) {
+            parent1 = makeDequantization(parent1, dequantization1);
+        }
+    }
+    if (!fqOnData2.empty()) {
+        if (operation == "relu") {
+            auto relu2 = std::make_shared<ngraph::opset1::Relu>(input2->output(0));
+            parent2 = makeFakeQuantize(relu2, inputPrecision, fqOnData2);
+        } else {
+            parent2 = makeFakeQuantize(input1, inputPrecision, fqOnData2);
+        }
+        parent2->set_friendly_name("concat_fq2");
+        if (!convert2.empty()) {
+            parent1 = std::make_shared<opset1::Convert>(parent2, convert2.outPrecision);
+        }
+        if (!dequantization1.empty()) {
+            parent2 = makeDequantization(parent2, dequantization2);
+        }
+    }
+    const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ parent1, parent2 }, axis);
+    concat->set_friendly_name("concat");
+    std::shared_ptr<ngraph::Node> parent = concat;
+    if (!dequantizationAfter.empty()) {
+        const auto lastDequantization = makeDequantization(concat, dequantizationAfter);
+        lastDequantization->set_friendly_name("multiply");
+        parent = lastDequantization;
+    }
+    addAttributes({ parent }, concatAttributes);
+    if (!fqOnData3.empty()) {
+        std::shared_ptr<Node> fq;
+        if (operation == "relu") {
+            auto relu = std::make_shared<ngraph::opset1::Relu>(concat->output(0));
+            fq = makeFakeQuantize(relu, inputPrecision, fqOnData3);
+        } else {
+            fq = makeFakeQuantize(concat, inputPrecision, fqOnData3);
+        }
+        fq->set_friendly_name("fakeQuantizeAfter");
+        parent = fq;
+    }
+    parent->set_friendly_name("output");
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(parent) };
+    std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
+        results,
+        ngraph::ParameterVector{ input1, input2 },
+        "MoveFakeQuantize");
+    return function;
+}
+
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph