[LPT] Refactoring: PoC (#5226)

* LPT fix for Windows * LPT fix for Windows * Remove inference_engine_transformations_EXPORTS * [nGraph] Register new node in GraphRewrite * [LPT] nGraph alignment * [LPT] nGraph alignment: tests Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
2021-07-19 14:48:20 +01:00 · 2021-07-19 14:48:20 +01:00 · c3c26b4807
commit c3c26b4807
parent 0d9212683f
393 changed files with 9567 additions and 5331 deletions
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -70,9 +70,12 @@
 #include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
 #include <low_precision/pull_reshape_through_dequantization.hpp>
 #include <low_precision/pull_transpose_through_dequantization.hpp>
-#include <low_precision/transformer.hpp>
+#include <low_precision/convolution.hpp>
 #include <low_precision/convolution_backprop_data.hpp>
+#include <low_precision/group_convolution.hpp>
+#include <low_precision/low_precision.hpp>
 #include <low_precision/mat_mul.hpp>
+#include <low_precision/multiply_to_group_convolution.hpp>
 #include <low_precision/strided_slice.hpp>
 #include <low_precision/network_helper.hpp>

@ -151,10 +154,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
        OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork");
        auto nGraphFunc = clonedNetwork.getFunction();

+        using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
+
        bool enableInt8;
        {
            ngraph::pass::Manager manager;
-            enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
+            enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc);
            if (enableInt8) {
                manager.register_pass<ngraph::pass::DisableConvertConstantFoldingOnConstPath>(
                    std::vector<ngraph::element::Type>{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 });
@ -208,8 +213,6 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc

            auto pass_config = manager.get_pass_config();

-            using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
-
            // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
            pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
                                      ngraph::pass::ConvertDepthToSpace>(
@ -391,28 +394,78 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
            if (!config.enable_fp16_for_quantized_models) {
                manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
            }
-            auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
-            const std::vector<ngraph::element::Type> supportedTypes = { ngraph::element::i8, ngraph::element::u8 };
-            lptPrerequisites->add_matcher<PullReshapeThroughDequantization>(supportedTypes);
-            lptPrerequisites->add_matcher<PullTransposeThroughDequantization>(supportedTypes);
-            lptPrerequisites->add_matcher<ngraph::pass::LinOpSequenceFusion>();
-            manager.run_passes(nGraphFunc);

-            auto params = LayerTransformation::Params(true,                                                        // updatePrecisions
-                                                      LayerTransformation::QuantizedTensorAlignment::UpdateLevel,  // quantizedTensorAlignmentOnActivations
-                                                      LayerTransformation::QuantizedTensorAlignment::None,         // quantizedTensorAlignmentOnWeights
-                                                      true);                                                       // supportAsymmetricQuantization
-            LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
-                .add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
-                    .setSupportAsymmetricQuantization(false)
-                    .setSupport3DTensorOnActivations(false))
-                .add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
-                    .setSupportAsymmetricQuantization(false)
-                    .setDeconvolutionSpecificChannelsRatio(true))
-                // INT8 StridedSlice not supported
-                .remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
+            auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
+                OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
+                    {0, {ngraph::element::u8, ngraph::element::i8}},
+                    {1, {ngraph::element::i8}},
+                }),
+                OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
+                    {0, {ngraph::element::u8, ngraph::element::i8}},
+                    {1, {ngraph::element::i8}}
+                }),
+                OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
+                    {0, {ngraph::element::u8, ngraph::element::i8}},
+                    {1, {ngraph::element::i8}}
+                }),
+                OperationPrecisionRestriction::create<ngraph::opset1::StridedSlice>({})
+            });

-            transformer.transform(nGraphFunc);
+            auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
+                OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
+                OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0}),
+            });
+
+            ngraph::pass::Manager lptManager;
+
+            auto lptPassConfig = lptManager.get_pass_config();
+            lptPassConfig->disable<ngraph::pass::low_precision::StridedSliceTransformation>();
+            lptPassConfig->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
+                if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
+                    return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
+                }
+                return false;
+            });
+            lptPassConfig->set_callback<ConvolutionBackpropDataTransformation>([](const_node_ptr& node) -> bool {
+                auto fillStaticChannel = [](const ngraph::PartialShape& shape, size_t& channel) -> bool {
+                    const auto rank = shape.rank();
+                    if (rank.is_dynamic()) {
+                        return false;
+                    }
+                    if (rank.get_length() < 2ul) {
+                        return false;
+                    }
+                    const auto dimension = shape[1];
+                    if (dimension.is_dynamic()) {
+                        return false;
+                    }
+                    channel = dimension.get_length();
+                    return true;
+                };
+
+                size_t inputChannels;
+                if (!fillStaticChannel(node->get_input_partial_shape(0), inputChannels)) {
+                    return true;
+                }
+
+                size_t outputChannels;
+                if (!fillStaticChannel(node->get_output_partial_shape(0), outputChannels)) {
+                    return true;
+                }
+
+
+                if ((inputChannels % 4 != 0) || (outputChannels % 16 != 0)) {
+                    return true;
+                }
+
+                return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
+            });
+            lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
+                return MatMulTransformation::is3DTensorOnActivations(node);
+            });
+
+            lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization);
+            lptManager.run_passes(nGraphFunc);
        }

        {
--- a/inference-engine/src/low_precision_transformations/CMakeLists.txt
+++ b/inference-engine/src/low_precision_transformations/CMakeLists.txt
@ -28,8 +28,6 @@ ie_faster_build(${TARGET_NAME}
 ie_add_vs_version_file(NAME ${TARGET_NAME}
                       FILEDESCRIPTION "Inference Engine LP transformations library")

-target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS)
-
 target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations
                                     PRIVATE openvino::itt)

--- a/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp
@ -11,12 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation {
+class LP_TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation {
 public:
-    AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {}
-    ~AddTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    AddTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };

--- a/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/pass/pass.hpp>
+#include "low_precision/lpt_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API AlignQuantizationIntervals;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::low_precision::AlignQuantizationIntervals : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp
@ -0,0 +1,26 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include <ngraph/pass/pass.hpp>
+#include "low_precision/lpt_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API AlignQuantizationParameters;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::low_precision::AlignQuantizationParameters : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp
@ -11,11 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation {
 public:
-    AvgPoolTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    AvgPoolTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp
@ -0,0 +1,24 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <ngraph/node.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "rt_info/attribute_parameters.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API BaseMatcherPass;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::BaseMatcherPass : public ngraph::pass::MatcherPass {
+public:
+    BaseMatcherPass(const AttributeParameters& params = AttributeParameters());
+    AttributeParameters params;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp
@ -12,11 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ClampTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ClampTransformation : public LayerTransformation {
 public:
-    ClampTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    ClampTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp
@ -13,7 +13,7 @@
 #include <ngraph/check.hpp>
 #include <ngraph/opsets/opset1.hpp>

-#include "transformations_visibility.hpp"
+#include "low_precision/lpt_visibility.hpp"
 #include "transformations/rt_info/dequantization_attribute.hpp"

 namespace ngraph {
@ -21,7 +21,7 @@ namespace pass {
 namespace low_precision {

 // template<typename BaseOp2>
-// class TRANSFORMATIONS_API DequantizationOp : public BaseOp2 {
+// class LP_TRANSFORMATIONS_API DequantizationOp : public BaseOp2 {
 // public:
 //    template <typename ... Args>
 //    DequantizationOp(Args&&... args) : BaseOp2(std::forward<Args>(args)...) {
@ -63,7 +63,7 @@ void copyRuntimeInfo(const ngraph::Node& from, ngraph::Node& to) {

 } // namespace

-class TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert {
+class LP_TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert {
 public:
    DequantizationConvert(const ngraph::Output<Node>& arg, const ngraph::element::Type& destination_type) :
        ngraph::opset1::Convert(arg, destination_type) {
@ -77,7 +77,7 @@ public:
    }
 };

-class TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract {
+class LP_TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract {
 public:
    DequantizationSubtract(
        const ngraph::Output<Node>& arg0,
@ -94,7 +94,7 @@ public:
    }
 };

-class TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply {
+class LP_TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply {
 public:
    DequantizationMultiply(
        const Output<Node>& arg0,
@ -116,7 +116,7 @@ public:
    }
 };

-class TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add {
+class LP_TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add {
 public:
    DequantizationAdd(
        const ngraph::Output<Node>& arg0,
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp
@ -8,6 +8,7 @@
 #include <tuple>
 #include <ngraph/ngraph.hpp>
 #include <ngraph/opsets/opset1.hpp>
+#include <low_precision/lpt_visibility.hpp>

 namespace ngraph {
 namespace pass {
@ -15,7 +16,7 @@ namespace low_precision {

 typedef std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> FakeQuantizeDequantizationValues;

-class FakeQuantizeDequantization {
+class LP_TRANSFORMATIONS_API FakeQuantizeDequantization {
 public:
    FakeQuantizeDequantization();

--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp
@ -7,7 +7,7 @@
 #include <exception>
 #include <string>
 #include <ngraph/node.hpp>
-#include <transformations_visibility.hpp>
+#include <low_precision/lpt_visibility.hpp>

 /**
 * @def THROW_TRANSFORMATION_EXCEPTION_LPT
@ -19,7 +19,7 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API Exception : std::exception {
+class LP_TRANSFORMATIONS_API Exception : std::exception {
    std::shared_ptr<std::ostringstream> buffer;
    mutable std::string buffer_str;
 public:
@ -42,7 +42,7 @@ public:
 #define THROW_TRANSFORMATION_EXCEPTION throw ::ngraph::pass::low_precision::Exception() << __FILE__ << ":" << __LINE__ << " "


-class TRANSFORMATIONS_API InferenceEngineLptException : public Exception {
+class LP_TRANSFORMATIONS_API InferenceEngineLptException : public Exception {
 public:
    InferenceEngineLptException(const std::string& filename, const size_t line, const Node& node) {
        *this
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp
@ -0,0 +1,56 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class OperationPerTensorQuantizationRestriction {
+public:
+    using RestrictedPorts = std::vector<size_t>;
+
+    ngraph::Node::type_info_t operationType;
+    bool specifyVersion;
+    std::vector<size_t> restrictedPorts;
+
+    OperationPerTensorQuantizationRestriction() = default;
+    OperationPerTensorQuantizationRestriction(
+        const ngraph::Node::type_info_t operationType,
+        const bool specifyVersion,
+        const RestrictedPorts& restrictedPorts) :
+        operationType(operationType),
+        specifyVersion(specifyVersion),
+        restrictedPorts(restrictedPorts) {}
+
+    template <typename T>
+    static OperationPerTensorQuantizationRestriction create(
+        const RestrictedPorts& restrictedPorts = {},
+        const bool specifyVersion = false) {
+        return OperationPerTensorQuantizationRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts);
+    }
+
+    template <typename T>
+    static RestrictedPorts getPrecisionsByOperationType(std::vector<OperationPerTensorQuantizationRestriction>& restrictions) {
+        for (const auto& restriction : restrictions) {
+            if (restriction.operationType == T::get_type_info_static()) {
+                return restriction.restrictedPorts;
+            }
+        }
+        return {};
+    }
+};
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp
@ -0,0 +1,59 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <unordered_set>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class OperationPrecisionRestriction {
+public:
+    using PrecisionsByPort = std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>;
+
+    ngraph::Node::type_info_t operationType;
+    bool specifyVersion;
+    std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>> precisionsByPort;
+
+    OperationPrecisionRestriction() = default;
+    OperationPrecisionRestriction(
+        const ngraph::Node::type_info_t operationType,
+        const bool specifyVersion,
+        const PrecisionsByPort& precisionsByPort) :
+        operationType(operationType),
+        specifyVersion(specifyVersion),
+        precisionsByPort(precisionsByPort) {}
+
+    template <typename T>
+    static OperationPrecisionRestriction create(
+        const PrecisionsByPort& precisionsByPort,
+        const bool specifyVersion = false) {
+        return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
+    }
+
+    template <typename T>
+    static PrecisionsByPort getPrecisionsByOperationType(std::vector<OperationPrecisionRestriction>& restrictions) {
+        for (const auto& restriction : restrictions) {
+            if (restriction.operationType == T::get_type_info_static()) {
+                return restriction.precisionsByPort;
+            }
+        }
+        return {};
+    }
+};
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp
@ -1,42 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include <ngraph/ngraph.hpp>
-#include <ngraph/check.hpp>
-#include <ngraph/opsets/opset1.hpp>
-#include "../ilayer_transformations_manager.hpp"
-
-namespace ngraph {
-namespace pass {
-namespace low_precision {
-
-class Subgraph {
-public:
-    Subgraph(ngraph::pass::ILayerTransformationsManager* layerTransformationsManager);
-
-    bool fillSubgraphForConcat(const std::shared_ptr<ngraph::opset1::Concat>& concat, std::unordered_set<std::string>& handledLayers);
-    bool empty() const;
-
-    std::vector<std::shared_ptr<ngraph::Node>> quantizationLayers;
-    std::vector<std::shared_ptr<ngraph::opset1::Concat>> concatLayers;
-    std::unordered_map<std::string, std::shared_ptr<ngraph::Node>> layers;
-
-private:
-    bool atLeastOneIsIntermediate(const std::shared_ptr<ngraph::Node>& node) const;
-    bool fillSubgraphForQuantization(const std::shared_ptr<ngraph::opset1::FakeQuantize>& fakeQuantize, std::unordered_set<std::string>& handledLayers);
-    bool fillSubgraphForIntermediate(const std::shared_ptr<ngraph::Node>& intermediate, std::unordered_set<std::string>& handledLayers);
-    bool fill(const std::shared_ptr<ngraph::Node>& concat, std::unordered_set<std::string>& handledLayers);
-    const ngraph::pass::ILayerTransformationsManager* layerTransformationsManager;
-};
-
-} // namespace low_precision
-} // namespace pass
-} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp
@ -13,32 +13,21 @@
 #include <ngraph/ngraph.hpp>

 #include "layer_transformation.hpp"
-#include "common/subgraph.hpp"
 #include "common/fake_quantize_dequantization.hpp"

 namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation {
 public:
-    ConcatTransformation(const Params& params) : LayerTransformation(params) {}
-    ~ConcatTransformation() override {};
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    ConcatTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;

 protected:
-    void addDequantizationLayers(
-        TransformationContext& context,
-        ngraph::pass::low_precision::Subgraph& subgraph,
-        std::function<void(
-            std::shared_ptr<ngraph::Node> layer,
-            std::shared_ptr<ngraph::Node> child,
-            const std::string originalLayerName,
-            std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate)> getLayerDequantizationCallback) const;
-
    static bool isHandled(
        const TransformationContext& context,
        const std::vector<std::shared_ptr<ngraph::Node>>& quantizationOperations);
@ -51,14 +40,6 @@ protected:
        NodeVector& multiplyNodes) const;

    std::shared_ptr<Node> concatenateDeqNodes(NodeVector& nodes) const;
-
-private:
-    size_t getMinQuantizationLevels(
-        const DataPrecision& dataPrecision,
-        const float maxOutputInterval,
-        const std::vector<QuantizationDetails>& quantizationLayersDetails,
-        const float outputLowValue,
-        const float outputHighValue) const;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp
@ -1,51 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-
-#include <ngraph/ngraph.hpp>
-
-#include "concat.hpp"
-#include "common/subgraph.hpp"
-#include "common/fake_quantize_dequantization.hpp"
-
-namespace ngraph {
-namespace pass {
-namespace low_precision {
-
-class TRANSFORMATIONS_API ConcatMultiChannelsTransformation : public ConcatTransformation {
-public:
-    ConcatMultiChannelsTransformation(const Params& params) : ConcatTransformation(params) {}
-    ~ConcatMultiChannelsTransformation() override {};
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
-    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
-
-private:
-    // Go through the parent elements of the layer and fill dequantization collection
-    // with Dq operations that should be inserted before the layer.
-    void fillDequantization(
-        const std::shared_ptr<ngraph::Node> layer,
-        const std::unordered_map<std::string, FakeQuantizeDequantization>& dequantizationByFakeQuantize,
-        std::vector<FakeQuantizeDequantization>& dequantization) const;
-
-    FakeQuantizeDequantization getConcatenatedDequantization(
-        const std::shared_ptr<ngraph::opset1::Concat> concat,
-        const std::vector<FakeQuantizeDequantization>& dequantization) const;
-
-    static FakeQuantizeDequantization getFoldedDequantization(
-        const std::shared_ptr<ngraph::Node> operation,
-        const FakeQuantizeDequantization& dequantization,
-        const size_t sourceOutputIdx);
-
-    bool isMultiChannel(const std::vector<std::shared_ptr<ngraph::opset1::Concat>>& concatLayers) const noexcept;
-};
-
-} // namespace low_precision
-} // namespace pass
-} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp
@ -11,12 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation {
 public:
-    ConvertTransformation(const Params& params) : LayerTransformation(params) {}
-    ~ConvertTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    ConvertTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };

--- a/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp
@ -7,14 +7,14 @@
 #include <memory>
 #include <utility>

-#include <transformations_visibility.hpp>
+#include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>

 namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ConvertSubtractConstant;
+class LP_TRANSFORMATIONS_API ConvertSubtractConstant;

 }  // namespace low_precision
 }  // namespace pass
--- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp
@ -11,12 +11,13 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation {
+class LP_TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation {
 public:
-    ConvolutionTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
-    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
+    NGRAPH_RTTI_DECLARATION;
+    ConvolutionTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
+    bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
+    static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
@ -11,13 +11,13 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
+class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
 public:
-    ConvolutionBackpropDataTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    ConvolutionBackpropDataTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
-    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
+    bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
+    static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp
@ -0,0 +1,61 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cassert>
+#include <memory>
+#include <vector>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/variant.hpp>
+#include "low_precision/lpt_visibility.hpp"
+#include "low_precision/base_matcher_pass.hpp"
+#include "low_precision/lpt_itt.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+template <typename AttributeType, typename OperationType>
+class CreateAttribute;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+enum class AttributeSource {
+    Node,
+    OutputPort
+};
+
+template <typename AttributeType, typename OperationType = ngraph::pattern::op::Label>
+class ngraph::pass::low_precision::CreateAttribute : public ngraph::pass::low_precision::BaseMatcherPass {
+public:
+    CreateAttribute(const AttributeSource source = AttributeSource::Node) {
+        assert((source == AttributeSource::Node) || (source == AttributeSource::OutputPort));
+        auto operation = std::is_same<OperationType, pattern::op::Label>::value ?
+            pattern::any_input() :
+            pattern::wrap_type<OperationType>();
+
+        ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
+            auto op = m.get_match_root();
+            if (transformation_callback(op)) {
+                return false;
+            }
+            {
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreateAttribute");
+                const auto attribute = ngraph::VariantWrapper<AttributeType>::create(op, params);
+                if (attribute == nullptr) {
+                    return false;
+                }
+            }
+            return true;
+        };
+
+        auto matcher = std::make_shared<ngraph::pattern::Matcher>(operation, "CreateAttribute");
+        this->register_matcher(matcher, callback);
+    }
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp
@ -0,0 +1,70 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include "rt_info/precision_preserved_attribute.hpp"
+#include "network_helper.hpp"
+#include "lpt_itt.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+template <typename AttributeType, typename OperationType>
+class CreatePrecisionsDependentAttribute;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+template <typename AttributeType, typename OperationType>
+class ngraph::pass::low_precision::CreatePrecisionsDependentAttribute : public ngraph::pass::MatcherPass {
+public:
+    CreatePrecisionsDependentAttribute() {
+        auto operation = pattern::wrap_type<OperationType>();
+
+        ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
+            auto node = m.get_match_root();
+            if (transformation_callback(node)) {
+                return false;
+            }
+
+            {
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreatePrecisionsDependentAttribute");
+                auto &rt = node->get_rt_info();
+
+                const auto precisionPreservedAttribute = std::make_shared<ngraph::VariantWrapper<PrecisionPreservedAttributePtr>>(
+                    std::make_shared<PrecisionPreservedAttribute>(false));
+                rt[ngraph::VariantWrapper<PrecisionPreservedAttributePtr>::type_info.name] = precisionPreservedAttribute;
+                const auto &targetSharedValue = precisionPreservedAttribute->get()->sharedValue;
+
+                const auto attribute = std::make_shared<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>(
+                    std::make_shared<AttributeType>());
+                rt[ngraph::VariantWrapper<std::shared_ptr<AttributeType>>::type_info.name] = attribute;
+
+                ngraph::pass::low_precision::NetworkHelper::reassign<PrecisionPreservedSharedValue, PrecisionPreservedAttribute>(
+                    targetSharedValue,
+                    {
+                        std::dynamic_pointer_cast<PrecisionPreservedAttribute>(attribute->get()),
+                        std::dynamic_pointer_cast<PrecisionPreservedAttribute>(precisionPreservedAttribute->get())
+                    });
+            }
+            return true;
+        };
+
+        auto matcher = std::make_shared<ngraph::pattern::Matcher>(operation, "CreatePrecisionsDependentAttribute");
+        this->register_matcher(matcher, callback);
+    }
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp
@ -10,12 +10,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation {
+class LP_TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation {
 public:
-    DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {}
-    ~DepthToSpaceTransformation() override {}
-    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+    NGRAPH_RTTI_DECLARATION;
+    DepthToSpaceTransformation(const Params& params = Params());
+    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp
@ -12,7 +12,7 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation {
 public:
    EltwiseBaseTransformation(const Params& params) : LayerTransformation(params) {}
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp
@ -13,17 +13,20 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation {
 public:
-    FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FakeQuantizeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;

    static bool checkElementwise(const std::shared_ptr<Node>& eltwise);

 private:
-    std::shared_ptr<opset1::FakeQuantize> fuseElementwise(TransformationContext& context, const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const;
+    std::shared_ptr<opset1::FakeQuantize> fuseElementwise(
+            TransformationContext& context,
+            MatcherPass* matcherPass,
+            const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp
@ -13,11 +13,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation {
 public:
-    FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FakeQuantizeDecompositionTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };

--- a/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation {
 public:
-    FoldConvertTransformation(const Params& params) : LayerTransformation(params) {}
-    ~FoldConvertTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FoldConvertTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/ngraph.hpp>
+#include "low_precision/layer_transformation.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API FoldFakeQuantizeTransformation : public LayerTransformation {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    FoldFakeQuantizeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
+    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
+    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
+};
+
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation {
 public:
-    FuseConvertTransformation(const Params& params) : LayerTransformation(params) {}
-    ~FuseConvertTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FuseConvertTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation {
 public:
-    FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
-    ~FuseFakeQuantizeTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FuseFakeQuantizeTransformation(const Params& params);
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;

 private:
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation {
 public:
-    FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
-    ~FuseMultiplyToFakeQuantizeTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FuseMultiplyToFakeQuantizeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation {
 public:
-    FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
-    ~FuseSubtractToFakeQuantizeTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    FuseSubtractToFakeQuantizeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp
@ -11,12 +11,13 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation {
+class LP_TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation {
 public:
-    GroupConvolutionTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
-    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
+    NGRAPH_RTTI_DECLARATION;
+    GroupConvolutionTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
+    bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
+    static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp
@ -1,24 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-#include <ngraph/node.hpp>
-#include "transformations_visibility.hpp"
-
-namespace ngraph {
-namespace pass {
-
-/**
- * @brief low precision transformation component interface.
-  */
-class TRANSFORMATIONS_API ILayerTransformationsManager {
-public:
-    virtual bool isQuantized(const std::shared_ptr<Node>& layer) const noexcept = 0;
-    virtual bool isPrecisionPreserved(const std::shared_ptr<Node>& layer) const noexcept = 0;
-};
-
-}  // namespace pass
-}  // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp
@ -10,12 +10,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation {
 public:
-    InterpolateTransformation(const Params& params) : LayerTransformation(params) {}
-    ~InterpolateTransformation() override {}
-    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+    NGRAPH_RTTI_DECLARATION;
+    InterpolateTransformation(const Params& params = Params());
+    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp
@ -1,24 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <ngraph/ngraph.hpp>
-#include <transformations_visibility.hpp>
-
-namespace ngraph {
-namespace pass {
-
-/**
- * @brief low precision transformation component interface.
-  */
-class TRANSFORMATIONS_API IParamsManager {
-public:
-    // TODO FIXME: it is not correct to have a string as a key here, try to use NodeTypeInfo
-    virtual std::vector<element::Type> getPrecisionsOnActivations(const Node& op) const noexcept = 0;
-};
-
-}  // namespace pass
-}  // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
@ -13,8 +13,6 @@
 #include <ngraph/ngraph.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>

-#include "iparams_manager.hpp"
-#include "ilayer_transformations_manager.hpp"
 #include "transformation_context.hpp"
 #include "quantization_details.hpp"
 #include "low_precision/common/ie_lpt_exception.hpp"
@ -41,7 +39,7 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API DataPrecision {
+class LP_TRANSFORMATIONS_API DataPrecision {
 public:
    DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}

@ -108,6 +106,17 @@ public:
        }
    }

+    // Return maximum value for quantization level. Quantization level is maximum value for precision.
+    static float getMaxValue(const size_t maxLevelsForPrecision) {
+        if (maxLevelsForPrecision == 255ul) {
+            return 254.f;
+        } else if (maxLevelsForPrecision == 256ul) {
+            return 255.f;
+        } else {
+            THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision;
+        }
+    }
+
    static bool hasNegativeValues(const std::vector<float>& values) {
        for (const float value : values) {
            if (value < 0.0) {
@ -148,92 +157,28 @@ inline std::ostream &operator << (std::ostream &os, const DataPrecision& value)
 }

 // Base class for all LP transformations, holds some common data structures
-class TRANSFORMATIONS_API LayerTransformation {
+class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherPass {
 public:
-    enum QuantizedTensorAlignment {
-        None,
-        UpdateLevel
-    };
-
    class Params {
    public:
        Params(
-                const bool updatePrecisions = true,
-                const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations = QuantizedTensorAlignment::UpdateLevel,
-                const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights = QuantizedTensorAlignment::None,
-                bool supportAsymmetricQuantization = false,
-                std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
-                std::vector<element::Type> precisionsOnWeights = { element::i8 },
-                element::Type deqPrecision = element::f32,
-                bool support3DTensorOnActivations = true,
-                bool deconvolutionSpecificChannelsRatio = false) :
-                updatePrecisions(updatePrecisions),
-                quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
-                quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
-                supportAsymmetricQuantization(supportAsymmetricQuantization),
-                precisionsOnActivations(precisionsOnActivations),
-                precisionsOnWeights(precisionsOnWeights),
-                deqPrecision(deqPrecision),
-                support3DTensorOnActivations(support3DTensorOnActivations),
-                deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
-            if (precisionsOnActivations.size() == 0ul) {
-                THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
-            }
-
-            if (precisionsOnWeights.size() == 0ul) {
-                THROW_TRANSFORMATION_EXCEPTION << "precisions on weights are not specisifed";
-            }
-        }
+            const bool updatePrecisions = true,
+            element::Type deqPrecision = element::f32) :
+            updatePrecisions(updatePrecisions),
+            deqPrecision(deqPrecision) {}

        Params& setUpdatePrecisions(const bool updatePrecisions) {
            this->updatePrecisions = updatePrecisions;
            return *this;
        }

-        Params& setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) {
-            this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations;
-            return *this;
-        }
-
-        Params& setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) {
-            this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights;
-            return *this;
-        }
-
-        Params& setSupportAsymmetricQuantization(const bool supportAsymmetricQuantization) {
-            this->supportAsymmetricQuantization = supportAsymmetricQuantization;
-            return *this;
-        }
-
-        Params& setPrecisionsOnActivations(const std::vector<element::Type>& precisionsOnActivations) {
-            this->precisionsOnActivations = precisionsOnActivations;
-            return *this;
-        }
-
-        Params& setPrecisionsOnWeights(const std::vector<element::Type>& precisionsOnWeights) {
-            this->precisionsOnWeights = precisionsOnWeights;
-            return *this;
-        }
-
-        Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) {
-            this->support3DTensorOnActivations = support3DTensorOnActivations;
-            return *this;
-        }
-
-        Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
-            this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
+        Params& setDeqPrecision(const element::Type& deqPrecision) {
+            this->deqPrecision = deqPrecision;
            return *this;
        }

        bool updatePrecisions;
-        QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
-        QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
-        bool supportAsymmetricQuantization;
-        std::vector<element::Type> precisionsOnActivations;
-        std::vector<element::Type> precisionsOnWeights;
        element::Type deqPrecision;
-        bool support3DTensorOnActivations;
-        bool deconvolutionSpecificChannelsRatio;
    };

    class PrecisionDetails {
@ -243,55 +188,49 @@ public:
                hasNegativeOutput(hasNegativeOutput),
                hasZeroPoint(hasZeroPoint) {}

-        const element::Type precision;
-        const bool hasNegativeOutput;
-        const bool hasZeroPoint;
+        element::Type precision;
+        bool hasNegativeOutput;
+        bool hasZeroPoint;
    };

    LayerTransformation(const Params& params);
    virtual ~LayerTransformation() = default;
-    virtual void registerMatcherIn(ngraph::pass::GraphRewrite& pass, TransformationContext& context) const = 0;
-    virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const = 0;
+    virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) = 0;

-    void setParamsManager(IParamsManager* paramsManager) noexcept;
-    void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept;
+    void setContext(TransformationContext* context) noexcept;

    void setUpdatePrecisions(const bool updatePrecisions);
-    void setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
-    void setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
-
-    void setQuantizationIntervalAsymmetryThreshold(const float value);
-    void setZeroThreshold(const float value);
-    void setMinQuantizationLevels(const size_t levels);
-
-    const std::vector<element::Type>& getPrecisionsOnActivations() const;
-    const std::vector<element::Type>& getPrecisionsOnWeights() const;

    virtual bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const;
-
-    bool canSubtractBeHandled(const std::shared_ptr<Node>& op, const size_t parentIndex = 0ul) const;
+    static bool canBeTransformedStatic(const std::shared_ptr<Node>& layer);

    bool canSubtractBeHandled(const std::shared_ptr<Node>& op, const FakeQuantizeDequantization& dequantization) const;

-    PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails) const;
+    // Get precision based on FakeQuantize operation.
+    // Undefined value is expected. In this case the accuracy has to be defined by the calling code.
+    // TODO: LPT: INT8 specific here
+    static PrecisionDetails getPrecisionDetails(
+        const size_t quantizationLevels,
+        const std::vector<float>& outputLowValues,
+        const std::vector<float>& outputHighValues);
+    static PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails);
+
+    static bool isAsymmetricQuantization(const std::shared_ptr<const Node>& node);

    // return true if operation can be quantized and false otherwise
    // for example: if convolution operation weights are not quantized, then isQuantize returns false and true otherwise
    // note: dequantization operations on activations are absent during method execution
-    virtual bool isQuantized(std::shared_ptr<Node> layer) const noexcept;
+    virtual bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept;

    // return true if operation can be preserved for precision
    // note: dequantization operations on activations are absent during method execution
    virtual bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept = 0;

-    DataPrecision getDataPrecision(
-            std::shared_ptr<Node> layer,
+    // weights specific
+    static DataPrecision getDataPrecision(
+            const std::shared_ptr<Node>& layer,
            const QuantizationDetails& quantizationDetails,
-            const bool onWeights) const;
-
-    void fillAvailablePrecisions(std::shared_ptr<Node> layer, std::vector<element::Type>& availablePrecisions) const;
-
-    std::vector<std::shared_ptr<Node>> getChildrenRecursivelyExceptPrecisionPreserved(const std::shared_ptr<Node>& op) const noexcept;
+            const std::vector<element::Type>& precisions);

 protected:
 #ifdef LPT_PRINT_DEQUANTIZATION_INFO
@ -303,24 +242,10 @@ protected:
 #endif

    bool updatePrecisions;
-    QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
-    QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
-    bool supportAsymmetricQuantization;
-    std::vector<element::Type> precisionsOnActivations;
-    std::vector<element::Type> precisionsOnWeights;
    element::Type deqPrecision;
-    bool support3DTensorOnActivations;
-    bool deconvolutionSpecificChannelsRatio;
-
-    // absolute value, used to determine quantization interval asymmetry
-    float quantizationIntervalAsymmetryThreshold;
-    // absolute value, used to determine zero
-    float zeroThreshold;
-    size_t minQuantizationLevels;

    static const char originalLayerPostfix[];
-    IParamsManager* paramsManager;
-    ILayerTransformationsManager* layerTransformationsManager;
+    TransformationContext* context;

 protected:
    std::shared_ptr<ngraph::Node> moveDequantizationAfter(
@ -340,7 +265,7 @@ protected:
        std::shared_ptr<ngraph::Node> lastNode,
        std::string originalName) const;

-    void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot) const;
+    void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot);

    //TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations
    bool canBeTransformedSpatialDimension(const TransformationContext& context, std::shared_ptr<Node> layer) const;
@ -358,38 +283,6 @@ protected:
    }
 };

-inline std::ostream &operator << (std::ostream &os, const LayerTransformation::QuantizedTensorAlignment& value) {
-    switch (value) {
-        case LayerTransformation::QuantizedTensorAlignment::None: {
-            os << "None";
-            break;
-        }
-        case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: {
-            os << "UpdateLevel";
-            break;
-        }
-        default: {
-            os << static_cast<int>(value);
-            break;
-        }
-    }
-    return os;
-}
-
-inline std::ostream &operator << (std::ostream &os, const std::vector<element::Type>& values) {
-    os << "{";
-    for (size_t i = 0; i < values.size(); ++i) {
-        const element::Type& value = values[i];
-        if (i > 0) {
-            os << value;
-        } else {
-            os << ", " << value;
-        }
-    }
-    os << "}";
-    return os;
-}
-
 typedef std::shared_ptr<LayerTransformation> LayerTransformationPtr;

 }  // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp
@ -0,0 +1,74 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+// one place to include all Low Precision Transformations from ngraph::pass::low_precision
+#include <low_precision/rt_info/intervals_alignment_attribute.hpp>
+#include <low_precision/rt_info/quantization_alignment_attribute.hpp>
+#include <low_precision/rt_info/precisions_attribute.hpp>
+#include <low_precision/rt_info/precision_preserved_attribute.hpp>
+
+#include <low_precision/markup_precisions.hpp>
+#include <low_precision/markup_avg_pool_precision_preserved.hpp>
+#include <low_precision/propagate_precisions.hpp>
+#include <low_precision/align_quantization_intervals.hpp>
+
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
+#include "low_precision/layer_transformation.hpp"
+#include "low_precision/markup_precisions.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API TypeRelaxedReplacer;
+class LP_TRANSFORMATIONS_API MarkupOptimizations;
+class LP_TRANSFORMATIONS_API LowPrecision;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    MarkupOptimizations(
+        const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
+        const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions);
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+private:
+    const std::vector<OperationPrecisionRestriction>& precisionRestrictions;
+    const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions;
+};
+
+class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::TypeRelaxedReplacer : public ngraph::pass::GraphRewrite {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    TypeRelaxedReplacer();
+};
+
+class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    LowPrecision(
+        const std::vector<OperationPrecisionRestriction>& precisionRestrictions = {},
+        const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions = {},
+        const LayerTransformation::Params = LayerTransformation::Params());
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+
+    static bool isFunctionQuantized(const std::shared_ptr<const ngraph::Function>& function);
+
+protected:
+    std::vector<OperationPrecisionRestriction> precisionRestrictions;
+    std::vector<OperationPerTensorQuantizationRestriction> quantizationRestrictions;
+    // remove
+    LayerTransformation::Params params;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp
@ -4,11 +4,12 @@

 /**
 * @brief Defines openvino domains for tracing
- * @file lpt_itt.h
+ * @file lpt_itt.hpp
 */

 #pragma once

+
 #include <openvino/itt.hpp>

 namespace ngraph {
--- a/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp
@ -0,0 +1,18 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/visibility.hpp"
+
+/**
+ * @file lpt_visibility.hpp
+ * @brief Defines visibility settings for Inference Engine LP Transformations library
+ */
+
+#ifdef inference_engine_lp_transformations_EXPORTS
+#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_EXPORT
+#else
+#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_IMPORT
+#endif
--- a/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp
@ -1,36 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-
-#include <ie_api.h>
-
-#include <ngraph/ngraph.hpp>
-
-#include <ngraph/pass/graph_rewrite.hpp>
-#include <low_precision/ilayer_transformations_manager.hpp>
-#include <low_precision/iparams_manager.hpp>
-
-using namespace std;
-
-
-namespace ngraph {
-namespace pass {
-
-class TRANSFORMATIONS_API LowPrecisionTransformations: public ngraph::pass::GraphRewrite, IParamsManager, ILayerTransformationsManager {
-public:
-    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
-
-    // IParamsManager interface implementation
-    std::vector<element::Type> getPrecisionsOnActivations(const NodeTypeInfo& layerName) const noexcept override;
-
-    // ILayerTransformationsManager interface implementation
-    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
-    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
-};
-
-}// namespace pass
-}// namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/pass/pass.hpp>
+#include <low_precision/lpt_visibility.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API MarkupAvgPoolPrecisionPreserved;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp
@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/pass/pass.hpp>
+#include "low_precision/lpt_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API MarkupCanBeQuantized;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::low_precision::MarkupCanBeQuantized : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp
@ -0,0 +1,44 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include <ngraph/pass/pass.hpp>
+#include "common/operation_per_tensor_quantization_restriction.hpp"
+#include "low_precision/lpt_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::low_precision::MarkupPerTensorQuantization : public ngraph::pass::FunctionPass {
+public:
+    class PerTensorQuantization {
+    public:
+        explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
+        void add(const uint64_t version, const std::vector<size_t>& ports) {
+            portsByVersion.emplace(version, ports);
+        }
+
+        bool versionIsRequired;
+        std::unordered_map<uint64_t, std::vector<size_t>> portsByVersion;
+    };
+
+    NGRAPH_RTTI_DECLARATION;
+    explicit MarkupPerTensorQuantization(const std::vector<OperationPerTensorQuantizationRestriction>& restrictions = {});
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+
+private:
+    std::unordered_map<std::string, PerTensorQuantization> restrictionsByOperation;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp
@ -0,0 +1,47 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <vector>
+
+#include <ngraph/pass/pass.hpp>
+#include "low_precision/lpt_visibility.hpp"
+#include "low_precision/common/operation_precision_restriction.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API MarkupPrecisions;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+// Transformation is used to add customization options runtime
+class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::FunctionPass {
+public:
+    class Restriction {
+    public:
+        explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
+        void add(const uint64_t version, const std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>& precisions) {
+            precisionsByVersion.emplace(version, precisions);
+        }
+
+        bool versionIsRequired;
+        std::unordered_map<uint64_t, std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>> precisionsByVersion;
+    };
+
+    NGRAPH_RTTI_DECLARATION;
+    explicit MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions = {});
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+
+private:
+    static bool isPrecisionPreserved(const std::shared_ptr<Node>& node);
+    static bool isSupported(const std::shared_ptr<Node>& node);
+    std::unordered_map<std::string, Restriction> restrictionsByOperation;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp
@ -11,14 +11,14 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation {
 public:
-    MatMulTransformation(const Params& params) : LayerTransformation(params) {}
-    ~MatMulTransformation() override {}
-    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+    NGRAPH_RTTI_DECLARATION;
+    MatMulTransformation(const Params& params = Params());
+    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
+    static bool is3DTensorOnActivations(const std::shared_ptr<const Node>& node);
 };

 }  // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp
@ -12,12 +12,12 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation {
 public:
-    MaxPoolTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+    NGRAPH_RTTI_DECLARATION;
+    MaxPoolTransformation(const Params& params = Params());
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };

--- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp
@ -11,12 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation {
+class LP_TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation {
 public:
-    MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) {}
-    ~MultiplyTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    MultiplyTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp
@ -7,24 +7,29 @@
 #include <memory>
 #include <ngraph/ngraph.hpp>
 #include "low_precision/layer_transformation.hpp"
+#include "common/operation_precision_restriction.hpp"

 namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation {
 public:
-    MultiplyToGroupConvolutionTransformation(const Params& params) : LayerTransformation(params), groupSize(1ul) {}
+    NGRAPH_RTTI_DECLARATION;
+    MultiplyToGroupConvolutionTransformation(
+        const Params& params = Params(),
+        const OperationPrecisionRestriction::PrecisionsByPort& restrictions = {});
    ~MultiplyToGroupConvolutionTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
-    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
+    bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
+    static bool canBeTransformedToGroupConvolution(const std::shared_ptr<const Node>& layer) noexcept;

    void setGroupSize(const size_t groupSize);
    size_t getGroupSize() const;
 private:
+    OperationPrecisionRestriction::PrecisionsByPort restrictions;
    size_t groupSize;
 };

--- a/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp
@ -10,11 +10,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API MVNTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API MVNTransformation : public LayerTransformation {
 public:
-    MVNTransformation(const Params& params) : LayerTransformation(params) {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    MVNTransformation(const Params& params = Params());
+    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@ -16,6 +16,10 @@
 #include "ngraph_ops/type_relaxed.hpp"
 #include <ngraph/rt_info.hpp>

+#include "rt_info/shared_value_attribute.hpp"
+#include "rt_info/precisions_attribute.hpp"
+#include "rt_info/per_tensor_quantization_attribute.hpp"
+#include "rt_info/intervals_alignment_attribute.hpp"
 #include "transformation_context.hpp"
 #include "quantization_details.hpp"
 #include "transformations/utils/utils.hpp"
@ -30,7 +34,7 @@ namespace low_precision {
 /**
 * @brief NetworkHelper class encapsulates manipulations with nGraph function.
 */
-class TRANSFORMATIONS_API NetworkHelper {
+class LP_TRANSFORMATIONS_API NetworkHelper {
 public:
    // Return true if `type` can be castable to at least one of `type`
    static bool is_castable_to_one_of(NodeTypeInfo type, const std::unordered_set<NodeTypeInfo>& types);
@ -76,6 +80,10 @@ public:

    static std::shared_ptr<Node> swapMultiplyAndAdd(std::shared_ptr<opset1::Add> addAfterMultiply, const int multiplyBranch);

+    static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets);
+
+    static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target);
+
    static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target);

    static void cleanRunTimeInfo(const std::shared_ptr<Node>& layer);
@ -116,7 +124,8 @@ public:
        std::shared_ptr<opset1::FakeQuantize> fq,
        element::Type precision,
        float min,
-        float max);
+        float max,
+        const bool replace = true);

    static FakeQuantizeDequantization makeDequantization(
        const float dequantizationMul,
@ -124,7 +133,8 @@ public:
        const ngraph::element::Type originalPrecision,
        const ngraph::PartialShape dataNodeOutputShape,
        element::Type precision,
-        const element::Type deqPrecision = element::f32);
+        const element::Type deqPrecision = element::f32,
+        std::shared_ptr<ngraph::Node> input = nullptr);

    static FakeQuantizeDequantization createDequantizationFromFakeQuantize(
        std::shared_ptr<opset1::FakeQuantize> fq,
@ -143,7 +153,7 @@ public:

    static FakeQuantizeDequantization getDequantization(const std::shared_ptr<Node>& node, const size_t parentIndex = 0ul, const bool inPlace = false);

-    static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr<Node>& node);
+    static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr<Node>& node, const bool convertIsMandatory = false);

    static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization);

@ -200,6 +210,115 @@ public:

    static bool isDQByDynamicDimension(const std::shared_ptr<Node>& layer, size_t inputIdx = 0);

+    static bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& node);
+
+    static void replaceAttributeInNodes(
+        std::shared_ptr<ngraph::Function> f,
+        const std::string& name,
+        const std::shared_ptr<ngraph::Variant> newAttribute,
+        const std::shared_ptr<ngraph::Variant> oldAttribute,
+        const std::shared_ptr<ngraph::Node>& initialNode) {
+        std::set<std::shared_ptr<Node>> visited;
+        std::deque<std::shared_ptr<Node>> nodes;
+        nodes.emplace_back(initialNode);
+
+        while (!nodes.empty()) {
+            auto node = nodes.front();
+            nodes.pop_front();
+
+            if (visited.count(node) || is_type<op::Constant>(node)) {
+                continue;
+            }
+
+            visited.insert(node);
+
+            bool handleConnectedNodes = false;
+            if (NetworkHelper::isPrecisionPreserved(node) || is_type<opset1::FakeQuantize>(node)) {
+                auto& rt = node->get_rt_info();
+
+                if (node == initialNode) {
+                    rt[name] = newAttribute;
+                    handleConnectedNodes = true;
+                } else {
+                    auto it = rt.find(name);
+                    if (it != rt.end()) {
+                        const auto currentAttribute = it->second;
+                        if (oldAttribute.get() == currentAttribute.get()) {
+                            rt[name] = newAttribute;
+                        }
+                        handleConnectedNodes = true;
+                    }
+                }
+            }
+
+            if (!handleConnectedNodes) {
+                continue;
+            }
+
+            if (!is_type<opset1::FakeQuantize>(node)) {
+                for (size_t index = 0ul; index < node->get_input_size(); ++index) {
+                    auto getInput = [](const std::shared_ptr<ngraph::Node>& node, const size_t index) {
+                        const auto dequantization = NetworkHelper::getDequantization(node, index);
+                        if (!dequantization.empty() &&
+                            (is_type<opset1::Convert>(dequantization.data.get_node())) &&
+                            is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
+                            const auto input = dequantization.data.get_node()->input(0);
+                            return input;
+                        }
+                        return node->input(index);
+                    };
+
+                    const auto& input = getInput(node, index);
+                    const auto& input_node = input.get_source_output().get_node_shared_ptr();
+
+                    //const auto& input_node = input.get_source_output().get_node_shared_ptr();
+                    if (visited.count(input_node) || is_type<op::Constant>(input_node)) {
+                        continue;
+                    }
+
+                    nodes.push_front(input_node);
+                }
+            }
+
+            for (auto& output : node->outputs()) {
+                for (auto& input_value : output.get_target_inputs()) {
+                    const auto& output_node = input_value.get_node()->shared_from_this();
+                    if (visited.count(output_node) || is_type<op::Constant>(output_node)) {
+                        continue;
+                    }
+
+                    nodes.push_front(output_node);
+                }
+            }
+        }
+    }
+
+    template <typename SharedValueType, typename SharedAttributeType>
+    static void reassign(
+        const std::shared_ptr<SharedValueType>& sharedValue,
+        const std::vector<std::weak_ptr<SharedAttributeType>>& attributes) {
+        for (const auto attributeWeakPtr : attributes) {
+            auto attribute = attributeWeakPtr.lock();
+            if (attribute == nullptr) {
+                continue;
+            }
+            attribute->sharedValue = sharedValue;
+            sharedValue->attributes.push_back(attribute);
+        }
+    }
+
+    static size_t calculateLevels(
+        const float dataPrecisionMin,
+        const float dataPrecisionMax,
+        const float combinedIntervalLow,
+        const float combinedIntervalHigh,
+        const float minIntervalLow,
+        const float minIntervalHigh,
+        float& dequantizationMul,
+        float& dequantizationSub,
+        float& updatedOutputLowValue,
+        float& updatedOutputHighValue);
+
 private:
    static std::shared_ptr<Node> foldFakeQuantize(
            const std::shared_ptr<opset1::FakeQuantize>& fq,
@ -292,6 +411,54 @@ std::shared_ptr<Node> fold_reshape(Args&&... args) {
    return node;
 }

+template <typename T>
+std::shared_ptr<ngraph::VariantWrapper<T>> getAttribute(const std::shared_ptr<Node>& inputNode) {
+    auto& rt = inputNode->get_rt_info();
+    auto it = rt.find(ngraph::VariantWrapper<T>::type_info.name);
+    if (it == rt.end()) {
+        return nullptr;
+    }
+
+    auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<T>>(it->second);
+    assert(attribute != nullptr);
+    return attribute;
+}
+
+template <typename T>
+std::shared_ptr<ngraph::VariantWrapper<T>> getAttribute(const Input<Node>& input) {
+    auto& rt = input.get_rt_info();
+    auto it = rt.find(ngraph::VariantWrapper<T>::type_info.name);
+    if (it == rt.end()) {
+        return nullptr;
+    }
+
+    auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<T>>(it->second);
+    assert(attribute != nullptr);
+    return attribute;
+}
+
+template <typename T>
+std::shared_ptr<ngraph::VariantWrapper<T>> getAttributeFromOutput(const Output<Node>& output) {
+    auto& rt = output.get_rt_info();
+    auto it = rt.find(ngraph::VariantWrapper<T>::type_info.name);
+    if (it == rt.end()) {
+        return nullptr;
+    }
+
+    auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<T>>(it->second);
+    assert(attribute != nullptr);
+    return attribute;
+}
+
+bool isDisabled(const std::shared_ptr<Node>& node);
+
+template <typename T, typename ... Args>
+std::shared_ptr<T> make_shared_attribute(Args&& ... args) {
+    std::shared_ptr<T> attribute = std::make_shared<T>(std::forward<Args>(args)...);
+    attribute->sharedValue->attributes.push_back(attribute);
+    return attribute;
+}
+
 }  // namespace low_precision
 }  // namespace pass
 }  // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp
@ -10,11 +10,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation {
 public:
-    NormalizeL2Transformation(const Params& params) : LayerTransformation(params) {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    NormalizeL2Transformation(const Params& params = Params());
+    bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API PReluTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API PReluTransformation : public LayerTransformation {
 public:
-    PReluTransformation(const Params& params) : LayerTransformation(params) {}
-    ~PReluTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    PReluTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp
@ -0,0 +1,29 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <low_precision/lpt_visibility.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+class LP_TRANSFORMATIONS_API PropagatePrecisions;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::low_precision::PropagatePrecisions : public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp
@ -0,0 +1,164 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <assert.h>
+#include <memory>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "low_precision/network_helper.hpp"
+#include "lpt_itt.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+template <class AttributeType>
+class LP_TRANSFORMATIONS_API PropagateSharedValue;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+template <class AttributeType>
+class ngraph::pass::low_precision::PropagateSharedValue : public ngraph::pass::FunctionPass {
+public:
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override {
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateSharedValue");
+
+        std::vector<std::shared_ptr<ngraph::Node>> nodes(f->get_ordered_ops());
+        for (auto it = nodes.begin(); it != nodes.end(); it++) {
+            const std::shared_ptr<Node> node = *it;
+            if (is_type<opset1::FakeQuantize>(node)) {
+                assert(node->get_output_size() == 1ul);
+                auto& outputRtInfo = node->output(0).get_rt_info();
+
+                auto attribute = make_shared_attribute<AttributeType>(std::set<element::Type>{element::u8, element::i8});
+
+                auto attributeWrapper = std::make_shared<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(attribute);
+                outputRtInfo[ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name] = attributeWrapper;
+                continue;
+            }
+
+            if (!NetworkHelper::isPrecisionPreserved(node)) {
+                for (auto& input : node->inputs()) {
+                    auto parentNode = input.get_source_output().get_node_shared_ptr();
+
+                    auto getAttributes = [](const Input<Node>& nodeInput) {
+                        const std::string name = ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name;
+
+                        auto node = nodeInput.get_source_output().get_node_shared_ptr();
+                        std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> attributes;
+                        if (is_type<opset1::FakeQuantize>(node)) {
+                            // output
+                            auto& rt = nodeInput.get_source_output().get_rt_info();
+                            auto it = rt.find(name);
+                            if (it != rt.end()) {
+                                const auto& attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(it->second);
+                                attributes.push_back(attribute);
+                            }
+                        }
+
+                        return attributes;
+                    };
+
+                    auto& nodeRt = input.get_rt_info();
+
+                    const std::string name = ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name;
+                    const auto it = nodeRt.find(name);
+                    if (it == nodeRt.end()) {
+                        continue;
+                    }
+
+                    const auto& attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(it->second);
+                    std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> attributes{ attribute };
+
+                    auto parentAttributes = getAttributes(input);
+                    if (parentAttributes.empty()) {
+                        continue;
+                    }
+
+                    for (auto& parentAttribute : parentAttributes) {
+                        parentAttribute->merge(attributes);
+                    }
+
+                    nodeRt[name] = parentAttributes[0];
+                }
+                continue;
+            }
+
+            handle(f, node);
+        }
+        return true;
+    }
+
+private:
+    std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> getParentInputRestrictions(
+        const std::shared_ptr<ngraph::Node> node) {
+        std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> parentAttributes;
+        for (size_t index = 0ul; index < node->get_input_size(); index++) {
+            const Input<Node>& input = node->input(index);
+            auto inputNode = input.get_source_output().get_node()->shared_from_this();
+
+            const auto dequantization = NetworkHelper::getDequantization(node, index);
+            if (!dequantization.empty() &&
+                (is_type<opset1::Convert>(dequantization.data.get_node())) &&
+                is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
+                inputNode = dequantization.data.get_node()->get_input_node_shared_ptr(0);
+            }
+
+            if (NetworkHelper::isPrecisionPreserved(inputNode)) {
+                auto& inputRtInfo = inputNode->get_rt_info();
+                auto inputAttributeIt = inputRtInfo.find(ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name);
+                if (inputAttributeIt != inputRtInfo.end()) {
+                    const auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(inputAttributeIt->second);
+                    parentAttributes.push_back(attribute);
+                }
+            } else if (is_type<opset1::FakeQuantize>(inputNode)) {
+                const auto& outputPortRtInfo = inputNode->outputs()[0].get_rt_info();
+                auto attributeIt = outputPortRtInfo.find(ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name);
+                if (attributeIt != outputPortRtInfo.end()) {
+                    const auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(attributeIt->second);
+                    parentAttributes.push_back(attribute);
+                }
+            }
+        }
+        return parentAttributes;
+    }
+
+    void handle(std::shared_ptr<ngraph::Function> f, const std::shared_ptr<ngraph::Node>& node) {
+        const bool precisionPreserved = NetworkHelper::isPrecisionPreserved(node);
+        if (precisionPreserved) {
+            const auto parentRestrictions = getParentInputRestrictions(node);
+            if (parentRestrictions.empty()) {
+                return;
+            }
+
+            // one operation - one output precision
+            // merge parent inputs to one current output
+            auto resultAttribute = parentRestrictions[0];
+
+            std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> toMerge = parentRestrictions;
+            toMerge.erase(toMerge.begin());
+            resultAttribute->merge(toMerge);
+
+            for (size_t index = 1ul; index < parentRestrictions.size(); index++) {
+                const auto oldAttribute = parentRestrictions[index]->get();
+                NetworkHelper::reassign<PrecisionsSharedValue, PrecisionsAttribute>(
+                    resultAttribute->get()->sharedValue,
+                    parentRestrictions[index]->get()->sharedValue->attributes);
+            }
+
+            auto& rt = node->get_rt_info();
+            rt[ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name] = resultAttribute;
+        }
+    }
+};
+
--- a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp
@ -0,0 +1,118 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/variant.hpp>
+
+#include "low_precision/lpt_visibility.hpp"
+#include "low_precision/network_helper.hpp"
+#include "low_precision/lpt_itt.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+template <typename AttributeType>
+class PropagateThroughPrecisionPreserved;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+template <typename AttributeType>
+class ngraph::pass::low_precision::PropagateThroughPrecisionPreserved : public ngraph::pass::MatcherPass {
+public:
+    PropagateThroughPrecisionPreserved() {
+        ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
+            auto node = m.get_match_root();
+            if (transformation_callback(node)) {
+                return false;
+            }
+
+            {
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateThroughPrecisionPreserved");
+
+                if (!ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node)) {
+                    return false;
+                }
+
+                const auto parentRestrictions = getParentInputRestrictions(node);
+                if (parentRestrictions.empty()) {
+                    return false;
+                }
+
+                auto resultAttribute = parentRestrictions[0];
+
+                std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> toMerge = parentRestrictions;
+                // TODO: LPT: handle pointer on itself in VariantWrapper<IntervalsAlignmentAttributePtr>::merge and remove erase, task #59498
+                toMerge.erase(toMerge.begin());
+                resultAttribute->merge(toMerge);
+
+                for (size_t index = 1ul; index < parentRestrictions.size(); index++) {
+                    const auto attributes = parentRestrictions[index]->get()->sharedValue->attributes;
+                    for (const auto attributeWeakPtr : attributes) {
+                        auto attribute = attributeWeakPtr.lock();
+                        if (attribute == nullptr) {
+                            continue;
+                        }
+                        attribute->sharedValue = resultAttribute->get()->sharedValue;
+                        resultAttribute->get()->sharedValue->attributes.push_back(attribute);
+                    }
+                }
+
+                auto &rt = node->get_rt_info();
+                rt[ngraph::VariantWrapper<std::shared_ptr<AttributeType>>::type_info.name] = resultAttribute;
+            }
+            return true;
+        };
+
+        auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "PropagateThroughPrecisionPreserved");
+        this->register_matcher(matcher, callback);
+    }
+
+private:
+    std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> getSourceOutputAttribute(const Input<Node>& input) {
+        auto input2 = input;
+        auto output = input2.get_source_output();
+        std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> attribute = getAttributeFromOutput<std::shared_ptr<AttributeType>>(output);
+        if (attribute == nullptr) {
+            attribute = getAttribute<std::shared_ptr<AttributeType>>(output.get_node_shared_ptr());
+        }
+        return attribute;
+    }
+
+    // TODO: possible duplicate: PropagateToInput::getSourceOutputAttribute
+    std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> getParentInputRestrictions(
+        const std::shared_ptr<ngraph::Node> node) {
+        std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> parentAttributes;
+        auto getInput = [](const std::shared_ptr<ngraph::Node>& node, const size_t index) -> Input<Node> {
+            const auto dequantization = NetworkHelper::getDequantization(node, index);
+            if (!dequantization.empty() &&
+                is_type<opset1::Convert>(dequantization.data.get_node()) &&
+                (dequantization.data.get_node()->get_input_size() == 1ul) &&
+                is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
+                return dequantization.data.get_node()->input(0);
+            }
+
+            return node->input(index);
+        };
+
+        for (size_t index = 0ul; index < node->get_input_size(); index++) {
+            const Input<Node>& input = getInput(node, index);
+            const auto attribute = getSourceOutputAttribute(input);
+            if (attribute != nullptr) {
+                parentAttributes.push_back(attribute);
+            }
+        }
+
+        return parentAttributes;
+    }
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp
@ -0,0 +1,105 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "network_helper.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+template <typename AttributeType>
+class PropagateToInput;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+template <typename AttributeType>
+class ngraph::pass::low_precision::PropagateToInput : public ngraph::pass::MatcherPass {
+public:
+    PropagateToInput() {
+        ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
+            auto node = m.get_match_root();
+            if (transformation_callback(node)) {
+                return false;
+            }
+
+            {
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateToInput");
+
+                for (auto input : node->inputs()) {
+                    auto parentAttribute = getSourceOutputAttribute(input);
+                    if (parentAttribute == nullptr) {
+                        continue;
+                    }
+
+                    auto attribute = getAttribute<std::shared_ptr<AttributeType>>(input);
+                    if (attribute != nullptr) {
+                        if ((attribute->get()->sharedValue != nullptr) && (attribute->get()->sharedValue->precisions.empty())) {
+                            return false;
+                        }
+
+                        std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<AttributeType>>>> attributes = { attribute };
+                        parentAttribute->merge(attributes);
+                    }
+
+                    auto& rt = input.get_rt_info();
+                    rt[ngraph::VariantWrapper<std::shared_ptr<AttributeType>>::type_info.name] = parentAttribute;
+                }
+            }
+            return true;
+        };
+
+        auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "PropagateThroughPrecisionPreserved");
+        this->register_matcher(matcher, callback);
+    }
+
+private:
+    // TODO: possible duplicate: PropagateThroughPrecisionPreserved::getParentInputRestrictions
+    std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> getSourceOutputAttribute(const Input<Node>& input) {
+        auto getInput = [](const Input<Node>& input) {
+            const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index());
+            if (!dequantization.empty() &&
+                is_type<opset1::Convert>(dequantization.data.get_node()) &&
+                (dequantization.data.get_node()->get_input_size() == 1ul) &&
+                is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
+                return dequantization.data.get_node()->input(0);
+            }
+
+            return input;
+        };
+
+        auto input2 = getInput(input);
+        auto output = input2.get_source_output();
+        std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> attribute = getAttributeFromOutput<std::shared_ptr<AttributeType>>(output);
+        if (attribute == nullptr) {
+            attribute = getAttribute<std::shared_ptr<AttributeType>>(output.get_node_shared_ptr());
+        }
+        return attribute;
+    }
+
+    std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> getParentInputRestrictions(
+        const std::shared_ptr<ngraph::Node> node) {
+        std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> parentAttributes;
+        for (size_t index = 0ul; index < node->get_input_size(); index++) {
+            const Input<Node>& input = node->input(index);
+            const auto attribute = getSourceOutputAttribute(input);
+            if (attribute != nullptr) {
+                parentAttributes.push_back(attribute);
+            }
+        }
+        return parentAttributes;
+    }
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp
@ -6,14 +6,14 @@

 #include <memory>
 #include <vector>
-#include <transformations_visibility.hpp>
+#include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>

 namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API PullReshapeThroughDequantization;
+class LP_TRANSFORMATIONS_API PullReshapeThroughDequantization;

 }  // namespace low_precision
 }  // namespace pass
--- a/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp
@ -6,14 +6,14 @@

 #include <memory>
 #include <vector>
-#include <transformations_visibility.hpp>
+#include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>

 namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API PullTransposeThroughDequantization;
+class LP_TRANSFORMATIONS_API PullTransposeThroughDequantization;

 }  // namespace low_precision
 }  // namespace pass
--- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp
@ -8,7 +8,7 @@
 #include <ostream>
 #include <vector>

-#include <transformations_visibility.hpp>
+#include <low_precision/lpt_visibility.hpp>

 #include <ngraph/node.hpp>
 #include <ngraph/opsets/opset1.hpp>
@ -18,7 +18,7 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API QuantizationDetails {
+class LP_TRANSFORMATIONS_API QuantizationDetails {
 public:
    QuantizationDetails();
    QuantizationDetails(const QuantizationDetails& quantizationDetails);
@ -27,33 +27,25 @@ public:
            const std::vector<float>& inputLowValues,
            const std::vector<float>& inputHighValues,
            const std::vector<float>& outputLowValues,
-            const std::vector<float>& outputHighValues,
-            const size_t inputIntervalsCount,
-            const size_t outputIntervalsCount,
-            const size_t outputChannelsCount);
+            const std::vector<float>& outputHighValues);

    static bool outputLayoutIsSupported(std::shared_ptr<opset1::FakeQuantize> quantize);

    static void getInputIntervals(
            std::shared_ptr<opset1::FakeQuantize> quantize,
            std::vector<float>& inputLowValues,
-            std::vector<float>& inputHighValues,
-            size_t& inputIntervalsCount);
+            std::vector<float>& inputHighValues);

    static void getOutputIntervals(
            std::shared_ptr<opset1::FakeQuantize> quantize,
            std::vector<float>& outputLowValues,
-            std::vector<float>& outputHighValues,
-            size_t& outputIntervalsCount);
+            std::vector<float>& outputHighValues);

    static QuantizationDetails getDetails(std::shared_ptr<opset1::FakeQuantize>);
    bool hasNegativeOutput() const;
    float maxOutput(const size_t channel) const;
    float maxInput(const size_t channel) const;

-    float maxOutputHigh() const;
-    float minOutputLow() const;
-
    float getInputLowValue(const size_t channel) const;
    float getInputHighValue(const size_t channel) const;
    float getOutputLowValue(const size_t channel) const;
@ -66,19 +58,15 @@ public:
    const std::vector<float> inputHighValues;
    const std::vector<float> outputLowValues;
    const std::vector<float> outputHighValues;
-    const size_t inputIntervalsCount;
-    const size_t outputIntervalsCount;
-    const size_t outputChannelsCount;

 private:
-    static void validate(std::shared_ptr<Node> constantLayer);
    static std::vector<float> getBlobValue(std::shared_ptr<Node> constantLayer);
 };

 inline std::ostream &operator << (std::ostream &os, const QuantizationDetails& value) {
    os << "levels: " << value.levels <<
-       ", input 1/" << value.inputIntervalsCount << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " <<
-       ", output 1/" << value.outputIntervalsCount << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]";
+       ", input 1/" << value.inputLowValues.size() << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " <<
+       ", output 1/" << value.outputLowValues.size() << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]";
    return os;
 }

--- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp
@ -19,10 +19,10 @@ namespace low_precision {
 * 
 */

-class TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation {
 public:
-    ReduceBaseTransformation(const Params& params);
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
+    ReduceBaseTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;

 protected:
--- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp
@ -14,11 +14,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation {
+class LP_TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation {
 public:
-    ReduceMaxTransformation(const Params& params);
+    NGRAPH_RTTI_DECLARATION;
+    ReduceMaxTransformation(const Params& params = Params());
    bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;

 protected:
--- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp
@ -14,11 +14,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation {
+class LP_TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation {
 public:
-    ReduceMeanTransformation(const Params& params);
+    NGRAPH_RTTI_DECLARATION;
+    ReduceMeanTransformation(const Params& params = Params());
    bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;

 protected:
--- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp
@ -14,11 +14,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation {
+class LP_TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation {
 public:
-    ReduceMinTransformation(const Params& params);
+    NGRAPH_RTTI_DECLARATION;
+    ReduceMinTransformation(const Params& params = Params());
    bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;

 protected:
--- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp
@ -14,11 +14,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
+class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
 public:
+    NGRAPH_RTTI_DECLARATION;
    ReduceSumTransformation(const Params& params);
    bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;

 protected:
--- a/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ReluTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ReluTransformation : public LayerTransformation {
 public:
-    ReluTransformation(const Params& params) : LayerTransformation(params) {}
-    ~ReluTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    ReluTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp
@ -11,12 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation {
 public:
-    ReshapeTransformation(const Params& params) : LayerTransformation(params) {}
-    ~ReshapeTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    ReshapeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;

--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp
@ -0,0 +1,14 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/type/element_type.hpp>
+#include "low_precision/lpt_visibility.hpp"
+
+class LP_TRANSFORMATIONS_API AttributeParameters {
+public:
+    AttributeParameters(const ngraph::element::Type deqPrecision = ngraph::element::f32) : deqPrecision(deqPrecision) {}
+    ngraph::element::Type deqPrecision;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
@ -0,0 +1,39 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+#include "low_precision/lpt_visibility.hpp"
+#include "low_precision/rt_info/precision_preserved_attribute.hpp"
+
+namespace ngraph {
+class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute {
+};
+
+using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>;
+
+extern template class LP_TRANSFORMATIONS_API VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
+
+template<>
+class LP_TRANSFORMATIONS_API VariantWrapper<AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<AvgPoolPrecisionPreservedAttributePtr> {
+public:
+    static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 };
+
+    const VariantTypeInfo& get_type_info() const override {
+        return type_info;
+    }
+
+    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
+
+    AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
+
+    void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AvgPoolPrecisionPreservedAttribute>>>>& attributes);
+    std::string to_string() override;
+};
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
@ -0,0 +1,88 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include "low_precision/rt_info/shared_value_attribute.hpp"
+#include "low_precision/rt_info/attribute_parameters.hpp"
+#include "low_precision/lpt_visibility.hpp"
+
+namespace ngraph {
+class IntervalsAlignmentAttribute;
+
+class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue : public SharedValue<IntervalsAlignmentAttribute> {
+public:
+    class Interval {
+    public:
+        Interval() = default;
+        Interval(const float low, const float high) : low(low), high(high) {}
+        float low;
+        float high;
+    };
+
+    IntervalsAlignmentSharedValue() = default;
+    IntervalsAlignmentSharedValue(
+        const Interval& combinedInterval,
+        const Interval& minInterval,
+        const size_t minLevels) :
+        combinedInterval(combinedInterval),
+        minInterval(minInterval),
+        minLevels(minLevels) {}
+
+    Interval combinedInterval;
+    Interval minInterval;
+    size_t minLevels;
+    // preferable precisions which are preferred by affected quantization operations to avoid zero points
+    std::set<element::Type> preferablePrecisions;
+
+#ifdef LPT_DEBUG
+    std::string minLevelsOperation;
+#endif
+};
+
+class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedValueAttribute<IntervalsAlignmentSharedValue> {
+public:
+    IntervalsAlignmentAttribute() = default;
+    IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels);
+    IntervalsAlignmentAttribute(
+        const IntervalsAlignmentSharedValue::Interval combinedInterval,
+        const size_t levels,
+        const IntervalsAlignmentSharedValue::Interval minInterval,
+        const size_t minLevels);
+
+    // specify subgraph original levels
+    size_t levels;
+};
+
+using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>;
+
+extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<IntervalsAlignmentAttributePtr>;
+
+template<>
+class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>> :
+    public VariantImpl<std::shared_ptr<IntervalsAlignmentAttribute>> {
+public:
+    static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 };
+
+    const VariantTypeInfo& get_type_info() const override {
+        return type_info;
+    }
+
+    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
+
+    std::shared_ptr<IntervalsAlignmentAttribute> get() const { return this->m_value; }
+
+    static std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> create(
+        const std::shared_ptr<ngraph::Node>& node,
+        const AttributeParameters& params);
+    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>>>& attributes);
+    std::string to_string() override;
+};
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp
@ -0,0 +1,33 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "low_precision/rt_info/shared_value_attribute.hpp"
+#include "low_precision/layer_transformation.hpp"
+#include "attribute_parameters.hpp"
+
+namespace ngraph {
+class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute {
+};
+
+extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PerTensorQuantizationAttribute>;
+
+template<>
+class LP_TRANSFORMATIONS_API VariantWrapper<PerTensorQuantizationAttribute> : public VariantImpl<PerTensorQuantizationAttribute> {
+public:
+    static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 };
+
+    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
+
+    const VariantTypeInfo& get_type_info() const override {
+        return type_info;
+    }
+};
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
@ -0,0 +1,51 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+#include "low_precision/lpt_visibility.hpp"
+#include "low_precision/rt_info/shared_value_attribute.hpp"
+
+namespace ngraph {
+
+class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute;
+
+class LP_TRANSFORMATIONS_API PrecisionPreservedSharedValue : public SharedValue<PrecisionPreservedAttribute> {
+public:
+    PrecisionPreservedSharedValue() = default;
+    PrecisionPreservedSharedValue(const bool value) : value(value) {}
+    bool value;
+};
+
+class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedValueAttribute<PrecisionPreservedSharedValue> {
+public:
+    PrecisionPreservedAttribute() = default;
+    PrecisionPreservedAttribute(const bool value);
+};
+
+using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>;
+
+extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PrecisionPreservedAttributePtr>;
+
+template<>
+class LP_TRANSFORMATIONS_API VariantWrapper<PrecisionPreservedAttributePtr> : public VariantImpl<PrecisionPreservedAttributePtr> {
+public:
+    static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 };
+
+    const VariantTypeInfo& get_type_info() const override {
+        return type_info;
+    }
+
+    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
+
+    PrecisionPreservedAttributePtr get() { return this->m_value; }
+
+    std::string to_string() override;
+};
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
@ -0,0 +1,64 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <unordered_set>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/variant.hpp>
+
+#include "low_precision/layer_transformation.hpp"
+#include "low_precision/lpt_visibility.hpp"
+#include "low_precision/rt_info/attribute_parameters.hpp"
+#include "low_precision/rt_info/shared_value_attribute.hpp"
+
+namespace ngraph {
+
+class PrecisionsAttribute;
+
+class LP_TRANSFORMATIONS_API PrecisionsSharedValue : public SharedValue<PrecisionsAttribute> {
+public:
+    std::vector<ngraph::element::Type> precisions;
+};
+
+using PrecisionsAttributePtr = std::shared_ptr<PrecisionsAttribute>;
+
+class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedValueAttribute<PrecisionsSharedValue> {
+public:
+    static const std::vector<ngraph::element::Type> defaultPrecisions;
+    PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions);
+};
+
+extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<PrecisionsAttribute>>;
+
+template<>
+class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<PrecisionsAttribute>> {
+public:
+    static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 };
+
+    const VariantTypeInfo& get_type_info() const override {
+        return type_info;
+    }
+
+    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
+
+    std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
+
+    std::shared_ptr<PrecisionsAttribute> get() { return this->m_value; }
+
+    // create attribute instance for node
+    static std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>> create(
+        const std::shared_ptr<ngraph::Node>& node,
+        const AttributeParameters& params);
+    // merge attribute instances which can be got from different sources: node, input port or output port
+    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>>& attributes);
+    // vizualize shared attributes details in VizualizeTree pass
+    std::string to_string() override;
+};
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
@ -0,0 +1,60 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <set>
+#include <unordered_set>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "shared_value_attribute.hpp"
+#include "attribute_parameters.hpp"
+
+namespace ngraph {
+class QuantizationAlignmentAttribute;
+
+class LP_TRANSFORMATIONS_API QuantizationAlignmentSharedValue : public SharedValue<QuantizationAlignmentAttribute> {
+public:
+    QuantizationAlignmentSharedValue(const bool value = false) : value(value) {}
+    bool value;
+};
+
+class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedValueAttribute<QuantizationAlignmentSharedValue>{
+public:
+    QuantizationAlignmentAttribute(const bool value = false);
+};
+
+using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>;
+
+extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<QuantizationAlignmentAttributePtr>;
+
+template<>
+class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>> :
+    public VariantImpl<std::shared_ptr<QuantizationAlignmentAttribute>> {
+public:
+    static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 };
+
+    const VariantTypeInfo& get_type_info() const override {
+        return type_info;
+    }
+
+    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
+
+    std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
+
+    std::shared_ptr<QuantizationAlignmentAttribute> get() { return this->m_value; }
+
+    static std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>> create(
+        const std::shared_ptr<ngraph::Node>& node,
+        const AttributeParameters& params);
+    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>>>& attributes);
+    std::string to_string() override;
+};
+} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp
@ -0,0 +1,59 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+
+#include <low_precision/lpt_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+template <class SharedAttributeType>
+class LP_TRANSFORMATIONS_API SharedValue;
+
+template <class SharedValueType>
+class LP_TRANSFORMATIONS_API SharedValueAttribute {
+public:
+    SharedValueAttribute() : sharedValue(std::make_shared<SharedValueType>()) {}
+    virtual ~SharedValueAttribute() = default;
+    std::shared_ptr<SharedValueType> sharedValue;
+    std::string get_string() {
+        std::stringstream ss;
+
+        const size_t rawPointer = (size_t)this;
+        ss << rawPointer << ": ";
+
+        const size_t sharedValueRawPointer = (size_t)sharedValue.get();
+        ss << "sharedValue: " << sharedValueRawPointer;
+
+        bool firstAttribute = true;
+        ss << ", attributes: {";
+        for (auto& attributeWeakPtr : sharedValue->attributes) {
+            auto attribute = attributeWeakPtr.lock();
+            if (attribute == nullptr) {
+                continue;
+            }
+
+            if (!firstAttribute) {
+                ss << ", ";
+            }
+            ss << (size_t)attribute.get();
+            firstAttribute = false;
+        }
+        ss << "}, ";
+        return ss.str();
+    }
+};
+
+template <class SharedValueAttributeType>
+class LP_TRANSFORMATIONS_API SharedValue {
+public:
+    virtual ~SharedValue() = default;
+    std::vector<std::weak_ptr<SharedValueAttributeType>> attributes;
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp
@ -11,11 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation {
 public:
-    ShuffleChannelsTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    ShuffleChannelsTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp
@ -13,11 +13,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API SplitTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API SplitTransformation : public LayerTransformation {
 public:
-    SplitTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    SplitTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    void updateOutputs(
--- a/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp
@ -11,11 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation {
 public:
-    SqueezeTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    SqueezeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp
@ -12,11 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation {
 public:
-    StridedSliceTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    StridedSliceTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp
@ -11,12 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation {
 public:
-    SubtractTransformation(const Params& params) : LayerTransformation(params) {}
-    ~SubtractTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    SubtractTransformation(const Params& params);
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation {
 public:
-    SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) {}
-    ~SubtractMultiplyToMultiplyAddTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    SubtractMultiplyToMultiplyAddTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp
@ -13,8 +13,9 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API TransformationContext {
+class LP_TRANSFORMATIONS_API TransformationContext {
 public:
+    TransformationContext();
    explicit TransformationContext(std::shared_ptr<Function> function);
    std::shared_ptr<Function> function;

--- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
@ -1,316 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <algorithm>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include <ngraph/ngraph.hpp>
-#include <ngraph_ops/type_relaxed.hpp>
-
-#include "layer_transformation.hpp"
-#include "iparams_manager.hpp"
-#include "ilayer_transformations_manager.hpp"
-
-namespace ngraph {
-namespace pass {
-namespace low_precision {
-
-struct StandaloneCleanup {
-    std::string typeName;
-    std::string typeId;
-    LayerTransformationPtr transformation;
-};
-
-class TRANSFORMATIONS_API LowPrecisionTransformations {
-public:
-    LowPrecisionTransformations() {}
-    LowPrecisionTransformations(
-        const std::map<std::string, LayerTransformationPtr>& branchSpecificTransformations,
-        const std::map<std::string, LayerTransformationPtr>& decompositionTransformations,
-        const std::map<std::string, LayerTransformationPtr>& transformations,
-        const std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& cleanupTransformations,
-        const std::vector<StandaloneCleanup>& standaloneCleanupTransformations);
-
-    void setUpdatePrecisions(const bool updatePrecisions);
-    void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
-    void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
-
-    /**
-     * Remove branch specific transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& removeBranchSpecific() {
-        const std::string operationType = getType<Operation>();
-        const std::string transformationType = typeid(Transformation).name();
-
-        for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
-            const auto& tranformationPtr = *it->second;
-            if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) {
-                branchSpecificTransformations.erase(it);
-                break;
-            }
-        }
-        return *this;
-    }
-
-    /**
-     * Remove transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& remove() {
-        const std::string operationType = getType<Operation>();
-        const std::string transformationType = typeid(Transformation).name();
-
-        for (auto it = transformations.begin(); it != transformations.end(); ++it) {
-            const auto& tranformationPtr = *it->second;
-            if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) {
-                transformations.erase(it);
-                break;
-            }
-        }
-        return *this;
-    }
-
-    /**
-     * Remove cleanup transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& removeCleanup() {
-        const std::string operationType = getType<Operation>();
-        const std::string transformationType = typeid(Transformation).name();
-
-        const auto it = cleanupTransformations.find(operationType);
-        if (it != cleanupTransformations.end()) {
-            const auto it1 = std::find_if(it->second.begin(), it->second.end(),
-                [&](const std::pair<std::string, LayerTransformationPtr>& transformation) {
-                    return transformation.first == transformationType;
-                });
-            if (it1 != it->second.end()) {
-                it->second.erase(it1);
-                if (it->second.empty()) {
-                    cleanupTransformations.erase(it);
-                }
-            }
-        }
-        return *this;
-    }
-
-    /**
-     * Remove standalone cleanup transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& removeStandaloneCleanup() {
-        const std::string operationType = getType<Operation>();
-        const std::string transformationType = typeid(Transformation).name();
-
-        for (auto it = standaloneCleanupTransformations.begin(); it != standaloneCleanupTransformations.end(); ++it) {
-            const auto& standaloneCleanup = *it;
-            if ((operationType == standaloneCleanup.typeName) && (transformationType == standaloneCleanup.typeId)) {
-                standaloneCleanupTransformations.erase(it);
-                break;
-            }
-        }
-        return *this;
-    }
-
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& removeAll() {
-        removeBranchSpecific<Transformation, Operation>();
-        remove<Transformation, Operation>();
-        removeCleanup<Transformation, Operation>();
-        removeStandaloneCleanup<Transformation, Operation>();
-
-        return *this;
-    }
-
-    /**
-     * Add branch specific transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params) {
-        const std::string typeName = getType<Operation>();
-        const auto it = branchSpecificTransformations.find(typeName);
-        if (it != branchSpecificTransformations.end()) {
-            branchSpecificTransformations.erase(it);
-        }
-
-        branchSpecificTransformations.emplace(typeName, std::make_shared<Transformation>(params));
-        return *this;
-    }
-
-    /**
-    * Add decomposition transformation. Transformation type and operation type are required.
-    * Operation type is used to find transformation by operation during precision definition.
-    */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& addDecomposition(const LayerTransformation::Params& params) {
-        const std::string typeName = getType<Operation>();
-        const auto it = decompositionTransformations.find(typeName);
-        if (it != decompositionTransformations.end()) {
-            decompositionTransformations.erase(it);
-        }
-
-        decompositionTransformations.emplace(typeName, std::make_shared<Transformation>(params));
-        return *this;
-    }
-
-    /**
-     * Add transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& add(const LayerTransformation::Params& params) {
-        const std::string typeName = getType<Operation>();
-        const auto it = transformations.find(typeName);
-        if (it != transformations.end()) {
-            transformations.erase(it);
-        }
-
-        transformations.emplace(typeName, std::make_shared<Transformation>(params));
-        return *this;
-    }
-
-    /**
-     * Add cleanup transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params) {
-        const std::string typeName = getType<Operation>();
-        const std::string typeId = typeid(Transformation).name();
-        const auto it = cleanupTransformations.find(typeName);
-        if (it == cleanupTransformations.end()) {
-            cleanupTransformations.emplace(typeName,
-                std::vector<std::pair<std::string, LayerTransformationPtr>>{ std::make_pair(typeId, std::make_shared<Transformation>(params)) });
-        } else {
-            const auto it1 = std::find_if(it->second.begin(), it->second.end(),
-                [&](const std::pair<std::string, LayerTransformationPtr>& transformation) {
-                    return transformation.first == typeName;
-                });
-            if (it1 != it->second.end()) {
-                it->second.erase(it1);
-            }
-            it->second.emplace_back(std::make_pair(typeId, std::make_shared<Transformation>(params)));
-        }
-        return *this;
-    }
-
-    /**
-     * Add cleanup transformation. Transformation type and operation type are required.
-     * Operation type is used to find transformation by operation during precision definition.
-     */
-    template <class Transformation, class Operation>
-    LowPrecisionTransformations& addStandaloneCleanup(const LayerTransformation::Params& params) {
-        const std::string typeName = getType<Operation>();
-        const std::string typeId = typeid(Transformation).name();
-        const auto it = std::find_if(standaloneCleanupTransformations.begin(), standaloneCleanupTransformations.end(),
-            [&](const StandaloneCleanup& transformation) {
-                return transformation.typeName == typeName && transformation.typeId == typeId;
-            });
-        if (it == standaloneCleanupTransformations.end()) {
-            standaloneCleanupTransformations.emplace_back(StandaloneCleanup{ typeName, typeId, std::make_shared<Transformation>(params) });
-        } else {
-            *it = { typeName, typeId, std::make_shared<Transformation>(params) };
-        }
-
-        return *this;
-    }
-
-    template <class Operation>
-    static std::string getType() {
-        return Operation::get_type_info_static().name;
-    }
-
-    static std::string getType(const Node& operation) {
-        return operation.get_type_name();
-    }
-
-    std::vector<LayerTransformationPtr> find(const std::string& transformationName) const;
-
-    template <class Operation>
-    std::vector<LayerTransformationPtr> find() const {
-        const std::string transformationKey = getType<Operation>();
-        return find(transformationKey);
-    }
-
-    void setParamsManager(IParamsManager* paramsManager) noexcept;
-    void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept;
-
-    // Key is not a layer type, but just a name of transformation
-    // Layer type (or a pattern) is defined by transformation itself as an ngraph matcher
-    std::map<std::string, LayerTransformationPtr> branchSpecificTransformations;
-    std::map<std::string, LayerTransformationPtr> decompositionTransformations;
-    std::map<std::string, LayerTransformationPtr> transformations;
-    std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> cleanupTransformations;
-    std::vector<StandaloneCleanup> standaloneCleanupTransformations;
-
-private:
-    static void setParamsManager(IParamsManager* paramsManager, std::map<std::string, LayerTransformationPtr>& transformations) noexcept;
-    static void setParamsManager(
-        IParamsManager* paramsManager,
-        std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& transformations) noexcept;
-    static void setParamsManager(IParamsManager* paramsManager, std::vector<StandaloneCleanup>& transformations) noexcept;
-    static void setLayerTransformationsManager(
-        ILayerTransformationsManager* layerTransformationsManager,
-        std::map<std::string, LayerTransformationPtr>& transformations) noexcept;
-    static void setLayerTransformationsManager(
-        ILayerTransformationsManager* layerTransformationsManager,
-        std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& transformations) noexcept;
-    static void setLayerTransformationsManager(
-        ILayerTransformationsManager* layerTransformationsManager,
-        std::vector<StandaloneCleanup>& transformations) noexcept;
-};
-
-/**
- * @brief low precision transformation component.
-  */
-class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILayerTransformationsManager {
-public:
-    static LowPrecisionTransformations getAllTransformations(const LayerTransformation::Params& params = LayerTransformation::Params());
-
-    static bool isFunctionQuantized(const std::shared_ptr<const Function>& function);
-
-    LowPrecisionTransformer();
-    LowPrecisionTransformer(const LowPrecisionTransformations& transformations);
-    void transform(std::shared_ptr<Function> network);
-
-    // IParamsManager interface implementation
-    std::vector<element::Type> getPrecisionsOnActivations(const Node& op) const noexcept override;
-
-    // ILayerTransformationsManager interface implementation
-    bool isQuantized(const std::shared_ptr<Node>& layer) const noexcept override;
-    bool isPrecisionPreserved(const std::shared_ptr<Node>& layer) const noexcept override;
-
-private:
-    LowPrecisionTransformations transformations;
-
-    void registerAllMatchers(
-        std::map<std::string, LayerTransformationPtr> transformations,
-        GraphRewrite& pass,
-        TransformationContext& context);
-
-    void registerAllMatchers(
-        std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
-        GraphRewrite& pass,
-        TransformationContext& context);
-};
-
-class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
-public:
-    TypeRelaxedReplacer();
-};
-
-} // namespace low_precision
-} // namespace pass
-} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp
@ -12,11 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation {
 public:
    TransparentBaseTransformation(const Params& params) : LayerTransformation(params) {}
    ~TransparentBaseTransformation() override {};
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };

--- a/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp
@ -12,12 +12,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation {
 public:
-    TransposeTransformation(const Params& params) : LayerTransformation(params) {}
-    ~TransposeTransformation() override {}
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    TransposeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp
@ -11,11 +11,11 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation {
+class LP_TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation {
 public:
-    UnsqueezeTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
-    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
+    NGRAPH_RTTI_DECLARATION;
+    UnsqueezeTransformation(const Params& params = Params());
+    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp
@ -0,0 +1,107 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/pass/pass.hpp>
+#include <ngraph/variant.hpp>
+
+#include "low_precision/network_helper.hpp"
+#include "low_precision/lpt_itt.hpp"
+#include "low_precision/lpt_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+
+template <typename AttributeType, typename OperationType>
+class UpdateSharedPrecisionPreserved;
+
+}  // namespace low_precision
+}  // namespace pass
+}  // namespace ngraph
+
+template <typename AttributeType, typename ExpectedAttributeType = AttributeType>
+class ngraph::pass::low_precision::UpdateSharedPrecisionPreserved : public ngraph::pass::MatcherPass {
+public:
+    UpdateSharedPrecisionPreserved() {
+        ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
+            auto node = m.get_match_root();
+
+            const bool needToCheckExpectedAttributeType = !std::is_same<ExpectedAttributeType, AttributeType>::value;
+            if (!needToCheckExpectedAttributeType) {
+                // expected attribute is ignored, set attributes for node inputs except Result & FakeQuantize operations
+                if (is_type<ngraph::opset1::Result>(node) ||
+                    is_type<ngraph::opset1::FakeQuantize>(node) ||
+                    transformation_callback(node)) {
+                    return false;
+                }
+            }
+
+            if (ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node) || is_type<opset1::FakeQuantize>(node)) {
+                return false;
+            }
+
+            {
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "UpdateSharedPrecisionPreserved");
+
+                // TODO: check if node can be quantized, if not, then doesn't update
+                for (auto input : node->inputs()) {
+                    auto precisionsAttributeWrapper = getAttribute<PrecisionsAttributePtr>(input);
+                    if (precisionsAttributeWrapper != nullptr) {
+                        const auto precisionsAttribute = precisionsAttributeWrapper->get();
+                        assert(precisionsAttribute != nullptr);
+                        if (precisionsAttribute->sharedValue->precisions.empty()) {
+                            return false;
+                        }
+                    }
+                }
+
+                for (auto input : node->inputs()) {
+                    if (needToCheckExpectedAttributeType) {
+                        if (getAttribute<ExpectedAttributeType>(input) == nullptr) {
+                            return false;
+                        }
+                    }
+                    auto parentAttribute = getSourceAttribute(input);
+                    if (parentAttribute == nullptr) {
+                        continue;
+                    }
+
+                    parentAttribute->get()->sharedValue->value = true;
+                }
+            }
+
+            return true;
+        };
+
+        auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "PropagateThroughPrecisionPreserved");
+        this->register_matcher(matcher, callback);
+    }
+
+private:
+    Input<Node> getDequantizationInput(const Input<Node>& input) {
+        const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index());
+        if (!dequantization.empty() &&
+            (is_type<opset1::Convert>(dequantization.data.get_node())) &&
+            is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
+            assert(dequantization.data.get_target_inputs().size() == 1ul);
+            return *dequantization.data.get_target_inputs().begin();
+        }
+        return input;
+    }
+
+    std::shared_ptr<ngraph::VariantWrapper<AttributeType>> getSourceAttribute(const Input<Node>& input) {
+        const auto dequantizationInput = getDequantizationInput(input);
+        const auto output = dequantizationInput.get_source_output();
+        auto attribute = ngraph::pass::low_precision::getAttribute<AttributeType>(output.get_node()->shared_from_this());
+        if (attribute == nullptr) {
+            attribute = ngraph::pass::low_precision::getAttribute<AttributeType>(output.get_node_shared_ptr());
+        }
+        return attribute;
+    }
+};
--- a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp
@ -13,10 +13,10 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation {
+class LP_TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation {
 public:
-    VariadicSplitTransformation(const Params& params);
-    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
+    NGRAPH_RTTI_DECLARATION;
+    VariadicSplitTransformation(const Params& params = Params());
 };
 } // namespace low_precision
 } // namespace pass
--- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
@ -13,21 +13,30 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{
+class LP_TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{
 public:
    WeightableLayerTransformation(const Params& params);
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
    bool canConvolutionBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const;
-    bool isQuantized(std::shared_ptr<Node> layer, bool reshapeIsRequired) const noexcept;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;

+    static bool checkPrecisionOnActivation(
+        const std::shared_ptr<const ngraph::Node>& node,
+        const std::vector<ngraph::element::Type>& supportedPrecisionsOnActivations) {
+        return true;
+    }
+
+    static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer, const bool reshapeIsRequired) noexcept;
+
 protected:
-    void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
+    bool decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
    static bool isGroup(const std::shared_ptr<Node>& node);
    static bool isDepthwise(const std::shared_ptr<Node>& node);

-    std::shared_ptr<opset1::FakeQuantize> getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) const;
-    DataPrecision getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) const;
+public:
+    static std::shared_ptr<opset1::FakeQuantize> getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node);
+    static DataPrecision getDataPrecisionOnWeights(const std::shared_ptr<Node>& node);
+    static bool isAsymmetricOnWeights(const std::shared_ptr<const Node>& node);
 };

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/src/add.cpp
+++ b/inference-engine/src/low_precision_transformations/src/add.cpp
@ -10,6 +10,7 @@
 #include <utility>
 #include <vector>

+#include <ngraph/pattern/op/wrap_type.hpp>
 #include "ngraph_ops/type_relaxed.hpp"

 #include "low_precision/common/ie_lpt_exception.hpp"
@ -20,6 +21,8 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

+NGRAPH_RTTI_DEFINITION(AddTransformation, "AddTransformation", 0);
+
 std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>& op) {
    // TODO: separate this part to standalone transformation: AddToSubtractTransformation
    // motivation:
@ -88,11 +91,22 @@ std::shared_ptr<opset1::Subtract> fuseWithSubtract(const std::shared_ptr<Node>&
    return newSubtract;
 }

-void AddTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
-    addSingleNodePattern<opset1::Add>(pass, context);
+AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {
+    auto matcher = ngraph::pattern::wrap_type<opset1::Add>();
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "AddTransformation");
+    this->register_matcher(m, callback);
 }

-bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
+bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
    std::shared_ptr<opset1::Add> op = as_type_ptr<opset1::Add>(m.get_match_root());
    if ((op == nullptr) || (!canBeTransformed(context, op))) {
        return false;
--- a/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp
+++ b/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp
@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/align_quantization_intervals.hpp"
+#include <memory>
+#include <ngraph/opsets/opset1.hpp>
+#include "low_precision/create_attribute.hpp"
+#include "low_precision/propagate_through_precision_preserved.hpp"
+#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
+
+using namespace ngraph;
+using namespace ngraph::pass::low_precision;
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationIntervals, "AlignQuantizationIntervals", 0);
+
+bool ngraph::pass::low_precision::AlignQuantizationIntervals::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    ngraph::pass::Manager manager;
+    manager.set_per_pass_validation(false);
+    std::shared_ptr<ngraph::pass::GraphRewrite> intervalsAlignment = manager.register_pass<ngraph::pass::GraphRewrite>();
+    intervalsAlignment->add_matcher<low_precision::CreateAttribute<IntervalsAlignmentAttributePtr, opset1::FakeQuantize>>();
+    intervalsAlignment->add_matcher<low_precision::PropagateThroughPrecisionPreserved<IntervalsAlignmentAttribute>>();
+    manager.run_passes(f);
+    return false;
+}
--- a/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp
+++ b/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp
@ -0,0 +1,27 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/align_quantization_parameters.hpp"
+#include <memory>
+#include "low_precision/create_attribute.hpp"
+#include "low_precision/propagate_through_precision_preserved.hpp"
+#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
+#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
+#include "low_precision/update_shared_precision_preserved.hpp"
+
+using namespace ngraph;
+using namespace ngraph::pass::low_precision;
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationParameters, "AlignQuantizationParameters", 0);
+
+bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    ngraph::pass::Manager manager;
+    manager.set_per_pass_validation(false);
+    std::shared_ptr<ngraph::pass::GraphRewrite> propagation = manager.register_pass<ngraph::pass::GraphRewrite>();
+    propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttributePtr>>();
+    propagation->add_matcher<low_precision::PropagateThroughPrecisionPreserved<QuantizationAlignmentAttribute>>();
+    propagation->add_matcher<low_precision::UpdateSharedPrecisionPreserved<QuantizationAlignmentAttributePtr, PerTensorQuantizationAttribute>>();
+    manager.run_passes(f);
+    return false;
+}
--- a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp
+++ b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp
@ -7,39 +7,39 @@
 #include <memory>
 #include <ngraph/ngraph.hpp>
 #include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>

 #include "low_precision/network_helper.hpp"
+#include "low_precision/rt_info/precision_preserved_attribute.hpp"

 namespace ngraph {
 namespace pass {
 namespace low_precision {

+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AvgPoolTransformation, "AvgPoolTransformation", 0);
+
 AvgPoolTransformation::AvgPoolTransformation(const Params& params) : LayerTransformation(params) {
+    auto matcher = pattern::wrap_type<opset1::AvgPool>({ pattern::wrap_type<opset1::Multiply>() });
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "AvgPoolTransformation");
+    this->register_matcher(m, callback);
 }

-void AvgPoolTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
-    addPattern(
-        pass,
-        context,
-        make_op_pattern<opset1::AvgPool>({ make_op_label<opset1::Multiply>() }));
-}
-
-bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
+bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
    if (!canBeTransformed(context, m.get_match_root())) {
        return false;
    }

    const std::shared_ptr<Node> pooling = NetworkHelper::separateInStandaloneBranch(m.get_match_root());
-
-    const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(pooling);
-
-    bool updatePrecision;
-    if ((children.size() == 1ul) && (!this->layerTransformationsManager->isQuantized(children[0]))) {
-        updatePrecision = false;
-    } else {
-        updatePrecision = NetworkHelper::notAllChildrensAreFQ(children);
-    }
-
+    const bool updatePrecision = isPrecisionPreserved(pooling);
    moveDequantizationAfter(context, pooling, NetworkHelper::getDequantization(pooling), updatePrecision);
    return true;
 }
@ -55,8 +55,7 @@ bool AvgPoolTransformation::canBeTransformed(const TransformationContext& contex
 }

 bool AvgPoolTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
-    const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(layer);
-    return NetworkHelper::notAllChildrensAreFQ(children);
+    return NetworkHelper::isPrecisionPreserved(layer);
 }

 } // namespace low_precision
--- a/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp
+++ b/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp
@ -0,0 +1,13 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/base_matcher_pass.hpp"
+#include <ngraph/node.hpp>
+#include "low_precision/rt_info/attribute_parameters.hpp"
+
+using namespace ngraph;
+using namespace ngraph::pass::low_precision;
+
+ngraph::pass::low_precision::BaseMatcherPass::BaseMatcherPass(const AttributeParameters& params) : params(params) {
+}
--- a/inference-engine/src/low_precision_transformations/src/clamp.cpp
+++ b/inference-engine/src/low_precision_transformations/src/clamp.cpp
@ -6,21 +6,32 @@
 #include <algorithm>
 #include <memory>
 #include <ngraph/ngraph.hpp>
+
+#include <ngraph/pattern/op/wrap_type.hpp>
 #include "low_precision/network_helper.hpp"

 namespace ngraph {
 namespace pass {
 namespace low_precision {

-ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {}
+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ClampTransformation, "ClampTransformation", 0);

-void ClampTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const {
-    addPattern(pass,
-               context,
-               make_op_pattern<opset1::Clamp>({ make_op_label<opset1::Multiply>() }));
+ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {
+    auto matcher = pattern::wrap_type<opset1::Clamp>({ pattern::wrap_type<opset1::Multiply>() });
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ClampTransformation");
+    this->register_matcher(m, callback);
 }

-bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const {
+bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
    auto subWithTheSameValues = [](std::shared_ptr<ngraph::opset1::Subtract> sub) {
        if (sub == nullptr) {
            return false;
--- a/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp
+++ b/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp
@ -0,0 +1,19 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision/common/operation_precision_restriction.hpp"
+
+#include <memory>
+#include <unordered_set>
+#include <set>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include "low_precision/network_helper.hpp"
+#include "low_precision/rt_info/precisions_attribute.hpp"
+
+using namespace ngraph;
+
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@ -11,11 +11,11 @@
 #include <utility>
 #include <vector>

+#include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/opsets/opset1.hpp>

 #include "low_precision/common/fake_quantize_dequantization.hpp"
 #include "low_precision/common/ie_lpt_exception.hpp"
-#include "low_precision/common/subgraph.hpp"
 #include "low_precision/common/dequantization_op.hpp"
 #include "low_precision/network_helper.hpp"

@ -23,218 +23,155 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-void ConcatTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const {
-    addSingleNodePattern<opset1::Concat>(pass, context);
+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConcatTransformation, "ConcatTransformation", 0);
+
+ConcatTransformation::ConcatTransformation(const Params& params) : LayerTransformation(params) {
+    auto matcher = ngraph::pattern::wrap_type<opset1::Concat>();
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ConcatTransformation");
+    this->register_matcher(m, callback);
 }

-bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
+bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
    std::shared_ptr<ngraph::opset1::Concat> concat = ngraph::as_type_ptr<ngraph::opset1::Concat>(m.get_match_root());
    if (!canBeTransformed(context, concat)) {
        return false;
    }

-    ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager);
-    std::unordered_set<std::string> handledLayers;
-    if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) {
-        return false;
-    }
-
-    if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) {
-        return false;
-    }
-
-    // Concat operations precision is defined:
-    // 1. consumers after Concat
-    // 2. FakeQuantize precisions without zero point
-    ngraph::Node& quantizationLayer = *subgraph.quantizationLayers[0];
-    std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer.shared_from_this());
-    if (!NetworkHelper::isQuantizeSupported(fq)) {
-        return false;
-    }
-    DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
-    if (dataPrecision.precision == ngraph::element::undefined) {
-        return false;
-    }
-
-    std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
-
-    for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
-        fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
-        if (fq == nullptr) {
+    std::vector<FakeQuantizeDequantization> layerDequantizations;
+    layerDequantizations.reserve(concat->get_input_size());
+    for (size_t parentIndex = 0ul; parentIndex < concat->get_input_size(); parentIndex++) {
+        FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, parentIndex);
+        if (dequantization.empty()) {
            return false;
        }
+        layerDequantizations.push_back(dequantization);
+    }

-        if (!NetworkHelper::isQuantizeSupported(fq)) {
-            return false;
+    bool allDequantizationShiftAreZero = true;
+    bool allDequantizationMultiplyAreZero = true;
+    for (const auto& dequantization : layerDequantizations) {
+        if (dequantization.subtract != nullptr) {
+            allDequantizationShiftAreZero = false;
        }

-        const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(fq);
-
-        // per tensor scale is supported only
-        if (quantizationDetails.inputHighValues.size() != 1ul) {
-            return false;
+        if (dequantization.multiply != nullptr) {
+            allDequantizationMultiplyAreZero = false;
        }

-        // define concatenation operation consumers precisions
-        std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
-        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrenPrecisions);
-        concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
-        if (concatChildrenPrecisions.empty()) {
-            return false;
-        }
-
-        // define FakeQuantize precisions without zero point
-        const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
-        if (dataPrecision2.precision == ngraph::element::undefined) {
-            return false;
-        }
-
-        if (dataPrecision.precision != dataPrecision2.precision) {
-            dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
+        if (!allDequantizationShiftAreZero && !allDequantizationMultiplyAreZero) {
+            break;
        }
    }

-    if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
-        dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
-    }
+    auto broadcastElementWiseConst = [](
+        // FakeQuantize constant shape must be broadcastable to the shape on data.
+        std::shared_ptr<ngraph::opset1::Constant> operation,
+        const ngraph::Shape targetShape) -> std::shared_ptr<Node> {
+            auto targetShapeConst = std::make_shared<ngraph::opset1::Constant>(
+                element::i64, ngraph::Shape{ targetShape.size() },
+                targetShape);

-    std::vector<QuantizationDetails> quantizationLayersDetails;
-    for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
-        std::shared_ptr<opset1::FakeQuantize> fakeQuantize = as_type_ptr<opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
-        auto newFakeQuantize = NetworkHelper::fuseConvert(fakeQuantize);
-        if (newFakeQuantize == nullptr) {
-            subgraph.quantizationLayers[i] = fakeQuantize;
-            quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize));
-            continue;
-        }
+            auto broadcast = ngraph::pass::low_precision::fold<ngraph::opset1::Broadcast>(
+                operation,
+                targetShapeConst,
+                ngraph::op::AutoBroadcastType::NUMPY);

-        fakeQuantize = newFakeQuantize;
-        newFakeQuantize = NetworkHelper::composeFakeQuantize(fakeQuantize);
-        if (newFakeQuantize == nullptr) {
-            subgraph.quantizationLayers[i] = fakeQuantize;
-            quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize));
-            continue;
-        }
-
-        fakeQuantize = newFakeQuantize;
-        subgraph.quantizationLayers[i] = fakeQuantize;
-        quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize));
-    }
-
-    FakeQuantizeDequantization dequantization;
-
-    if ((quantizationLayersDetails[0].inputHighValues.size() == 1)) {
-        float outputLowValue = quantizationLayersDetails[0].outputLowValues[0];
-        float outputHighValue = quantizationLayersDetails[0].outputHighValues[0];
-
-        for (size_t index = 0lu; index < subgraph.quantizationLayers.size(); index++) {
-            const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index];
-            if (outputLowValue > quantizationDetails.outputLowValues[0]) {
-                outputLowValue = quantizationDetails.outputLowValues[0];
-            }
-            if (outputHighValue < quantizationDetails.outputHighValues[0]) {
-                outputHighValue = quantizationDetails.outputHighValues[0];
-            }
-        }
-
-        if ((outputLowValue == 0.f) && (outputHighValue == 0.f)) {
-            return false;
-        }
-
-        const float maxOutputInterval = outputHighValue - outputLowValue;
-        if (quantizedTensorAlignmentOnActivations == QuantizedTensorAlignment::UpdateLevel) {
-            const size_t minLevels = getMinQuantizationLevels(
-                dataPrecision,
-                maxOutputInterval,
-                quantizationLayersDetails,
-                outputLowValue,
-                outputHighValue);
-            if (minLevels < this->minQuantizationLevels) {
-                return false;
-            }
-        }
-
-        // FQ -> SUB_quantization -> MUL_quantization -[INT8]-> SUB_dequantization -> MUL_dequantization ->
-        const float quantizationMul = (dataPrecision.max - dataPrecision.min) / maxOutputInterval;
-        const float dequantizationMul = maxOutputInterval / (dataPrecision.max - dataPrecision.min);
-
-        // FQ outputLowValue = dataPrecision.min * dequantizationMul - quantizationSub
-        const float quantizationSub = outputLowValue - dataPrecision.min * dequantizationMul;
-        const float dequantizationSub = std::round(-quantizationSub * quantizationMul);
-
-        // 1. get data for dequantization. Dequantization data will be used several times later.
-        dequantization = ngraph::pass::low_precision::NetworkHelper::makeDequantization(
-            dequantizationMul,
-            dequantizationSub,
-            subgraph.quantizationLayers[0]->get_output_element_type(0),
-            subgraph.quantizationLayers[0]->get_output_partial_shape(0),
-            updatePrecisions ? dataPrecision.precision : subgraph.quantizationLayers[0]->get_output_element_type(0),
-            deqPrecision);
-
-        for (size_t index = 0; index < subgraph.quantizationLayers.size(); index++) {
-            std::shared_ptr<ngraph::opset1::FakeQuantize> fakeQuantizeLayer = as_type_ptr<ngraph::opset1::FakeQuantize>(
-                subgraph.quantizationLayers[index]->shared_from_this());
-
-            const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index];
-
-            switch (quantizedTensorAlignmentOnActivations) {
-                case QuantizedTensorAlignment::None: {
-                    THROW_TRANSFORMATION_EXCEPTION << "not implemented: " << quantizedTensorAlignmentOnActivations;
-                }
-                case QuantizedTensorAlignment::UpdateLevel: {
-                    const float updatedOutputLowValue = (quantizationDetails.outputLowValues[0] - quantizationSub) * quantizationMul;
-                    const float updatedOutputHighValue = (quantizationDetails.outputHighValues[0] - quantizationSub) * quantizationMul;
-
-                    // 2. update FakeQuantize - one time action
-                    std::shared_ptr<opset1::FakeQuantize> newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize(
-                        fakeQuantizeLayer,
-                        updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0),
-                        roundf(updatedOutputLowValue),
-                        roundf(updatedOutputHighValue));
-
-                    const size_t levels = static_cast<size_t>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
-                    newFakeQuantizeLayer->set_levels(levels);
-
-                    subgraph.quantizationLayers[index] = newFakeQuantizeLayer;
-                    subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer;
-                    break;
-                }
-                default: {
-                    THROW_TRANSFORMATION_EXCEPTION << "unexpected value " << quantizedTensorAlignmentOnActivations;
-                }
-            }
-        }
-    } else {
-        return false;
-    }
-
-    auto dequantizationValuesCallback = [&](
-        std::shared_ptr<ngraph::Node> layer,
-        std::shared_ptr<ngraph::Node> child,
-        const std::string originalLayerName,
-        std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate) {
-        dequantizationsToConcatenate.push_back(dequantization);
+            return broadcast;
    };

-    addDequantizationLayers(context, subgraph, dequantizationValuesCallback);
+    bool someDqInLowPrecision = std::any_of(
+        layerDequantizations.begin(),
+        layerDequantizations.end(),
+        [](const FakeQuantizeDequantization& value) { return value.isLowPrecision(); });

-    if (updatePrecisions) {
-        for (const auto it : subgraph.layers) {
-            const std::shared_ptr<ngraph::Node>& node = it.second;
-            if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node) != nullptr) {
-                ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision);
-            } else {
-                // set precision to explicitly to have updated precision during transformation
-                for (size_t i = 0; i < node->get_output_size(); ++i) {
-                    node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i));
-                }
-            }
+    bool someDqInFpPrecision = std::any_of(
+        layerDequantizations.begin(),
+        layerDequantizations.end(),
+        [](const FakeQuantizeDequantization& value) { return !value.isLowPrecision(); });
+
+    bool DqWithDifferentPrecision = someDqInLowPrecision && someDqInFpPrecision;
+
+    OutputVector dataNodes;
+    NodeVector convertNodes;
+    NodeVector subtractNodes;
+    NodeVector multiplyNodes;
+    for (size_t i = 0; i < layerDequantizations.size(); ++i) {
+        const auto& dequantization = layerDequantizations[i];
+
+        if (DqWithDifferentPrecision && dequantization.isLowPrecision()) {
+            dataNodes.push_back(dequantization.convert);
+        } else {
+            dataNodes.push_back(dequantization.data);
+        }
+
+        if (dequantization.convert != nullptr) {
+            convertNodes.push_back(dequantization.convert);
+        }
+
+        Shape targetShape(concat->get_input_partial_shape(i).rank().get_length(), 1ul);
+        targetShape[1] = concat->get_input_partial_shape(i)[1].get_length();
+
+        if (!allDequantizationShiftAreZero) {
+            subtractNodes.push_back(dequantization.subtract == nullptr ?
+                std::make_shared<ngraph::opset1::Constant>(deqPrecision, targetShape, std::vector<float>({ 0.f })) :
+                broadcastElementWiseConst(dequantization.subtractConstant, targetShape));
+        }
+
+        if (!allDequantizationMultiplyAreZero) {
+            multiplyNodes.push_back(dequantization.multiply == nullptr ?
+                std::make_shared<ngraph::opset1::Constant>(deqPrecision, targetShape, std::vector<float>({ 1.0f })) :
+                broadcastElementWiseConst(dequantization.multiplyConstant, targetShape));
        }
    }

-    for (const std::shared_ptr<ngraph::Node>& quantizationLayer : subgraph.quantizationLayers) {
-        context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name());
+    const auto newConcat = concat->clone_with_new_inputs(dataNodes);
+
+    std::shared_ptr<ngraph::Node> lastDequantization = newConcat;
+    if (!convertNodes.empty()) {
+        const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat });
+
+        NetworkHelper::copyInfo({ concat, convert }, convert);
+        lastDequantization = convert;
    }
+
+    // concatenation axis is 1
+    if (!subtractNodes.empty()) {
+        const auto subtract = std::make_shared<DequantizationSubtract>(
+            lastDequantization,
+            NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ?
+                subtractNodes[0] :
+                ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(subtractNodes, 1)));
+
+        NetworkHelper::copyInfo({ concat, subtract }, subtract);
+        lastDequantization = subtract;
+    }
+
+    if (!multiplyNodes.empty()) {
+        const auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
+            DequantizationMultiply(
+                lastDequantization,
+                NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ?
+                    multiplyNodes[0] :
+                    ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(multiplyNodes, 1))),
+            layerDequantizations[0].multiply->get_output_element_type(0));
+
+        NetworkHelper::copyInfo({ concat, multiply }, multiply);
+        lastDequantization = multiply;
+    }
+
+    replace_node(concat, lastDequantization);
+    NetworkHelper::copyInfo(concat, newConcat);
+    updateOutput(context, lastDequantization, newConcat);
    return true;
 }

@ -251,6 +188,8 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
    const auto axis = concat->get_axis();
    const auto outPShape = concat->get_output_partial_shape(0);
    const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outPShape.rank());
+
+    // TODO: LPT: to support current flow: #58269
    if (normalizedAxis != 1ul) {
        return false;
    }
@ -259,6 +198,27 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
        return false;
    }

+    const bool perTensorQuantizationIsRequired = normalizedAxis != 1ul;
+
+    element::Type precision;
+    for (size_t i = 0ul; i < concat->get_input_size(); i++) {
+        const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, i);
+        if (dequantization.empty() || (updatePrecisions && !dequantization.isLowPrecision())) {
+            return false;
+        }
+
+        if (precision == element::undefined) {
+            precision = dequantization.data.get_element_type();
+        } else if (precision != dequantization.data.get_element_type()) {
+            return false;
+        }
+
+        if (perTensorQuantizationIsRequired &&
+            (((dequantization.subtractConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.subtractConstant)) ||
+            ((dequantization.multiplyConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) {
+            return false;
+        }
+    }
    return true;
 }

@ -338,115 +298,6 @@ std::shared_ptr<Node> ConcatTransformation::concatenateDeqNodes(NodeVector& node
    return nodes.size() == 1ul ? nodes[0] : fold<ngraph::opset1::Concat>(nodes, 1);
 }

-void ConcatTransformation::addDequantizationLayers(
-    TransformationContext& context,
-    ngraph::pass::low_precision::Subgraph& subgraph,
-    std::function<void(
-        std::shared_ptr<ngraph::Node> layer,
-        std::shared_ptr<ngraph::Node> child,
-        const std::string originalLayerName,
-        std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate)> getLayerDequantizationCallback) const {
-    std::unordered_map<std::string, ngraph::Node*> outputs;
-    for (size_t i = 0; i < context.function->get_output_size(); ++i) {
-        ngraph::Node* node = context.function->get_output_op(i).get();
-        if (node->get_input_size() != 1ul) {
-            THROW_IE_LPT_EXCEPTION(*node) << "unexpected inputs count for result node";
-        }
-
-        outputs.emplace(node->get_input_node_shared_ptr(0)->get_friendly_name(), node);
-    }
-
-    std::unordered_map<std::string, std::shared_ptr<ngraph::Node>> notHandledSubgraphLayers = subgraph.layers;
-    while (notHandledSubgraphLayers.size() != 0ul) {
-        const auto layerIt = notHandledSubgraphLayers.begin();
-        std::shared_ptr<ngraph::Node> layer = layerIt->second;
-        notHandledSubgraphLayers.erase(layerIt);
-
-        std::vector<FakeQuantizeDequantization> layerDequantizations;
-
-        for (size_t i = 0; i < layer->get_output_size(); ++i) {
-            const auto childInputs = layer->get_output_target_inputs(i);
-            for (const auto childInput : childInputs) {
-                ngraph::Node& child = *childInput.get_node();
-
-                if (subgraph.layers.find(child.get_friendly_name()) == subgraph.layers.end()) {
-                    std::shared_ptr<ngraph::Node> source = layer;
-                    const std::shared_ptr<ngraph::Node> destination = child.shared_from_this();
-
-                    if (layerDequantizations.size() == 0ul) {
-                        // fill layerDequantizations collection
-                        getLayerDequantizationCallback(source, destination, source->get_friendly_name(), layerDequantizations);
-                    }
-
-                    {
-                        NodeVector convertNodes;
-                        NodeVector subtractNodes;
-                        NodeVector multiplyNodes;
-
-                        // forming nodes for concatenation
-                        fillDequantizationNodes(layerDequantizations, layer, convertNodes, subtractNodes, multiplyNodes);
-
-                        // TODO: the second place (first is FQ decomposition) where dequantization operations are inserted
-                        if (!convertNodes.empty()) {
-                            const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination);
-                            std::shared_ptr<ngraph::Node> convert =
-                                convertNodes[0]->clone_with_new_inputs({ destination->get_input_source_output(sourceOutputIdx) });
-
-                            insert_new_node_between(source, destination, convert);
-                            ngraph::copy_runtime_info({ layer, convert }, convert);
-                            source = convert;
-                        }
-
-                        // concatenation axis is 1
-                        if (!subtractNodes.empty()) {
-                            const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination);
-                            std::shared_ptr<ngraph::opset1::Subtract> subtract = std::make_shared<DequantizationSubtract>(
-                                destination->get_input_source_output(sourceOutputIdx),
-                                NetworkHelper::toScalarIfPossible(concatenateDeqNodes(subtractNodes)));
-
-                            insert_new_node_between(source, destination, subtract);
-                            ngraph::copy_runtime_info({ layer, subtract }, subtract);
-                            source = subtract;
-                        }
-
-                        if (!multiplyNodes.empty()) {
-                            const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination);
-                            std::shared_ptr<ngraph::opset1::Multiply> multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
-                                DequantizationMultiply(
-                                    destination->get_input_source_output(sourceOutputIdx),
-                                    NetworkHelper::toScalarIfPossible(concatenateDeqNodes(multiplyNodes))),
-                                    layerDequantizations[0].multiply->get_output_element_type(0));
-
-                            insert_new_node_between(source, destination, multiply);
-                            ngraph::copy_runtime_info({ layer, multiply }, multiply);
-                            source = multiply;
-                        }
-                    }
-
-                    // first input is used
-                    const ngraph::element::Type precision = layerDequantizations[0].data.get_element_type();
-                    layer->set_output_type(0, precision, layer->get_output_partial_shape(0));
-
-                    const auto it = outputs.find(layer->get_friendly_name());
-                    if (it != outputs.end() && is_type<ngraph::opset1::Result>(child.shared_from_this())) {
-                        const std::string originalName = layer->get_friendly_name();
-                        const std::string newName = layer->get_friendly_name() + LayerTransformation::originalLayerPostfix;
-                        layer->set_friendly_name(newName);
-
-                        // Split & VariadicSplit have other naming rules
-                        if (is_type<opset1::Split>(layer) || is_type<opset1::VariadicSplit>(layer)) {
-                            source->set_friendly_name(originalName + "." + std::to_string(i));
-                        } else {
-                            source->set_friendly_name(originalName);
-                        }
-                        subgraph.layers[layer->get_friendly_name()] = layer;
-                    }
-                }
-            }
-        }
-    }
-}
-
 bool ConcatTransformation::isHandled(const TransformationContext& context, const std::vector<std::shared_ptr<ngraph::Node>>& quantizationOperations) {
    for (const std::shared_ptr<ngraph::Node>& quantizationLayer : quantizationOperations) {
        if (context.quantizedFakeQuantizeNames.find(quantizationLayer->get_friendly_name()) != context.quantizedFakeQuantizeNames.end()) {
@ -457,32 +308,6 @@ bool ConcatTransformation::isHandled(const TransformationContext& context, const
    return false;
 }

-size_t ConcatTransformation::getMinQuantizationLevels(
-    const DataPrecision& dataPrecision,
-    const float maxOutputInterval,
-    const std::vector<QuantizationDetails>& quantizationLayersDetails,
-    const float outputLowValue,
-    const float outputHighValue) const {
-    size_t minLevels = std::numeric_limits<std::size_t>::max();
-    for (const QuantizationDetails quantizationDetails : quantizationLayersDetails) {
-        // if there is negative part then calculation is based on `outputLowValue` if not then on `outputHighValue` only
-        const float updatedOutputLowValue = outputLowValue != 0.f ?
-            (quantizationDetails.outputLowValues[0] / outputLowValue) * dataPrecision.min :
-            (quantizationDetails.outputLowValues[0] / outputHighValue) * dataPrecision.max;
-
-        // if there is positive part then calculation is based on `outputHighValue` if not then on `outputLowValue` only
-        const float updatedOutputHighValue = outputHighValue != 0.f ?
-            (quantizationDetails.outputHighValues[0] / outputHighValue) * dataPrecision.max :
-            (quantizationDetails.outputHighValues[0] / outputLowValue) * dataPrecision.min;
-
-        const size_t levels = static_cast<size_t>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
-        if (minLevels > levels) {
-            minLevels = levels;
-        }
-    }
-    return minLevels;
-}
-
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
@ -1,334 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision/concat_multi_channels.hpp"
-
-#include <queue>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include <ngraph/ngraph.hpp>
-#include <ngraph/opsets/opset1.hpp>
-
-#include "low_precision/common/fake_quantize_dequantization.hpp"
-#include "low_precision/common/dequantization_op.hpp"
-#include "low_precision/common/ie_lpt_exception.hpp"
-#include "low_precision/common/subgraph.hpp"
-#include "low_precision/network_helper.hpp"
-
-namespace ngraph {
-namespace pass {
-namespace low_precision {
-
-bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::shared_ptr<ngraph::opset1::Concat>>& concatLayers) const noexcept {
-    for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
-        const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
-        for (const std::shared_ptr<ngraph::Node>& child : children) {
-            if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
-                is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
-                this->layerTransformationsManager->isQuantized(child)) {
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-void ConcatMultiChannelsTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const {
-    addSingleNodePattern<opset1::Concat>(pass, context);
-}
-
-bool ConcatMultiChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
-    std::shared_ptr<ngraph::opset1::Concat> concat = ngraph::as_type_ptr<ngraph::opset1::Concat>(m.get_match_root());
-    if (!canBeTransformed(context, concat)) {
-        return false;
-    }
-
-    ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager);
-    std::unordered_set<std::string> handledLayers;
-    if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) {
-        return false;
-    }
-
-    if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) {
-        return false;
-    }
-
-    if (!isMultiChannel(subgraph.concatLayers)) {
-        ConcatTransformation::transform(context, m);
-        return false;
-    }
-
-    DataPrecision dataPrecision;
-    {
-        std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
-        for (auto quantizationLayer : subgraph.quantizationLayers) {
-            std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer->shared_from_this());
-            if (!NetworkHelper::isQuantizeSupported(fq)) {
-                return false;
-            }
-
-            // define concatenation operation consumers precisions
-            std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
-            fillAvailablePrecisions(quantizationLayer, fqChildrenPrecisions);
-            concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
-            if (concatChildrenPrecisions.empty()) {
-                return false;
-            }
-
-            // define FakeQuantize precisions without zero point
-            const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
-            if (dataPrecision.precision == ngraph::element::undefined) {
-                dataPrecision = tmp;
-                continue;
-            }
-
-            if ((tmp.precision != dataPrecision.precision) && (tmp.precision == ngraph::element::u8)) {
-                dataPrecision = tmp;
-            }
-        }
-
-        if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
-            dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
-        }
-    }
-
-    for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
-        const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
-        if (fq == nullptr) {
-            return false;
-        }
-
-        if (!NetworkHelper::isQuantizeSupported(fq)) {
-            return false;
-        }
-    }
-
-    std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
-
-    for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
-        const std::shared_ptr<ngraph::Node>& fakeQuantizeLayer = subgraph.quantizationLayers[i];
-
-        std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(fakeQuantizeLayer->shared_from_this());
-        assert(fq);
-
-        auto newFakeQuantize = NetworkHelper::fuseConvert(fq);
-        if (newFakeQuantize != nullptr) {
-            fq = newFakeQuantize;
-        }
-
-        newFakeQuantize = NetworkHelper::composeFakeQuantize(fq);
-        if (newFakeQuantize != nullptr) {
-            fq = newFakeQuantize;
-        }
-
-        const DataPrecision currentDataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
-        const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
-
-        // 1. get data for dequantization. Dequantization data will be used several times later.
-        const FakeQuantizeDequantization fakeQuantizeDequantization = ngraph::pass::low_precision::NetworkHelper::createDequantizationFromFakeQuantize(
-            fq,
-            dataPrecision.precision,
-            dataPrecision.min,
-            dataPrecision.max,
-            dataPrecision.precision == currentDataPrecision.precision ? currentDataPrecision.hasZeroPoint : true,
-            updatePrecisions,
-            deqPrecision);
-        dequantizations[fakeQuantizeLayer->get_friendly_name()] = fakeQuantizeDequantization;
-
-        // 2. update FakeQuantize - one time action
-        const std::shared_ptr<opset1::FakeQuantize> newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize(
-            fq,
-            updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0),
-            roundf(dataPrecision.min),
-            roundf(dataPrecision.max));
-
-        subgraph.quantizationLayers[i] = newFakeQuantizeLayer;
-        subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer;
-    }
-
-    auto dequantizationValuesCallback = [&](
-        std::shared_ptr<ngraph::Node> layer,
-        std::shared_ptr<ngraph::Node> child,
-        const std::string originalLayerName,
-        std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate) {
-        if (layer->get_friendly_name() != originalLayerName) {
-            const auto update = [](
-                const std::string& originalLayerName,
-                const std::string& newLayerName,
-                std::unordered_map<std::string, FakeQuantizeDequantization>& dequantizationLayers) {
-                auto it = dequantizationLayers.find(originalLayerName);
-                if (it != dequantizationLayers.end()) {
-                    dequantizationLayers.emplace(newLayerName, it->second);
-                    dequantizationLayers.erase(it);
-                }
-            };
-            update(originalLayerName, layer->get_friendly_name(), dequantizations);
-        }
-
-        fillDequantization(
-            layer,
-            dequantizations,
-            dequantizationsToConcatenate);
-
-        if (!is_type<ngraph::opset1::Concat>(layer)) {
-            // for intermediate layers we should get Dq operations to be inserted between layer and child
-            assert(dequantizationsToConcatenate.size() == 1ul);
-            const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(layer, child);
-            if (layer->get_input_partial_shape(0)[1] != layer->get_output_partial_shape(sourceOutputIdx)[1]) {
-                dequantizationsToConcatenate[0] = getFoldedDequantization(layer, dequantizationsToConcatenate[0], sourceOutputIdx);
-            }
-        }
-    };
-
-    addDequantizationLayers(context, subgraph, dequantizationValuesCallback);
-
-    if (updatePrecisions) {
-        for (const auto it : subgraph.layers) {
-            const std::shared_ptr<ngraph::Node> node = it.second;
-            if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node)) {
-                ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision);
-            } else {
-                // set precision to explicitly to have updated precision during transformation
-                for (size_t i = 0; i < node->get_output_size(); ++i) {
-                    node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i));
-                }
-            }
-        }
-    }
-
-    for (const std::shared_ptr<ngraph::Node>& quantizationLayer : subgraph.quantizationLayers) {
-        context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name());
-    }
-    return true;
-}
-
-bool ConcatMultiChannelsTransformation::isPrecisionPreserved(std::shared_ptr<Node>) const noexcept {
-    return true;
-}
-
-void ConcatMultiChannelsTransformation::fillDequantization(
-    const std::shared_ptr<ngraph::Node> layer,
-    const std::unordered_map<std::string, FakeQuantizeDequantization>& dequantizationByFakeQuantize,
-    std::vector<FakeQuantizeDequantization>& dequantization) const {
-    const auto fillDqByFakeQuantize = [&](const std::shared_ptr<ngraph::Node>& fq) {
-        const auto it = dequantizationByFakeQuantize.find(fq->get_friendly_name());
-        if (it == dequantizationByFakeQuantize.end()) {
-            THROW_IE_LPT_EXCEPTION(*fq) << "dequantization scale values are not found";
-        }
-
-        const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second;
-        dequantization.push_back(fakeQuantizeDequantization);
-    };
-
-    if (is_type<ngraph::opset1::FakeQuantize>(layer)) {
-        fillDqByFakeQuantize(layer);
-    } else {
-        for (size_t i = 0; i < layer->get_input_size(); ++i) {
-            std::shared_ptr<ngraph::Node> parent = layer->get_input_node_shared_ptr(i);
-            if (as_type_ptr<ngraph::opset1::Constant>(parent)) {
-                continue;
-            }
-
-            const auto fakeQuantize = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(parent);
-            if (fakeQuantize) {
-                fillDqByFakeQuantize(fakeQuantize);
-            } else {
-                const auto concat = ngraph::as_type_ptr<ngraph::opset1::Concat>(parent);
-                if (concat) {
-                    std::vector<FakeQuantizeDequantization> dequantizationToConcatenate;
-                    fillDequantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate);
-
-                    // add concatenated dequantization operations to dequantization collection
-                    dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate));
-                } else {
-                    const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer);
-                    if (parent->get_input_partial_shape(0)[1] != parent->get_output_partial_shape(sourceOutputIdx)[1]) {
-                        std::vector<FakeQuantizeDequantization> dequantizationToPropagate;
-                        fillDequantization(parent, dequantizationByFakeQuantize, dequantizationToPropagate);
-
-                        // add folded dequantization operations to dequantization colection
-                        dequantization.push_back(getFoldedDequantization(parent, dequantizationToPropagate[0], sourceOutputIdx));
-                    } else {
-                        fillDequantization(parent, dequantizationByFakeQuantize, dequantization);
-                    }
-                }
-            }
-        }
-    }
-}
-
-FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDequantization(
-    const std::shared_ptr<ngraph::opset1::Concat> concat,
-    const std::vector<FakeQuantizeDequantization>& dequantization) const {
-    NodeVector convertNodes;
-    NodeVector subtractNodes;
-    NodeVector multiplyNodes;
-
-    // forming nodes for concatenation
-    fillDequantizationNodes(dequantization, concat, convertNodes, subtractNodes, multiplyNodes);
-
-    std::shared_ptr<Node> parent = concat;
-    std::shared_ptr<DequantizationConvert> convert;
-    if (!convertNodes.empty()) {
-        convert = as_type_ptr<DequantizationConvert>(dequantization[0].convert->clone_with_new_inputs({ parent }));
-        parent = convert;
-    }
-
-    std::shared_ptr<DequantizationSubtract> subtract;
-    std::shared_ptr<ngraph::opset1::Constant> subConst;
-    if (!subtractNodes.empty()) {
-        subConst = as_type_ptr<ngraph::opset1::Constant>(concatenateDeqNodes(subtractNodes));
-        subtract = std::make_shared<DequantizationSubtract>(parent, subConst);
-        parent = subtract;
-    }
-
-    std::shared_ptr<DequantizationMultiply> multiply;
-    std::shared_ptr<ngraph::opset1::Constant> mulConst;
-    if (!multiplyNodes.empty()) {
-        mulConst = as_type_ptr<ngraph::opset1::Constant>(concatenateDeqNodes(multiplyNodes));
-        multiply = std::make_shared<DequantizationMultiply>(parent, mulConst);
-    }
-
-    return FakeQuantizeDequantization(concat, convert, subtract, nullptr, subConst, multiply, mulConst);
-}
-
-FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantization(
-    const std::shared_ptr<ngraph::Node> operation,
-    const FakeQuantizeDequantization& dequantization,
-    const size_t sourceOutputIdx) {
-    OutputVector inputs = operation->input_values();
-    OutputVector outputs(operation->get_output_size());
-    Output<Node> data = operation->output(sourceOutputIdx);
-
-    std::shared_ptr<Node> parent = operation;
-    std::shared_ptr<DequantizationConvert> convert;
-    if (dequantization.convert) {
-        convert = as_type_ptr<DequantizationConvert>(dequantization.convert->clone_with_new_inputs({ data }));
-        parent = convert;
-    }
-
-    std::shared_ptr<DequantizationSubtract> subtract;
-    std::shared_ptr<ngraph::opset1::Constant> subConst;
-    if (dequantization.subtract) {
-        subConst = NetworkHelper::foldDequantizationConstant(dequantization.subtractConstant, operation, sourceOutputIdx);
-        subtract = std::make_shared<DequantizationSubtract>(parent, subConst);
-        parent = subtract;
-    }
-
-    std::shared_ptr<DequantizationMultiply> multiply;
-    std::shared_ptr<ngraph::opset1::Constant> mulConst;
-    if (dequantization.multiply) {
-        mulConst = NetworkHelper::foldDequantizationConstant(dequantization.multiplyConstant, operation, sourceOutputIdx);
-        multiply = std::make_shared<DequantizationMultiply>(parent, mulConst);
-    }
-
-    return FakeQuantizeDequantization(data, convert, subtract, nullptr, subConst, multiply, mulConst);
-}
-
-} // namespace low_precision
-} // namespace pass
-} // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/src/convert.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convert.cpp
@ -11,6 +11,7 @@
 #include <utility>
 #include <vector>

+#include <ngraph/pattern/op/wrap_type.hpp>
 #include "low_precision/common/ie_lpt_exception.hpp"
 #include "low_precision/network_helper.hpp"

@ -18,11 +19,24 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

-void ConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
-    addSingleNodePattern<opset1::Convert>(pass, context);
+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvertTransformation, "ConvertTransformation", 0);
+
+ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransformation(params) {
+    auto matcher = pattern::wrap_type<opset1::Convert>();
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ConvertTransformation");
+    this->register_matcher(m, callback);
 }

-bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
+bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
    std::shared_ptr<opset1::Convert> convert = as_type_ptr<opset1::Convert>(m.get_match_root());
    if (!convert) {
        return false;
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@ -10,6 +10,8 @@
 #include <vector>
 #include <cassert>

+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
 #include "low_precision/network_helper.hpp"
 #include "low_precision/common/dequantization_op.hpp"

@ -17,28 +19,39 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {

+NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvolutionTransformation, "ConvolutionTransformation", 0);
+
 ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) {
+    auto matcher = ngraph::pattern::wrap_type<opset1::Convolution>({
+        ngraph::pattern::wrap_type<opset1::Multiply>(),
+        std::make_shared<pattern::op::Or>(OutputVector {
+            pattern::wrap_type<opset1::Multiply>(),
+            pattern::wrap_type<opset1::FakeQuantize>()
+        })
+    });
+
+
+    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
+        auto op = m.get_match_root();
+        if (transformation_callback(op)) {
+            return false;
+        }
+        return transform(*context, m);
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ConvolutionTransformation");
+    this->register_matcher(m, callback);
 }

-void ConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
-    addPattern(
-        pass,
-        context,
-        make_op_pattern<opset1::Convolution>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
-
-    addPattern(
-        pass,
-        context,
-        make_op_pattern<opset1::Convolution>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
+bool ConvolutionTransformation::isQuantized(const std::shared_ptr<const Node>& layer) const noexcept {
+    return ConvolutionTransformation::isQuantizedStatic(layer);
 }

-bool ConvolutionTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
-    return WeightableLayerTransformation::isQuantized(layer, false);
+bool ConvolutionTransformation::isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept {
+    return WeightableLayerTransformation::isQuantizedStatic(layer, false);
 }

-
-
-bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
+bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) {
    auto convolution = m.get_match_root();

    if (!canConvolutionBeTransformed(context, convolution)) {
@ -150,7 +163,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                reducedConstant->cast_vector<float>()[0]);
        }

-        const auto copyNode = convolution->copy_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
+        const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
        auto conv = as_type_ptr<opset1::Convolution>(copyNode);
        std::shared_ptr<Node> relaxedNewConvolution;
        if (conv) {
@ -164,6 +177,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                    std::vector<element::Type>{deqPrecision, deqPrecision},
                    std::vector<element::Type>{deqPrecision});
        }
+        NetworkHelper::copyInfo(convolution, relaxedNewConvolution);

        std::shared_ptr<ngraph::opset1::Multiply> newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
            std::vector<element::Type>{ deqPrecision, deqPrecision },
@ -179,12 +193,18 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                convolution->get_input_node_ptr(0)->get_input_source_output(0),
                convolution->input_value(1)});
            replace_node(convolution, newConvolution);
+            NetworkHelper::copyInfo(convolution, newConvolution);
            convolution = newConvolution;
        }
    }

    {
-        decomposeFakeQuantizeForWeightsPath(convolution);
+        const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution);
+        assert((updatePrecisions && decomposed) || (!updatePrecisions));
+        if (!updatePrecisions && !decomposed) {
+            // TODO: LPT: issue #58685
+            return false;
+        }

        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->input_value(1).get_node_shared_ptr());

@ -218,13 +238,16 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                    reshapeFromWeights->input_value(1) }));
            }

+            auto newConvolution = convolution->clone_with_new_inputs({
+                convolution->input_value(0),
+                reshapeFromWeights != nullptr ?
+                    reshapeFromWeights :
+                    multiplyFromWeights->input_value(0)
+            });
+            NetworkHelper::copyInfo(convolution, newConvolution);
+
            auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
-                convolution->copy_with_new_inputs({
-                    convolution->input_value(0),
-                    reshapeFromWeights != nullptr ?
-                        reshapeFromWeights :
-                        multiplyFromWeights->input_value(0)
-                    }),
+                newConvolution,
                foldConvert(
                    fold_reshape<opset1::Reshape>(
                        multiplyFromWeights->input_value(1),
@ -270,6 +293,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                    convolution->get_input_node_ptr(1)->input_value(0) :
                    childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})});
            replace_node(convolution, newConvolution);
+            NetworkHelper::copyInfo(convolution, newConvolution);
            convolution = newConvolution;
        }

--- a/Show More
+++ b/Show More