Fix mixed precision inference for quantized IRs (#16785)

* disable mixed precision inference for quantized IRs * typo fix * improved solution, disable mixed precision in quantized IRs selectively only for float nodes * minor typos correction * added unit-tests * renamed rt_info * updated list of nodes for which FQ is propagated; updated unit-tests * fix failing build
2023-04-24 11:13:04 +02:00 · 2023-04-24 11:13:04 +02:00 · 6ff0cad127
commit 6ff0cad127
parent 01065338ef
3 changed files with 361 additions and 154 deletions
--- a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
@ -8,6 +8,7 @@
 #include "openvino/op/util/broadcast_base.hpp"
 #include "openvino/op/util/gather_base.hpp"
 #include "openvino/opsets/opset10.hpp"
+#include "openvino/opsets/opset11.hpp"
 #include "openvino/opsets/opset2.hpp"
 #include "openvino/pass/manager.hpp"
 #include "openvino/pass/pattern/op/or.hpp"
@ -23,6 +24,30 @@ using namespace ov::opset10;
 namespace ov {
 namespace pass {

+void mark_reduceop_path(const std::shared_ptr<Node>& node) {
+    node->get_rt_info().emplace("reduceop_path", true);
+}
+bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
+    return node->get_rt_info().count("reduceop_path");
+}
+
+void erase_reduceop_path(const std::shared_ptr<Node>& node) {
+    auto& rt_info = node->get_rt_info();
+    rt_info.erase("reduceop_path");
+}
+
+void mark_fq_path(const std::shared_ptr<Node>& node) {
+    node->get_rt_info().emplace("fq_path", true);
+}
+bool is_fq_path(const std::shared_ptr<const Node>& node) {
+    return node->get_rt_info().count("fq_path");
+}
+
+void erase_fq_path(const std::shared_ptr<Node>& node) {
+    auto& rt_info = node->get_rt_info();
+    rt_info.erase("fq_path");
+}
+
 // Marking continues to propagate through these ops.
 std::shared_ptr<Node> propagate_through_ops = pattern::wrap_type<Squeeze,
                                                                 Unsqueeze,
@ -72,11 +97,11 @@ public:
            if (!has_marked_output)
                return false;

-            auto convert_node = dynamic_pointer_cast<Convert>(node);
+            auto convert_node = as_type_ptr<Convert>(node);
            if (convert_node) {
                // if during propagating up there is a Convert it must go to Const,
                // otherwise interrupt propagation
-                auto const_node = dynamic_pointer_cast<Constant>(node->input_value(0).get_node_shared_ptr());
+                auto const_node = as_type_ptr<Constant>(node->input_value(0).get_node_shared_ptr());
                if (!const_node)
                    return false;
            }
@ -106,7 +131,7 @@ public:
                return false;

            // on convert down propagation should be interrupted
-            auto convert_node = dynamic_pointer_cast<Convert>(node);
+            auto convert_node = as_type_ptr<Convert>(node);
            if (convert_node)
                return false;

@ -114,6 +139,11 @@ public:
            for (const auto& in_node : node->input_values()) {
                if (!in_node.get_element_type().is_real())
                    continue;
+                if (is_fq_path(in_node.get_node_shared_ptr())) {
+                    enable_fp16_compression(node);
+                    return true;
+                }
+
                if (fp16_compression_is_disabled(in_node.get_node_shared_ptr())) {
                    disable_fp16_compression(node);
                    is_changed = true;
@ -127,18 +157,6 @@ public:
    }
 };

-void mark_reduceop_path(const std::shared_ptr<Node>& node) {
-    node->get_rt_info().emplace("reduceop_path", true);
-}
-bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
-    return node->get_rt_info().count("reduceop_path");
-}
-
-void erase_reduceop_path(const std::shared_ptr<Node>& node) {
-    auto& rt_info = node->get_rt_info();
-    rt_info.erase("reduceop_path");
-}
-
 class InitMarkReduceOpPath : public pass::MatcherPass {
 public:
    OPENVINO_RTTI("InitMarkReduceOpPath", "0");
@ -267,11 +285,11 @@ public:
            if (!m.get_match_root())
                return false;

-            const auto mul = std::dynamic_pointer_cast<Multiply>(m.get_match_root());
+            const auto mul = as_type_ptr<Multiply>(m.get_match_root());
            // if pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
            // need to check that power is negative
            if (mul) {
-                const auto pow_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(pow_exp));
+                const auto pow_const = as_type_ptr<Constant>(pattern_to_output.at(pow_exp));
                if (pow_const) {
                    // continue only if exponent is negative (z < 0)
                    if (pow_const->get_element_type() == element::f16) {
@ -286,7 +304,7 @@ public:
                }
            }

-            const auto eps_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(eps_const_pattern));
+            const auto eps_const = as_type_ptr<Constant>(pattern_to_output.at(eps_const_pattern));
            if (!eps_const)
                return false;
            if (eps_const->get_element_type() == element::f32) {
@ -307,6 +325,68 @@ public:
    }
 };

+class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
+public:
+    OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0");
+    PropagateDownDisableSensitivityForQuantized() {
+        MATCHER_SCOPE(PropagateDownDisableSensitivityForQuantized);
+
+        // through this nodes
+        std::shared_ptr<Node> quantization_propagating_nodes = pattern::wrap_type<Squeeze,
+                                                                                  Unsqueeze,
+                                                                                  FakeQuantize,
+                                                                                  Reshape,
+                                                                                  op::util::BroadcastBase,
+                                                                                  DepthToSpace,
+                                                                                  opset2::Interpolate,
+                                                                                  opset4::Interpolate,
+                                                                                  opset11::Interpolate,
+                                                                                  opset2::MaxPool,
+                                                                                  MaxPool,
+                                                                                  Pad,
+                                                                                  ReduceMax,
+                                                                                  ReduceMin,
+                                                                                  Relu,
+                                                                                  Transpose,
+                                                                                  ShuffleChannels,
+                                                                                  StridedSlice,
+                                                                                  Slice,
+                                                                                  VariadicSplit,
+                                                                                  Split,
+                                                                                  op::util::GatherBase,
+                                                                                  Concat,
+                                                                                  Tile>();
+
+        matcher_pass_callback callback = [=](pattern::Matcher& m) {
+            const auto& node = m.get_match_root();
+            if (!node)
+                return false;
+
+            auto is_quantize = as_type_ptr<FakeQuantize>(node);
+            if (is_quantize) {
+                mark_fq_path(node);
+                return true;
+            }
+
+            bool is_changed = false;
+
+            for (const auto& in_node_output : node->input_values()) {
+                auto input_node = in_node_output.get_node_shared_ptr();
+                auto is_quantize = as_type_ptr<FakeQuantize>(input_node);
+                if (is_quantize || is_fq_path(input_node)) {
+                    mark_fq_path(node);
+                    enable_fp16_compression(node);
+                    is_changed = true;
+                }
+            }
+
+            return is_changed;
+        };
+        auto m = make_shared<pattern::Matcher>(quantization_propagating_nodes, matcher_name);
+        register_matcher(m, callback);
+    }
+};
+
 bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model>& m) {
    RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision);

@ -314,6 +394,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
    // Mark root of Division with eps pattern to keep in FP32
    REGISTER_PASS(manager, MarkDivWithEps)
    REGISTER_PASS(manager, MarkExpInReduceOpPath)
+    REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)

    // both Up and Down propagations are needed.
    // Why both of them are needed is explained in comments in passes declarations.
@ -328,6 +409,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model

    for (auto& node : m->get_ops()) {
        erase_reduceop_path(node);
+        erase_fq_path(node);
    }

    return false;  // no need to revalidate
--- a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
+++ b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
@ -126,7 +126,6 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;

-    {
    auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
@ -138,24 +137,10 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e
    auto matmul_1 = make_shared<MatMul>(mul_1, input_2);

    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();

    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
-    }
-
-    {
-        auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
-        auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
-
-        auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
-        auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
-        auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
-        auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
-
-        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
-    }

    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
@ -519,7 +504,6 @@ TEST(TransformationTests, PowWithPositiveExponent) {
    pass::Manager manager;
    // graph should be left unchanged
    const float eps_value = 1.0e-12f;
-    {
    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
@ -529,22 +513,11 @@ TEST(TransformationTests, PowWithPositiveExponent) {
    auto mul = std::make_shared<Multiply>(input_1, pow);

    model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();

    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
-    }

-    {
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-        auto add = std::make_shared<Add>(input_2, eps_const);
-        auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
-        auto pow = std::make_shared<Power>(add, pow_exp_const);
-        auto mul = std::make_shared<Multiply>(input_1, pow);
-
-        model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
-    }
    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
    // need to compare twice to ensure that no extra nodes are marked
@ -559,7 +532,6 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) {
    pass::Manager manager;
    // if eps_value is greater than normalized_fp16_min then leave graph unchanged
    const float eps_value = 0.0001f;
-    {
    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
@ -567,20 +539,11 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) {
    auto divide = std::make_shared<Divide>(input_1, add);

    model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();

    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
-    }

-    {
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-        auto add = std::make_shared<Add>(input_2, eps_const);
-        auto divide = std::make_shared<Divide>(input_1, add);
-
-        model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
-    }
    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
    // need to compare twice to ensure that no extra nodes are marked
@ -798,7 +761,6 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou
    // ReduceSum without Exp is not a precision sensitive case
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
-    {
    auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
@ -810,24 +772,10 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou
    auto matmul_1 = make_shared<MatMul>(mul_1, input_2);

    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();

    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
-    }
-
-    {
-        auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
-        auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
-
-        auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
-        auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
-        auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
-        auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
-
-        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
-    }

    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
@ -986,7 +934,7 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx
    const float eps_value = 1.e-12f;
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
-    {
+
    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
@ -996,21 +944,11 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx
    auto mul = std::make_shared<Multiply>(input_1, pow);

    model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();
+
    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
-    }

-    {
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-        auto add = std::make_shared<Add>(input_2, eps_const);
-        auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
-        auto pow = std::make_shared<Power>(add, pow_exp_const);
-        auto mul = std::make_shared<Multiply>(input_1, pow);
-
-        model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
-    }
    const auto fc = FunctionsComparator::with_default()
                        .enable(FunctionsComparator::PRECISIONS)
                        .enable(FunctionsComparator::RUNTIME_KEYS)
@ -1027,7 +965,7 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc
    const float eps_value = 0.0001f;
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
-    {
+
    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
@ -1035,19 +973,11 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc
    auto divide = std::make_shared<Divide>(input_1, add);

    model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();
+
    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
-    }

-    {
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-        auto add = std::make_shared<Add>(input_2, eps_const);
-        auto divide = std::make_shared<Divide>(input_1, add);
-
-        model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
-    }
    const auto fc = FunctionsComparator::with_default()
                        .enable(FunctionsComparator::PRECISIONS)
                        .enable(FunctionsComparator::RUNTIME_KEYS)
@ -1162,3 +1092,82 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_InL2NormWithSqrtA
    result = fc(model, model_ref);
    ASSERT_TRUE(result.valid) << result.message;
 }
+
+TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_1) {
+    shared_ptr<Model> model, model_ref;
+    pass::Manager manager;
+    // despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
+    // be inferred in int8 therefore no need to mark them: model and model_ref should match
+    auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto exp_1 = make_shared<opset10::Exp>(input_1);
+
+    auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
+    auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
+    auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
+    auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
+    auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
+
+    auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
+    auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
+
+    auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
+    auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
+
+    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();
+
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    manager.run_passes(model);
+
+    const auto fc = FunctionsComparator::with_default()
+                        .enable(FunctionsComparator::PRECISIONS)
+                        .enable(FunctionsComparator::RUNTIME_KEYS)
+                        .enable(FunctionsComparator::CONST_VALUES);
+    // need to compare twice to ensure that no extra nodes are marked
+    FunctionsComparator::Result result = fc(model_ref, model);
+    ASSERT_TRUE(result.valid) << result.message;
+    result = fc(model, model_ref);
+    ASSERT_TRUE(result.valid) << result.message;
+}
+
+TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_2) {
+    shared_ptr<Model> model, model_ref;
+    pass::Manager manager;
+    // despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
+    // be inferred in int8 therefore no need to mark them: model and model_ref should match
+    auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto exp_1 = make_shared<opset10::Exp>(input_1);
+
+    auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
+    auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
+    auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
+    auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
+    auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
+
+    auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
+    auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
+
+    auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
+    auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
+
+    auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
+    auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
+
+    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    model_ref = model->clone();
+
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    manager.run_passes(model);
+
+    const auto fc = FunctionsComparator::with_default()
+                        .enable(FunctionsComparator::PRECISIONS)
+                        .enable(FunctionsComparator::RUNTIME_KEYS)
+                        .enable(FunctionsComparator::CONST_VALUES);
+    // need to compare twice to ensure that no extra nodes are marked
+    FunctionsComparator::Result result = fc(model_ref, model);
+    ASSERT_TRUE(result.valid) << result.message;
+    result = fc(model, model_ref);
+    ASSERT_TRUE(result.valid) << result.message;
+}
--- a/src/common/transformations/tests/utils/convert_precision.cpp
+++ b/src/common/transformations/tests/utils/convert_precision.cpp
@ -1716,3 +1716,119 @@ TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) {
    FunctionsComparator::Result result = func_comparator(model_ref, model);
    ASSERT_TRUE(result.valid) << result.message;
 }
+
+TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_1) {
+    shared_ptr<Model> model, model_ref;
+    pass::Manager manager;
+    {
+        auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+        auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+        auto exp_1 = make_shared<opset10::Exp>(input_1);
+
+        auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
+        auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
+        auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
+        auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
+        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
+
+        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
+        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
+
+        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
+        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
+
+        model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+
+        type_to_fuse_map empty_type_to_fuse_map = {};
+        bool keep_precision_sensitive_in_fp32 = true;
+        manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
+                                                      empty_type_to_fuse_map,
+                                                      keep_precision_sensitive_in_fp32);
+        manager.run_passes(model);
+    }
+
+    {
+        auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
+        auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
+        auto exp_1 = make_shared<opset10::Exp>(input_1);
+
+        auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
+        auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
+        auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
+        auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
+        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
+
+        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
+        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
+
+        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
+        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
+
+        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    }
+
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default();
+    FunctionsComparator::Result result = func_comparator(model_ref, model);
+    ASSERT_TRUE(result.valid) << result.message;
+}
+
+TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_2) {
+    shared_ptr<Model> model, model_ref;
+    pass::Manager manager;
+    {
+        auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+        auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
+        auto exp_1 = make_shared<opset10::Exp>(input_1);
+
+        auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
+        auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
+        auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
+        auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
+        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
+
+        auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
+        auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
+
+        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
+        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
+
+        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
+        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
+
+        model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+
+        type_to_fuse_map empty_type_to_fuse_map = {};
+        bool keep_precision_sensitive_in_fp32 = true;
+        manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
+                                                      empty_type_to_fuse_map,
+                                                      keep_precision_sensitive_in_fp32);
+        manager.run_passes(model);
+    }
+
+    {
+        auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
+        auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
+        auto exp_1 = make_shared<opset10::Exp>(input_1);
+
+        auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
+        auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
+        auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
+        auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
+        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
+
+        auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
+        auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
+
+        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
+        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
+
+        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
+        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
+
+        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    }
+
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default();
+    FunctionsComparator::Result result = func_comparator(model_ref, model);
+    ASSERT_TRUE(result.valid) << result.message;
+}