Fix mixed precision inference for quantized IRs (#16785)

* disable mixed precision inference for quantized IRs * typo fix * improved solution, disable mixed precision in quantized IRs selectively only for float nodes * minor typos correction * added unit-tests * renamed rt_info * updated list of nodes for which FQ is propagated; updated unit-tests * fix failing build
2023-04-24 11:13:04 +02:00 · 2023-04-24 11:13:04 +02:00 · 6ff0cad127
commit 6ff0cad127
parent 01065338ef
3 changed files with 361 additions and 154 deletions
--- a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp
@ -8,6 +8,7 @@
 #include "openvino/op/util/broadcast_base.hpp"
 #include "openvino/op/util/gather_base.hpp"
 #include "openvino/opsets/opset10.hpp"
 #include "openvino/opsets/opset11.hpp"
 #include "openvino/opsets/opset2.hpp"
 #include "openvino/pass/manager.hpp"
 #include "openvino/pass/pattern/op/or.hpp"
@ -23,6 +24,30 @@ using namespace ov::opset10;
 namespace ov {
 namespace pass {
 void mark_reduceop_path(const std::shared_ptr<Node>& node) {
    node->get_rt_info().emplace("reduceop_path", true);
 }
 bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
    return node->get_rt_info().count("reduceop_path");
 }
 void erase_reduceop_path(const std::shared_ptr<Node>& node) {
    auto& rt_info = node->get_rt_info();
    rt_info.erase("reduceop_path");
 }
 void mark_fq_path(const std::shared_ptr<Node>& node) {
    node->get_rt_info().emplace("fq_path", true);
 }
 bool is_fq_path(const std::shared_ptr<const Node>& node) {
    return node->get_rt_info().count("fq_path");
 }
 void erase_fq_path(const std::shared_ptr<Node>& node) {
    auto& rt_info = node->get_rt_info();
    rt_info.erase("fq_path");
 }
 // Marking continues to propagate through these ops.
 std::shared_ptr<Node> propagate_through_ops = pattern::wrap_type<Squeeze,
                                                                 Unsqueeze,
@ -72,11 +97,11 @@ public:
            if (!has_marked_output)
                return false;
-            auto convert_node = dynamic_pointer_cast<Convert>(node);
+            auto convert_node = as_type_ptr<Convert>(node);
            if (convert_node) {
                // if during propagating up there is a Convert it must go to Const,
                // otherwise interrupt propagation
-                auto const_node = dynamic_pointer_cast<Constant>(node->input_value(0).get_node_shared_ptr());
+                auto const_node = as_type_ptr<Constant>(node->input_value(0).get_node_shared_ptr());
                if (!const_node)
                    return false;
            }
@ -106,7 +131,7 @@ public:
                return false;
            // on convert down propagation should be interrupted
-            auto convert_node = dynamic_pointer_cast<Convert>(node);
+            auto convert_node = as_type_ptr<Convert>(node);
            if (convert_node)
                return false;
@ -114,6 +139,11 @@ public:
            for (const auto& in_node : node->input_values()) {
                if (!in_node.get_element_type().is_real())
                    continue;
                if (is_fq_path(in_node.get_node_shared_ptr())) {
                    enable_fp16_compression(node);
                    return true;
                }
                if (fp16_compression_is_disabled(in_node.get_node_shared_ptr())) {
                    disable_fp16_compression(node);
                    is_changed = true;
@ -127,18 +157,6 @@ public:
    }
 };
 void mark_reduceop_path(const std::shared_ptr<Node>& node) {
    node->get_rt_info().emplace("reduceop_path", true);
 }
 bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
    return node->get_rt_info().count("reduceop_path");
 }
 void erase_reduceop_path(const std::shared_ptr<Node>& node) {
    auto& rt_info = node->get_rt_info();
    rt_info.erase("reduceop_path");
 }
 class InitMarkReduceOpPath : public pass::MatcherPass {
 public:
    OPENVINO_RTTI("InitMarkReduceOpPath", "0");
@ -267,11 +285,11 @@ public:
            if (!m.get_match_root())
                return false;
-            const auto mul = std::dynamic_pointer_cast<Multiply>(m.get_match_root());
+            const auto mul = as_type_ptr<Multiply>(m.get_match_root());
            // if pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
            // need to check that power is negative
            if (mul) {
-                const auto pow_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(pow_exp));
+                const auto pow_const = as_type_ptr<Constant>(pattern_to_output.at(pow_exp));
                if (pow_const) {
                    // continue only if exponent is negative (z < 0)
                    if (pow_const->get_element_type() == element::f16) {
@ -286,7 +304,7 @@ public:
                }
            }
-            const auto eps_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(eps_const_pattern));
+            const auto eps_const = as_type_ptr<Constant>(pattern_to_output.at(eps_const_pattern));
            if (!eps_const)
                return false;
            if (eps_const->get_element_type() == element::f32) {
@ -307,6 +325,68 @@ public:
    }
 };
 class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
 public:
    OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0");
    PropagateDownDisableSensitivityForQuantized() {
        MATCHER_SCOPE(PropagateDownDisableSensitivityForQuantized);
        // through this nodes
        std::shared_ptr<Node> quantization_propagating_nodes = pattern::wrap_type<Squeeze,
                                                                                  Unsqueeze,
                                                                                  FakeQuantize,
                                                                                  Reshape,
                                                                                  op::util::BroadcastBase,
                                                                                  DepthToSpace,
                                                                                  opset2::Interpolate,
                                                                                  opset4::Interpolate,
                                                                                  opset11::Interpolate,
                                                                                  opset2::MaxPool,
                                                                                  MaxPool,
                                                                                  Pad,
                                                                                  ReduceMax,
                                                                                  ReduceMin,
                                                                                  Relu,
                                                                                  Transpose,
                                                                                  ShuffleChannels,
                                                                                  StridedSlice,
                                                                                  Slice,
                                                                                  VariadicSplit,
                                                                                  Split,
                                                                                  op::util::GatherBase,
                                                                                  Concat,
                                                                                  Tile>();
        matcher_pass_callback callback = [=](pattern::Matcher& m) {
            const auto& node = m.get_match_root();
            if (!node)
                return false;
            auto is_quantize = as_type_ptr<FakeQuantize>(node);
            if (is_quantize) {
                mark_fq_path(node);
                return true;
            }
            bool is_changed = false;
            for (const auto& in_node_output : node->input_values()) {
                auto input_node = in_node_output.get_node_shared_ptr();
                auto is_quantize = as_type_ptr<FakeQuantize>(input_node);
                if (is_quantize || is_fq_path(input_node)) {
                    mark_fq_path(node);
                    enable_fp16_compression(node);
                    is_changed = true;
                }
            }
            return is_changed;
        };
        auto m = make_shared<pattern::Matcher>(quantization_propagating_nodes, matcher_name);
        register_matcher(m, callback);
    }
 };
 bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model>& m) {
    RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision);
@ -314,6 +394,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
    // Mark root of Division with eps pattern to keep in FP32
    REGISTER_PASS(manager, MarkDivWithEps)
    REGISTER_PASS(manager, MarkExpInReduceOpPath)
    REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)
    // both Up and Down propagations are needed.
    // Why both of them are needed is explained in comments in passes declarations.
@ -328,6 +409,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
    for (auto& node : m->get_ops()) {
        erase_reduceop_path(node);
        erase_fq_path(node);
    }
    return false;  // no need to revalidate
--- a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
+++ b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp
@ -126,36 +126,21 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
-    {
+    auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
-        auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
+    auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
        auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
-        auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
+    auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
-        auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
+    auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
-        auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
+    auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
-        auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
+    auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
-        model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    model_ref = model->clone();
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        manager.run_passes(model);
+    manager.run_passes(model);
    }
    {
        auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
        auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
        auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
        auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
        auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
        auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    }
    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
@ -519,32 +504,20 @@ TEST(TransformationTests, PowWithPositiveExponent) {
    pass::Manager manager;
    // graph should be left unchanged
    const float eps_value = 1.0e-12f;
-    {
+    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
+    auto add = std::make_shared<Add>(input_2, eps_const);
-        auto add = std::make_shared<Add>(input_2, eps_const);
+    auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
-        auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
+    auto pow = std::make_shared<Power>(add, pow_exp_const);
-        auto pow = std::make_shared<Power>(add, pow_exp_const);
+    auto mul = std::make_shared<Multiply>(input_1, pow);
        auto mul = std::make_shared<Multiply>(input_1, pow);
-        model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
+    model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
    model_ref = model->clone();
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        manager.run_passes(model);
+    manager.run_passes(model);
    }
    {
        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
        auto add = std::make_shared<Add>(input_2, eps_const);
        auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
        auto pow = std::make_shared<Power>(add, pow_exp_const);
        auto mul = std::make_shared<Multiply>(input_1, pow);
        model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
    }
    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
    // need to compare twice to ensure that no extra nodes are marked
@ -559,28 +532,18 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) {
    pass::Manager manager;
    // if eps_value is greater than normalized_fp16_min then leave graph unchanged
    const float eps_value = 0.0001f;
-    {
+    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
+    auto add = std::make_shared<Add>(input_2, eps_const);
-        auto add = std::make_shared<Add>(input_2, eps_const);
+    auto divide = std::make_shared<Divide>(input_1, add);
        auto divide = std::make_shared<Divide>(input_1, add);
-        model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
+    model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
    model_ref = model->clone();
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        manager.run_passes(model);
+    manager.run_passes(model);
    }
    {
        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
        auto add = std::make_shared<Add>(input_2, eps_const);
        auto divide = std::make_shared<Divide>(input_1, add);
        model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
    }
    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
    // need to compare twice to ensure that no extra nodes are marked
@ -798,36 +761,21 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou
    // ReduceSum without Exp is not a precision sensitive case
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
-    {
+    auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
-        auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
+    auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
-        auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
+    auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
        auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
-        auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
+    auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
-        auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
+    auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
-        auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
+    auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
-        auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
+    auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
-        model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
+    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    model_ref = model->clone();
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        manager.run_passes(model);
+    manager.run_passes(model);
    }
    {
        auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
        auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
        auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
        auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
        auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
        auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    }
    const FunctionsComparator func_comparator =
        FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
@ -986,31 +934,21 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx
    const float eps_value = 1.e-12f;
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
    {
        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
        auto add = std::make_shared<Add>(input_2, eps_const);
        auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
        auto pow = std::make_shared<Power>(add, pow_exp_const);
        auto mul = std::make_shared<Multiply>(input_1, pow);
-        model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
+    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        manager.run_passes(model);
+    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-    }
+    auto add = std::make_shared<Add>(input_2, eps_const);
    auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
    auto pow = std::make_shared<Power>(add, pow_exp_const);
    auto mul = std::make_shared<Multiply>(input_1, pow);
-    {
+    model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+    model_ref = model->clone();
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        auto add = std::make_shared<Add>(input_2, eps_const);
+    manager.run_passes(model);
        auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
        auto pow = std::make_shared<Power>(add, pow_exp_const);
        auto mul = std::make_shared<Multiply>(input_1, pow);
        model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
    }
    const auto fc = FunctionsComparator::with_default()
                        .enable(FunctionsComparator::PRECISIONS)
                        .enable(FunctionsComparator::RUNTIME_KEYS)
@ -1027,27 +965,19 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc
    const float eps_value = 0.0001f;
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
    {
        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
        auto add = std::make_shared<Add>(input_2, eps_const);
        auto divide = std::make_shared<Divide>(input_1, add);
-        model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
+    auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
+    auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
-        manager.run_passes(model);
+    auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
-    }
+    auto add = std::make_shared<Add>(input_2, eps_const);
    auto divide = std::make_shared<Divide>(input_1, add);
-    {
+    model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
-        auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+    model_ref = model->clone();
-        auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
+
-        auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
+    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
-        auto add = std::make_shared<Add>(input_2, eps_const);
+    manager.run_passes(model);
        auto divide = std::make_shared<Divide>(input_1, add);
        model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
    }
    const auto fc = FunctionsComparator::with_default()
                        .enable(FunctionsComparator::PRECISIONS)
                        .enable(FunctionsComparator::RUNTIME_KEYS)
@ -1162,3 +1092,82 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_InL2NormWithSqrtA
    result = fc(model, model_ref);
    ASSERT_TRUE(result.valid) << result.message;
 }
 TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_1) {
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
    // despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
    // be inferred in int8 therefore no need to mark them: model and model_ref should match
    auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto exp_1 = make_shared<opset10::Exp>(input_1);
    auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
    auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
    auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
    auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
    auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
    auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
    auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
    auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
    auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    model_ref = model->clone();
    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
    const auto fc = FunctionsComparator::with_default()
                        .enable(FunctionsComparator::PRECISIONS)
                        .enable(FunctionsComparator::RUNTIME_KEYS)
                        .enable(FunctionsComparator::CONST_VALUES);
    // need to compare twice to ensure that no extra nodes are marked
    FunctionsComparator::Result result = fc(model_ref, model);
    ASSERT_TRUE(result.valid) << result.message;
    result = fc(model, model_ref);
    ASSERT_TRUE(result.valid) << result.message;
 }
 TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_2) {
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
    // despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
    // be inferred in int8 therefore no need to mark them: model and model_ref should match
    auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
    auto exp_1 = make_shared<opset10::Exp>(input_1);
    auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
    auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
    auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
    auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
    auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
    auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
    auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
    auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
    auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
    auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
    auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
    model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    model_ref = model->clone();
    manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
    manager.run_passes(model);
    const auto fc = FunctionsComparator::with_default()
                        .enable(FunctionsComparator::PRECISIONS)
                        .enable(FunctionsComparator::RUNTIME_KEYS)
                        .enable(FunctionsComparator::CONST_VALUES);
    // need to compare twice to ensure that no extra nodes are marked
    FunctionsComparator::Result result = fc(model_ref, model);
    ASSERT_TRUE(result.valid) << result.message;
    result = fc(model, model_ref);
    ASSERT_TRUE(result.valid) << result.message;
 }
--- a/src/common/transformations/tests/utils/convert_precision.cpp
+++ b/src/common/transformations/tests/utils/convert_precision.cpp
@ -1716,3 +1716,119 @@ TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) {
    FunctionsComparator::Result result = func_comparator(model_ref, model);
    ASSERT_TRUE(result.valid) << result.message;
 }
 TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_1) {
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
    {
        auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto exp_1 = make_shared<opset10::Exp>(input_1);
        auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
        auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
        auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
        auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
        model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
        type_to_fuse_map empty_type_to_fuse_map = {};
        bool keep_precision_sensitive_in_fp32 = true;
        manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
                                                      empty_type_to_fuse_map,
                                                      keep_precision_sensitive_in_fp32);
        manager.run_passes(model);
    }
    {
        auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
        auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
        auto exp_1 = make_shared<opset10::Exp>(input_1);
        auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
        auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
        auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
        auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    }
    const FunctionsComparator func_comparator = FunctionsComparator::with_default();
    FunctionsComparator::Result result = func_comparator(model_ref, model);
    ASSERT_TRUE(result.valid) << result.message;
 }
 TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_2) {
    shared_ptr<Model> model, model_ref;
    pass::Manager manager;
    {
        auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
        auto exp_1 = make_shared<opset10::Exp>(input_1);
        auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
        auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
        auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
        auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
        auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
        auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
        model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
        type_to_fuse_map empty_type_to_fuse_map = {};
        bool keep_precision_sensitive_in_fp32 = true;
        manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
                                                      empty_type_to_fuse_map,
                                                      keep_precision_sensitive_in_fp32);
        manager.run_passes(model);
    }
    {
        auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
        auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
        auto exp_1 = make_shared<opset10::Exp>(input_1);
        auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
        auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
        auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
        auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
        auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
        auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
        auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
        auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
        auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
        auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
        auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
        model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
    }
    const FunctionsComparator func_comparator = FunctionsComparator::with_default();
    FunctionsComparator::Result result = func_comparator(model_ref, model);
    ASSERT_TRUE(result.valid) << result.message;
 }