From 6ff0cad127d91d02df3cfacbeb3133c49d57f214 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Mon, 24 Apr 2023 11:13:04 +0200 Subject: [PATCH] Fix mixed precision inference for quantized IRs (#16785) * disable mixed precision inference for quantized IRs * typo fix * improved solution, disable mixed precision in quantized IRs selectively only for float nodes * minor typos correction * added unit-tests * renamed rt_info * updated list of nodes for which FQ is propagated; updated unit-tests * fix failing build --- ...k_subgraphs_to_keep_in_mixed_precision.cpp | 118 ++++++-- ...bgraph_to_keep_in_mixed_precision_test.cpp | 281 +++++++++--------- .../tests/utils/convert_precision.cpp | 116 ++++++++ 3 files changed, 361 insertions(+), 154 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp index 7238ca870c7..e276efa9aaf 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -8,6 +8,7 @@ #include "openvino/op/util/broadcast_base.hpp" #include "openvino/op/util/gather_base.hpp" #include "openvino/opsets/opset10.hpp" +#include "openvino/opsets/opset11.hpp" #include "openvino/opsets/opset2.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/or.hpp" @@ -23,6 +24,30 @@ using namespace ov::opset10; namespace ov { namespace pass { +void mark_reduceop_path(const std::shared_ptr& node) { + node->get_rt_info().emplace("reduceop_path", true); +} +bool is_reduceop_path(const std::shared_ptr& node) { + return node->get_rt_info().count("reduceop_path"); +} + +void erase_reduceop_path(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info.erase("reduceop_path"); +} + +void mark_fq_path(const std::shared_ptr& node) { + node->get_rt_info().emplace("fq_path", true); +} +bool is_fq_path(const std::shared_ptr& node) { + return node->get_rt_info().count("fq_path"); +} + +void erase_fq_path(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info.erase("fq_path"); +} + // Marking continues to propagate through these ops. std::shared_ptr propagate_through_ops = pattern::wrap_type(node); + auto convert_node = as_type_ptr(node); if (convert_node) { // if during propagating up there is a Convert it must go to Const, // otherwise interrupt propagation - auto const_node = dynamic_pointer_cast(node->input_value(0).get_node_shared_ptr()); + auto const_node = as_type_ptr(node->input_value(0).get_node_shared_ptr()); if (!const_node) return false; } @@ -106,7 +131,7 @@ public: return false; // on convert down propagation should be interrupted - auto convert_node = dynamic_pointer_cast(node); + auto convert_node = as_type_ptr(node); if (convert_node) return false; @@ -114,6 +139,11 @@ public: for (const auto& in_node : node->input_values()) { if (!in_node.get_element_type().is_real()) continue; + if (is_fq_path(in_node.get_node_shared_ptr())) { + enable_fp16_compression(node); + return true; + } + if (fp16_compression_is_disabled(in_node.get_node_shared_ptr())) { disable_fp16_compression(node); is_changed = true; @@ -127,18 +157,6 @@ public: } }; -void mark_reduceop_path(const std::shared_ptr& node) { - node->get_rt_info().emplace("reduceop_path", true); -} -bool is_reduceop_path(const std::shared_ptr& node) { - return node->get_rt_info().count("reduceop_path"); -} - -void erase_reduceop_path(const std::shared_ptr& node) { - auto& rt_info = node->get_rt_info(); - rt_info.erase("reduceop_path"); -} - class InitMarkReduceOpPath : public pass::MatcherPass { public: OPENVINO_RTTI("InitMarkReduceOpPath", "0"); @@ -267,11 +285,11 @@ public: if (!m.get_match_root()) return false; - const auto mul = std::dynamic_pointer_cast(m.get_match_root()); + const auto mul = as_type_ptr(m.get_match_root()); // if pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched // need to check that power is negative if (mul) { - const auto pow_const = std::dynamic_pointer_cast(pattern_to_output.at(pow_exp)); + const auto pow_const = as_type_ptr(pattern_to_output.at(pow_exp)); if (pow_const) { // continue only if exponent is negative (z < 0) if (pow_const->get_element_type() == element::f16) { @@ -286,7 +304,7 @@ public: } } - const auto eps_const = std::dynamic_pointer_cast(pattern_to_output.at(eps_const_pattern)); + const auto eps_const = as_type_ptr(pattern_to_output.at(eps_const_pattern)); if (!eps_const) return false; if (eps_const->get_element_type() == element::f32) { @@ -307,6 +325,68 @@ public: } }; +class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass { +public: + OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0"); + PropagateDownDisableSensitivityForQuantized() { + MATCHER_SCOPE(PropagateDownDisableSensitivityForQuantized); + + // through this nodes + std::shared_ptr quantization_propagating_nodes = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!node) + return false; + + auto is_quantize = as_type_ptr(node); + if (is_quantize) { + mark_fq_path(node); + return true; + } + + bool is_changed = false; + + for (const auto& in_node_output : node->input_values()) { + auto input_node = in_node_output.get_node_shared_ptr(); + auto is_quantize = as_type_ptr(input_node); + if (is_quantize || is_fq_path(input_node)) { + mark_fq_path(node); + enable_fp16_compression(node); + is_changed = true; + } + } + + return is_changed; + }; + auto m = make_shared(quantization_propagating_nodes, matcher_name); + register_matcher(m, callback); + } +}; + bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr& m) { RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision); @@ -314,6 +394,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptrget_ops()) { erase_reduceop_path(node); + erase_fq_path(node); } return false; // no need to revalidate diff --git a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp index 643045dbd04..7dea7e948d3 100644 --- a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp +++ b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp @@ -126,36 +126,21 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e shared_ptr model, model_ref; pass::Manager manager; - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto reduce_sum_1 = make_shared(input_1, reduction_axes); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(input_1, reduction_axes); - auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); - auto matmul_1 = make_shared(mul_1, input_2); + auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto matmul_1 = make_shared(mul_1, input_2); - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto reduce_sum_1 = make_shared(input_1, reduction_axes); - - auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); - auto matmul_1 = make_shared(mul_1, input_2); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } + manager.register_pass(); + manager.run_passes(model); const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); @@ -519,32 +504,20 @@ TEST(TransformationTests, PowWithPositiveExponent) { pass::Manager manager; // graph should be left unchanged const float eps_value = 1.0e-12f; - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); - auto pow = std::make_shared(add, pow_exp_const); - auto mul = std::make_shared(input_1, pow); + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); - model = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + model = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); - manager.register_pass(); - manager.run_passes(model); - } + manager.register_pass(); + manager.run_passes(model); - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); - auto pow = std::make_shared(add, pow_exp_const); - auto mul = std::make_shared(input_1, pow); - - model_ref = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); - } const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); // need to compare twice to ensure that no extra nodes are marked @@ -559,28 +532,18 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) { pass::Manager manager; // if eps_value is greater than normalized_fp16_min then leave graph unchanged const float eps_value = 0.0001f; - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto divide = std::make_shared(input_1, add); + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); - model = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + model = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); - manager.register_pass(); - manager.run_passes(model); - } + manager.register_pass(); + manager.run_passes(model); - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto divide = std::make_shared(input_1, add); - - model_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); - } const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); // need to compare twice to ensure that no extra nodes are marked @@ -798,36 +761,21 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou // ReduceSum without Exp is not a precision sensitive case shared_ptr model, model_ref; pass::Manager manager; - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto reduce_sum_1 = make_shared(input_1, reduction_axes); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(input_1, reduction_axes); - auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); - auto matmul_1 = make_shared(mul_1, input_2); + auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto matmul_1 = make_shared(mul_1, input_2); - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto reduce_sum_1 = make_shared(input_1, reduction_axes); - - auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); - auto matmul_1 = make_shared(mul_1, input_2); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } + manager.register_pass(); + manager.run_passes(model); const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); @@ -986,31 +934,21 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx const float eps_value = 1.e-12f; shared_ptr model, model_ref; pass::Manager manager; - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); - auto pow = std::make_shared(add, pow_exp_const); - auto mul = std::make_shared(input_1, pow); - model = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); - manager.register_pass(); - manager.run_passes(model); - } + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); - auto pow = std::make_shared(add, pow_exp_const); - auto mul = std::make_shared(input_1, pow); + model = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); + + manager.register_pass(); + manager.run_passes(model); - model_ref = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); - } const auto fc = FunctionsComparator::with_default() .enable(FunctionsComparator::PRECISIONS) .enable(FunctionsComparator::RUNTIME_KEYS) @@ -1027,27 +965,19 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc const float eps_value = 0.0001f; shared_ptr model, model_ref; pass::Manager manager; - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto divide = std::make_shared(input_1, add); - model = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); - manager.register_pass(); - manager.run_passes(model); - } + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); - { - auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); - auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); - auto add = std::make_shared(input_2, eps_const); - auto divide = std::make_shared(input_1, add); + model = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); + + manager.register_pass(); + manager.run_passes(model); - model_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); - } const auto fc = FunctionsComparator::with_default() .enable(FunctionsComparator::PRECISIONS) .enable(FunctionsComparator::RUNTIME_KEYS) @@ -1162,3 +1092,82 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_InL2NormWithSqrtA result = fc(model, model_ref); ASSERT_TRUE(result.valid) << result.message; } + +TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_1) { + shared_ptr model, model_ref; + pass::Manager manager; + // despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will + // be inferred in int8 therefore no need to mark them: model and model_ref should match + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + + auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f}); + auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f}); + auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f}); + auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f}); + auto fq_1 = make_shared(exp_1, in_low, in_high, out_low, out_high, 256); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(fq_1, reduction_axes); + + auto fq_2 = make_shared(reduce_sum_1, in_low, in_high, out_low, out_high, 256); + auto matmul_1 = make_shared(fq_2, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); + + manager.register_pass(); + manager.run_passes(model); + + const auto fc = FunctionsComparator::with_default() + .enable(FunctionsComparator::PRECISIONS) + .enable(FunctionsComparator::RUNTIME_KEYS) + .enable(FunctionsComparator::CONST_VALUES); + // need to compare twice to ensure that no extra nodes are marked + FunctionsComparator::Result result = fc(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; + result = fc(model, model_ref); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_2) { + shared_ptr model, model_ref; + pass::Manager manager; + // despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will + // be inferred in int8 therefore no need to mark them: model and model_ref should match + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + + auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f}); + auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f}); + auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f}); + auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f}); + auto fq_1 = make_shared(exp_1, in_low, in_high, out_low, out_high, 256); + + auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto unsqueeze_1 = make_shared(fq_1, unsqueeze_axes); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(unsqueeze_1, reduction_axes); + + auto fq_2 = make_shared(reduce_sum_1, in_low, in_high, out_low, out_high, 256); + auto matmul_1 = make_shared(fq_2, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + model_ref = model->clone(); + + manager.register_pass(); + manager.run_passes(model); + + const auto fc = FunctionsComparator::with_default() + .enable(FunctionsComparator::PRECISIONS) + .enable(FunctionsComparator::RUNTIME_KEYS) + .enable(FunctionsComparator::CONST_VALUES); + // need to compare twice to ensure that no extra nodes are marked + FunctionsComparator::Result result = fc(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; + result = fc(model, model_ref); + ASSERT_TRUE(result.valid) << result.message; +} diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 73d33361121..5226c41ae17 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -1716,3 +1716,119 @@ TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) { FunctionsComparator::Result result = func_comparator(model_ref, model); ASSERT_TRUE(result.valid) << result.message; } + +TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_1) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + + auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f}); + auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f}); + auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f}); + auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f}); + auto fq_1 = make_shared(exp_1, in_low, in_high, out_low, out_high, 256); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(fq_1, reduction_axes); + + auto fq_2 = make_shared(reduce_sum_1, in_low, in_high, out_low, out_high, 256); + auto matmul_1 = make_shared(fq_2, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_map{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + + auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f}); + auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f}); + auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f}); + auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f}); + auto fq_1 = make_shared(exp_1, in_low, in_high, out_low, out_high, 256); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(fq_1, reduction_axes); + + auto fq_2 = make_shared(reduce_sum_1, in_low, in_high, out_low, out_high, 256); + auto matmul_1 = make_shared(fq_2, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_2) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + + auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f}); + auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f}); + auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f}); + auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f}); + auto fq_1 = make_shared(exp_1, in_low, in_high, out_low, out_high, 256); + + auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto unsqueeze_1 = make_shared(fq_1, unsqueeze_axes); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(unsqueeze_1, reduction_axes); + + auto fq_2 = make_shared(reduce_sum_1, in_low, in_high, out_low, out_high, 256); + auto matmul_1 = make_shared(fq_2, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_map{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + + auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f}); + auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f}); + auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f}); + auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f}); + auto fq_1 = make_shared(exp_1, in_low, in_high, out_low, out_high, 256); + + auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto unsqueeze_1 = make_shared(fq_1, unsqueeze_axes); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(unsqueeze_1, reduction_axes); + + auto fq_2 = make_shared(reduce_sum_1, in_low, in_high, out_low, out_high, 256); + auto matmul_1 = make_shared(fq_2, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +}