diff --git a/docs/MO_DG/prepare_model/FP16_Compression.md b/docs/MO_DG/prepare_model/FP16_Compression.md index 78279a298cd..bbfcb2fe756 100644 --- a/docs/MO_DG/prepare_model/FP16_Compression.md +++ b/docs/MO_DG/prepare_model/FP16_Compression.md @@ -26,7 +26,7 @@ For details on how plugins handle compressed ``FP16`` models, see .. note:: - Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading + Some large models (larger than a few GB) when compressed to ``FP16`` may consume enormous amount of RAM on the loading phase of the inference. In case if you are facing such problems, please try to convert them without compression: `mo --input_model INPUT_MODEL --compress_to_fp16=False` diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp index 157bce8df83..7238ca870c7 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -23,30 +23,6 @@ using namespace ov::opset10; namespace ov { namespace pass { -/* - * MarkNormalizationOps marks MVN and NormalizeL2 to be kept in f32 precision. - */ -class MarkNormalizationOps : public MatcherPass { -public: - OPENVINO_RTTI("MarkNormalizationOps", "0"); - - MarkNormalizationOps() { - MATCHER_SCOPE(MarkNormalizationOps); - auto ops_to_be_kept_fp32 = pattern::wrap_type(); - - matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto& node = m.get_match_root(); - if (!node) - return false; - - disable_fp16_compression(node); - return true; - }; - auto m = make_shared(ops_to_be_kept_fp32, matcher_name); - register_matcher(m, callback); - } -}; - // Marking continues to propagate through these ops. std::shared_ptr propagate_through_ops = pattern::wrap_typeget_ops()) { diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 67132082857..7bd8cd822d6 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -36,6 +36,7 @@ bool fuse_type_to_shapeof_v0(const std::shared_ptr& node, const pr bool fuse_type_to_random_uniform_v8(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_unique_v10(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_range_v4(const std::shared_ptr& node, const precisions_map& precisions); +bool fuse_type_to_eye_v9(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_parameter(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions); bool fuse_type_to_nms3(const std::shared_ptr& node, const precisions_map& precisions); @@ -356,6 +357,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr}, {opset1::ShapeOf::get_type_info_static(), fuse_type_to_shapeof_v0}, {opset4::Range::get_type_info_static(), fuse_type_to_range_v4}, + {opset9::Eye::get_type_info_static(), fuse_type_to_eye_v9}, {opset10::Unique::get_type_info_static(), fuse_type_to_unique_v10}, {opset8::RandomUniform::get_type_info_static(), fuse_type_to_random_uniform_v8}}; @@ -383,6 +385,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr(); manager.register_pass(); + manager.run_passes(f); } (void)is_changed; // ignored @@ -453,6 +456,20 @@ bool fuse_type_to_range_v4(const std::shared_ptr& node, const prec return false; } +bool fuse_type_to_eye_v9(const std::shared_ptr& node, const precisions_map& precisions) { + auto it = precisions.find(node->get_output_element_type(0)); + if (it == precisions.end()) + return false; + const auto& to = it->second; + if (auto eye_node = ov::as_type_ptr(node)) { + if (to.is_integral() || to.is_real()) { + eye_node->set_out_type(to); + return true; + } + } + return false; +} + bool fuse_type_to_parameter(const std::shared_ptr& node, const precisions_map& precisions) { auto it = precisions.find(node->get_output_element_type(0)); if (it == precisions.end()) diff --git a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp index 80635215c8a..a2775f08fe6 100644 --- a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp +++ b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp @@ -147,88 +147,6 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) { } } -TEST_F(TransformationTestsF, align_mixed_fp16_fp32_4) { - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto add_1 = make_shared(mvn_1, addition_const); - auto matmul_1 = make_shared(add_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - pass::Manager manager; - manager.register_pass(); - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto convert_to_f32_1 = make_shared(input_1, element::f32); - auto mvn_1 = make_shared(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto convert_to_f16_1 = make_shared(mvn_1, element::f32); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto add_1 = make_shared(convert_to_f16_1, addition_const); - auto matmul_1 = make_shared(add_1, input_2); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } -} - -TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) { - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 56, 224}); - - auto split_axis = Constant::create(element::i64, Shape{}, {3}); - auto split = make_shared(input_1, split_axis, 4); - - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(split->output(0), reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto add_1 = make_shared(mvn_1, addition_const); - auto matmul_1 = make_shared(add_1, input_2); - - auto result_1 = make_shared(matmul_1); - auto result_2 = make_shared(split->output(1)); - auto result_3 = make_shared(split->output(2)); - model = make_shared(OutputVector{result_1, result_2, result_3}, ParameterVector{input_1, input_2}); - - pass::Manager manager; - manager.register_pass(); - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 56, 224}); - - auto split_axis = Constant::create(element::i64, Shape{}, {3}); - auto split = make_shared(input_1, split_axis, 4); - - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto convert_to_f32_1 = make_shared(split->output(0), element::f32); - auto mvn_1 = make_shared(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto convert_to_f16_1 = make_shared(mvn_1, element::f32); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto add_1 = make_shared(convert_to_f16_1, addition_const); - auto matmul_1 = make_shared(add_1, input_2); - - // todo: without Converts to fp16 because of GPU - auto result_1 = make_shared(matmul_1); - auto result_2 = make_shared(split->output(1)); - auto result_3 = make_shared(split->output(2)); - - model_ref = make_shared(OutputVector{result_1, result_2, result_3}, ParameterVector{input_1, input_2}); - } -} - TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) { { auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); diff --git a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp index 1cf4214d77f..643045dbd04 100644 --- a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp +++ b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp @@ -166,132 +166,6 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e ASSERT_TRUE(result.valid) << result.message; } -TEST(TransformationTests, MarkNormalizationOps_1) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto matmul_1 = make_shared(mvn_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto matmul_1 = make_shared(mvn_1, input_2); - - // marking nodes to be kept in fp32 for mixed precision - disable_fp16_compression(addition_const); - disable_fp16_compression(mvn_1); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = - FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); - // need to compare twice to ensure that no extra nodes are marked - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; - result = func_comparator(model, model_ref); - ASSERT_TRUE(result.valid) << result.message; -} - -TEST(TransformationTests, MarkNormalizationOps_2) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto matmul_1 = make_shared(normalizel2_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto matmul_1 = make_shared(normalizel2_1, input_2); - - // marking nodes to be kept in fp32 for mixed precision - disable_fp16_compression(addition_const); - disable_fp16_compression(normalizel2_1); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = - FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); - // need to compare twice to ensure that no extra nodes are marked - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; - result = func_comparator(model, model_ref); - ASSERT_TRUE(result.valid) << result.message; -} - -TEST(TransformationTests, MarkNormalizationOps_3) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, AxisSet{3}, true, 1.0e-8); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto matmul_1 = make_shared(mvn_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, AxisSet{3}, true, 1.0e-8); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto matmul_1 = make_shared(mvn_1, input_2); - - // marking nodes to be kept in fp32 for mixed precision - disable_fp16_compression(addition_const); - disable_fp16_compression(mvn_1); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = - FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); - // need to compare twice to ensure that no extra nodes are marked - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; - result = func_comparator(model, model_ref); - ASSERT_TRUE(result.valid) << result.message; -} - TEST(TransformationTests, keep_precission_sensitive_fp32_2) { shared_ptr model, model_ref; pass::Manager manager; @@ -413,130 +287,6 @@ TEST(TransformationTests, keep_precission_sensitive_fp32_3) { ASSERT_TRUE(result.valid) << result.message; } -TEST(TransformationTests, keep_precission_sensitive_fp32_4) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto unsqueeze_1 = make_shared(mvn_1, addition_const); - auto matmul_1 = make_shared(unsqueeze_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto unsqueeze_1 = make_shared(mvn_1, addition_const); - auto matmul_1 = make_shared(unsqueeze_1, input_2); - - // marking nodes to be kept in fp32 for mixed precision - disable_fp16_compression(mvn_1); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = - FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); - // need to compare twice to ensure that no extra nodes are marked - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; - result = func_comparator(model, model_ref); - ASSERT_TRUE(result.valid) << result.message; -} - -TEST(TransformationTests, keep_precission_sensitive_fp32_5) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto unsqueeze_1 = make_shared(normalizel2_1, addition_const); - auto matmul_1 = make_shared(unsqueeze_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); - auto unsqueeze_1 = make_shared(normalizel2_1, addition_const); - auto matmul_1 = make_shared(unsqueeze_1, input_2); - - // marking nodes to be kept in fp32 for mixed precision - disable_fp16_compression(normalizel2_1); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = - FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); - // need to compare twice to ensure that no extra nodes are marked - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; - result = func_comparator(model, model_ref); - ASSERT_TRUE(result.valid) << result.message; -} - -TEST(TransformationTests, keep_precission_sensitive_fp32_6) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto matmul_1 = make_shared(normalizel2_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - manager.register_pass(); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto matmul_1 = make_shared(normalizel2_1, input_2); - - disable_fp16_compression(normalizel2_1); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = - FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); - // need to compare twice to ensure that no extra nodes are marked - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; - result = func_comparator(model, model_ref); - ASSERT_TRUE(result.valid) << result.message; -} - TEST(TransformationTests, keep_precission_sensitive_fp32_7) { shared_ptr model, model_ref; pass::Manager manager; diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index ee00eb59182..73d33361121 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -13,9 +13,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -832,6 +834,8 @@ TEST(TransformationTests, ConvertPrecision_check_marking_does_not_leak_in_trivia model = std::make_shared(NodeVector{reshape}, ParameterVector{input_1, input_2}); pass::Manager manager; + manager.register_pass(); + type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_map{{element::f32, element::f16}}, @@ -871,6 +875,8 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_1) { model = std::make_shared(NodeVector{reshape}, ParameterVector{input_1, input_2}); pass::Manager manager; + manager.register_pass(); + type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_map{{element::f32, element::f16}}, @@ -924,6 +930,8 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_2) { model = std::make_shared(NodeVector{result}, ParameterVector{input_1}); pass::Manager manager; + manager.register_pass(); + type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_map{{element::f32, element::f16}}, @@ -1003,6 +1011,8 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_3) { model = std::make_shared(NodeVector{result_1, result_2}, ParameterVector{input_1, input_2}); pass::Manager manager; + manager.register_pass(); + type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_map{{element::f32, element::f16}}, @@ -1350,9 +1360,8 @@ TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_e auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); auto reduce_sum_1 = make_shared(exp_1, reduction_axes); - auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto factor_const = opset10::Constant::create(element::f32, Shape{1}, {-1}); + auto mul_1 = make_shared(reduce_sum_1, factor_const); auto mul_1_compressed = make_shared(mul_1, element::f16); auto matmul_1 = make_shared(mul_1_compressed, input_2); @@ -1397,9 +1406,8 @@ TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_r auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); auto reduce_mean_1 = make_shared(exp_1, reduction_axes); - auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_mean_1, factor_const_decompressed); + auto factor_const = opset10::Constant::create(element::f32, Shape{1}, {-1}); + auto mul_1 = make_shared(reduce_mean_1, factor_const); auto mul_1_compressed = make_shared(mul_1, element::f16); auto matmul_1 = make_shared(mul_1_compressed, input_2); @@ -1455,84 +1463,6 @@ TEST(TransformationTests, ConvertPrecision_reducesum_without_exp) { ASSERT_TRUE(result.valid) << result.message; } -TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_1) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto matmul_1 = make_shared(mvn_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - type_to_fuse_map empty_type_to_fuse_map = {}; - bool keep_precision_sensitive_in_fp32 = true; - manager.register_pass(precisions_map{{element::f32, element::f16}}, - empty_type_to_fuse_map, - keep_precision_sensitive_in_fp32); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); - auto input_1_decompressed = make_shared(input_1, element::f32); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); - auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); - auto mvn_1 = - make_shared(input_1_decompressed, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); - auto mvn_compressed = make_shared(mvn_1, element::f16); - auto matmul_1 = make_shared(mvn_compressed, input_2); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = FunctionsComparator::with_default(); - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; -} - -TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_2) { - shared_ptr model, model_ref; - pass::Manager manager; - - { - auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); - auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto matmul_1 = make_shared(normalizel2_1, input_2); - - model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - - type_to_fuse_map empty_type_to_fuse_map = {}; - bool keep_precision_sensitive_in_fp32 = true; - manager.register_pass(precisions_map{{element::f32, element::f16}}, - empty_type_to_fuse_map, - keep_precision_sensitive_in_fp32); - manager.run_passes(model); - } - - { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); - auto input_1_decompressed = make_shared(input_1, element::f32); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); - auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); - auto normalizel2_1 = - make_shared(input_1_decompressed, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); - auto normalizel2_compressed = make_shared(normalizel2_1, element::f16); - auto matmul_1 = make_shared(normalizel2_compressed, input_2); - - model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); - } - - const FunctionsComparator func_comparator = FunctionsComparator::with_default(); - FunctionsComparator::Result result = func_comparator(model_ref, model); - ASSERT_TRUE(result.valid) << result.message; -} - TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_t2t_subgraph) { shared_ptr model, model_ref; pass::Manager manager; @@ -1774,9 +1704,8 @@ TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) { auto unsqueeze_1 = make_shared(exp_1, unsqueeze_axes); auto reduce_sum_1 = make_shared(unsqueeze_1, reduction_axes); - auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); - auto factor_const_decompressed = make_shared(factor_const, element::f32); - auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto factor_const = opset10::Constant::create(element::f32, Shape{1}, {-1}); + auto mul_1 = make_shared(reduce_sum_1, factor_const); auto mul_1_compressed = make_shared(mul_1, element::f16); auto matmul_1 = make_shared(mul_1_compressed, input_2); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 5097e22d01a..c8b88ad73c8 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -194,6 +194,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // fuse softmax patterns so that they will not be marked as precision sensitive in ConvertPrecision manager.register_pass(); + // decompose MVNs that sre not supported in GPU, so the they will be marked as precision sensitive in ConvertPrecision + manager.register_pass(); // call ConvertPrecision with keep_precision_sensitive_in_fp32 = true manager.register_pass(fp_convert_precision_map, empty_fuse_map, true);