diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp deleted file mode 100644 index b8d0e40ce79..00000000000 --- a/src/common/transformations/include/transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/pass/pass.hpp" -#include "transformations_visibility.hpp" - -namespace ov { -namespace pass { - -class TRANSFORMATIONS_API ConvertCompressedToMixedPrecision; - -} // namespace pass -} // namespace ov - -/** - * @ingroup ie_transformation_common_api - * @brief ConvertCompressedToMixedPrecision converts fp16 compressed ov::Model to mixed precision ov::Model. - * In mixed precision ov::Models precision sensitive nodes are kept in fp32 while most of the model is in fp16. - */ -class ov::pass::ConvertCompressedToMixedPrecision : public ov::pass::ModelPass { -public: - OPENVINO_RTTI("ConvertCompressedToMixedPrecision", "0"); - bool run_on_model(const std::shared_ptr& f) override; -}; diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp index 43be3fb5ed9..10e75d93292 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp @@ -11,22 +11,11 @@ namespace ov { namespace pass { -class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding; class TRANSFORMATIONS_API ConvertCompressedOnlyToLegacy; } // namespace pass } // namespace ov -/** - * @ingroup ie_transformation_common_api - * @brief Enables ConstantFolding for Convert operation in compressed function. - */ -class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); - EnableDecompressionConvertConstantFolding(); -}; - /** * @ingroup ie_transformation_common_api * @brief ConvertCompressedOnlyToLegacy transformation converts compression only FP16 format to legacy FP16 format. diff --git a/src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp b/src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp new file mode 100644 index 00000000000..59471f9ba61 --- /dev/null +++ b/src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations/enable_decompression_convert_constant_folding.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief Disables ConstantFolding for Convert operation in compressed function. + */ +class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); + EnableDecompressionConvertConstantFolding(); +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp b/src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp index 11d039047a2..5b92fd649ad 100644 --- a/src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp @@ -8,8 +8,7 @@ #include "openvino/core/rt_info.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" #include "openvino/opsets/opset10.hpp" -#include "transformations/convert_precision.hpp" -#include "transformations/rt_info/decompression.hpp" +#include "openvino/pass/constant_folding.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" using namespace ov; @@ -35,6 +34,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptrset_friendly_name(node->get_friendly_name() + "_compressed_to_f16"); out_inputs.replace_source_output(convert); + pass::disable_constant_folding(convert); is_changed = true; } } diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index d431f5bdbc8..5c4f5894e64 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -25,7 +25,6 @@ #include "transformations/common_optimizations/concat_reduce_fusion.hpp" #include "transformations/common_optimizations/conv_mul_fusion.hpp" #include "transformations/common_optimizations/conv_to_binary_conv.hpp" -#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp" #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" @@ -125,8 +124,6 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr& f) { - RUN_ON_MODEL_SCOPE(ConvertCompressedToMixedPrecision); - - // pass is triggered only for fp16 compressed Models - if (!ov::op::util::has_decompression_converts(f)) - return false; - - Manager manager(get_pass_config()); - REGISTER_PASS(manager, MarkSugraphsToKeepInMixedPrecision) - REGISTER_PASS(manager, AlignMixedFP32FP16Types) - - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map additional_fuse_map = {}; - // call ConvertPrecision with keep_precision_sensitive_in_fp32 = true - REGISTER_PASS(manager, ConvertPrecision, convert_precision_list, additional_fuse_map, true) - - REGISTER_PASS(manager, EnableDecompressionConvertConstantFolding) - REGISTER_PASS(manager, ConstantFolding) - manager.run_passes(f); - - return false; -} diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp index 73c283d9a5e..ae712e10ead 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp @@ -9,27 +9,12 @@ #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/convert_precision.hpp" +#include "transformations/enable_decompression_convert_constant_folding.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/utils/utils.hpp" using namespace ov; -ov::pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() { - MATCHER_SCOPE(EnableDecompressionConvertConstantFolding); - auto convert = pattern::wrap_type(); - - ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto& node = m.get_match_root(); - if (!ov::is_decompression(node)) - return false; - enable_constant_folding(node); - return true; - }; - - auto m = std::make_shared(convert, matcher_name); - this->register_matcher(m, callback); -} - bool ov::pass::ConvertCompressedOnlyToLegacy::run_on_model(const std::shared_ptr& f) { RUN_ON_MODEL_SCOPE(ConvertCompressedOnlyToLegacy); if (ov::op::util::has_decompression_converts(f)) { diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index c466a4b9df6..f762e1c1028 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -17,7 +17,12 @@ #include #include "itt.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/manager.hpp" #include "ov_ops/type_relaxed.hpp" +#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp" +#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp" +#include "transformations/enable_decompression_convert_constant_folding.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" using namespace ov; @@ -175,7 +180,7 @@ bool convert_precision(ov::pass::PassBase& pass, // If output type mismatch given type we try to fuse type into this operation // otherwise we insert Convert operation. for (auto& node : ops) { - if (skip_precision_sensitive && fp16_compression_is_disabled(node)) + if (skip_precision_sensitive && fp16_compression_is_disabled(node) && to == element::f16) continue; // Recursively apply transformation for sub-graph based operations @@ -201,7 +206,7 @@ bool convert_precision(ov::pass::PassBase& pass, for (auto& node : ops) { // skip precision sensitive nodes - if (skip_precision_sensitive && fp16_compression_is_disabled(node)) + if (skip_precision_sensitive && fp16_compression_is_disabled(node) && to == element::f16) continue; is_output_precision_changed |= convert_node_output_precision(node); } @@ -219,6 +224,8 @@ bool convert_precision(ov::pass::PassBase& pass, // Convert elimination here for (auto& node : ops) { if (auto convert = std::dynamic_pointer_cast(node)) { + if (pass::constant_folding_is_disabled(node)) + continue; // WA for topK, dont remove fake convert if (convert->input(0).get_element_type() == convert->get_convert_element_type() && convert->input_value(0).get_node_shared_ptr()->get_output_size() == 1) { @@ -301,6 +308,17 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr compress_f16_pair = {ov::element::f32, ov::element::f16}; + bool has_compress_f16 = std::count(m_precisions.begin(), m_precisions.end(), compress_f16_pair) > 0; + + if (m_keep_precision_sensitive_in_fp32 && has_compress_f16) { + pass::Manager manager(get_pass_config()); + // Mark subgraphs with disable_fp16_compression to keep them in FP32 + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + } + for (const auto& it : m_additional_type_to_fuse_map) { type_to_fuse[it.first] = it.second; } @@ -327,6 +345,13 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr(); + manager.register_pass(); + } + (void)is_changed; // ignored // Returning value is false because pass::Manager always apply Validation pass diff --git a/src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp b/src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp new file mode 100644 index 00000000000..659994a68cd --- /dev/null +++ b/src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/enable_decompression_convert_constant_folding.hpp" + +#include "itt.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" + +using namespace ov; + +pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() { + MATCHER_SCOPE(EnableDecompressionConvertConstantFolding); + auto convert = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!is_decompression(node)) + return false; + enable_constant_folding(node); + return true; + }; + + auto m = std::make_shared(convert, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp index cfe06a0800d..6127453b842 100644 --- a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp +++ b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include "common_test_utils/ngraph_test_utils.hpp" @@ -36,24 +35,21 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_1) { pass::Manager manager; manager.register_pass(); manager.register_pass(); - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map empty_type_to_fuse_map = {}; - manager.register_pass(convert_precision_list, empty_type_to_fuse_map, true); manager.run_passes(model); } { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto convert_to_f32_1 = make_shared(input_1, element::f32); auto exp_1 = make_shared(convert_to_f32_1); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto reduce_sum_1 = make_shared(exp_1, reduction_axes); auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); auto factor_const_decompressed = make_shared(factor_const, element::f32); auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); - auto convert_to_f16_1 = make_shared(mul_1, element::f16); + auto convert_to_f16_1 = make_shared(mul_1, element::f32); auto matmul_1 = make_shared(convert_to_f16_1, input_2); model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); @@ -81,18 +77,15 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_2) { pass::Manager manager; manager.register_pass(); manager.register_pass(); - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map empty_type_to_fuse_map = {}; - manager.register_pass(convert_precision_list, empty_type_to_fuse_map, true); manager.run_passes(model); } { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto convert_to_f32_1 = make_shared(input_1, element::f32); auto exp_1 = make_shared(convert_to_f32_1); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto unsqueeze_axes = Constant::create(element::i64, Shape{1}, {1}); @@ -102,7 +95,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_2) { auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); auto factor_const_decompressed = make_shared(factor_const, element::f32); auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); - auto convert_to_f16_1 = make_shared(mul_1, element::f16); + auto convert_to_f16_1 = make_shared(mul_1, element::f32); auto matmul_1 = make_shared(convert_to_f16_1, input_2); model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); @@ -130,17 +123,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) { pass::Manager manager; manager.register_pass(); manager.register_pass(); - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map empty_type_to_fuse_map = {}; - manager.register_pass(convert_precision_list, empty_type_to_fuse_map, true); manager.run_passes(model); } { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto convert_to_f32_1 = make_shared(input_1, element::f32); auto exp_1 = make_shared(convert_to_f32_1); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto reduce_sum_1 = make_shared(exp_1, reduction_axes); @@ -150,7 +140,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) { auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); auto factor_const_decompressed = make_shared(factor_const, element::f32); auto mul_1 = make_shared(add_1, factor_const_decompressed); - auto convert_to_f16_1 = make_shared(mul_1, element::f16); + auto convert_to_f16_1 = make_shared(mul_1, element::f32); auto matmul_1 = make_shared(convert_to_f16_1, input_2); model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); @@ -172,21 +162,18 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_4) { pass::Manager manager; manager.register_pass(); manager.register_pass(); - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map empty_type_to_fuse_map = {}; - manager.register_pass(convert_precision_list, empty_type_to_fuse_map, true); manager.run_passes(model); } { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto convert_to_f32_1 = make_shared(input_1, element::f32); auto mvn_1 = make_shared(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); auto add_1 = make_shared(mvn_1, addition_const); - auto convert_to_f16_1 = make_shared(add_1, element::f16); + auto convert_to_f16_1 = make_shared(add_1, element::f32); auto matmul_1 = make_shared(convert_to_f16_1, input_2); model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); @@ -215,15 +202,12 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) { pass::Manager manager; manager.register_pass(); manager.register_pass(); - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map empty_type_to_fuse_map = {}; - manager.register_pass(convert_precision_list, empty_type_to_fuse_map, true); manager.run_passes(model); } { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); - auto input_2 = make_shared(element::f16, Shape{1, 3, 56, 224}); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 56, 224}); auto convert_to_f32_1 = make_shared(input_1, element::f32); @@ -234,7 +218,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) { auto mvn_1 = make_shared(split->output(0), reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f}); auto add_1 = make_shared(mvn_1, addition_const); - auto convert_to_f16_1 = make_shared(add_1, element::f16); + auto convert_to_f16_1 = make_shared(add_1, element::f32); auto matmul_1 = make_shared(convert_to_f16_1, input_2); // todo: without Converts to fp16 because of GPU @@ -271,17 +255,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) { pass::Manager manager; manager.register_pass(); manager.register_pass(); - const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}}; - type_to_fuse_map empty_type_to_fuse_map = {}; - manager.register_pass(convert_precision_list, empty_type_to_fuse_map, true); manager.run_passes(model); } { - auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto convert_to_f32_1 = make_shared(input_1, element::f32); auto exp_1 = make_shared(convert_to_f32_1); - auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto reduce_sum_1 = make_shared(exp_1, reduction_axes); @@ -289,14 +270,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) { auto factor_const_decompressed = make_shared(factor_const, element::f32); auto out_shape = Constant::create(element::i64, Shape{3}, {1, 3, 224}); - auto minval = Constant::create(element::f16, Shape{}, {1}); - auto maxval = Constant::create(element::f16, Shape{}, {10}); - auto rand_uniform = make_shared(out_shape, minval, maxval, element::f16); + auto minval = Constant::create(element::f32, Shape{}, {1}); + auto maxval = Constant::create(element::f32, Shape{}, {10}); + auto rand_uniform = make_shared(out_shape, minval, maxval, element::f32); auto rand_uniform_decompressed = make_shared(rand_uniform, element::f32); auto rand_uniform_add_factor = make_shared(rand_uniform_decompressed, factor_const_decompressed); auto mul_1 = make_shared(reduce_sum_1, rand_uniform_add_factor); - auto convert_to_f16_1 = make_shared(mul_1, element::f16); + auto convert_to_f16_1 = make_shared(mul_1, element::f32); auto matmul_1 = make_shared(convert_to_f16_1, input_2); model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); diff --git a/src/common/transformations/tests/common_optimizations/convert_compressed_to_mixed_precision_test.cpp b/src/common/transformations/tests/common_optimizations/convert_compressed_to_mixed_precision_test.cpp deleted file mode 100644 index b88c7efd593..00000000000 --- a/src/common/transformations/tests/common_optimizations/convert_compressed_to_mixed_precision_test.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp" - -#include - -#include -#include -#include - -#include "common_test_utils/ngraph_test_utils.hpp" -#include "openvino/core/model.hpp" -#include "openvino/opsets/opset10.hpp" -#include "openvino/pass/manager.hpp" -#include "transformations/init_node_info.hpp" -#include "transformations/rt_info/decompression.hpp" - -using namespace testing; -using namespace ov; - -TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecision) { - { - auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); - auto const_weights = - opset10::Constant::create(element::f16, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - auto convert_ins1 = std::make_shared(const_weights, element::f32); - mark_as_decompression(convert_ins1); - auto conv = std::make_shared(input, - convert_ins1, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - model = std::make_shared(NodeVector{conv}, ParameterVector{input}); - - pass::Manager manager; - manager.register_pass(); - manager.register_pass(); - manager.run_passes(model); - ASSERT_NO_THROW(check_rt_info(model)); - } - - { - auto input = std::make_shared(element::f16, Shape{1, 3, 12, 12}); - auto const_weights = - opset10::Constant::create(element::f16, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - auto conv = std::make_shared(input, - const_weights, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - model_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); - } -} - -TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecissionNoConvertion) { - // test that pass is not triggered when there are no decompression Converts - { - auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); - auto const_weights = - opset10::Constant::create(element::f32, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - auto conv = std::make_shared(input, - const_weights, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - model = std::make_shared(NodeVector{conv}, ParameterVector{input}); - - pass::Manager manager; - manager.register_pass(); - manager.register_pass(); - manager.run_passes(model); - ASSERT_NO_THROW(check_rt_info(model)); - } - - { - auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); - auto const_weights = - opset10::Constant::create(element::f32, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - auto conv = std::make_shared(input, - const_weights, - Strides{1, 1}, - CoordinateDiff{0, 0}, - CoordinateDiff{0, 0}, - Strides{1, 1}); - - model_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); - } -} - -TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecission_keep_sensitive_nodes_in_fp32) { - { - auto input_1 = std::make_shared(element::f32, Shape{360, 640}); - auto input_2 = std::make_shared(element::f32, Shape{720, 1280}); - auto shapeof = std::make_shared(input_2); - - // decompression Converts are needed for ConvertCompressedToMixedPrecision to be triggered - auto compressed_const = opset10::Constant::create(element::f16, Shape{}, {2.0f}); - auto decompress_convert = std::make_shared(compressed_const, element::f32); - mark_as_decompression(decompress_convert); - auto add_decompressed_const = std::make_shared(input_1, decompress_convert); - - auto convert_to_float = std::make_shared(shapeof, element::f32); - auto const_denominator = opset10::Constant::create(element::f32, Shape{}, {2.0f}); - auto div = std::make_shared(convert_to_float, const_denominator); - auto new_shape = std::make_shared(div, element::i64); - - auto reshape = std::make_shared(add_decompressed_const, new_shape, false); - model = std::make_shared(NodeVector{reshape}, ParameterVector{input_1, input_2}); - - pass::Manager manager; - manager.register_pass(); - manager.run_passes(model); - } - { - auto input_1 = std::make_shared(element::f16, Shape{360, 640}); - auto input_2 = std::make_shared(element::f16, Shape{720, 1280}); - - // after ConvertCompressedToMixedPrecision Const->Convert are constant-folded into a single f16 Const - auto compressed_const = opset10::Constant::create(element::f16, Shape{}, {2.0f}); - auto add_compressed_const = std::make_shared(input_1, compressed_const); - - // shape subgraph will be constant folded - auto new_shape_const = opset10::Constant::create(element::i64, Shape{2}, {360, 640}); - - auto reshape = std::make_shared(add_compressed_const, new_shape_const, false); - model_ref = std::make_shared(NodeVector{reshape}, ParameterVector{input_1, input_2}); - } -} diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 9ba053f8b8c..11ea0cb422d 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -21,11 +21,11 @@ #include #include "common_test_utils/ngraph_test_utils.hpp" -#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" using namespace testing; using namespace ov; +using namespace std; template bool has_type(std::shared_ptr f) { @@ -776,7 +776,6 @@ TEST(TransformationTests, ConvertPrecision_skip_precision_sensitive) { model = std::make_shared(NodeVector{interpolate}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -811,7 +810,6 @@ TEST(TransformationTests, ConvertPrecision_without_keep_precision_sensitive_in_f interpolate = std::make_shared(input, sizes, scales, attrs); model = std::make_shared(NodeVector{interpolate}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = false; manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -834,7 +832,6 @@ TEST(TransformationTests, ConvertPrecision_check_marking_does_not_leak_in_trivia model = std::make_shared(NodeVector{reshape}, ParameterVector{input_1, input_2}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -874,7 +871,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_1) { model = std::make_shared(NodeVector{reshape}, ParameterVector{input_1, input_2}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -928,7 +924,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_2) { model = std::make_shared(NodeVector{result}, ParameterVector{input_1}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -1008,7 +1003,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_3) { model = std::make_shared(NodeVector{result_1, result_2}, ParameterVector{input_1, input_2}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = true; manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -1087,7 +1081,6 @@ TEST(TransformationTests, ConvertCompressedToMixedPrecission_do_not_keep_in_fp32 model = std::make_shared(NodeVector{interpolate}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(); type_to_fuse_map empty_type_to_fuse_map = {}; bool keep_precision_sensitive_in_fp32 = false; // didn't keep in FP32 intentionally manager.register_pass(precisions_array{{element::f32, element::f16}}, @@ -1323,3 +1316,474 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) { std::vector{171}, {1, 0, 1, 0, 1, 0, 1, 1}); } + +TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(exp_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto matmul_1 = make_shared(mul_1, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto exp_1 = make_shared(input_1_decompressed); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(exp_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto mul_1_compressed = make_shared(mul_1, element::f16); + auto matmul_1 = make_shared(mul_1_compressed, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_reducemean) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(exp_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto matmul_1 = make_shared(mul_1, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto exp_1 = make_shared(input_1_decompressed); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_mean_1 = make_shared(exp_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_mean_1, factor_const_decompressed); + auto mul_1_compressed = make_shared(mul_1, element::f16); + auto matmul_1 = make_shared(mul_1_compressed, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_reducesum_without_exp) { + // ReduceSum without Exp is not a precision sensitive case, the whole Model should be cast into f16, + // no nodes should be marked and no Converts should be added + shared_ptr model, model_ref; + pass::Manager manager; + + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(input_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f32, Shape{1}, {-1}); + auto mul_1 = make_shared(reduce_sum_1, factor_const); + auto matmul_1 = make_shared(mul_1, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_1 = make_shared(input_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto mul_1 = make_shared(reduce_sum_1, factor_const); + auto matmul_1 = make_shared(mul_1, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_1) { + shared_ptr model, model_ref; + pass::Manager manager; + + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto mvn_1 = make_shared(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); + auto matmul_1 = make_shared(mvn_1, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto mvn_1 = + make_shared(input_1_decompressed, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT); + auto mvn_compressed = make_shared(mvn_1, element::f16); + auto matmul_1 = make_shared(mvn_compressed, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_2) { + shared_ptr model, model_ref; + pass::Manager manager; + + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto normalizel2_1 = make_shared(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); + auto matmul_1 = make_shared(normalizel2_1, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto normalizel2_1 = + make_shared(input_1_decompressed, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX); + auto normalizel2_compressed = make_shared(normalizel2_1, element::f16); + auto matmul_1 = make_shared(normalizel2_compressed, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_t2t_subgraph) { + shared_ptr model, model_ref; + pass::Manager manager; + // subgraph from t2t-vit-7 + { + auto input_1 = make_shared(element::f32, Shape{1, 3136, 32}); + auto input_2 = make_shared(element::f32, Shape{1, 3136, 32}); + auto input_3 = make_shared(element::f32, Shape{1, 3136, 64, 1}); + auto input_4 = make_shared(element::f32, Shape{128, 64}); + auto exp_1 = make_shared(input_1); + auto exp_2 = make_shared(input_2); + + auto factor_1 = opset10::Constant::create(element::f32, Shape{1}, {0.5}); // add decompression + auto mul_1 = make_shared(exp_1, factor_1); + auto factor_2 = opset10::Constant::create(element::f32, Shape{1}, {0.5}); + auto mul_2 = make_shared(exp_2, factor_2); + + auto const_unsqueeze_1 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32}); + auto unsqueeze_1 = make_shared(mul_1, const_unsqueeze_1, false); + + auto const_unsqueeze_2 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32}); + auto unsqueeze_2 = make_shared(mul_2, const_unsqueeze_1, false); + auto reduction_axes_1 = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto reduce_sum_1 = make_shared(mul_2, reduction_axes_1, true); + auto mul_3 = make_shared(reduce_sum_1, mul_1); + auto mul_4 = make_shared(input_3, unsqueeze_2); + + auto reduction_axes_2 = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto reduce_sum_2 = make_shared(mul_4, reduction_axes_2); + auto reduction_axes_3 = opset10::Constant::create(element::i64, Shape{1}, {2}); + auto reduce_sum_3 = make_shared(mul_3, reduction_axes_3, true); + + auto broadcast_to_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 1}); + auto broadcast = + make_shared(reduce_sum_3, broadcast_to_shape, ov::op::BroadcastType::BIDIRECTIONAL); + auto tile_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 64}); + auto tile = make_shared(broadcast, tile_shape); + auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {1.e-10}); + auto add_1 = make_shared(tile, eps_const); + + auto const_unsqueeze_3 = opset10::Constant::create(element::i64, Shape{4}, {1, 1, 64, 32}); + auto unsqueeze_3 = make_shared(reduce_sum_2, const_unsqueeze_3, false); + auto mul_5 = make_shared(unsqueeze_1, unsqueeze_3); + + auto reduction_axes_4 = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_4 = make_shared(mul_5, reduction_axes_4); + + auto div_1 = make_shared(reduce_sum_4, add_1); + auto matmul_1 = make_shared(div_1, input_4, false, true); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2, input_3, input_4}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3136, 32}); + auto input_2 = make_shared(element::f16, Shape{1, 3136, 32}); + auto input_3 = make_shared(element::f16, Shape{1, 3136, 64, 1}); + auto input_4 = make_shared(element::f16, Shape{128, 64}); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto input_2_decompressed = make_shared(input_2, element::f32); + auto input_3_decompressed = make_shared(input_3, element::f32); + + auto exp_1 = make_shared(input_1_decompressed); + auto exp_2 = make_shared(input_2_decompressed); + + auto factor_1 = opset10::Constant::create(element::f32, Shape{1}, {0.5}); + auto mul_1 = make_shared(exp_1, factor_1); + auto factor_2 = opset10::Constant::create(element::f32, Shape{1}, {0.5}); + auto mul_2 = make_shared(exp_2, factor_2); + + auto const_unsqueeze_1 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32}); + auto unsqueeze_1 = make_shared(mul_1, const_unsqueeze_1, false); + + auto const_unsqueeze_2 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32}); + auto unsqueeze_2 = make_shared(mul_2, const_unsqueeze_2, false); + auto reduction_axes_1 = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto reduce_sum_1 = make_shared(mul_2, reduction_axes_1, true); + auto mul_3 = make_shared(reduce_sum_1, mul_1); + auto mul_4 = make_shared(input_3_decompressed, unsqueeze_2); + + auto reduction_axes_2 = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto reduce_sum_2 = make_shared(mul_4, reduction_axes_2); + auto reduction_axes_3 = opset10::Constant::create(element::i64, Shape{1}, {2}); + auto reduce_sum_3 = make_shared(mul_3, reduction_axes_3, true); + + auto broadcast_to_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 1}); + auto broadcast = + make_shared(reduce_sum_3, broadcast_to_shape, ov::op::BroadcastType::BIDIRECTIONAL); + auto tile_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 64}); + auto tile = make_shared(broadcast, tile_shape); + auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {1.e-10}); + auto add_1 = make_shared(tile, eps_const); + + auto const_unsqueeze_3 = opset10::Constant::create(element::i64, Shape{4}, {1, 1, 64, 32}); + auto unsqueeze_3 = make_shared(reduce_sum_2, const_unsqueeze_3, false); + auto mul_5 = make_shared(unsqueeze_1, unsqueeze_3); + + auto reduction_axes_4 = opset10::Constant::create(element::i64, Shape{1}, {-1}); + auto reduce_sum_4 = make_shared(mul_5, reduction_axes_4); + + auto div_1 = make_shared(reduce_sum_4, add_1); + auto div_compressed = make_shared(div_1, element::f16); + auto matmul_1 = make_shared(div_compressed, input_4, false, true); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2, input_3, input_4}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) { + shared_ptr model, model_ref; + pass::Manager manager; + + const float eps_value = 1.0e-12f; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + model = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = std::make_shared(element::f16, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f16, PartialShape::dynamic(3)); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto input_2_decompressed = make_shared(input_2, element::f32); + + auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2_decompressed, eps_const); + auto divide = std::make_shared(input_1_decompressed, add); + + model_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) { + shared_ptr model, model_ref; + pass::Manager manager; + const float eps_value = 1.0e-12f; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); + + model = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = std::make_shared(element::f16, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f16, PartialShape::dynamic(3)); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto input_2_decompressed = make_shared(input_2, element::f32); + + auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2_decompressed, eps_const); + auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1_decompressed, pow); + + model_ref = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) { + shared_ptr model, model_ref; + pass::Manager manager; + { + auto input_1 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto exp_1 = make_shared(input_1); + auto input_2 = make_shared(element::f32, Shape{1, 3, 224, 224}); + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + + auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto unsqueeze_1 = make_shared(exp_1, unsqueeze_axes); + auto reduce_sum_1 = make_shared(unsqueeze_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto matmul_1 = make_shared(mul_1, input_2); + + model = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = true; + manager.register_pass(precisions_array{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32); + manager.run_passes(model); + } + + { + auto input_1 = make_shared(element::f16, Shape{1, 3, 224, 224}); + auto input_1_decompressed = make_shared(input_1, element::f32); + auto exp_1 = make_shared(input_1_decompressed); + auto input_2 = make_shared(element::f16, Shape{1, 3, 224, 224}); + + auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1}); + + auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1}); + auto unsqueeze_1 = make_shared(exp_1, unsqueeze_axes); + auto reduce_sum_1 = make_shared(unsqueeze_1, reduction_axes); + + auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1}); + auto factor_const_decompressed = make_shared(factor_const, element::f32); + auto mul_1 = make_shared(reduce_sum_1, factor_const_decompressed); + auto mul_1_compressed = make_shared(mul_1, element::f16); + auto matmul_1 = make_shared(mul_1_compressed, input_2); + + model_ref = make_shared(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default(); + FunctionsComparator::Result result = func_comparator(model_ref, model); + ASSERT_TRUE(result.valid) << result.message; +} diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index e86b587999e..4c8f16ec07a 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -41,7 +41,6 @@ #include #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" -#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp" #include #include @@ -140,6 +139,63 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); + + precisions_array fp_convert_precision_list = { + {ov::element::f64, ov::element::f32} + }; + + // call conversion of float types with keep_precision_sensitive_in_fp32 = true + auto fp_precision_supported = [&](ov::element::Type e) -> bool { + switch (e) { + case ov::element::f16: return device_info.supports_fp16; + case ov::element::f32: return true; // assume that all GPUs support f32 data type + case ov::element::f64: return device_info.supports_fp64; + case ov::element::bf16: return false; + default: return false; + } + return false; + }; + + const auto fallback_precision = ov::element::f32; + std::vector fp_element_types = { + ov::element::f32, + ov::element::f16, + ov::element::bf16 + }; + + // Add conversion from FP data types to infer precision if it's specified + auto infer_precision = config.get_property(ov::hint::inference_precision); + if (infer_precision != ov::element::undefined) { + if (!fp_precision_supported(infer_precision)) + infer_precision = fallback_precision; + + for (auto& et : fp_element_types) { + if (et != infer_precision) { + fp_convert_precision_list.push_back({et, infer_precision}); + } + } + } + + // Add conversion from unsupported FP data types to f32 if we don't have a conversion to something valid already in the list + for (auto& et : fp_element_types) { + if (!fp_precision_supported(et)) { + auto et_pair = std::make_pair(et, fallback_precision); + bool has_valid_conversion = std::find_if(fp_convert_precision_list.begin(), fp_convert_precision_list.end(), + [&](std::pair v) -> bool { + return v.first == et_pair.first && fp_precision_supported(v.second); + }) != fp_convert_precision_list.end(); + + if (!has_valid_conversion) { + fp_convert_precision_list.push_back(et_pair); + } + } + } + + type_to_fuse_map empty_fuse_map = {}; + manager.register_pass(); + // call ConvertPrecision with keep_precision_sensitive_in_fp32 = true + manager.register_pass(fp_convert_precision_list, empty_fuse_map, true); + manager.register_pass(); manager.register_pass(); @@ -176,8 +232,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); - precisions_array convert_precision_list { - {ngraph::element::f64, ngraph::element::f32}, + precisions_array int_convert_precision_list { {ngraph::element::i64, ngraph::element::i32}, {ngraph::element::u64, ngraph::element::i32}, {ngraph::element::u16, ngraph::element::i32}, @@ -187,54 +242,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { {ngraph::element::u4, ngraph::element::u8}, }; - auto fp_precision_supported = [&](ngraph::element::Type e) -> bool { - switch (e) { - case ngraph::element::f16: return device_info.supports_fp16; - case ngraph::element::f32: return true; // assume that all GPUs support f32 data type - case ngraph::element::f64: return device_info.supports_fp64; - case ngraph::element::bf16: return false; - default: return false; - } - return false; - }; - - const auto fallback_precision = ngraph::element::f32; - std::vector fp_element_types = { - ngraph::element::f32, - ngraph::element::f16, - ngraph::element::bf16 - }; - - // Add conversion from FP data types to infer precision if it's specified - auto infer_precision = config.get_property(ov::inference_precision); - if (infer_precision != ov::element::undefined) { - if (!fp_precision_supported(infer_precision)) - infer_precision = fallback_precision; - - for (auto& et : fp_element_types) { - if (et != infer_precision) { - convert_precision_list.push_back({et, infer_precision}); - } - } - } - - // Add conversion from unsupported FP data types to f32 if we don't have a conversion to something valid already in the list - for (auto& et : fp_element_types) { - if (!fp_precision_supported(et)) { - auto et_pair = std::make_pair(et, fallback_precision); - bool has_valid_conversion = std::find_if(convert_precision_list.begin(), convert_precision_list.end(), - [&](std::pair v) -> bool { - return v.first == et_pair.first && fp_precision_supported(v.second); - }) != convert_precision_list.end(); - - if (!has_valid_conversion) { - convert_precision_list.push_back(et_pair); - } - } - } - manager.register_pass(); - manager.register_pass(convert_precision_list); + manager.register_pass(int_convert_precision_list); auto pass_config = manager.get_pass_config(); pass_config->disable(); @@ -242,7 +251,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // disable conversion to legacy and use the new mixed precision // in which precision sensitive nodes are kept in FP32 pass_config->disable(); - pass_config->enable(); // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 pass_config->set_callback