From aa32ff1df34ecb6528dbd107c03e1ba96c3bed19 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 15 Jun 2023 11:07:22 +0200 Subject: [PATCH] keep Const + DecompressionConvert for CPU (#15930) * keep Const+DecompressionConvert pattern for CPU * temporary disabled failing unit-tests * disable CF by modifying bounds evaluate as well; minor corrections * added TODOs with ticket numbers * join const+decompression markings * minimized convert_precision.cpp changes * minor corrections * refactor fp16 transformations: moved into separate fp16_compression folder * style-fix * minor fixes * do not disable evaluate and CF in shape path * safer disabling of Const conversion * style-fix and minor corrections * restore original placement of ConvertPrecision --- ...dequantization_subgraph_transformation.cpp | 2 +- ..._precision_sensitive_shapeof_subgraphs.hpp | 12 ++++ ...decompression_convert_constant_folding.hpp | 26 ------- ...decompression_convert_constant_folding.hpp | 27 ------- .../align_mixed_fp32_fp16_types.hpp | 0 .../convert_compression_only_to_legacy.hpp | 0 ...decompression_convert_constant_folding.hpp | 49 +++++++++++++ ...k_subgraphs_to_keep_in_mixed_precision.hpp | 0 .../rt_info/is_shape_subgraph.hpp | 35 ++++++++++ .../rt_info/keep_fp16_const.hpp | 35 ++++++++++ .../common_optimizations.cpp | 4 +- ..._precision_sensitive_shapeof_subgraphs.cpp | 7 ++ .../src/transformations/convert_precision.cpp | 12 +++- ...decompression_convert_constant_folding.cpp | 27 ------- ...decompression_convert_constant_folding.cpp | 29 -------- .../align_mixed_fp32_fp16_types.cpp | 2 +- .../convert_compression_only_to_legacy.cpp | 4 +- ...decompression_convert_constant_folding.cpp | 70 +++++++++++++++++++ ...k_subgraphs_to_keep_in_mixed_precision.cpp | 2 +- .../rt_info/is_shape_sugraph.cpp | 20 ++++++ .../rt_info/keep_fp16_const.cpp | 20 ++++++ .../align_mixed_fp32_fp16_types_test.cpp | 5 +- ...onvert_compression_only_to_legacy_test.cpp | 2 +- ...bgraph_to_keep_in_mixed_precision_test.cpp | 2 +- src/core/src/bound_evaluate.cpp | 6 +- .../intel_cpu/src/dnnl_extension_utils.cpp | 6 ++ .../transformation_pipeline.cpp | 11 ++- .../src/gna_transformations_pipeline.cpp | 4 +- .../src/plugin/transformations_pipeline.cpp | 2 +- src/plugins/template/src/plugin.cpp | 4 +- 30 files changed, 293 insertions(+), 132 deletions(-) delete mode 100644 src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp delete mode 100644 src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp rename src/common/transformations/include/transformations/{common_optimizations => fp16_compression}/align_mixed_fp32_fp16_types.hpp (100%) rename src/common/transformations/include/transformations/{common_optimizations => fp16_compression}/convert_compression_only_to_legacy.hpp (100%) create mode 100644 src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp rename src/common/transformations/include/transformations/{common_optimizations => fp16_compression}/mark_subgraphs_to_keep_in_mixed_precision.hpp (100%) create mode 100644 src/common/transformations/include/transformations/rt_info/is_shape_subgraph.hpp create mode 100644 src/common/transformations/include/transformations/rt_info/keep_fp16_const.hpp delete mode 100644 src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp delete mode 100644 src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp rename src/common/transformations/src/transformations/{common_optimizations => fp16_compression}/align_mixed_fp32_fp16_types.cpp (97%) rename src/common/transformations/src/transformations/{common_optimizations => fp16_compression}/convert_compression_only_to_legacy.cpp (85%) create mode 100644 src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp rename src/common/transformations/src/transformations/{common_optimizations => fp16_compression}/mark_subgraphs_to_keep_in_mixed_precision.cpp (99%) create mode 100644 src/common/transformations/src/transformations/rt_info/is_shape_sugraph.cpp create mode 100644 src/common/transformations/src/transformations/rt_info/keep_fp16_const.cpp diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index a99c707b126..5358a828c2d 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -4,8 +4,8 @@ #include #include -#include #include +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include #include diff --git a/src/common/transformations/include/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp b/src/common/transformations/include/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp index 5311782c09c..2cabfde586d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp @@ -13,6 +13,7 @@ namespace pass { class TRANSFORMATIONS_API MarkPrecisionSensitiveShapeOfSubgraphs; class TRANSFORMATIONS_API MarkPrecisionSensitiveConstants; class TRANSFORMATIONS_API MarkDividesInShapeSubgraphs; +class TRANSFORMATIONS_API MarkShapeOfSubgraphs; } // namespace pass } // namespace ov @@ -33,6 +34,17 @@ protected: std::function m_markup_func; }; +/** + * @ingroup ie_transformation_common_api + * @brief MarkShapeOfSubgraphs marks shape subgraphs. + * Information whether the node belongs to the shape path or to the data path is needed during evaluate and CF. + */ +class ov::pass::MarkShapeOfSubgraphs : public MarkPrecisionSensitiveShapeOfSubgraphs { +public: + OPENVINO_RTTI("MarkShapeOfSubgraphs", "0"); + MarkShapeOfSubgraphs(); +}; + /** * @ingroup ie_transformation_common_api * @brief MarkPrecisionSensitiveConstants marks the constants diff --git a/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp b/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp deleted file mode 100644 index f80f70fd599..00000000000 --- a/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/pass/graph_rewrite.hpp" -#include "transformations_visibility.hpp" - -namespace ov { -namespace pass { - -class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding; - -} // namespace pass -} // namespace ov - -/** - * @ingroup ie_transformation_common_api - * @brief Disables ConstantFolding for Convert operation in compressed function. - */ -class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("DisableDecompressionConvertConstantFolding", "0"); - DisableDecompressionConvertConstantFolding(); -}; diff --git a/src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp b/src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp deleted file mode 100644 index 59471f9ba61..00000000000 --- a/src/common/transformations/include/transformations/enable_decompression_convert_constant_folding.hpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/pass/graph_rewrite.hpp" -#include "transformations/enable_decompression_convert_constant_folding.hpp" -#include "transformations_visibility.hpp" - -namespace ov { -namespace pass { - -class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding; - -} // namespace pass -} // namespace ov - -/** - * @ingroup ie_transformation_common_api - * @brief Disables ConstantFolding for Convert operation in compressed function. - */ -class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); - EnableDecompressionConvertConstantFolding(); -}; diff --git a/src/common/transformations/include/transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp b/src/common/transformations/include/transformations/fp16_compression/align_mixed_fp32_fp16_types.hpp similarity index 100% rename from src/common/transformations/include/transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp rename to src/common/transformations/include/transformations/fp16_compression/align_mixed_fp32_fp16_types.hpp diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp b/src/common/transformations/include/transformations/fp16_compression/convert_compression_only_to_legacy.hpp similarity index 100% rename from src/common/transformations/include/transformations/common_optimizations/convert_compression_only_to_legacy.hpp rename to src/common/transformations/include/transformations/fp16_compression/convert_compression_only_to_legacy.hpp diff --git a/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp b/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp new file mode 100644 index 00000000000..29d335b0db1 --- /dev/null +++ b/src/common/transformations/include/transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "mark_decompression_convert_constant_folding.hpp" +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding; +class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding; +class TRANSFORMATIONS_API KeepConstAndDecompression; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief Enables ConstantFolding for Convert operation in compressed function. + */ +class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0"); + EnableDecompressionConvertConstantFolding(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief Disables ConstantFolding for Convert operation in compressed function. + */ +class ov::pass::DisableDecompressionConvertConstantFolding : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("DisableDecompressionConvertConstantFolding", "0"); + DisableDecompressionConvertConstantFolding(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief Disables ConstantFolding for Convert operation and prevents conversion of f16 Consts to f32. + */ +class ov::pass::KeepConstAndDecompression : public MatcherPass { +public: + OPENVINO_RTTI("KeepConstAndDecompression", "0"); + KeepConstAndDecompression(); +}; diff --git a/src/common/transformations/include/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp b/src/common/transformations/include/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp similarity index 100% rename from src/common/transformations/include/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp rename to src/common/transformations/include/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp diff --git a/src/common/transformations/include/transformations/rt_info/is_shape_subgraph.hpp b/src/common/transformations/include/transformations/rt_info/is_shape_subgraph.hpp new file mode 100644 index 00000000000..6d12efb6295 --- /dev/null +++ b/src/common/transformations/include/transformations/rt_info/is_shape_subgraph.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/core/runtime_attribute.hpp" +#include "transformations_visibility.hpp" + +namespace ov { + +TRANSFORMATIONS_API void mark_shape_subgraph(const std::shared_ptr& node); + +TRANSFORMATIONS_API void unmark_shape_subgraph(const std::shared_ptr& node); + +TRANSFORMATIONS_API bool is_shape_subgraph(const std::shared_ptr& node); + +/** + * @ingroup ie_runtime_attr_api + * @brief ShapeSubgraph class represents runtime info attribute that marks shape subgraphs. + * Information whether the node belongs to the shape path or to the data path is needed during evaluate and CF. + */ +class TRANSFORMATIONS_API ShapeSubgraph : public RuntimeAttribute { +public: + OPENVINO_RTTI("shape_subgraph", "0"); + + ShapeSubgraph() = default; + + bool is_copyable() const override { + return false; + } +}; + +} // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/keep_fp16_const.hpp b/src/common/transformations/include/transformations/rt_info/keep_fp16_const.hpp new file mode 100644 index 00000000000..f38ca72f9a0 --- /dev/null +++ b/src/common/transformations/include/transformations/rt_info/keep_fp16_const.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/core/runtime_attribute.hpp" +#include "transformations_visibility.hpp" + +namespace ov { + +TRANSFORMATIONS_API void enable_keep_fp16_const(const std::shared_ptr& node); + +TRANSFORMATIONS_API void disable_keep_fp16_const(const std::shared_ptr& node); + +TRANSFORMATIONS_API bool is_keep_fp16_const(const std::shared_ptr& node); + +/** + * @ingroup ie_runtime_attr_api + * @brief DisableFP16Compression class represents runtime info attribute that marks operation + * as prohibitted to convert to FP16 as part of Compressed Only format. + */ +class TRANSFORMATIONS_API KeepFP16Const : public RuntimeAttribute { +public: + OPENVINO_RTTI("keep_fp16_const", "0"); + + KeepFP16Const() = default; + + bool is_copyable() const override { + return false; + } +}; + +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 6064effe880..941427fa404 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -25,7 +25,6 @@ #include "transformations/common_optimizations/concat_reduce_fusion.hpp" #include "transformations/common_optimizations/conv_mul_fusion.hpp" #include "transformations/common_optimizations/conv_to_binary_conv.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include "transformations/common_optimizations/dilated_convolution_converter.hpp" @@ -64,7 +63,8 @@ #include "transformations/common_optimizations/swish_fusion.hpp" #include "transformations/common_optimizations/transpose_sinking.hpp" #include "transformations/common_optimizations/transpose_to_reshape.hpp" -#include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/init_node_info.hpp" #include "transformations/op_conversions/batch_norm_decomposition.hpp" #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.cpp index 2dccd2d3be9..19dec1b5ba5 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.cpp @@ -14,6 +14,7 @@ #include "openvino/opsets/opset8.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" +#include "transformations/rt_info/is_shape_subgraph.hpp" #include "transformations/rt_info/nonconvertible_divide.hpp" #include "transformations/utils/utils.hpp" @@ -89,3 +90,9 @@ bool ov::pass::MarkPrecisionSensitiveShapeOfSubgraphs::run_on_model(const shared } return true; } + +ov::pass::MarkShapeOfSubgraphs::MarkShapeOfSubgraphs() { + m_markup_func = [](Node* node) { + mark_shape_subgraph(node->shared_from_this()); + }; +} diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 656c2faaab5..9b785432e77 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -21,10 +21,12 @@ #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" #include "ov_ops/type_relaxed.hpp" -#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp" -#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp" -#include "transformations/enable_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/align_mixed_fp32_fp16_types.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp" +#include "transformations/rt_info/decompression.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" +#include "transformations/rt_info/keep_fp16_const.hpp" using namespace ov; @@ -1028,6 +1030,10 @@ std::shared_ptr convert_low_precisions_int(std::shared_ptr& node, const precisions_map& precisions, const std::vector>& consumers) { + // Consts marked with disable_constant_folding should be kept in f16 until they reach the plugin + if (is_keep_fp16_const(node)) + return false; + auto from = node->get_element_type(); auto it = precisions.find(from); if (it == precisions.end()) diff --git a/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp b/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp deleted file mode 100644 index cf379b2e2c0..00000000000 --- a/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "transformations/disable_decompression_convert_constant_folding.hpp" - -#include "itt.hpp" -#include "openvino/opsets/opset8.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "transformations/rt_info/decompression.hpp" -#include "transformations/rt_info/disable_constant_folding.hpp" - -ov::pass::DisableDecompressionConvertConstantFolding::DisableDecompressionConvertConstantFolding() { - MATCHER_SCOPE(DisableDecompressionConvertConstantFolding); - auto convert = pattern::wrap_type(); - - ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto& node = m.get_match_root(); - if (!ov::is_decompression(node)) - return false; - disable_constant_folding(node); - return true; - }; - - auto m = std::make_shared(convert, matcher_name); - this->register_matcher(m, callback); -} diff --git a/src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp b/src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp deleted file mode 100644 index 659994a68cd..00000000000 --- a/src/common/transformations/src/transformations/enable_decompression_convert_constant_folding.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "transformations/enable_decompression_convert_constant_folding.hpp" - -#include "itt.hpp" -#include "openvino/opsets/opset8.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "transformations/rt_info/decompression.hpp" -#include "transformations/rt_info/disable_constant_folding.hpp" - -using namespace ov; - -pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() { - MATCHER_SCOPE(EnableDecompressionConvertConstantFolding); - auto convert = pattern::wrap_type(); - - matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto& node = m.get_match_root(); - if (!is_decompression(node)) - return false; - enable_constant_folding(node); - return true; - }; - - auto m = std::make_shared(convert, matcher_name); - this->register_matcher(m, callback); -} diff --git a/src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp b/src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp similarity index 97% rename from src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp rename to src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp index 685451684e0..e94ef1969f1 100644 --- a/src/common/transformations/src/transformations/common_optimizations/align_mixed_fp32_fp16_types.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp" +#include "transformations/fp16_compression/align_mixed_fp32_fp16_types.hpp" #include "itt.hpp" #include "openvino/core/rt_info.hpp" diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp b/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp similarity index 85% rename from src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp rename to src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp index 64b3e8ae303..0a1877e9893 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_compression_only_to_legacy.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/convert_compression_only_to_legacy.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include "itt.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/convert_precision.hpp" -#include "transformations/enable_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/utils/utils.hpp" diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp new file mode 100644 index 00000000000..ffd56eabdb0 --- /dev/null +++ b/src/common/transformations/src/transformations/fp16_compression/mark_decompression_convert_constant_folding.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" + +#include "itt.hpp" +#include "openvino/opsets/opset8.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" +#include "transformations/rt_info/is_shape_subgraph.hpp" +#include "transformations/rt_info/keep_fp16_const.hpp" + +using namespace ov; + +pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() { + MATCHER_SCOPE(EnableDecompressionConvertConstantFolding); + auto convert = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!is_decompression(node)) + return false; + enable_constant_folding(node); + return true; + }; + + auto m = std::make_shared(convert, matcher_name); + this->register_matcher(m, callback); +} + +pass::DisableDecompressionConvertConstantFolding::DisableDecompressionConvertConstantFolding() { + MATCHER_SCOPE(DisableDecompressionConvertConstantFolding); + auto convert = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!is_decompression(node)) + return false; + disable_constant_folding(node); + return true; + }; + + auto m = std::make_shared(convert, matcher_name); + this->register_matcher(m, callback); +} + +pass::KeepConstAndDecompression::KeepConstAndDecompression() { + MATCHER_SCOPE(KeepDecompressionsInFP32Matcher); + + auto node_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (!is_decompression(node) || !is_type(node) || + ov::is_shape_subgraph(node->shared_from_this())) + return false; + + disable_constant_folding(node); + + if (!is_type(node->input_value(0).get_node_shared_ptr())) + return true; + enable_keep_fp16_const(node->input_value(0).get_node_shared_ptr()); + + return true; + }; + auto m = std::make_shared(node_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp similarity index 99% rename from src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp rename to src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp index e276efa9aaf..ad3661eb2ea 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp" +#include "transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp" #include "itt.hpp" #include "openvino/op/util/broadcast_base.hpp" diff --git a/src/common/transformations/src/transformations/rt_info/is_shape_sugraph.cpp b/src/common/transformations/src/transformations/rt_info/is_shape_sugraph.cpp new file mode 100644 index 00000000000..6e3e5f122f8 --- /dev/null +++ b/src/common/transformations/src/transformations/rt_info/is_shape_sugraph.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/rt_info/is_shape_subgraph.hpp" + +void ov::mark_shape_subgraph(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info[ShapeSubgraph::get_type_info_static()] = ShapeSubgraph{}; +} + +void ov::unmark_shape_subgraph(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info.erase(ShapeSubgraph::get_type_info_static()); +} + +bool ov::is_shape_subgraph(const std::shared_ptr& node) { + const auto& rt_info = node->get_rt_info(); + return rt_info.count(ShapeSubgraph::get_type_info_static()); +} diff --git a/src/common/transformations/src/transformations/rt_info/keep_fp16_const.cpp b/src/common/transformations/src/transformations/rt_info/keep_fp16_const.cpp new file mode 100644 index 00000000000..41fce20bd7e --- /dev/null +++ b/src/common/transformations/src/transformations/rt_info/keep_fp16_const.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/rt_info/keep_fp16_const.hpp" + +void ov::enable_keep_fp16_const(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info[KeepFP16Const::get_type_info_static()] = KeepFP16Const{}; +} + +void ov::disable_keep_fp16_const(const std::shared_ptr& node) { + auto& rt_info = node->get_rt_info(); + rt_info.erase(KeepFP16Const::get_type_info_static()); +} + +bool ov::is_keep_fp16_const(const std::shared_ptr& node) { + const auto& rt_info = node->get_rt_info(); + return rt_info.count(KeepFP16Const::get_type_info_static()); +} diff --git a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp index a2775f08fe6..a8e1bf403ff 100644 --- a/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp +++ b/src/common/transformations/tests/common_optimizations/align_mixed_fp32_fp16_types_test.cpp @@ -2,15 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "transformations/fp16_compression/align_mixed_fp32_fp16_types.hpp" + #include #include #include #include -#include -#include #include "common_test_utils/ngraph_test_utils.hpp" +#include "transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp" using namespace testing; using namespace ov; diff --git a/src/common/transformations/tests/common_optimizations/convert_compression_only_to_legacy_test.cpp b/src/common/transformations/tests/common_optimizations/convert_compression_only_to_legacy_test.cpp index ea3f3444022..a26dc1d2498 100644 --- a/src/common/transformations/tests/common_optimizations/convert_compression_only_to_legacy_test.cpp +++ b/src/common/transformations/tests/common_optimizations/convert_compression_only_to_legacy_test.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include diff --git a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp index 7dea7e948d3..48c1c6a1202 100644 --- a/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp +++ b/src/common/transformations/tests/common_optimizations/mark_subgraph_to_keep_in_mixed_precision_test.cpp @@ -8,7 +8,7 @@ #include "openvino/opsets/opset10.hpp" #include "openvino/opsets/opset2.hpp" #include "openvino/pass/manager.hpp" -#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp" +#include "transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" using namespace testing; diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp index fb07ddaed06..ec313a5ce25 100644 --- a/src/core/src/bound_evaluate.cpp +++ b/src/core/src/bound_evaluate.cpp @@ -10,6 +10,8 @@ #include "openvino/opsets/opset10.hpp" #include "shape_util.hpp" #include "tensor_conversion_util.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "transformations/rt_info/is_shape_subgraph.hpp" namespace { using namespace ov; @@ -247,7 +249,9 @@ bool ov::could_propagate(const Output& output, std::vector& result) bool can_add = true; size_t arg_count = node->get_input_size(); - if (arg_count == 0 && !is_type(node)) { + auto node_shared_ptr = node->shared_from_this(); + bool is_decompress_data_path = is_decompression(node_shared_ptr) && !is_shape_subgraph(node_shared_ptr); + if ((arg_count == 0 && !is_type(node)) || is_decompress_data_path) { status = false; continue; } else if (is_type(node) || is_type(node)) { diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index b59e0ac857c..1cef0551d1e 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -32,6 +32,8 @@ uint8_t DnnlExtensionUtils::sizeOfDataType(dnnl::memory::data_type dataType) { return 1; case dnnl::memory::data_type::bin: return 1; + case dnnl::memory::data_type::f16: + return 2; case dnnl::memory::data_type::undef: return 0; default: @@ -54,6 +56,8 @@ memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngin return memory::data_type::u8; case InferenceEngine::Precision::BIN: return memory::data_type::bin; + case InferenceEngine::Precision::FP16: + return memory::data_type::f16; case InferenceEngine::Precision::UNSPECIFIED: return memory::data_type::undef; default: { @@ -76,6 +80,8 @@ InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::dat return InferenceEngine::Precision::U8; case memory::data_type::bin: return InferenceEngine::Precision::BIN; + case memory::data_type::f16: + return InferenceEngine::Precision::FP16; case memory::data_type::undef: return InferenceEngine::Precision::UNSPECIFIED; default: { diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 648aca0fcc1..7735db21270 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -17,9 +17,10 @@ #include // Common transformations +#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp" #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" #include "transformations/common_optimizations/broadcast_transition.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include "transformations/common_optimizations/fq_mul_fusion.hpp" #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" @@ -30,7 +31,7 @@ #include "transformations/common_optimizations/common_optimizations.hpp" #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp" #include "transformations/control_flow/unroll_tensor_iterator.hpp" -#include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/op_conversions/convert_batch_to_space.hpp" #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" #include "transformations/op_conversions/convert_depth_to_space.hpp" @@ -118,7 +119,6 @@ #include "nodes/normalize.h" #include "nodes/fake_quantize.h" #include "nodes/mha.h" - #include "dnnl.hpp" #include @@ -200,6 +200,9 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::pass::Manager manager; manager.set_per_pass_validation(false); CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs); + // todo: uncomment KeepConstAndDecompression when xxx-105060 is ready + // CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); const bool useLpt = !defaultPrecisions.empty(); if (useLpt) { @@ -258,7 +261,9 @@ void Transformations::PreLpt(const std::vector& defaultPrecis } CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate); CPU_REGISTER_PASS_COMMON(manager, ov::pass::RefConvertI64ToI32); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, precisions, type_to_fuse); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert); CPU_REGISTER_PASS_COMMON(manager, SwapConvertTranspose); CPU_REGISTER_PASS_X64(manager, ConvertToInteraction); diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp index 93b049297ab..209a756eac1 100644 --- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp +++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp @@ -14,7 +14,6 @@ #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" #include "transformations/common_optimizations/concat_reduce_fusion.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" #include "transformations/common_optimizations/fq_mul_fusion.hpp" #include "transformations/common_optimizations/fq_reshape_fusion.hpp" #include "transformations/common_optimizations/pull_transpose_through_fq.hpp" @@ -27,7 +26,8 @@ #include "transformations/convert_precision.hpp" #include "transformations/decompose_2d_convolution.hpp" #include "transformations/decompose_mvn.hpp" -#include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/handle_transposes_around_matmul.hpp" #include "transformations/init_node_info.hpp" #include "transformations/insert_copy_layer.hpp" diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index df6e52997d0..735a1a389d8 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -41,7 +41,7 @@ #include #include #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include #include #include diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index e23dad74d6f..28c19079e98 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -14,9 +14,9 @@ #include "remote_context.hpp" #include "template/properties.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include "transformations/control_flow/unroll_if.hpp" -#include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" #include "transformations/op_conversions/convert_reduce_to_pooling.hpp" namespace {