[FP16] call marking for mixed precision inside ConvertPrecision (#14965)

* call marking for mixed precision inside ConvertPrecision

* fix typo in precisions list; moved conversion from f64->f32 to the very beginning

* remove obsolete convert_compressed_to_mixed_precision_test.cpp

* typo fix after merge

* corrected namespace prefix

* fixed align_mixed_fp32_fp16_types_test.cpp by removing redundant ConvertPrecision

* updated ConvertPrecison tests for mixed precision

* style fix
This commit is contained in:
Pavel Esir 2023-02-03 10:47:57 +01:00 committed by GitHub
parent 1e8144f21b
commit 4103a931c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 639 additions and 337 deletions

View File

@ -1,27 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/pass/pass.hpp"
#include "transformations_visibility.hpp"
namespace ov {
namespace pass {
class TRANSFORMATIONS_API ConvertCompressedToMixedPrecision;
} // namespace pass
} // namespace ov
/**
* @ingroup ie_transformation_common_api
* @brief ConvertCompressedToMixedPrecision converts fp16 compressed ov::Model to mixed precision ov::Model.
* In mixed precision ov::Models precision sensitive nodes are kept in fp32 while most of the model is in fp16.
*/
class ov::pass::ConvertCompressedToMixedPrecision : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("ConvertCompressedToMixedPrecision", "0");
bool run_on_model(const std::shared_ptr<Model>& f) override;
};

View File

@ -11,22 +11,11 @@
namespace ov {
namespace pass {
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
class TRANSFORMATIONS_API ConvertCompressedOnlyToLegacy;
} // namespace pass
} // namespace ov
/**
* @ingroup ie_transformation_common_api
* @brief Enables ConstantFolding for Convert operation in compressed function.
*/
class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0");
EnableDecompressionConvertConstantFolding();
};
/**
* @ingroup ie_transformation_common_api
* @brief ConvertCompressedOnlyToLegacy transformation converts compression only FP16 format to legacy FP16 format.

View File

@ -0,0 +1,27 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/pass/graph_rewrite.hpp"
#include "transformations/enable_decompression_convert_constant_folding.hpp"
#include "transformations_visibility.hpp"
namespace ov {
namespace pass {
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
} // namespace pass
} // namespace ov
/**
* @ingroup ie_transformation_common_api
* @brief Disables ConstantFolding for Convert operation in compressed function.
*/
class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0");
EnableDecompressionConvertConstantFolding();
};

View File

@ -8,8 +8,7 @@
#include "openvino/core/rt_info.hpp"
#include "openvino/op/util/precision_sensitive_attribute.hpp"
#include "openvino/opsets/opset10.hpp"
#include "transformations/convert_precision.hpp"
#include "transformations/rt_info/decompression.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"
using namespace ov;
@ -35,6 +34,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
copy_runtime_info(incoming_node, convert);
input.replace_source_output(convert);
disable_fp16_compression(convert);
pass::disable_constant_folding(convert);
is_changed = true;
}
return is_changed;
@ -61,6 +61,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
copy_runtime_info(node, convert);
convert->set_friendly_name(node->get_friendly_name() + "_compressed_to_f16");
out_inputs.replace_source_output(convert);
pass::disable_constant_folding(convert);
is_changed = true;
}
}

View File

@ -25,7 +25,6 @@
#include "transformations/common_optimizations/concat_reduce_fusion.hpp"
#include "transformations/common_optimizations/conv_mul_fusion.hpp"
#include "transformations/common_optimizations/conv_to_binary_conv.hpp"
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp"
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
@ -125,8 +124,6 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
// Enabling conversion of FP16 IR to legacy representation, each plugin have to disable it
// after support for FP16 IR is implemented
REGISTER_PASS(manager, ConvertCompressedOnlyToLegacy)
// should be enabled manually only on plugins supporting mixed precision inference
REGISTER_DISABLED_PASS(manager, ConvertCompressedToMixedPrecision);
REGISTER_PASS(manager, MarkDividesInShapeSubgraphs)
REGISTER_PASS(manager, WeightsDequantizeToFakeQuantize)

View File

@ -1,37 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
#include "itt.hpp"
#include "openvino/opsets/opset8.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp"
#include "transformations/convert_precision.hpp"
#include "transformations/utils/utils.hpp"
bool ov::pass::ConvertCompressedToMixedPrecision::run_on_model(const std::shared_ptr<ov::Model>& f) {
RUN_ON_MODEL_SCOPE(ConvertCompressedToMixedPrecision);
// pass is triggered only for fp16 compressed Models
if (!ov::op::util::has_decompression_converts(f))
return false;
Manager manager(get_pass_config());
REGISTER_PASS(manager, MarkSugraphsToKeepInMixedPrecision)
REGISTER_PASS(manager, AlignMixedFP32FP16Types)
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map additional_fuse_map = {};
// call ConvertPrecision with keep_precision_sensitive_in_fp32 = true
REGISTER_PASS(manager, ConvertPrecision, convert_precision_list, additional_fuse_map, true)
REGISTER_PASS(manager, EnableDecompressionConvertConstantFolding)
REGISTER_PASS(manager, ConstantFolding)
manager.run_passes(f);
return false;
}

View File

@ -9,27 +9,12 @@
#include "openvino/pass/manager.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/convert_precision.hpp"
#include "transformations/enable_decompression_convert_constant_folding.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"
#include "transformations/utils/utils.hpp"
using namespace ov;
ov::pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() {
MATCHER_SCOPE(EnableDecompressionConvertConstantFolding);
auto convert = pattern::wrap_type<opset8::Convert>();
ov::matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& node = m.get_match_root();
if (!ov::is_decompression(node))
return false;
enable_constant_folding(node);
return true;
};
auto m = std::make_shared<ov::pass::pattern::Matcher>(convert, matcher_name);
this->register_matcher(m, callback);
}
bool ov::pass::ConvertCompressedOnlyToLegacy::run_on_model(const std::shared_ptr<ov::Model>& f) {
RUN_ON_MODEL_SCOPE(ConvertCompressedOnlyToLegacy);
if (ov::op::util::has_decompression_converts(f)) {

View File

@ -17,7 +17,12 @@
#include <vector>
#include "itt.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "openvino/pass/manager.hpp"
#include "ov_ops/type_relaxed.hpp"
#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp"
#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp"
#include "transformations/enable_decompression_convert_constant_folding.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"
using namespace ov;
@ -175,7 +180,7 @@ bool convert_precision(ov::pass::PassBase& pass,
// If output type mismatch given type we try to fuse type into this operation
// otherwise we insert Convert operation.
for (auto& node : ops) {
if (skip_precision_sensitive && fp16_compression_is_disabled(node))
if (skip_precision_sensitive && fp16_compression_is_disabled(node) && to == element::f16)
continue;
// Recursively apply transformation for sub-graph based operations
@ -201,7 +206,7 @@ bool convert_precision(ov::pass::PassBase& pass,
for (auto& node : ops) {
// skip precision sensitive nodes
if (skip_precision_sensitive && fp16_compression_is_disabled(node))
if (skip_precision_sensitive && fp16_compression_is_disabled(node) && to == element::f16)
continue;
is_output_precision_changed |= convert_node_output_precision(node);
}
@ -219,6 +224,8 @@ bool convert_precision(ov::pass::PassBase& pass,
// Convert elimination here
for (auto& node : ops) {
if (auto convert = std::dynamic_pointer_cast<opset4::Convert>(node)) {
if (pass::constant_folding_is_disabled(node))
continue;
// WA for topK, dont remove fake convert
if (convert->input(0).get_element_type() == convert->get_convert_element_type() &&
convert->input_value(0).get_node_shared_ptr()->get_output_size() == 1) {
@ -301,6 +308,17 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ngraph::Func
{opset10::Unique::get_type_info_static(), fuse_type_to_unique_v10},
{opset8::RandomUniform::get_type_info_static(), fuse_type_to_random_uniform_v8}};
std::pair<ov::element::Type, ov::element::Type> compress_f16_pair = {ov::element::f32, ov::element::f16};
bool has_compress_f16 = std::count(m_precisions.begin(), m_precisions.end(), compress_f16_pair) > 0;
if (m_keep_precision_sensitive_in_fp32 && has_compress_f16) {
pass::Manager manager(get_pass_config());
// Mark subgraphs with disable_fp16_compression to keep them in FP32
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
manager.run_passes(f);
}
for (const auto& it : m_additional_type_to_fuse_map) {
type_to_fuse[it.first] = it.second;
}
@ -327,6 +345,13 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ngraph::Func
m_keep_precision_sensitive_in_fp32);
}
// to remove extra converts
if (m_keep_precision_sensitive_in_fp32) {
pass::Manager manager(get_pass_config());
manager.register_pass<pass::EnableDecompressionConvertConstantFolding>();
manager.register_pass<pass::ConstantFolding>();
}
(void)is_changed; // ignored
// Returning value is false because pass::Manager always apply Validation pass

View File

@ -0,0 +1,29 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/enable_decompression_convert_constant_folding.hpp"
#include "itt.hpp"
#include "openvino/opsets/opset8.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/rt_info/decompression.hpp"
#include "transformations/rt_info/disable_constant_folding.hpp"
using namespace ov;
pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() {
MATCHER_SCOPE(EnableDecompressionConvertConstantFolding);
auto convert = pattern::wrap_type<opset8::Convert>();
matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& node = m.get_match_root();
if (!is_decompression(node))
return false;
enable_constant_folding(node);
return true;
};
auto m = std::make_shared<ov::pass::pattern::Matcher>(convert, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -9,7 +9,6 @@
#include <openvino/pass/manager.hpp>
#include <transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp>
#include <transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp>
#include <transformations/convert_precision.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
@ -36,24 +35,21 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_1) {
pass::Manager manager;
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_type_to_fuse_map = {};
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<ReduceSum>(exp_1, reduction_axes);
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
@ -81,18 +77,15 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_2) {
pass::Manager manager;
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_type_to_fuse_map = {};
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto unsqueeze_axes = Constant::create(element::i64, Shape{1}, {1});
@ -102,7 +95,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_2) {
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
@ -130,17 +123,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) {
pass::Manager manager;
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_type_to_fuse_map = {};
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<ReduceSum>(exp_1, reduction_axes);
@ -150,7 +140,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) {
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(add_1, factor_const_decompressed);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
@ -172,21 +162,18 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_4) {
pass::Manager manager;
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_type_to_fuse_map = {};
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
auto mvn_1 = make_shared<MVN>(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f});
auto add_1 = make_shared<Add>(mvn_1, addition_const);
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f16);
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f32);
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
@ -215,15 +202,12 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) {
pass::Manager manager;
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_type_to_fuse_map = {};
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 56, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 56, 224});
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
@ -234,7 +218,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) {
auto mvn_1 = make_shared<MVN>(split->output(0), reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f});
auto add_1 = make_shared<Add>(mvn_1, addition_const);
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f16);
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f32);
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
// todo: without Converts to fp16 because of GPU
@ -271,17 +255,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) {
pass::Manager manager;
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.register_pass<pass::AlignMixedFP32FP16Types>();
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_type_to_fuse_map = {};
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<ReduceSum>(exp_1, reduction_axes);
@ -289,14 +270,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) {
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto out_shape = Constant::create(element::i64, Shape{3}, {1, 3, 224});
auto minval = Constant::create(element::f16, Shape{}, {1});
auto maxval = Constant::create(element::f16, Shape{}, {10});
auto rand_uniform = make_shared<RandomUniform>(out_shape, minval, maxval, element::f16);
auto minval = Constant::create(element::f32, Shape{}, {1});
auto maxval = Constant::create(element::f32, Shape{}, {10});
auto rand_uniform = make_shared<RandomUniform>(out_shape, minval, maxval, element::f32);
auto rand_uniform_decompressed = make_shared<Convert>(rand_uniform, element::f32);
auto rand_uniform_add_factor = make_shared<Add>(rand_uniform_decompressed, factor_const_decompressed);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, rand_uniform_add_factor);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});

View File

@ -1,140 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
#include <gtest/gtest.h>
#include <memory>
#include <string>
#include <transformations/convert_precision.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/core/model.hpp"
#include "openvino/opsets/opset10.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/rt_info/decompression.hpp"
using namespace testing;
using namespace ov;
TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecision) {
{
auto input = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights =
opset10::Constant::create(element::f16, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
auto convert_ins1 = std::make_shared<opset10::Convert>(const_weights, element::f32);
mark_as_decompression(convert_ins1);
auto conv = std::make_shared<opset10::Convolution>(input,
convert_ins1,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
model = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::ConvertCompressedToMixedPrecision>();
manager.run_passes(model);
ASSERT_NO_THROW(check_rt_info(model));
}
{
auto input = std::make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 12, 12});
auto const_weights =
opset10::Constant::create(element::f16, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
auto conv = std::make_shared<opset10::Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
}
TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecissionNoConvertion) {
// test that pass is not triggered when there are no decompression Converts
{
auto input = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights =
opset10::Constant::create(element::f32, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
auto conv = std::make_shared<opset10::Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
model = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::ConvertCompressedToMixedPrecision>();
manager.run_passes(model);
ASSERT_NO_THROW(check_rt_info(model));
}
{
auto input = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights =
opset10::Constant::create(element::f32, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
auto conv = std::make_shared<opset10::Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
}
TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecission_keep_sensitive_nodes_in_fp32) {
{
auto input_1 = std::make_shared<opset10::Parameter>(element::f32, Shape{360, 640});
auto input_2 = std::make_shared<opset10::Parameter>(element::f32, Shape{720, 1280});
auto shapeof = std::make_shared<opset10::ShapeOf>(input_2);
// decompression Converts are needed for ConvertCompressedToMixedPrecision to be triggered
auto compressed_const = opset10::Constant::create(element::f16, Shape{}, {2.0f});
auto decompress_convert = std::make_shared<opset10::Convert>(compressed_const, element::f32);
mark_as_decompression(decompress_convert);
auto add_decompressed_const = std::make_shared<opset10::Add>(input_1, decompress_convert);
auto convert_to_float = std::make_shared<opset10::Convert>(shapeof, element::f32);
auto const_denominator = opset10::Constant::create(element::f32, Shape{}, {2.0f});
auto div = std::make_shared<opset10::Divide>(convert_to_float, const_denominator);
auto new_shape = std::make_shared<opset10::Convert>(div, element::i64);
auto reshape = std::make_shared<opset10::Reshape>(add_decompressed_const, new_shape, false);
model = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
pass::Manager manager;
manager.register_pass<pass::ConvertCompressedToMixedPrecision>();
manager.run_passes(model);
}
{
auto input_1 = std::make_shared<opset10::Parameter>(element::f16, Shape{360, 640});
auto input_2 = std::make_shared<opset10::Parameter>(element::f16, Shape{720, 1280});
// after ConvertCompressedToMixedPrecision Const->Convert are constant-folded into a single f16 Const
auto compressed_const = opset10::Constant::create(element::f16, Shape{}, {2.0f});
auto add_compressed_const = std::make_shared<opset10::Add>(input_1, compressed_const);
// shape subgraph will be constant folded
auto new_shape_const = opset10::Constant::create(element::i64, Shape{2}, {360, 640});
auto reshape = std::make_shared<opset10::Reshape>(add_compressed_const, new_shape_const, false);
model_ref = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
}
}

View File

@ -21,11 +21,11 @@
#include <vector>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"
using namespace testing;
using namespace ov;
using namespace std;
template <element::Type_t T>
bool has_type(std::shared_ptr<Model> f) {
@ -776,7 +776,6 @@ TEST(TransformationTests, ConvertPrecision_skip_precision_sensitive) {
model = std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{input});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -811,7 +810,6 @@ TEST(TransformationTests, ConvertPrecision_without_keep_precision_sensitive_in_f
interpolate = std::make_shared<opset10::Interpolate>(input, sizes, scales, attrs);
model = std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{input});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = false;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -834,7 +832,6 @@ TEST(TransformationTests, ConvertPrecision_check_marking_does_not_leak_in_trivia
model = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -874,7 +871,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_1) {
model = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -928,7 +924,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_2) {
model = std::make_shared<Model>(NodeVector{result}, ParameterVector{input_1});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -1008,7 +1003,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_3) {
model = std::make_shared<Model>(NodeVector{result_1, result_2}, ParameterVector{input_1, input_2});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -1087,7 +1081,6 @@ TEST(TransformationTests, ConvertCompressedToMixedPrecission_do_not_keep_in_fp32
model = std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{input});
pass::Manager manager;
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = false; // didn't keep in FP32 intentionally
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
@ -1323,3 +1316,474 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) {
std::vector<uint8_t>{171},
{1, 0, 1, 0, 1, 0, 1, 1});
}
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(exp_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(exp_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
auto mul_1_compressed = make_shared<opset10::Convert>(mul_1, element::f16);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1_compressed, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_reducemean) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(exp_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_mean_1 = make_shared<opset10::ReduceMean>(exp_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
auto mul_1 = make_shared<opset10::Multiply>(reduce_mean_1, factor_const_decompressed);
auto mul_1_compressed = make_shared<opset10::Convert>(mul_1, element::f16);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1_compressed, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_reducesum_without_exp) {
// ReduceSum without Exp is not a precision sensitive case, the whole Model should be cast into f16,
// no nodes should be marked and no Converts should be added
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(input_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f32, Shape{1}, {-1});
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(input_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_1) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto mvn_1 = make_shared<opset10::MVN>(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
auto matmul_1 = make_shared<opset10::MatMul>(mvn_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto mvn_1 =
make_shared<opset10::MVN>(input_1_decompressed, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
auto mvn_compressed = make_shared<opset10::Convert>(mvn_1, element::f16);
auto matmul_1 = make_shared<opset10::MatMul>(mvn_compressed, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_2) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto normalizel2_1 = make_shared<opset10::NormalizeL2>(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX);
auto matmul_1 = make_shared<opset10::MatMul>(normalizel2_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto normalizel2_1 =
make_shared<opset10::NormalizeL2>(input_1_decompressed, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX);
auto normalizel2_compressed = make_shared<opset10::Convert>(normalizel2_1, element::f16);
auto matmul_1 = make_shared<opset10::MatMul>(normalizel2_compressed, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_t2t_subgraph) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
// subgraph from t2t-vit-7
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3136, 32});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3136, 32});
auto input_3 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3136, 64, 1});
auto input_4 = make_shared<opset10::Parameter>(element::f32, Shape{128, 64});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto exp_2 = make_shared<opset10::Exp>(input_2);
auto factor_1 = opset10::Constant::create(element::f32, Shape{1}, {0.5}); // add decompression
auto mul_1 = make_shared<opset10::Multiply>(exp_1, factor_1);
auto factor_2 = opset10::Constant::create(element::f32, Shape{1}, {0.5});
auto mul_2 = make_shared<opset10::Multiply>(exp_2, factor_2);
auto const_unsqueeze_1 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
auto unsqueeze_1 = make_shared<opset10::Reshape>(mul_1, const_unsqueeze_1, false);
auto const_unsqueeze_2 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
auto unsqueeze_2 = make_shared<opset10::Reshape>(mul_2, const_unsqueeze_1, false);
auto reduction_axes_1 = opset10::Constant::create(element::i64, Shape{1}, {1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(mul_2, reduction_axes_1, true);
auto mul_3 = make_shared<opset10::Multiply>(reduce_sum_1, mul_1);
auto mul_4 = make_shared<opset10::Multiply>(input_3, unsqueeze_2);
auto reduction_axes_2 = opset10::Constant::create(element::i64, Shape{1}, {1});
auto reduce_sum_2 = make_shared<opset10::ReduceSum>(mul_4, reduction_axes_2);
auto reduction_axes_3 = opset10::Constant::create(element::i64, Shape{1}, {2});
auto reduce_sum_3 = make_shared<opset10::ReduceSum>(mul_3, reduction_axes_3, true);
auto broadcast_to_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 1});
auto broadcast =
make_shared<opset10::Broadcast>(reduce_sum_3, broadcast_to_shape, ov::op::BroadcastType::BIDIRECTIONAL);
auto tile_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 64});
auto tile = make_shared<opset10::Tile>(broadcast, tile_shape);
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {1.e-10});
auto add_1 = make_shared<opset10::Add>(tile, eps_const);
auto const_unsqueeze_3 = opset10::Constant::create(element::i64, Shape{4}, {1, 1, 64, 32});
auto unsqueeze_3 = make_shared<opset10::Reshape>(reduce_sum_2, const_unsqueeze_3, false);
auto mul_5 = make_shared<opset10::Multiply>(unsqueeze_1, unsqueeze_3);
auto reduction_axes_4 = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_4 = make_shared<opset10::ReduceSum>(mul_5, reduction_axes_4);
auto div_1 = make_shared<opset10::Divide>(reduce_sum_4, add_1);
auto matmul_1 = make_shared<opset10::MatMul>(div_1, input_4, false, true);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2, input_3, input_4});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3136, 32});
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3136, 32});
auto input_3 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3136, 64, 1});
auto input_4 = make_shared<opset10::Parameter>(element::f16, Shape{128, 64});
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto input_2_decompressed = make_shared<opset10::Convert>(input_2, element::f32);
auto input_3_decompressed = make_shared<opset10::Convert>(input_3, element::f32);
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
auto exp_2 = make_shared<opset10::Exp>(input_2_decompressed);
auto factor_1 = opset10::Constant::create(element::f32, Shape{1}, {0.5});
auto mul_1 = make_shared<opset10::Multiply>(exp_1, factor_1);
auto factor_2 = opset10::Constant::create(element::f32, Shape{1}, {0.5});
auto mul_2 = make_shared<opset10::Multiply>(exp_2, factor_2);
auto const_unsqueeze_1 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
auto unsqueeze_1 = make_shared<opset10::Reshape>(mul_1, const_unsqueeze_1, false);
auto const_unsqueeze_2 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
auto unsqueeze_2 = make_shared<opset10::Reshape>(mul_2, const_unsqueeze_2, false);
auto reduction_axes_1 = opset10::Constant::create(element::i64, Shape{1}, {1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(mul_2, reduction_axes_1, true);
auto mul_3 = make_shared<opset10::Multiply>(reduce_sum_1, mul_1);
auto mul_4 = make_shared<opset10::Multiply>(input_3_decompressed, unsqueeze_2);
auto reduction_axes_2 = opset10::Constant::create(element::i64, Shape{1}, {1});
auto reduce_sum_2 = make_shared<opset10::ReduceSum>(mul_4, reduction_axes_2);
auto reduction_axes_3 = opset10::Constant::create(element::i64, Shape{1}, {2});
auto reduce_sum_3 = make_shared<opset10::ReduceSum>(mul_3, reduction_axes_3, true);
auto broadcast_to_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 1});
auto broadcast =
make_shared<opset10::Broadcast>(reduce_sum_3, broadcast_to_shape, ov::op::BroadcastType::BIDIRECTIONAL);
auto tile_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 64});
auto tile = make_shared<opset10::Tile>(broadcast, tile_shape);
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {1.e-10});
auto add_1 = make_shared<opset10::Add>(tile, eps_const);
auto const_unsqueeze_3 = opset10::Constant::create(element::i64, Shape{4}, {1, 1, 64, 32});
auto unsqueeze_3 = make_shared<opset10::Reshape>(reduce_sum_2, const_unsqueeze_3, false);
auto mul_5 = make_shared<opset10::Multiply>(unsqueeze_1, unsqueeze_3);
auto reduction_axes_4 = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_4 = make_shared<opset10::ReduceSum>(mul_5, reduction_axes_4);
auto div_1 = make_shared<opset10::Divide>(reduce_sum_4, add_1);
auto div_compressed = make_shared<opset10::Convert>(div_1, element::f16);
auto matmul_1 = make_shared<opset10::MatMul>(div_compressed, input_4, false, true);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2, input_3, input_4});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
const float eps_value = 1.0e-12f;
{
auto input_1 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset10::Add>(input_2, eps_const);
auto divide = std::make_shared<opset10::Divide>(input_1, add);
model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto input_2_decompressed = make_shared<opset10::Convert>(input_2, element::f32);
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset10::Add>(input_2_decompressed, eps_const);
auto divide = std::make_shared<opset10::Divide>(input_1_decompressed, add);
model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
const float eps_value = 1.0e-12f;
{
auto input_1 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset10::Add>(input_2, eps_const);
auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset10::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset10::Multiply>(input_1, pow);
model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto input_2_decompressed = make_shared<opset10::Convert>(input_2, element::f32);
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset10::Add>(input_2_decompressed, eps_const);
auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset10::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset10::Multiply>(input_1_decompressed, pow);
model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(exp_1, unsqueeze_axes);
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(exp_1, unsqueeze_axes);
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
auto mul_1_compressed = make_shared<opset10::Convert>(mul_1, element::f16);
auto matmul_1 = make_shared<opset10::MatMul>(mul_1_compressed, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}

View File

@ -41,7 +41,6 @@
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
#include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
#include <transformations/common_optimizations/transpose_sinking.hpp>
@ -140,6 +139,63 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ov::pass::InitNodeInfo>();
manager.register_pass<EinsumDecomposition>();
precisions_array fp_convert_precision_list = {
{ov::element::f64, ov::element::f32}
};
// call conversion of float types with keep_precision_sensitive_in_fp32 = true
auto fp_precision_supported = [&](ov::element::Type e) -> bool {
switch (e) {
case ov::element::f16: return device_info.supports_fp16;
case ov::element::f32: return true; // assume that all GPUs support f32 data type
case ov::element::f64: return device_info.supports_fp64;
case ov::element::bf16: return false;
default: return false;
}
return false;
};
const auto fallback_precision = ov::element::f32;
std::vector<ov::element::Type> fp_element_types = {
ov::element::f32,
ov::element::f16,
ov::element::bf16
};
// Add conversion from FP data types to infer precision if it's specified
auto infer_precision = config.get_property(ov::hint::inference_precision);
if (infer_precision != ov::element::undefined) {
if (!fp_precision_supported(infer_precision))
infer_precision = fallback_precision;
for (auto& et : fp_element_types) {
if (et != infer_precision) {
fp_convert_precision_list.push_back({et, infer_precision});
}
}
}
// Add conversion from unsupported FP data types to f32 if we don't have a conversion to something valid already in the list
for (auto& et : fp_element_types) {
if (!fp_precision_supported(et)) {
auto et_pair = std::make_pair(et, fallback_precision);
bool has_valid_conversion = std::find_if(fp_convert_precision_list.begin(), fp_convert_precision_list.end(),
[&](std::pair<ov::element::Type, ov::element::Type> v) -> bool {
return v.first == et_pair.first && fp_precision_supported(v.second);
}) != fp_convert_precision_list.end();
if (!has_valid_conversion) {
fp_convert_precision_list.push_back(et_pair);
}
}
}
type_to_fuse_map empty_fuse_map = {};
manager.register_pass<ov::pass::Validate>();
// call ConvertPrecision with keep_precision_sensitive_in_fp32 = true
manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_list, empty_fuse_map, true);
manager.register_pass<ov::pass::CommonOptimizations>();
manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
@ -176,8 +232,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ov::pass::ConvertPriorBox8To0, false>();
manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
precisions_array convert_precision_list {
{ngraph::element::f64, ngraph::element::f32},
precisions_array int_convert_precision_list {
{ngraph::element::i64, ngraph::element::i32},
{ngraph::element::u64, ngraph::element::i32},
{ngraph::element::u16, ngraph::element::i32},
@ -187,54 +242,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
{ngraph::element::u4, ngraph::element::u8},
};
auto fp_precision_supported = [&](ngraph::element::Type e) -> bool {
switch (e) {
case ngraph::element::f16: return device_info.supports_fp16;
case ngraph::element::f32: return true; // assume that all GPUs support f32 data type
case ngraph::element::f64: return device_info.supports_fp64;
case ngraph::element::bf16: return false;
default: return false;
}
return false;
};
const auto fallback_precision = ngraph::element::f32;
std::vector<ov::element::Type> fp_element_types = {
ngraph::element::f32,
ngraph::element::f16,
ngraph::element::bf16
};
// Add conversion from FP data types to infer precision if it's specified
auto infer_precision = config.get_property(ov::inference_precision);
if (infer_precision != ov::element::undefined) {
if (!fp_precision_supported(infer_precision))
infer_precision = fallback_precision;
for (auto& et : fp_element_types) {
if (et != infer_precision) {
convert_precision_list.push_back({et, infer_precision});
}
}
}
// Add conversion from unsupported FP data types to f32 if we don't have a conversion to something valid already in the list
for (auto& et : fp_element_types) {
if (!fp_precision_supported(et)) {
auto et_pair = std::make_pair(et, fallback_precision);
bool has_valid_conversion = std::find_if(convert_precision_list.begin(), convert_precision_list.end(),
[&](std::pair<ov::element::Type, ov::element::Type> v) -> bool {
return v.first == et_pair.first && fp_precision_supported(v.second);
}) != convert_precision_list.end();
if (!has_valid_conversion) {
convert_precision_list.push_back(et_pair);
}
}
}
manager.register_pass<ngraph::pass::Validate>();
manager.register_pass<ov::pass::ConvertPrecision>(convert_precision_list);
manager.register_pass<ov::pass::ConvertPrecision>(int_convert_precision_list);
auto pass_config = manager.get_pass_config();
pass_config->disable<ov::pass::EyeDecomposition>();
@ -242,7 +251,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
// disable conversion to legacy and use the new mixed precision
// in which precision sensitive nodes are kept in FP32
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
pass_config->enable<ov::pass::ConvertCompressedToMixedPrecision>();
// SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
pass_config->set_callback<ov::pass::ConvertSpaceToDepth,