[FP16] call marking for mixed precision inside ConvertPrecision (#14965)
* call marking for mixed precision inside ConvertPrecision * fix typo in precisions list; moved conversion from f64->f32 to the very beginning * remove obsolete convert_compressed_to_mixed_precision_test.cpp * typo fix after merge * corrected namespace prefix * fixed align_mixed_fp32_fp16_types_test.cpp by removing redundant ConvertPrecision * updated ConvertPrecison tests for mixed precision * style fix
This commit is contained in:
parent
1e8144f21b
commit
4103a931c2
@ -1,27 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/pass/pass.hpp"
|
||||
#include "transformations_visibility.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API ConvertCompressedToMixedPrecision;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief ConvertCompressedToMixedPrecision converts fp16 compressed ov::Model to mixed precision ov::Model.
|
||||
* In mixed precision ov::Models precision sensitive nodes are kept in fp32 while most of the model is in fp16.
|
||||
*/
|
||||
class ov::pass::ConvertCompressedToMixedPrecision : public ov::pass::ModelPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ConvertCompressedToMixedPrecision", "0");
|
||||
bool run_on_model(const std::shared_ptr<Model>& f) override;
|
||||
};
|
@ -11,22 +11,11 @@
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
|
||||
class TRANSFORMATIONS_API ConvertCompressedOnlyToLegacy;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief Enables ConstantFolding for Convert operation in compressed function.
|
||||
*/
|
||||
class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0");
|
||||
EnableDecompressionConvertConstantFolding();
|
||||
};
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief ConvertCompressedOnlyToLegacy transformation converts compression only FP16 format to legacy FP16 format.
|
||||
|
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/pass/graph_rewrite.hpp"
|
||||
#include "transformations/enable_decompression_convert_constant_folding.hpp"
|
||||
#include "transformations_visibility.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief Disables ConstantFolding for Convert operation in compressed function.
|
||||
*/
|
||||
class ov::pass::EnableDecompressionConvertConstantFolding : public ov::pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("EnableDecompressionConvertConstantFolding", "0");
|
||||
EnableDecompressionConvertConstantFolding();
|
||||
};
|
@ -8,8 +8,7 @@
|
||||
#include "openvino/core/rt_info.hpp"
|
||||
#include "openvino/op/util/precision_sensitive_attribute.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "transformations/convert_precision.hpp"
|
||||
#include "transformations/rt_info/decompression.hpp"
|
||||
#include "openvino/pass/constant_folding.hpp"
|
||||
#include "transformations/rt_info/disable_fp16_compression.hpp"
|
||||
|
||||
using namespace ov;
|
||||
@ -35,6 +34,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
|
||||
copy_runtime_info(incoming_node, convert);
|
||||
input.replace_source_output(convert);
|
||||
disable_fp16_compression(convert);
|
||||
pass::disable_constant_folding(convert);
|
||||
is_changed = true;
|
||||
}
|
||||
return is_changed;
|
||||
@ -61,6 +61,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
|
||||
copy_runtime_info(node, convert);
|
||||
convert->set_friendly_name(node->get_friendly_name() + "_compressed_to_f16");
|
||||
out_inputs.replace_source_output(convert);
|
||||
pass::disable_constant_folding(convert);
|
||||
is_changed = true;
|
||||
}
|
||||
}
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include "transformations/common_optimizations/concat_reduce_fusion.hpp"
|
||||
#include "transformations/common_optimizations/conv_mul_fusion.hpp"
|
||||
#include "transformations/common_optimizations/conv_to_binary_conv.hpp"
|
||||
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
|
||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||
#include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp"
|
||||
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
|
||||
@ -125,8 +124,6 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
|
||||
// Enabling conversion of FP16 IR to legacy representation, each plugin have to disable it
|
||||
// after support for FP16 IR is implemented
|
||||
REGISTER_PASS(manager, ConvertCompressedOnlyToLegacy)
|
||||
// should be enabled manually only on plugins supporting mixed precision inference
|
||||
REGISTER_DISABLED_PASS(manager, ConvertCompressedToMixedPrecision);
|
||||
|
||||
REGISTER_PASS(manager, MarkDividesInShapeSubgraphs)
|
||||
REGISTER_PASS(manager, WeightsDequantizeToFakeQuantize)
|
||||
|
@ -1,37 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "openvino/opsets/opset8.hpp"
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp"
|
||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||
#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp"
|
||||
#include "transformations/convert_precision.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
bool ov::pass::ConvertCompressedToMixedPrecision::run_on_model(const std::shared_ptr<ov::Model>& f) {
|
||||
RUN_ON_MODEL_SCOPE(ConvertCompressedToMixedPrecision);
|
||||
|
||||
// pass is triggered only for fp16 compressed Models
|
||||
if (!ov::op::util::has_decompression_converts(f))
|
||||
return false;
|
||||
|
||||
Manager manager(get_pass_config());
|
||||
REGISTER_PASS(manager, MarkSugraphsToKeepInMixedPrecision)
|
||||
REGISTER_PASS(manager, AlignMixedFP32FP16Types)
|
||||
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map additional_fuse_map = {};
|
||||
// call ConvertPrecision with keep_precision_sensitive_in_fp32 = true
|
||||
REGISTER_PASS(manager, ConvertPrecision, convert_precision_list, additional_fuse_map, true)
|
||||
|
||||
REGISTER_PASS(manager, EnableDecompressionConvertConstantFolding)
|
||||
REGISTER_PASS(manager, ConstantFolding)
|
||||
manager.run_passes(f);
|
||||
|
||||
return false;
|
||||
}
|
@ -9,27 +9,12 @@
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "openvino/pass/pattern/op/wrap_type.hpp"
|
||||
#include "transformations/convert_precision.hpp"
|
||||
#include "transformations/enable_decompression_convert_constant_folding.hpp"
|
||||
#include "transformations/rt_info/disable_fp16_compression.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
using namespace ov;
|
||||
|
||||
ov::pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() {
|
||||
MATCHER_SCOPE(EnableDecompressionConvertConstantFolding);
|
||||
auto convert = pattern::wrap_type<opset8::Convert>();
|
||||
|
||||
ov::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& node = m.get_match_root();
|
||||
if (!ov::is_decompression(node))
|
||||
return false;
|
||||
enable_constant_folding(node);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ov::pass::pattern::Matcher>(convert, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool ov::pass::ConvertCompressedOnlyToLegacy::run_on_model(const std::shared_ptr<ov::Model>& f) {
|
||||
RUN_ON_MODEL_SCOPE(ConvertCompressedOnlyToLegacy);
|
||||
if (ov::op::util::has_decompression_converts(f)) {
|
||||
|
@ -17,7 +17,12 @@
|
||||
#include <vector>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "openvino/pass/constant_folding.hpp"
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "ov_ops/type_relaxed.hpp"
|
||||
#include "transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp"
|
||||
#include "transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp"
|
||||
#include "transformations/enable_decompression_convert_constant_folding.hpp"
|
||||
#include "transformations/rt_info/disable_fp16_compression.hpp"
|
||||
|
||||
using namespace ov;
|
||||
@ -175,7 +180,7 @@ bool convert_precision(ov::pass::PassBase& pass,
|
||||
// If output type mismatch given type we try to fuse type into this operation
|
||||
// otherwise we insert Convert operation.
|
||||
for (auto& node : ops) {
|
||||
if (skip_precision_sensitive && fp16_compression_is_disabled(node))
|
||||
if (skip_precision_sensitive && fp16_compression_is_disabled(node) && to == element::f16)
|
||||
continue;
|
||||
|
||||
// Recursively apply transformation for sub-graph based operations
|
||||
@ -201,7 +206,7 @@ bool convert_precision(ov::pass::PassBase& pass,
|
||||
|
||||
for (auto& node : ops) {
|
||||
// skip precision sensitive nodes
|
||||
if (skip_precision_sensitive && fp16_compression_is_disabled(node))
|
||||
if (skip_precision_sensitive && fp16_compression_is_disabled(node) && to == element::f16)
|
||||
continue;
|
||||
is_output_precision_changed |= convert_node_output_precision(node);
|
||||
}
|
||||
@ -219,6 +224,8 @@ bool convert_precision(ov::pass::PassBase& pass,
|
||||
// Convert elimination here
|
||||
for (auto& node : ops) {
|
||||
if (auto convert = std::dynamic_pointer_cast<opset4::Convert>(node)) {
|
||||
if (pass::constant_folding_is_disabled(node))
|
||||
continue;
|
||||
// WA for topK, dont remove fake convert
|
||||
if (convert->input(0).get_element_type() == convert->get_convert_element_type() &&
|
||||
convert->input_value(0).get_node_shared_ptr()->get_output_size() == 1) {
|
||||
@ -301,6 +308,17 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ngraph::Func
|
||||
{opset10::Unique::get_type_info_static(), fuse_type_to_unique_v10},
|
||||
{opset8::RandomUniform::get_type_info_static(), fuse_type_to_random_uniform_v8}};
|
||||
|
||||
std::pair<ov::element::Type, ov::element::Type> compress_f16_pair = {ov::element::f32, ov::element::f16};
|
||||
bool has_compress_f16 = std::count(m_precisions.begin(), m_precisions.end(), compress_f16_pair) > 0;
|
||||
|
||||
if (m_keep_precision_sensitive_in_fp32 && has_compress_f16) {
|
||||
pass::Manager manager(get_pass_config());
|
||||
// Mark subgraphs with disable_fp16_compression to keep them in FP32
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
manager.run_passes(f);
|
||||
}
|
||||
|
||||
for (const auto& it : m_additional_type_to_fuse_map) {
|
||||
type_to_fuse[it.first] = it.second;
|
||||
}
|
||||
@ -327,6 +345,13 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ngraph::Func
|
||||
m_keep_precision_sensitive_in_fp32);
|
||||
}
|
||||
|
||||
// to remove extra converts
|
||||
if (m_keep_precision_sensitive_in_fp32) {
|
||||
pass::Manager manager(get_pass_config());
|
||||
manager.register_pass<pass::EnableDecompressionConvertConstantFolding>();
|
||||
manager.register_pass<pass::ConstantFolding>();
|
||||
}
|
||||
|
||||
(void)is_changed; // ignored
|
||||
|
||||
// Returning value is false because pass::Manager always apply Validation pass
|
||||
|
@ -0,0 +1,29 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/enable_decompression_convert_constant_folding.hpp"
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "openvino/opsets/opset8.hpp"
|
||||
#include "openvino/pass/pattern/op/wrap_type.hpp"
|
||||
#include "transformations/rt_info/decompression.hpp"
|
||||
#include "transformations/rt_info/disable_constant_folding.hpp"
|
||||
|
||||
using namespace ov;
|
||||
|
||||
pass::EnableDecompressionConvertConstantFolding::EnableDecompressionConvertConstantFolding() {
|
||||
MATCHER_SCOPE(EnableDecompressionConvertConstantFolding);
|
||||
auto convert = pattern::wrap_type<opset8::Convert>();
|
||||
|
||||
matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& node = m.get_match_root();
|
||||
if (!is_decompression(node))
|
||||
return false;
|
||||
enable_constant_folding(node);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ov::pass::pattern::Matcher>(convert, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -9,7 +9,6 @@
|
||||
#include <openvino/pass/manager.hpp>
|
||||
#include <transformations/common_optimizations/align_mixed_fp32_fp16_types.hpp>
|
||||
#include <transformations/common_optimizations/mark_subgraphs_to_keep_in_mixed_precision.hpp>
|
||||
#include <transformations/convert_precision.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
@ -36,24 +35,21 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_1) {
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
|
||||
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<ReduceSum>(exp_1, reduction_axes);
|
||||
|
||||
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
|
||||
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
@ -81,18 +77,15 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_2) {
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
|
||||
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
|
||||
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
|
||||
auto unsqueeze_axes = Constant::create(element::i64, Shape{1}, {1});
|
||||
@ -102,7 +95,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_2) {
|
||||
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
|
||||
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
@ -130,17 +123,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) {
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
|
||||
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
|
||||
auto reduce_sum_1 = make_shared<ReduceSum>(exp_1, reduction_axes);
|
||||
@ -150,7 +140,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_3) {
|
||||
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<Multiply>(add_1, factor_const_decompressed);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
|
||||
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
@ -172,21 +162,18 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_4) {
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
|
||||
auto mvn_1 = make_shared<MVN>(convert_to_f32_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
|
||||
auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f});
|
||||
auto add_1 = make_shared<Add>(mvn_1, addition_const);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f16);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f32);
|
||||
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
@ -215,15 +202,12 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) {
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 56, 224});
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 56, 224});
|
||||
|
||||
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
|
||||
|
||||
@ -234,7 +218,7 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_mnv_with_split) {
|
||||
auto mvn_1 = make_shared<MVN>(split->output(0), reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
|
||||
auto addition_const = Constant::create(element::f32, Shape{1}, {0.1f});
|
||||
auto add_1 = make_shared<Add>(mvn_1, addition_const);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f16);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(add_1, element::f32);
|
||||
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
|
||||
|
||||
// todo: without Converts to fp16 because of GPU
|
||||
@ -271,17 +255,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) {
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.register_pass<pass::AlignMixedFP32FP16Types>();
|
||||
const precisions_array convert_precision_list{{ov::element::f32, ov::element::f16}};
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
manager.register_pass<pass::ConvertPrecision>(convert_precision_list, empty_type_to_fuse_map, true);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto convert_to_f32_1 = make_shared<Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<Exp>(convert_to_f32_1);
|
||||
auto input_2 = make_shared<Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<ReduceSum>(exp_1, reduction_axes);
|
||||
|
||||
@ -289,14 +270,14 @@ TEST_F(TransformationTestsF, align_mixed_fp16_fp32_with_rand_uniform) {
|
||||
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
|
||||
|
||||
auto out_shape = Constant::create(element::i64, Shape{3}, {1, 3, 224});
|
||||
auto minval = Constant::create(element::f16, Shape{}, {1});
|
||||
auto maxval = Constant::create(element::f16, Shape{}, {10});
|
||||
auto rand_uniform = make_shared<RandomUniform>(out_shape, minval, maxval, element::f16);
|
||||
auto minval = Constant::create(element::f32, Shape{}, {1});
|
||||
auto maxval = Constant::create(element::f32, Shape{}, {10});
|
||||
auto rand_uniform = make_shared<RandomUniform>(out_shape, minval, maxval, element::f32);
|
||||
auto rand_uniform_decompressed = make_shared<Convert>(rand_uniform, element::f32);
|
||||
auto rand_uniform_add_factor = make_shared<Add>(rand_uniform_decompressed, factor_const_decompressed);
|
||||
|
||||
auto mul_1 = make_shared<Multiply>(reduce_sum_1, rand_uniform_add_factor);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f16);
|
||||
auto convert_to_f16_1 = make_shared<Convert>(mul_1, element::f32);
|
||||
auto matmul_1 = make_shared<MatMul>(convert_to_f16_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
@ -1,140 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <transformations/convert_precision.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "openvino/core/model.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "transformations/init_node_info.hpp"
|
||||
#include "transformations/rt_info/decompression.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecision) {
|
||||
{
|
||||
auto input = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
auto const_weights =
|
||||
opset10::Constant::create(element::f16, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
auto convert_ins1 = std::make_shared<opset10::Convert>(const_weights, element::f32);
|
||||
mark_as_decompression(convert_ins1);
|
||||
auto conv = std::make_shared<opset10::Convolution>(input,
|
||||
convert_ins1,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::ConvertCompressedToMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
ASSERT_NO_THROW(check_rt_info(model));
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 12, 12});
|
||||
auto const_weights =
|
||||
opset10::Constant::create(element::f16, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
auto conv = std::make_shared<opset10::Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecissionNoConvertion) {
|
||||
// test that pass is not triggered when there are no decompression Converts
|
||||
{
|
||||
auto input = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
auto const_weights =
|
||||
opset10::Constant::create(element::f32, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
auto conv = std::make_shared<opset10::Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::ConvertCompressedToMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
ASSERT_NO_THROW(check_rt_info(model));
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
auto const_weights =
|
||||
opset10::Constant::create(element::f32, Shape{1, 3, 3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9});
|
||||
auto conv = std::make_shared<opset10::Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertCompressedToMixedPrecission_keep_sensitive_nodes_in_fp32) {
|
||||
{
|
||||
auto input_1 = std::make_shared<opset10::Parameter>(element::f32, Shape{360, 640});
|
||||
auto input_2 = std::make_shared<opset10::Parameter>(element::f32, Shape{720, 1280});
|
||||
auto shapeof = std::make_shared<opset10::ShapeOf>(input_2);
|
||||
|
||||
// decompression Converts are needed for ConvertCompressedToMixedPrecision to be triggered
|
||||
auto compressed_const = opset10::Constant::create(element::f16, Shape{}, {2.0f});
|
||||
auto decompress_convert = std::make_shared<opset10::Convert>(compressed_const, element::f32);
|
||||
mark_as_decompression(decompress_convert);
|
||||
auto add_decompressed_const = std::make_shared<opset10::Add>(input_1, decompress_convert);
|
||||
|
||||
auto convert_to_float = std::make_shared<opset10::Convert>(shapeof, element::f32);
|
||||
auto const_denominator = opset10::Constant::create(element::f32, Shape{}, {2.0f});
|
||||
auto div = std::make_shared<opset10::Divide>(convert_to_float, const_denominator);
|
||||
auto new_shape = std::make_shared<opset10::Convert>(div, element::i64);
|
||||
|
||||
auto reshape = std::make_shared<opset10::Reshape>(add_decompressed_const, new_shape, false);
|
||||
model = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::ConvertCompressedToMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
{
|
||||
auto input_1 = std::make_shared<opset10::Parameter>(element::f16, Shape{360, 640});
|
||||
auto input_2 = std::make_shared<opset10::Parameter>(element::f16, Shape{720, 1280});
|
||||
|
||||
// after ConvertCompressedToMixedPrecision Const->Convert are constant-folded into a single f16 Const
|
||||
auto compressed_const = opset10::Constant::create(element::f16, Shape{}, {2.0f});
|
||||
auto add_compressed_const = std::make_shared<opset10::Add>(input_1, compressed_const);
|
||||
|
||||
// shape subgraph will be constant folded
|
||||
auto new_shape_const = opset10::Constant::create(element::i64, Shape{2}, {360, 640});
|
||||
|
||||
auto reshape = std::make_shared<opset10::Reshape>(add_compressed_const, new_shape_const, false);
|
||||
model_ref = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
}
|
@ -21,11 +21,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
|
||||
#include "transformations/rt_info/disable_fp16_compression.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
using namespace std;
|
||||
|
||||
template <element::Type_t T>
|
||||
bool has_type(std::shared_ptr<Model> f) {
|
||||
@ -776,7 +776,6 @@ TEST(TransformationTests, ConvertPrecision_skip_precision_sensitive) {
|
||||
model = std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{input});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -811,7 +810,6 @@ TEST(TransformationTests, ConvertPrecision_without_keep_precision_sensitive_in_f
|
||||
interpolate = std::make_shared<opset10::Interpolate>(input, sizes, scales, attrs);
|
||||
model = std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{input});
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = false;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -834,7 +832,6 @@ TEST(TransformationTests, ConvertPrecision_check_marking_does_not_leak_in_trivia
|
||||
model = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -874,7 +871,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_1) {
|
||||
model = std::make_shared<Model>(NodeVector{reshape}, ParameterVector{input_1, input_2});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -928,7 +924,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_2) {
|
||||
model = std::make_shared<Model>(NodeVector{result}, ParameterVector{input_1});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -1008,7 +1003,6 @@ TEST(TransformationTests, ConvertPrecision_whole_shape_subgraph_is_marked_3) {
|
||||
model = std::make_shared<Model>(NodeVector{result_1, result_2}, ParameterVector{input_1, input_2});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -1087,7 +1081,6 @@ TEST(TransformationTests, ConvertCompressedToMixedPrecission_do_not_keep_in_fp32
|
||||
model = std::make_shared<Model>(NodeVector{interpolate}, ParameterVector{input});
|
||||
|
||||
pass::Manager manager;
|
||||
manager.register_pass<pass::MarkPrecisionSensitiveShapeOfSubgraphs>();
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = false; // didn't keep in FP32 intentionally
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
@ -1323,3 +1316,474 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) {
|
||||
std::vector<uint8_t>{171},
|
||||
{1, 0, 1, 0, 1, 0, 1, 1});
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(exp_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(exp_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto mul_1_compressed = make_shared<opset10::Convert>(mul_1, element::f16);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1_compressed, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_reducemean) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(exp_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_mean_1 = make_shared<opset10::ReduceMean>(exp_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_mean_1, factor_const_decompressed);
|
||||
auto mul_1_compressed = make_shared<opset10::Convert>(mul_1, element::f16);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1_compressed, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_reducesum_without_exp) {
|
||||
// ReduceSum without Exp is not a precision sensitive case, the whole Model should be cast into f16,
|
||||
// no nodes should be marked and no Converts should be added
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(input_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f32, Shape{1}, {-1});
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(input_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_1) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto mvn_1 = make_shared<opset10::MVN>(input_1, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mvn_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto mvn_1 =
|
||||
make_shared<opset10::MVN>(input_1_decompressed, reduction_axes, true, 1.0e-8f, op::MVNEpsMode::INSIDE_SQRT);
|
||||
auto mvn_compressed = make_shared<opset10::Convert>(mvn_1, element::f16);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mvn_compressed, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_MarkNormalizationOps_2) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto normalizel2_1 = make_shared<opset10::NormalizeL2>(input_1, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(normalizel2_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto normalizel2_1 =
|
||||
make_shared<opset10::NormalizeL2>(input_1_decompressed, reduction_axes, 1.0e-8f, ov::op::EpsMode::MAX);
|
||||
auto normalizel2_compressed = make_shared<opset10::Convert>(normalizel2_1, element::f16);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(normalizel2_compressed, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_t2t_subgraph) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
// subgraph from t2t-vit-7
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3136, 32});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3136, 32});
|
||||
auto input_3 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3136, 64, 1});
|
||||
auto input_4 = make_shared<opset10::Parameter>(element::f32, Shape{128, 64});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
auto exp_2 = make_shared<opset10::Exp>(input_2);
|
||||
|
||||
auto factor_1 = opset10::Constant::create(element::f32, Shape{1}, {0.5}); // add decompression
|
||||
auto mul_1 = make_shared<opset10::Multiply>(exp_1, factor_1);
|
||||
auto factor_2 = opset10::Constant::create(element::f32, Shape{1}, {0.5});
|
||||
auto mul_2 = make_shared<opset10::Multiply>(exp_2, factor_2);
|
||||
|
||||
auto const_unsqueeze_1 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
|
||||
auto unsqueeze_1 = make_shared<opset10::Reshape>(mul_1, const_unsqueeze_1, false);
|
||||
|
||||
auto const_unsqueeze_2 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
|
||||
auto unsqueeze_2 = make_shared<opset10::Reshape>(mul_2, const_unsqueeze_1, false);
|
||||
auto reduction_axes_1 = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(mul_2, reduction_axes_1, true);
|
||||
auto mul_3 = make_shared<opset10::Multiply>(reduce_sum_1, mul_1);
|
||||
auto mul_4 = make_shared<opset10::Multiply>(input_3, unsqueeze_2);
|
||||
|
||||
auto reduction_axes_2 = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto reduce_sum_2 = make_shared<opset10::ReduceSum>(mul_4, reduction_axes_2);
|
||||
auto reduction_axes_3 = opset10::Constant::create(element::i64, Shape{1}, {2});
|
||||
auto reduce_sum_3 = make_shared<opset10::ReduceSum>(mul_3, reduction_axes_3, true);
|
||||
|
||||
auto broadcast_to_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 1});
|
||||
auto broadcast =
|
||||
make_shared<opset10::Broadcast>(reduce_sum_3, broadcast_to_shape, ov::op::BroadcastType::BIDIRECTIONAL);
|
||||
auto tile_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 64});
|
||||
auto tile = make_shared<opset10::Tile>(broadcast, tile_shape);
|
||||
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {1.e-10});
|
||||
auto add_1 = make_shared<opset10::Add>(tile, eps_const);
|
||||
|
||||
auto const_unsqueeze_3 = opset10::Constant::create(element::i64, Shape{4}, {1, 1, 64, 32});
|
||||
auto unsqueeze_3 = make_shared<opset10::Reshape>(reduce_sum_2, const_unsqueeze_3, false);
|
||||
auto mul_5 = make_shared<opset10::Multiply>(unsqueeze_1, unsqueeze_3);
|
||||
|
||||
auto reduction_axes_4 = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_4 = make_shared<opset10::ReduceSum>(mul_5, reduction_axes_4);
|
||||
|
||||
auto div_1 = make_shared<opset10::Divide>(reduce_sum_4, add_1);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(div_1, input_4, false, true);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2, input_3, input_4});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3136, 32});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3136, 32});
|
||||
auto input_3 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3136, 64, 1});
|
||||
auto input_4 = make_shared<opset10::Parameter>(element::f16, Shape{128, 64});
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto input_2_decompressed = make_shared<opset10::Convert>(input_2, element::f32);
|
||||
auto input_3_decompressed = make_shared<opset10::Convert>(input_3, element::f32);
|
||||
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
|
||||
auto exp_2 = make_shared<opset10::Exp>(input_2_decompressed);
|
||||
|
||||
auto factor_1 = opset10::Constant::create(element::f32, Shape{1}, {0.5});
|
||||
auto mul_1 = make_shared<opset10::Multiply>(exp_1, factor_1);
|
||||
auto factor_2 = opset10::Constant::create(element::f32, Shape{1}, {0.5});
|
||||
auto mul_2 = make_shared<opset10::Multiply>(exp_2, factor_2);
|
||||
|
||||
auto const_unsqueeze_1 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
|
||||
auto unsqueeze_1 = make_shared<opset10::Reshape>(mul_1, const_unsqueeze_1, false);
|
||||
|
||||
auto const_unsqueeze_2 = opset10::Constant::create(element::i64, Shape{4}, {1, 3136, 1, 32});
|
||||
auto unsqueeze_2 = make_shared<opset10::Reshape>(mul_2, const_unsqueeze_2, false);
|
||||
auto reduction_axes_1 = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(mul_2, reduction_axes_1, true);
|
||||
auto mul_3 = make_shared<opset10::Multiply>(reduce_sum_1, mul_1);
|
||||
auto mul_4 = make_shared<opset10::Multiply>(input_3_decompressed, unsqueeze_2);
|
||||
|
||||
auto reduction_axes_2 = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto reduce_sum_2 = make_shared<opset10::ReduceSum>(mul_4, reduction_axes_2);
|
||||
auto reduction_axes_3 = opset10::Constant::create(element::i64, Shape{1}, {2});
|
||||
auto reduce_sum_3 = make_shared<opset10::ReduceSum>(mul_3, reduction_axes_3, true);
|
||||
|
||||
auto broadcast_to_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 1});
|
||||
auto broadcast =
|
||||
make_shared<opset10::Broadcast>(reduce_sum_3, broadcast_to_shape, ov::op::BroadcastType::BIDIRECTIONAL);
|
||||
auto tile_shape = opset10::Constant::create(element::i64, Shape{3}, {1, 1, 64});
|
||||
auto tile = make_shared<opset10::Tile>(broadcast, tile_shape);
|
||||
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {1.e-10});
|
||||
auto add_1 = make_shared<opset10::Add>(tile, eps_const);
|
||||
|
||||
auto const_unsqueeze_3 = opset10::Constant::create(element::i64, Shape{4}, {1, 1, 64, 32});
|
||||
auto unsqueeze_3 = make_shared<opset10::Reshape>(reduce_sum_2, const_unsqueeze_3, false);
|
||||
auto mul_5 = make_shared<opset10::Multiply>(unsqueeze_1, unsqueeze_3);
|
||||
|
||||
auto reduction_axes_4 = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_4 = make_shared<opset10::ReduceSum>(mul_5, reduction_axes_4);
|
||||
|
||||
auto div_1 = make_shared<opset10::Divide>(reduce_sum_4, add_1);
|
||||
auto div_compressed = make_shared<opset10::Convert>(div_1, element::f16);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(div_compressed, input_4, false, true);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2, input_3, input_4});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_DivisionByZeroMinimalPattern) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
|
||||
const float eps_value = 1.0e-12f;
|
||||
{
|
||||
auto input_1 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset10::Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<opset10::Divide>(input_1, add);
|
||||
model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto input_2_decompressed = make_shared<opset10::Convert>(input_2, element::f32);
|
||||
|
||||
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset10::Add>(input_2_decompressed, eps_const);
|
||||
auto divide = std::make_shared<opset10::Divide>(input_1_decompressed, add);
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_PowWithNegativeExponent) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
const float eps_value = 1.0e-12f;
|
||||
{
|
||||
auto input_1 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset10::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset10::Add>(input_2, eps_const);
|
||||
auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77});
|
||||
auto pow = std::make_shared<opset10::Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<opset10::Multiply>(input_1, pow);
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset10::Parameter>(element::f16, PartialShape::dynamic(3));
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto input_2_decompressed = make_shared<opset10::Convert>(input_2, element::f32);
|
||||
|
||||
auto eps_const = opset10::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset10::Add>(input_2_decompressed, eps_const);
|
||||
auto pow_exp_const = opset10::Constant::create(element::f32, Shape{1}, {-1.77});
|
||||
auto pow = std::make_shared<opset10::Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<opset10::Multiply>(input_1_decompressed, pow);
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
|
||||
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(exp_1, unsqueeze_axes);
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_array{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_1_decompressed = make_shared<opset10::Convert>(input_1, element::f32);
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1_decompressed);
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
|
||||
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(exp_1, unsqueeze_axes);
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
|
||||
|
||||
auto factor_const = opset10::Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<opset10::Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<opset10::Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto mul_1_compressed = make_shared<opset10::Convert>(mul_1, element::f16);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(mul_1_compressed, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
@ -41,7 +41,6 @@
|
||||
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
|
||||
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
|
||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||
#include "transformations/common_optimizations/convert_compressed_to_mixed_precision.hpp"
|
||||
#include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
|
||||
#include <transformations/common_optimizations/transpose_sinking.hpp>
|
||||
|
||||
@ -140,6 +139,63 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
|
||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||
manager.register_pass<EinsumDecomposition>();
|
||||
|
||||
precisions_array fp_convert_precision_list = {
|
||||
{ov::element::f64, ov::element::f32}
|
||||
};
|
||||
|
||||
// call conversion of float types with keep_precision_sensitive_in_fp32 = true
|
||||
auto fp_precision_supported = [&](ov::element::Type e) -> bool {
|
||||
switch (e) {
|
||||
case ov::element::f16: return device_info.supports_fp16;
|
||||
case ov::element::f32: return true; // assume that all GPUs support f32 data type
|
||||
case ov::element::f64: return device_info.supports_fp64;
|
||||
case ov::element::bf16: return false;
|
||||
default: return false;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const auto fallback_precision = ov::element::f32;
|
||||
std::vector<ov::element::Type> fp_element_types = {
|
||||
ov::element::f32,
|
||||
ov::element::f16,
|
||||
ov::element::bf16
|
||||
};
|
||||
|
||||
// Add conversion from FP data types to infer precision if it's specified
|
||||
auto infer_precision = config.get_property(ov::hint::inference_precision);
|
||||
if (infer_precision != ov::element::undefined) {
|
||||
if (!fp_precision_supported(infer_precision))
|
||||
infer_precision = fallback_precision;
|
||||
|
||||
for (auto& et : fp_element_types) {
|
||||
if (et != infer_precision) {
|
||||
fp_convert_precision_list.push_back({et, infer_precision});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add conversion from unsupported FP data types to f32 if we don't have a conversion to something valid already in the list
|
||||
for (auto& et : fp_element_types) {
|
||||
if (!fp_precision_supported(et)) {
|
||||
auto et_pair = std::make_pair(et, fallback_precision);
|
||||
bool has_valid_conversion = std::find_if(fp_convert_precision_list.begin(), fp_convert_precision_list.end(),
|
||||
[&](std::pair<ov::element::Type, ov::element::Type> v) -> bool {
|
||||
return v.first == et_pair.first && fp_precision_supported(v.second);
|
||||
}) != fp_convert_precision_list.end();
|
||||
|
||||
if (!has_valid_conversion) {
|
||||
fp_convert_precision_list.push_back(et_pair);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type_to_fuse_map empty_fuse_map = {};
|
||||
manager.register_pass<ov::pass::Validate>();
|
||||
// call ConvertPrecision with keep_precision_sensitive_in_fp32 = true
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_list, empty_fuse_map, true);
|
||||
|
||||
manager.register_pass<ov::pass::CommonOptimizations>();
|
||||
|
||||
manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
|
||||
@ -176,8 +232,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
manager.register_pass<ov::pass::ConvertPriorBox8To0, false>();
|
||||
manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
|
||||
|
||||
precisions_array convert_precision_list {
|
||||
{ngraph::element::f64, ngraph::element::f32},
|
||||
precisions_array int_convert_precision_list {
|
||||
{ngraph::element::i64, ngraph::element::i32},
|
||||
{ngraph::element::u64, ngraph::element::i32},
|
||||
{ngraph::element::u16, ngraph::element::i32},
|
||||
@ -187,54 +242,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
{ngraph::element::u4, ngraph::element::u8},
|
||||
};
|
||||
|
||||
auto fp_precision_supported = [&](ngraph::element::Type e) -> bool {
|
||||
switch (e) {
|
||||
case ngraph::element::f16: return device_info.supports_fp16;
|
||||
case ngraph::element::f32: return true; // assume that all GPUs support f32 data type
|
||||
case ngraph::element::f64: return device_info.supports_fp64;
|
||||
case ngraph::element::bf16: return false;
|
||||
default: return false;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const auto fallback_precision = ngraph::element::f32;
|
||||
std::vector<ov::element::Type> fp_element_types = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16,
|
||||
ngraph::element::bf16
|
||||
};
|
||||
|
||||
// Add conversion from FP data types to infer precision if it's specified
|
||||
auto infer_precision = config.get_property(ov::inference_precision);
|
||||
if (infer_precision != ov::element::undefined) {
|
||||
if (!fp_precision_supported(infer_precision))
|
||||
infer_precision = fallback_precision;
|
||||
|
||||
for (auto& et : fp_element_types) {
|
||||
if (et != infer_precision) {
|
||||
convert_precision_list.push_back({et, infer_precision});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add conversion from unsupported FP data types to f32 if we don't have a conversion to something valid already in the list
|
||||
for (auto& et : fp_element_types) {
|
||||
if (!fp_precision_supported(et)) {
|
||||
auto et_pair = std::make_pair(et, fallback_precision);
|
||||
bool has_valid_conversion = std::find_if(convert_precision_list.begin(), convert_precision_list.end(),
|
||||
[&](std::pair<ov::element::Type, ov::element::Type> v) -> bool {
|
||||
return v.first == et_pair.first && fp_precision_supported(v.second);
|
||||
}) != convert_precision_list.end();
|
||||
|
||||
if (!has_valid_conversion) {
|
||||
convert_precision_list.push_back(et_pair);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
manager.register_pass<ngraph::pass::Validate>();
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(convert_precision_list);
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(int_convert_precision_list);
|
||||
|
||||
auto pass_config = manager.get_pass_config();
|
||||
pass_config->disable<ov::pass::EyeDecomposition>();
|
||||
@ -242,7 +251,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
// disable conversion to legacy and use the new mixed precision
|
||||
// in which precision sensitive nodes are kept in FP32
|
||||
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
||||
pass_config->enable<ov::pass::ConvertCompressedToMixedPrecision>();
|
||||
|
||||
// SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
||||
pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
|
||||
|
Loading…
Reference in New Issue
Block a user