Fix mixed precision inference for quantized IRs (#16785)

* disable mixed precision inference for quantized IRs

* typo fix

* improved solution, disable mixed precision in quantized IRs selectively only for float nodes

* minor typos correction

* added unit-tests

* renamed rt_info

* updated list of nodes for which FQ is propagated; updated unit-tests

* fix failing build
This commit is contained in:
Pavel Esir 2023-04-24 11:13:04 +02:00 committed by GitHub
parent 01065338ef
commit 6ff0cad127
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 361 additions and 154 deletions

View File

@ -8,6 +8,7 @@
#include "openvino/op/util/broadcast_base.hpp" #include "openvino/op/util/broadcast_base.hpp"
#include "openvino/op/util/gather_base.hpp" #include "openvino/op/util/gather_base.hpp"
#include "openvino/opsets/opset10.hpp" #include "openvino/opsets/opset10.hpp"
#include "openvino/opsets/opset11.hpp"
#include "openvino/opsets/opset2.hpp" #include "openvino/opsets/opset2.hpp"
#include "openvino/pass/manager.hpp" #include "openvino/pass/manager.hpp"
#include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/or.hpp"
@ -23,6 +24,30 @@ using namespace ov::opset10;
namespace ov { namespace ov {
namespace pass { namespace pass {
void mark_reduceop_path(const std::shared_ptr<Node>& node) {
node->get_rt_info().emplace("reduceop_path", true);
}
bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
return node->get_rt_info().count("reduceop_path");
}
void erase_reduceop_path(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase("reduceop_path");
}
void mark_fq_path(const std::shared_ptr<Node>& node) {
node->get_rt_info().emplace("fq_path", true);
}
bool is_fq_path(const std::shared_ptr<const Node>& node) {
return node->get_rt_info().count("fq_path");
}
void erase_fq_path(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase("fq_path");
}
// Marking continues to propagate through these ops. // Marking continues to propagate through these ops.
std::shared_ptr<Node> propagate_through_ops = pattern::wrap_type<Squeeze, std::shared_ptr<Node> propagate_through_ops = pattern::wrap_type<Squeeze,
Unsqueeze, Unsqueeze,
@ -72,11 +97,11 @@ public:
if (!has_marked_output) if (!has_marked_output)
return false; return false;
auto convert_node = dynamic_pointer_cast<Convert>(node); auto convert_node = as_type_ptr<Convert>(node);
if (convert_node) { if (convert_node) {
// if during propagating up there is a Convert it must go to Const, // if during propagating up there is a Convert it must go to Const,
// otherwise interrupt propagation // otherwise interrupt propagation
auto const_node = dynamic_pointer_cast<Constant>(node->input_value(0).get_node_shared_ptr()); auto const_node = as_type_ptr<Constant>(node->input_value(0).get_node_shared_ptr());
if (!const_node) if (!const_node)
return false; return false;
} }
@ -106,7 +131,7 @@ public:
return false; return false;
// on convert down propagation should be interrupted // on convert down propagation should be interrupted
auto convert_node = dynamic_pointer_cast<Convert>(node); auto convert_node = as_type_ptr<Convert>(node);
if (convert_node) if (convert_node)
return false; return false;
@ -114,6 +139,11 @@ public:
for (const auto& in_node : node->input_values()) { for (const auto& in_node : node->input_values()) {
if (!in_node.get_element_type().is_real()) if (!in_node.get_element_type().is_real())
continue; continue;
if (is_fq_path(in_node.get_node_shared_ptr())) {
enable_fp16_compression(node);
return true;
}
if (fp16_compression_is_disabled(in_node.get_node_shared_ptr())) { if (fp16_compression_is_disabled(in_node.get_node_shared_ptr())) {
disable_fp16_compression(node); disable_fp16_compression(node);
is_changed = true; is_changed = true;
@ -127,18 +157,6 @@ public:
} }
}; };
void mark_reduceop_path(const std::shared_ptr<Node>& node) {
node->get_rt_info().emplace("reduceop_path", true);
}
bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
return node->get_rt_info().count("reduceop_path");
}
void erase_reduceop_path(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase("reduceop_path");
}
class InitMarkReduceOpPath : public pass::MatcherPass { class InitMarkReduceOpPath : public pass::MatcherPass {
public: public:
OPENVINO_RTTI("InitMarkReduceOpPath", "0"); OPENVINO_RTTI("InitMarkReduceOpPath", "0");
@ -267,11 +285,11 @@ public:
if (!m.get_match_root()) if (!m.get_match_root())
return false; return false;
const auto mul = std::dynamic_pointer_cast<Multiply>(m.get_match_root()); const auto mul = as_type_ptr<Multiply>(m.get_match_root());
// if pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched // if pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
// need to check that power is negative // need to check that power is negative
if (mul) { if (mul) {
const auto pow_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(pow_exp)); const auto pow_const = as_type_ptr<Constant>(pattern_to_output.at(pow_exp));
if (pow_const) { if (pow_const) {
// continue only if exponent is negative (z < 0) // continue only if exponent is negative (z < 0)
if (pow_const->get_element_type() == element::f16) { if (pow_const->get_element_type() == element::f16) {
@ -286,7 +304,7 @@ public:
} }
} }
const auto eps_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(eps_const_pattern)); const auto eps_const = as_type_ptr<Constant>(pattern_to_output.at(eps_const_pattern));
if (!eps_const) if (!eps_const)
return false; return false;
if (eps_const->get_element_type() == element::f32) { if (eps_const->get_element_type() == element::f32) {
@ -307,6 +325,68 @@ public:
} }
}; };
class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
public:
OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0");
PropagateDownDisableSensitivityForQuantized() {
MATCHER_SCOPE(PropagateDownDisableSensitivityForQuantized);
// through this nodes
std::shared_ptr<Node> quantization_propagating_nodes = pattern::wrap_type<Squeeze,
Unsqueeze,
FakeQuantize,
Reshape,
op::util::BroadcastBase,
DepthToSpace,
opset2::Interpolate,
opset4::Interpolate,
opset11::Interpolate,
opset2::MaxPool,
MaxPool,
Pad,
ReduceMax,
ReduceMin,
Relu,
Transpose,
ShuffleChannels,
StridedSlice,
Slice,
VariadicSplit,
Split,
op::util::GatherBase,
Concat,
Tile>();
matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& node = m.get_match_root();
if (!node)
return false;
auto is_quantize = as_type_ptr<FakeQuantize>(node);
if (is_quantize) {
mark_fq_path(node);
return true;
}
bool is_changed = false;
for (const auto& in_node_output : node->input_values()) {
auto input_node = in_node_output.get_node_shared_ptr();
auto is_quantize = as_type_ptr<FakeQuantize>(input_node);
if (is_quantize || is_fq_path(input_node)) {
mark_fq_path(node);
enable_fp16_compression(node);
is_changed = true;
}
}
return is_changed;
};
auto m = make_shared<pattern::Matcher>(quantization_propagating_nodes, matcher_name);
register_matcher(m, callback);
}
};
bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model>& m) { bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model>& m) {
RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision); RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision);
@ -314,6 +394,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
// Mark root of Division with eps pattern to keep in FP32 // Mark root of Division with eps pattern to keep in FP32
REGISTER_PASS(manager, MarkDivWithEps) REGISTER_PASS(manager, MarkDivWithEps)
REGISTER_PASS(manager, MarkExpInReduceOpPath) REGISTER_PASS(manager, MarkExpInReduceOpPath)
REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)
// both Up and Down propagations are needed. // both Up and Down propagations are needed.
// Why both of them are needed is explained in comments in passes declarations. // Why both of them are needed is explained in comments in passes declarations.
@ -328,6 +409,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
for (auto& node : m->get_ops()) { for (auto& node : m->get_ops()) {
erase_reduceop_path(node); erase_reduceop_path(node);
erase_fq_path(node);
} }
return false; // no need to revalidate return false; // no need to revalidate

View File

@ -126,36 +126,21 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e
shared_ptr<Model> model, model_ref; shared_ptr<Model> model, model_ref;
pass::Manager manager; pass::Manager manager;
{ auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224}); auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32); auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed); auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<MatMul>(mul_1, input_2); auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
model_ref = model->clone();
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>(); manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.run_passes(model); manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = const FunctionsComparator func_comparator =
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
@ -519,32 +504,20 @@ TEST(TransformationTests, PowWithPositiveExponent) {
pass::Manager manager; pass::Manager manager;
// graph should be left unchanged // graph should be left unchanged
const float eps_value = 1.0e-12f; const float eps_value = 1.0e-12f;
{ auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3)); auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3)); auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); auto add = std::make_shared<Add>(input_2, eps_const);
auto add = std::make_shared<Add>(input_2, eps_const); auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77}); auto pow = std::make_shared<Power>(add, pow_exp_const);
auto pow = std::make_shared<Power>(add, pow_exp_const); auto mul = std::make_shared<Multiply>(input_1, pow);
auto mul = std::make_shared<Multiply>(input_1, pow);
model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2}); model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
model_ref = model->clone();
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>(); manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.run_passes(model); manager.run_passes(model);
}
{
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<Add>(input_2, eps_const);
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<Power>(add, pow_exp_const);
auto mul = std::make_shared<Multiply>(input_1, pow);
model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = const FunctionsComparator func_comparator =
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
// need to compare twice to ensure that no extra nodes are marked // need to compare twice to ensure that no extra nodes are marked
@ -559,28 +532,18 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) {
pass::Manager manager; pass::Manager manager;
// if eps_value is greater than normalized_fp16_min then leave graph unchanged // if eps_value is greater than normalized_fp16_min then leave graph unchanged
const float eps_value = 0.0001f; const float eps_value = 0.0001f;
{ auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3)); auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3)); auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); auto add = std::make_shared<Add>(input_2, eps_const);
auto add = std::make_shared<Add>(input_2, eps_const); auto divide = std::make_shared<Divide>(input_1, add);
auto divide = std::make_shared<Divide>(input_1, add);
model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2}); model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
model_ref = model->clone();
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>(); manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.run_passes(model); manager.run_passes(model);
}
{
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<Add>(input_2, eps_const);
auto divide = std::make_shared<Divide>(input_1, add);
model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = const FunctionsComparator func_comparator =
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
// need to compare twice to ensure that no extra nodes are marked // need to compare twice to ensure that no extra nodes are marked
@ -798,36 +761,21 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou
// ReduceSum without Exp is not a precision sensitive case // ReduceSum without Exp is not a precision sensitive case
shared_ptr<Model> model, model_ref; shared_ptr<Model> model, model_ref;
pass::Manager manager; pass::Manager manager;
{ auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224}); auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224}); auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1}); auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
auto factor_const = Constant::create(element::f16, Shape{1}, {-1}); auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32); auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed); auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<MatMul>(mul_1, input_2); auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2}); model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
model_ref = model->clone();
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>(); manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.run_passes(model); manager.run_passes(model);
}
{
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = const FunctionsComparator func_comparator =
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS); FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
@ -986,31 +934,21 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx
const float eps_value = 1.e-12f; const float eps_value = 1.e-12f;
shared_ptr<Model> model, model_ref; shared_ptr<Model> model, model_ref;
pass::Manager manager; pass::Manager manager;
{
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<Add>(input_2, eps_const);
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<Power>(add, pow_exp_const);
auto mul = std::make_shared<Multiply>(input_1, pow);
model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2}); auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>(); auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
manager.run_passes(model); auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
} auto add = std::make_shared<Add>(input_2, eps_const);
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<Power>(add, pow_exp_const);
auto mul = std::make_shared<Multiply>(input_1, pow);
{ model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3)); model_ref = model->clone();
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
auto add = std::make_shared<Add>(input_2, eps_const); manager.run_passes(model);
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<Power>(add, pow_exp_const);
auto mul = std::make_shared<Multiply>(input_1, pow);
model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
const auto fc = FunctionsComparator::with_default() const auto fc = FunctionsComparator::with_default()
.enable(FunctionsComparator::PRECISIONS) .enable(FunctionsComparator::PRECISIONS)
.enable(FunctionsComparator::RUNTIME_KEYS) .enable(FunctionsComparator::RUNTIME_KEYS)
@ -1027,27 +965,19 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc
const float eps_value = 0.0001f; const float eps_value = 0.0001f;
shared_ptr<Model> model, model_ref; shared_ptr<Model> model, model_ref;
pass::Manager manager; pass::Manager manager;
{
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<Add>(input_2, eps_const);
auto divide = std::make_shared<Divide>(input_1, add);
model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2}); auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>(); auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
manager.run_passes(model); auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
} auto add = std::make_shared<Add>(input_2, eps_const);
auto divide = std::make_shared<Divide>(input_1, add);
{ model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3)); model_ref = model->clone();
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value}); manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
auto add = std::make_shared<Add>(input_2, eps_const); manager.run_passes(model);
auto divide = std::make_shared<Divide>(input_1, add);
model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
const auto fc = FunctionsComparator::with_default() const auto fc = FunctionsComparator::with_default()
.enable(FunctionsComparator::PRECISIONS) .enable(FunctionsComparator::PRECISIONS)
.enable(FunctionsComparator::RUNTIME_KEYS) .enable(FunctionsComparator::RUNTIME_KEYS)
@ -1162,3 +1092,82 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_InL2NormWithSqrtA
result = fc(model, model_ref); result = fc(model, model_ref);
ASSERT_TRUE(result.valid) << result.message; ASSERT_TRUE(result.valid) << result.message;
} }
TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_1) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
// despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
// be inferred in int8 therefore no need to mark them: model and model_ref should match
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
model_ref = model->clone();
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.run_passes(model);
const auto fc = FunctionsComparator::with_default()
.enable(FunctionsComparator::PRECISIONS)
.enable(FunctionsComparator::RUNTIME_KEYS)
.enable(FunctionsComparator::CONST_VALUES);
// need to compare twice to ensure that no extra nodes are marked
FunctionsComparator::Result result = fc(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
result = fc(model, model_ref);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_2) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
// despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
// be inferred in int8 therefore no need to mark them: model and model_ref should match
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
model_ref = model->clone();
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
manager.run_passes(model);
const auto fc = FunctionsComparator::with_default()
.enable(FunctionsComparator::PRECISIONS)
.enable(FunctionsComparator::RUNTIME_KEYS)
.enable(FunctionsComparator::CONST_VALUES);
// need to compare twice to ensure that no extra nodes are marked
FunctionsComparator::Result result = fc(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
result = fc(model, model_ref);
ASSERT_TRUE(result.valid) << result.message;
}

View File

@ -1716,3 +1716,119 @@ TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) {
FunctionsComparator::Result result = func_comparator(model_ref, model); FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message; ASSERT_TRUE(result.valid) << result.message;
} }
TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_1) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_2) {
shared_ptr<Model> model, model_ref;
pass::Manager manager;
{
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
type_to_fuse_map empty_type_to_fuse_map = {};
bool keep_precision_sensitive_in_fp32 = true;
manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
empty_type_to_fuse_map,
keep_precision_sensitive_in_fp32);
manager.run_passes(model);
}
{
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
auto exp_1 = make_shared<opset10::Exp>(input_1);
auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}