Fix mixed precision inference for quantized IRs (#16785)
* disable mixed precision inference for quantized IRs * typo fix * improved solution, disable mixed precision in quantized IRs selectively only for float nodes * minor typos correction * added unit-tests * renamed rt_info * updated list of nodes for which FQ is propagated; updated unit-tests * fix failing build
This commit is contained in:
parent
01065338ef
commit
6ff0cad127
@ -8,6 +8,7 @@
|
||||
#include "openvino/op/util/broadcast_base.hpp"
|
||||
#include "openvino/op/util/gather_base.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/opsets/opset11.hpp"
|
||||
#include "openvino/opsets/opset2.hpp"
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "openvino/pass/pattern/op/or.hpp"
|
||||
@ -23,6 +24,30 @@ using namespace ov::opset10;
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
void mark_reduceop_path(const std::shared_ptr<Node>& node) {
|
||||
node->get_rt_info().emplace("reduceop_path", true);
|
||||
}
|
||||
bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
|
||||
return node->get_rt_info().count("reduceop_path");
|
||||
}
|
||||
|
||||
void erase_reduceop_path(const std::shared_ptr<Node>& node) {
|
||||
auto& rt_info = node->get_rt_info();
|
||||
rt_info.erase("reduceop_path");
|
||||
}
|
||||
|
||||
void mark_fq_path(const std::shared_ptr<Node>& node) {
|
||||
node->get_rt_info().emplace("fq_path", true);
|
||||
}
|
||||
bool is_fq_path(const std::shared_ptr<const Node>& node) {
|
||||
return node->get_rt_info().count("fq_path");
|
||||
}
|
||||
|
||||
void erase_fq_path(const std::shared_ptr<Node>& node) {
|
||||
auto& rt_info = node->get_rt_info();
|
||||
rt_info.erase("fq_path");
|
||||
}
|
||||
|
||||
// Marking continues to propagate through these ops.
|
||||
std::shared_ptr<Node> propagate_through_ops = pattern::wrap_type<Squeeze,
|
||||
Unsqueeze,
|
||||
@ -72,11 +97,11 @@ public:
|
||||
if (!has_marked_output)
|
||||
return false;
|
||||
|
||||
auto convert_node = dynamic_pointer_cast<Convert>(node);
|
||||
auto convert_node = as_type_ptr<Convert>(node);
|
||||
if (convert_node) {
|
||||
// if during propagating up there is a Convert it must go to Const,
|
||||
// otherwise interrupt propagation
|
||||
auto const_node = dynamic_pointer_cast<Constant>(node->input_value(0).get_node_shared_ptr());
|
||||
auto const_node = as_type_ptr<Constant>(node->input_value(0).get_node_shared_ptr());
|
||||
if (!const_node)
|
||||
return false;
|
||||
}
|
||||
@ -106,7 +131,7 @@ public:
|
||||
return false;
|
||||
|
||||
// on convert down propagation should be interrupted
|
||||
auto convert_node = dynamic_pointer_cast<Convert>(node);
|
||||
auto convert_node = as_type_ptr<Convert>(node);
|
||||
if (convert_node)
|
||||
return false;
|
||||
|
||||
@ -114,6 +139,11 @@ public:
|
||||
for (const auto& in_node : node->input_values()) {
|
||||
if (!in_node.get_element_type().is_real())
|
||||
continue;
|
||||
if (is_fq_path(in_node.get_node_shared_ptr())) {
|
||||
enable_fp16_compression(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fp16_compression_is_disabled(in_node.get_node_shared_ptr())) {
|
||||
disable_fp16_compression(node);
|
||||
is_changed = true;
|
||||
@ -127,18 +157,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
void mark_reduceop_path(const std::shared_ptr<Node>& node) {
|
||||
node->get_rt_info().emplace("reduceop_path", true);
|
||||
}
|
||||
bool is_reduceop_path(const std::shared_ptr<const Node>& node) {
|
||||
return node->get_rt_info().count("reduceop_path");
|
||||
}
|
||||
|
||||
void erase_reduceop_path(const std::shared_ptr<Node>& node) {
|
||||
auto& rt_info = node->get_rt_info();
|
||||
rt_info.erase("reduceop_path");
|
||||
}
|
||||
|
||||
class InitMarkReduceOpPath : public pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("InitMarkReduceOpPath", "0");
|
||||
@ -267,11 +285,11 @@ public:
|
||||
if (!m.get_match_root())
|
||||
return false;
|
||||
|
||||
const auto mul = std::dynamic_pointer_cast<Multiply>(m.get_match_root());
|
||||
const auto mul = as_type_ptr<Multiply>(m.get_match_root());
|
||||
// if pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
|
||||
// need to check that power is negative
|
||||
if (mul) {
|
||||
const auto pow_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(pow_exp));
|
||||
const auto pow_const = as_type_ptr<Constant>(pattern_to_output.at(pow_exp));
|
||||
if (pow_const) {
|
||||
// continue only if exponent is negative (z < 0)
|
||||
if (pow_const->get_element_type() == element::f16) {
|
||||
@ -286,7 +304,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const auto eps_const = std::dynamic_pointer_cast<Constant>(pattern_to_output.at(eps_const_pattern));
|
||||
const auto eps_const = as_type_ptr<Constant>(pattern_to_output.at(eps_const_pattern));
|
||||
if (!eps_const)
|
||||
return false;
|
||||
if (eps_const->get_element_type() == element::f32) {
|
||||
@ -307,6 +325,68 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0");
|
||||
PropagateDownDisableSensitivityForQuantized() {
|
||||
MATCHER_SCOPE(PropagateDownDisableSensitivityForQuantized);
|
||||
|
||||
// through this nodes
|
||||
std::shared_ptr<Node> quantization_propagating_nodes = pattern::wrap_type<Squeeze,
|
||||
Unsqueeze,
|
||||
FakeQuantize,
|
||||
Reshape,
|
||||
op::util::BroadcastBase,
|
||||
DepthToSpace,
|
||||
opset2::Interpolate,
|
||||
opset4::Interpolate,
|
||||
opset11::Interpolate,
|
||||
opset2::MaxPool,
|
||||
MaxPool,
|
||||
Pad,
|
||||
ReduceMax,
|
||||
ReduceMin,
|
||||
Relu,
|
||||
Transpose,
|
||||
ShuffleChannels,
|
||||
StridedSlice,
|
||||
Slice,
|
||||
VariadicSplit,
|
||||
Split,
|
||||
op::util::GatherBase,
|
||||
Concat,
|
||||
Tile>();
|
||||
|
||||
matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& node = m.get_match_root();
|
||||
if (!node)
|
||||
return false;
|
||||
|
||||
auto is_quantize = as_type_ptr<FakeQuantize>(node);
|
||||
if (is_quantize) {
|
||||
mark_fq_path(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_changed = false;
|
||||
|
||||
for (const auto& in_node_output : node->input_values()) {
|
||||
auto input_node = in_node_output.get_node_shared_ptr();
|
||||
auto is_quantize = as_type_ptr<FakeQuantize>(input_node);
|
||||
if (is_quantize || is_fq_path(input_node)) {
|
||||
mark_fq_path(node);
|
||||
enable_fp16_compression(node);
|
||||
is_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return is_changed;
|
||||
};
|
||||
auto m = make_shared<pattern::Matcher>(quantization_propagating_nodes, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
};
|
||||
|
||||
bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model>& m) {
|
||||
RUN_ON_MODEL_SCOPE(MarkSugraphsToKeepInMixedPrecision);
|
||||
|
||||
@ -314,6 +394,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
|
||||
// Mark root of Division with eps pattern to keep in FP32
|
||||
REGISTER_PASS(manager, MarkDivWithEps)
|
||||
REGISTER_PASS(manager, MarkExpInReduceOpPath)
|
||||
REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)
|
||||
|
||||
// both Up and Down propagations are needed.
|
||||
// Why both of them are needed is explained in comments in passes declarations.
|
||||
@ -328,6 +409,7 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
|
||||
|
||||
for (auto& node : m->get_ops()) {
|
||||
erase_reduceop_path(node);
|
||||
erase_fq_path(node);
|
||||
}
|
||||
|
||||
return false; // no need to revalidate
|
||||
|
@ -126,7 +126,6 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
@ -138,24 +137,10 @@ TEST(TransformationTests, MarkSugraphsToKeepInMixedPrecision_reducesum_without_e
|
||||
auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
|
||||
|
||||
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator =
|
||||
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
|
||||
@ -519,7 +504,6 @@ TEST(TransformationTests, PowWithPositiveExponent) {
|
||||
pass::Manager manager;
|
||||
// graph should be left unchanged
|
||||
const float eps_value = 1.0e-12f;
|
||||
{
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
@ -529,22 +513,11 @@ TEST(TransformationTests, PowWithPositiveExponent) {
|
||||
auto mul = std::make_shared<Multiply>(input_1, pow);
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<Add>(input_2, eps_const);
|
||||
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
|
||||
auto pow = std::make_shared<Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<Multiply>(input_1, pow);
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
const FunctionsComparator func_comparator =
|
||||
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
|
||||
// need to compare twice to ensure that no extra nodes are marked
|
||||
@ -559,7 +532,6 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) {
|
||||
pass::Manager manager;
|
||||
// if eps_value is greater than normalized_fp16_min then leave graph unchanged
|
||||
const float eps_value = 0.0001f;
|
||||
{
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
@ -567,20 +539,11 @@ TEST(TransformationTests, DivisionByZeroMinimalPatternUnchanged) {
|
||||
auto divide = std::make_shared<Divide>(input_1, add);
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<Divide>(input_1, add);
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
const FunctionsComparator func_comparator =
|
||||
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
|
||||
// need to compare twice to ensure that no extra nodes are marked
|
||||
@ -798,7 +761,6 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou
|
||||
// ReduceSum without Exp is not a precision sensitive case
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
@ -810,24 +772,10 @@ TEST(TransformationTests, MarkReduceOpExpToKeepInMixedPrecision_reducesum_withou
|
||||
auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto reduction_axes = Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<ReduceSum>(input_1, reduction_axes);
|
||||
|
||||
auto factor_const = Constant::create(element::f16, Shape{1}, {-1});
|
||||
auto factor_const_decompressed = make_shared<Convert>(factor_const, element::f32);
|
||||
auto mul_1 = make_shared<Multiply>(reduce_sum_1, factor_const_decompressed);
|
||||
auto matmul_1 = make_shared<MatMul>(mul_1, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator =
|
||||
FunctionsComparator::with_default().enable(FunctionsComparator::RUNTIME_KEYS);
|
||||
@ -986,7 +934,7 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx
|
||||
const float eps_value = 1.e-12f;
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
@ -996,21 +944,11 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_PowWithPositiveEx
|
||||
auto mul = std::make_shared<Multiply>(input_1, pow);
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<Add>(input_2, eps_const);
|
||||
auto pow_exp_const = Constant::create(element::f32, Shape{1}, {1.77});
|
||||
auto pow = std::make_shared<Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<Multiply>(input_1, pow);
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
const auto fc = FunctionsComparator::with_default()
|
||||
.enable(FunctionsComparator::PRECISIONS)
|
||||
.enable(FunctionsComparator::RUNTIME_KEYS)
|
||||
@ -1027,7 +965,7 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc
|
||||
const float eps_value = 0.0001f;
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
@ -1035,19 +973,11 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_MinimalPatternUnc
|
||||
auto divide = std::make_shared<Divide>(input_1, add);
|
||||
|
||||
model = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<Divide>(input_1, add);
|
||||
|
||||
model_ref = std::make_shared<Model>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
const auto fc = FunctionsComparator::with_default()
|
||||
.enable(FunctionsComparator::PRECISIONS)
|
||||
.enable(FunctionsComparator::RUNTIME_KEYS)
|
||||
@ -1162,3 +1092,82 @@ TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_InL2NormWithSqrtA
|
||||
result = fc(model, model_ref);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_1) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
// despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
|
||||
// be inferred in int8 therefore no need to mark them: model and model_ref should match
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
|
||||
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
|
||||
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
|
||||
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
|
||||
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
|
||||
|
||||
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
|
||||
const auto fc = FunctionsComparator::with_default()
|
||||
.enable(FunctionsComparator::PRECISIONS)
|
||||
.enable(FunctionsComparator::RUNTIME_KEYS)
|
||||
.enable(FunctionsComparator::CONST_VALUES);
|
||||
// need to compare twice to ensure that no extra nodes are marked
|
||||
FunctionsComparator::Result result = fc(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
result = fc(model, model_ref);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MarkDivWithEpsToKeepInMixedPrecision_disable_for_quantized_nodes_2) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
// despite there are sensitive Exp->ReduceSum nodes, but because of the FQ they will
|
||||
// be inferred in int8 therefore no need to mark them: model and model_ref should match
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
|
||||
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
|
||||
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
|
||||
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
|
||||
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
|
||||
|
||||
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
|
||||
|
||||
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
model_ref = model->clone();
|
||||
|
||||
manager.register_pass<pass::MarkSugraphsToKeepInMixedPrecision>();
|
||||
manager.run_passes(model);
|
||||
|
||||
const auto fc = FunctionsComparator::with_default()
|
||||
.enable(FunctionsComparator::PRECISIONS)
|
||||
.enable(FunctionsComparator::RUNTIME_KEYS)
|
||||
.enable(FunctionsComparator::CONST_VALUES);
|
||||
// need to compare twice to ensure that no extra nodes are marked
|
||||
FunctionsComparator::Result result = fc(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
result = fc(model, model_ref);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
@ -1716,3 +1716,119 @@ TEST(TransformationTests, ConvertPrecision_exp_through_unsqueeze) {
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_1) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
|
||||
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
|
||||
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
|
||||
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
|
||||
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
|
||||
|
||||
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
|
||||
auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
|
||||
auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
|
||||
auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
|
||||
auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
|
||||
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(fq_1, reduction_axes);
|
||||
|
||||
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertPrecision_disable_for_quantized_nodes_2) {
|
||||
shared_ptr<Model> model, model_ref;
|
||||
pass::Manager manager;
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f32, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
|
||||
auto in_low = op::v0::Constant::create(element::f32, Shape{}, {0.f});
|
||||
auto in_high = op::v0::Constant::create(element::f32, Shape{}, {5.f});
|
||||
auto out_low = op::v0::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto out_high = op::v0::Constant::create(element::f32, Shape{}, {4.f});
|
||||
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
|
||||
|
||||
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
|
||||
|
||||
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
|
||||
|
||||
model = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
|
||||
type_to_fuse_map empty_type_to_fuse_map = {};
|
||||
bool keep_precision_sensitive_in_fp32 = true;
|
||||
manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
|
||||
empty_type_to_fuse_map,
|
||||
keep_precision_sensitive_in_fp32);
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto input_2 = make_shared<opset10::Parameter>(element::f16, Shape{1, 3, 224, 224});
|
||||
auto exp_1 = make_shared<opset10::Exp>(input_1);
|
||||
|
||||
auto in_low = op::v0::Constant::create(element::f16, Shape{}, {0.f});
|
||||
auto in_high = op::v0::Constant::create(element::f16, Shape{}, {5.f});
|
||||
auto out_low = op::v0::Constant::create(element::f16, Shape{}, {2.f});
|
||||
auto out_high = op::v0::Constant::create(element::f16, Shape{}, {4.f});
|
||||
auto fq_1 = make_shared<opset10::FakeQuantize>(exp_1, in_low, in_high, out_low, out_high, 256);
|
||||
|
||||
auto unsqueeze_axes = opset10::Constant::create(element::i64, Shape{1}, {1});
|
||||
auto unsqueeze_1 = make_shared<opset10::Unsqueeze>(fq_1, unsqueeze_axes);
|
||||
|
||||
auto reduction_axes = opset10::Constant::create(element::i64, Shape{1}, {-1});
|
||||
auto reduce_sum_1 = make_shared<opset10::ReduceSum>(unsqueeze_1, reduction_axes);
|
||||
|
||||
auto fq_2 = make_shared<opset10::FakeQuantize>(reduce_sum_1, in_low, in_high, out_low, out_high, 256);
|
||||
auto matmul_1 = make_shared<opset10::MatMul>(fq_2, input_2);
|
||||
|
||||
model_ref = make_shared<Model>(NodeVector{matmul_1}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
|
||||
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
|
||||
FunctionsComparator::Result result = func_comparator(model_ref, model);
|
||||
ASSERT_TRUE(result.valid) << result.message;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user