Reenable AddFakeQuantizeFusion and MulFakeQuantizeFusion (#5574)

* Reenable AddFakeQuantizeFusion and MulFakeQuantizeFusion

* remove unused variable

* is_single_value simplify

* skip transformations for low precision types

* add comment regarding restriction in AddFakeQuantizeFusion

* remove fp16 test

* remove negative const handling
This commit is contained in:
Mateusz Tabaka 2021-09-07 10:14:25 +02:00 committed by GitHub
parent 8985feff6f
commit 5d6ef444a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 511 additions and 225 deletions

View File

@ -171,11 +171,14 @@ std::stringstream toStream(const std::vector<float>& dequantizationValues) {
}
void LayerTransformation::printDequantizationInfo(const std::shared_ptr<Node>& layer) {
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(ov::as_type_ptr<opset1::FakeQuantize>(layer));
std::cout <<
layer->get_type_name() << (NetworkHelper::isConstantPath(layer) ? " on weights " : " on activations ") <<
layer->get_friendly_name() << ":" << std::endl <<
" details : " << quantizationDetails << std::endl;
auto fq = as_type_ptr<opset1::FakeQuantize>(layer);
if (fq) {
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(ov::as_type_ptr<opset1::FakeQuantize>(layer));
std::cout <<
layer->get_type_name() << (NetworkHelper::isConstantPath(layer) ? " on weights " : " on activations ") <<
layer->get_friendly_name() << ":" << std::endl <<
" details : " << quantizationDetails << std::endl;
}
}
void LayerTransformation::printDequantizationInfo(const DataPrecision& dataPrecision) {

View File

@ -11,6 +11,7 @@
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/validation_util.hpp>
#include "itt.hpp"
@ -29,38 +30,85 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() {
ngraph::pattern::any_input()});
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_value_map = m.get_pattern_value_map();
const auto& input = pattern_value_map.at(input_pattern);
const auto& type = input.get_element_type();
if (type.bitwidth() < element::f32.bitwidth())
return false;
auto fq = std::dynamic_pointer_cast<opset5::FakeQuantize>(pattern_value_map.at(fq_pattern).get_node_shared_ptr());
if (!fq)
return false;
std::shared_ptr<Node> add_const = std::dynamic_pointer_cast<opset5::Constant>(pattern_value_map.at(const_pattern).get_node_shared_ptr());
const auto& add_node = pattern_value_map.at(add_pattern).get_node_shared_ptr();
auto add_const = std::dynamic_pointer_cast<opset5::Constant>(pattern_value_map.at(const_pattern).get_node_shared_ptr());
if (!add_const)
return false;
std::shared_ptr<Node> new_const = add_const;
auto const_shape = add_const->get_shape();
size_t const_shape_size = shape_size(const_shape);
if (const_shape_size > 1) {
bool is_single_value = const_shape_size == 1;
if (!is_single_value) {
float v;
is_single_value = op::util::get_single_value(add_const, v);
if (is_single_value) {
new_const = std::make_shared<opset5::Constant>(add_const->get_element_type(), Shape{1}, v);
}
}
if (!is_single_value) {
// disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1)
if (!(const_shape[0] > 1 && const_shape[0] == const_shape_size) &&
!(const_shape.size() > 1 && const_shape[1] == const_shape_size)) {
return false;
}
// Convolution+Add or MatMul+Add can be fused later
// so don't fuse Add+FQ in that situation
const auto& add_inputs = add_node->input_values();
bool add_parent_is_conv_or_mm = std::any_of(add_inputs.begin(), add_inputs.end(),
[] (const Output<Node>& node) -> bool {
auto node_ptr = node.get_node();
return is_type<opset5::Convolution>(node_ptr) ||
is_type<opset5::GroupConvolution>(node_ptr) ||
is_type<opset5::ConvolutionBackpropData>(node_ptr) ||
is_type<opset5::GroupConvolutionBackpropData>(node_ptr) ||
is_type<opset5::MatMul>(node_ptr);
});
if (add_parent_is_conv_or_mm)
return false;
auto fq_users = fq->get_users();
// Concat LPT transformation supports per tensor quantization only
bool fq_user_is_concat = std::any_of(fq_users.begin(), fq_users.end(),
[] (const Output<Node>& node) -> bool {
auto node_ptr = node.get_node();
return is_type<opset5::Concat>(node_ptr);
});
if (fq_user_is_concat)
return false;
auto diff = fq->get_input_partial_shape(0).rank().get_length() - static_cast<Dimension::value_type>(const_shape.size());
if (diff > 0) {
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
const_shape.insert(const_shape.begin(), diff, 1);
new_const = std::make_shared<opset5::Reshape>(new_const,
op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
}
}
if (const_shape_size > 1 &&
static_cast<Dimension::value_type>(const_shape.size()) < fq->get_input_partial_shape(0).rank().get_length()) {
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
const_shape.insert(const_shape.begin(), fq->get_input_partial_shape(0).rank().get_length() - const_shape.size(), 1);
add_const = std::make_shared<opset5::Reshape>(add_const, op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
}
auto new_input_low = std::make_shared<opset5::Subtract>(fq->input_value(1), add_const);
auto new_input_high = std::make_shared<opset5::Subtract>(fq->input_value(2), add_const);
auto new_fq = register_new_node<opset5::FakeQuantize>(pattern_value_map.at(input_pattern),
auto input_low_sub = std::make_shared<opset5::Subtract>(fq->input_value(1), new_const);
std::shared_ptr<Node> new_input_low = get_constant_from_source(input_low_sub);
if (!new_input_low)
new_input_low = input_low_sub;
auto input_high_sub = std::make_shared<opset5::Subtract>(fq->input_value(2), new_const);
std::shared_ptr<Node> new_input_high = get_constant_from_source(input_high_sub);
if (!new_input_high)
new_input_high = input_high_sub;
auto new_fq = register_new_node<opset5::FakeQuantize>(input,
new_input_low,
new_input_high,
fq->input_value(3),
fq->input_value(4),
fq->get_levels());
new_fq->set_friendly_name(fq->get_friendly_name());
copy_runtime_info({pattern_value_map.at(add_pattern).get_node_shared_ptr(), fq}, {new_input_low, new_input_high, new_fq});
copy_runtime_info({add_node, fq}, {new_input_low, new_input_high, new_fq});
replace_node(fq, new_fq);
return true;
};

View File

@ -195,6 +195,8 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
fq_fusions->add_matcher<ngraph::pass::FakeQuantizeReshapeFusion>();
fq_fusions->add_matcher<ngraph::pass::PullTransposeThroughFQUp>();
fq_fusions->add_matcher<ngraph::pass::ReluFakeQuantizeFusion>();
fq_fusions->add_matcher<ngraph::pass::AddFakeQuantizeFusion>();
fq_fusions->add_matcher<ngraph::pass::MulFakeQuantizeFusion>();
fq_fusions->set_name("ngraph::pass::FakeQuantizeFusions");
// StridesOptimization should be at the very end

View File

@ -12,35 +12,10 @@
#include <ngraph/opsets/opset4.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/validation_util.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeMulFusion, "FakeQuantizeMulFusion", 0);
namespace {
std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
get_adjusted_output_range(ngraph::Output<ngraph::Node> out_low,
ngraph::Output<ngraph::Node> out_high,
ngraph::Output<ngraph::Node> multiplier) {
const auto mul_out_low = std::make_shared<ngraph::opset4::Multiply>(out_low, multiplier);
const auto mul_out_high = std::make_shared<ngraph::opset4::Multiply>(out_high, multiplier);
copy_runtime_info({out_low.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
mul_out_low);
copy_runtime_info({out_high.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
mul_out_high);
ngraph::OutputVector new_out_low(1), new_out_high(1);
if (!mul_out_low->constant_fold(new_out_low, {out_low, multiplier})) {
new_out_low[0] = mul_out_low;
}
if (!mul_out_high->constant_fold(new_out_high, {out_high, multiplier})) {
new_out_high[0] = mul_out_high;
}
return {new_out_low[0], new_out_high[0]};
}
} // namespace
// This transformation multiplies the "output_low" and "output_high" inputs of the FQ operation
// by the constant value that before transormation is used to multiply the output of FQ.
// Both output_low and output_high are multiplied by the value represented as C (a constant) below.
@ -64,10 +39,11 @@ std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
MATCHER_SCOPE(FakeQuantizeMulFusion);
const auto data_p = ngraph::pattern::any_input();
const auto fq_output_low_p = ngraph::pattern::any_input();
const auto fq_output_high_p = ngraph::pattern::any_input();
const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>({ngraph::pattern::any_input(),
const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>({data_p,
ngraph::pattern::any_input(),
ngraph::pattern::any_input(),
fq_output_low_p,
@ -81,20 +57,65 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
const auto& pattern_map = m.get_pattern_value_map();
const auto& data = pattern_map.at(data_p);
const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr();
const auto & original_output_low = pattern_map.at(fq_output_low_p);
const auto & original_output_high = pattern_map.at(fq_output_high_p);
const auto & mul_constant = pattern_map.at(mul_constant_p);
auto mul_constant = pattern_map.at(mul_constant_p).get_node_shared_ptr();
auto mul_constant_shape = mul_constant->get_shape();
bool is_single_value = shape_size(mul_constant_shape) == 1;
const auto new_output_limits = get_adjusted_output_range(
original_output_low, original_output_high, mul_constant);
if (!is_single_value) {
float v;
auto constant = std::dynamic_pointer_cast<opset4::Constant>(mul_constant);
if (constant) {
is_single_value = op::util::get_single_value(constant, v);
if (is_single_value) {
mul_constant_shape = Shape{1};
mul_constant = std::make_shared<opset4::Constant>(mul_constant->get_element_type(), mul_constant_shape, v);
}
}
}
if (!is_single_value) {
auto fq_outputs = fq_node->get_users();
// Convolution and GroupConvolution LP transformations require output low/high to have the same values
bool fq_output_is_conv = std::any_of(fq_outputs.begin(), fq_outputs.end(),
[] (const std::shared_ptr<Node>& node) -> bool {
return is_type<opset4::Convolution>(node) ||
is_type<opset4::GroupConvolution>(node);
});
if (fq_output_is_conv) {
return false;
}
const auto & data_rank = data.get_partial_shape().rank();
if (data_rank.is_dynamic()) {
return false;
}
auto rank = data_rank.get_length();
auto diff = rank - mul_constant_shape.size();
if (diff > 0) {
mul_constant_shape.insert(mul_constant_shape.begin(), diff, 1);
mul_constant = std::make_shared<ngraph::opset4::Reshape>(mul_constant,
op::Constant::create(element::i64, Shape{mul_constant_shape.size()}, mul_constant_shape), false);
}
}
auto get_adjusted_output_range = [&] (const Output<Node>& node) -> std::shared_ptr<Node> {
auto ret = std::make_shared<ngraph::opset4::Multiply>(node, mul_constant);
copy_runtime_info(node.get_node_shared_ptr(), ret);
auto constant = get_constant_from_source(ret);
if (constant)
return constant;
return ret;
};
const auto new_fq_node = fq_node->clone_with_new_inputs({fq_node->input_value(0),
fq_node->input_value(1),
fq_node->input_value(2),
new_output_limits.first,
new_output_limits.second});
get_adjusted_output_range(original_output_low),
get_adjusted_output_range(original_output_high)});
const auto mul_node = pattern_map.at(mul_node_p).get_node_shared_ptr();

View File

@ -11,6 +11,7 @@
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/validation_util.hpp>
#include "itt.hpp"
@ -29,6 +30,10 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() {
ngraph::pattern::any_input()});
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_value_map = m.get_pattern_value_map();
const auto& input = pattern_value_map.at(input_pattern);
const auto& type = input.get_element_type();
if (type.bitwidth() < element::f32.bitwidth())
return false;
auto fq = std::dynamic_pointer_cast<opset5::FakeQuantize>(pattern_value_map.at(fq_pattern).get_node_shared_ptr());
if (!fq)
return false;
@ -37,74 +42,61 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() {
return false;
auto mul_const_value = mul_const->cast_vector<float>();
if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f == 0.0f; }))
if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f <= 0.0f; }))
return false;
std::shared_ptr<Node> new_const = mul_const;
auto const_shape = mul_const->get_shape();
size_t const_shape_size = shape_size(const_shape);
if (const_shape_size > 1) {
bool is_single_value = const_shape_size == 1;
if (!is_single_value) {
float v;
is_single_value = op::util::get_single_value(mul_const, v);
if (is_single_value) {
new_const = std::make_shared<opset5::Constant>(mul_const->get_element_type(), Shape{1}, v);
const_shape = Shape{1};
}
}
if (!is_single_value) {
// disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1)
if (!(const_shape[0] > 1 && const_shape[0] == const_shape_size) &&
!(const_shape.size() > 1 && const_shape[1] == const_shape_size)) {
return false;
}
}
std::shared_ptr<Node> mul_const_node = mul_const;
if (const_shape_size > 1 &&
static_cast<Dimension::value_type>(const_shape.size()) < fq->get_input_partial_shape(0).rank().get_length()) {
const auto& rank = fq->get_input_partial_shape(0).rank();
if (rank.is_dynamic())
return false;
auto fq_users = fq->get_users();
// Concat LPT transformation supports per tensor quantization only
bool fq_user_is_concat = std::any_of(fq_users.begin(), fq_users.end(),
[] (const Output<Node>& node) -> bool {
auto node_ptr = node.get_node();
return is_type<opset5::Concat>(node_ptr);
});
if (fq_user_is_concat)
return false;
auto diff = rank.get_length() - static_cast<Dimension::value_type>(const_shape.size());
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
const_shape.insert(const_shape.begin(), fq->get_input_partial_shape(0).rank().get_length() - const_shape.size(), 1);
mul_const_node = std::make_shared<opset5::Reshape>(mul_const_node,
const_shape.insert(const_shape.begin(), diff, 1);
new_const = std::make_shared<opset5::Reshape>(new_const,
op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
}
auto new_input_low = std::make_shared<opset5::Divide>(fq->input_value(1), mul_const_node);
auto new_input_high = std::make_shared<opset5::Divide>(fq->input_value(2), mul_const_node);
auto input_low_div = std::make_shared<opset5::Divide>(fq->input_value(1), new_const);
std::shared_ptr<Node> new_input_low = get_constant_from_source(input_low_div);
if (!new_input_low)
new_input_low = input_low_div;
auto input_high_div = std::make_shared<opset5::Divide>(fq->input_value(2), new_const);
std::shared_ptr<Node> new_input_high = get_constant_from_source(input_high_div);
if (!new_input_high)
new_input_high = input_high_div;
auto mul = pattern_value_map.at(mul_pattern).get_node_shared_ptr();
const auto& mul_data = pattern_value_map.at(input_pattern);
std::shared_ptr<Node> new_fq;
if (std::all_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f < 0.0f; })) {
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low, new_input_high,
fq->input_value(4), fq->input_value(3), fq->get_levels());
copy_runtime_info({mul, fq}, {mul_const_node, new_input_low, new_input_high, new_fq});
} else if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f < 0.0f; })) {
const auto& output_low = fq->input_value(3);
const auto& output_high = fq->input_value(4);
// get the mask of the values from mul_const that are less than zero
std::vector<float> less_than_zero;
less_than_zero.reserve(mul_const_value.size());
// and greater or equal to zero
std::vector<float> greater_eq_zero;
greater_eq_zero.reserve(mul_const_value.size());
for (size_t i = 0; i < mul_const_value.size(); i++) {
less_than_zero.push_back(mul_const_value[i] < 0);
greater_eq_zero.push_back(mul_const_value[i] >= 0);
}
auto less_const = op::Constant::create(output_low.get_element_type(), const_shape, less_than_zero);
auto greater_eq_const = op::Constant::create(output_low.get_element_type(), const_shape, greater_eq_zero);
// new_output_low is defined as follows:
// output_low[i], when mul_const[i] >= 0
// output_high[i], when mul_const[i] < 0
auto new_output_low = std::make_shared<opset5::Add>(
std::make_shared<opset5::Multiply>(greater_eq_const, output_low),
std::make_shared<opset5::Multiply>(less_const, output_high));
// new_output_high is defined as follows:
// output_high[i], when mul_const[i] >= 0
// output_low[i], when mul_const[i] < 0
auto new_output_high = std::make_shared<opset5::Add>(
std::make_shared<opset5::Multiply>(greater_eq_const, output_high),
std::make_shared<opset5::Multiply>(less_const, output_low));
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low,
new_input_high, new_output_low, new_output_high, fq->get_levels());
} else {
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low, new_input_high,
fq->input_value(3), fq->input_value(4), fq->get_levels());
}
copy_runtime_info({mul, fq}, {mul_const_node, new_input_low, new_input_high, new_fq});
auto new_fq = register_new_node<opset5::FakeQuantize>(input, new_input_low, new_input_high,
fq->input_value(3), fq->input_value(4), fq->get_levels());
copy_runtime_info({pattern_value_map.at(mul_pattern).get_node_shared_ptr(), fq},
{new_const, new_input_low, new_input_high, new_fq});
new_fq->set_friendly_name(fq->get_friendly_name());
replace_node(fq, new_fq);
return true;

View File

@ -17,18 +17,32 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::PullTransposeThroughFQUp, "PullTransposeThr
ngraph::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() {
MATCHER_SCOPE(PullTransposeThroughFQUp);
auto m_fq = pattern::wrap_type<opset1::FakeQuantize>({pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank())},
pattern::any_input(pattern::has_static_shape()),
pattern::any_input(pattern::has_static_shape()),
pattern::any_input(pattern::has_static_shape()),
pattern::any_input(pattern::has_static_shape())},
pattern::consumers_count(1));
auto m_transpose = pattern::wrap_type<opset1::Transpose>({m_fq, pattern::wrap_type<opset1::Constant>()});
auto m_transpose_perm = pattern::wrap_type<opset1::Constant>();
auto m_transpose = pattern::wrap_type<opset1::Transpose>({m_fq, m_transpose_perm});
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
auto & pattern_map = m.get_pattern_value_map();
auto transpose = pattern_map[m_transpose].get_node_shared_ptr();
auto fq = pattern_map[m_fq].get_node_shared_ptr();
auto are_inputs_scalars = shape_size(fq->input_value(1).get_shape()) == 1 &&
shape_size(fq->input_value(2).get_shape()) == 1 &&
shape_size(fq->input_value(3).get_shape()) == 1 &&
shape_size(fq->input_value(4).get_shape()) == 1;
if (!are_inputs_scalars) {
auto perm = std::dynamic_pointer_cast<opset1::Constant>(pattern_map[m_transpose_perm].get_node_shared_ptr());
if (!perm)
return false;
auto perm_val = perm->cast_vector<int64_t>();
if (!(perm_val[0] == 0 && perm_val[1] == 1))
return false;
}
auto input_rank = fq->input(0).get_partial_shape().rank().get_length();
ngraph::NodeVector new_ops;

View File

@ -42,7 +42,6 @@ TEST(TransformationTests, AddFakeQuantizeFusion) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -62,6 +61,50 @@ TEST(TransformationTests, AddFakeQuantizeFusion) {
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, AddFakeQuantizeFusionWithConvolutionAndScalarConstant) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{1, 3, 2, 2});
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
auto add_const = opset5::Constant::create(element::f32, Shape{1}, {2});
auto add = std::make_shared<opset5::Add>(conv, add_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
input_high, output_low,
output_high, 11);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{1, 3, 2, 2});
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-2});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {18});
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(conv, input_low,
input_high, output_low,
output_high, 11);
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, AddFakeQuantizeFusionConstantOnFirstInput) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
@ -81,7 +124,44 @@ TEST(TransformationTests, AddFakeQuantizeFusionConstantOnFirstInput) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-2});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {18});
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
input_high, output_low,
output_high, 11);
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, AddFakeQuantizeFusionConstantWithEqualValues) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto add_const = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {2, 2, 2});
auto add = std::make_shared<opset5::Add>(add_const, data);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
input_high, output_low,
output_high, 11);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -120,7 +200,6 @@ TEST(TransformationTests, AddFakeQuantizeFusionReshape) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -159,7 +238,6 @@ TEST(TransformationTests, NegativeAddFakeQuantizeFusionNotAConstant) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -180,3 +258,75 @@ TEST(TransformationTests, NegativeAddFakeQuantizeFusionNotAConstant) {
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, NegativeAddFakeQuantizeFusionWithConvolutionAndNonScalarConstant) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{4, 3, 2, 2});
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
auto add_const = opset5::Constant::create(element::f32, Shape{1, 4, 1, 1}, {1, 2, 3, 4});
auto add = std::make_shared<opset5::Add>(conv, add_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
input_high, output_low,
output_high, 11);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{4, 3, 2, 2});
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
auto add_const = opset5::Constant::create(element::f32, Shape{1, 4, 1, 1}, {1, 2, 3, 4});
auto add = std::make_shared<opset5::Add>(conv, add_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
input_high, output_low,
output_high, 11);
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, NegativeAddFakeQuantizeFusionLowPrecision) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
auto add_const = opset5::Constant::create(element::f16, Shape{1}, {2});
auto add = std::make_shared<opset5::Add>(data, add_const);
auto input_low = opset5::Constant::create(element::f16, Shape{1}, {0});
auto input_high = opset5::Constant::create(element::f16, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f16, Shape{}, {0});
auto output_high = opset5::Constant::create(element::f16, Shape{}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
input_high, output_low,
output_high, 11);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
f_ref = clone_function(*f);
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::AddFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}

View File

@ -49,8 +49,10 @@ public:
const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
data, in_low, in_high, out_low, out_high, 255);
std::vector<float> mul_const(shape_size(mul_const_shape));
std::iota(mul_const.begin(), mul_const.end(), 0);
const auto mul_value = ngraph::opset4::Constant::create(
ngraph::element::Type_t::f32, mul_const_shape, {3.14f});
ngraph::element::Type_t::f32, mul_const_shape, mul_const);
const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
m_function = std::make_shared<ngraph::Function>(
@ -167,7 +169,7 @@ INSTANTIATE_TEST_SUITE_P(FQOutputs_1D__multiplier_3D, FQMulFusion,
::testing::Values(ngraph::Shape{1, 64, 1, 1}),
::testing::Values(ngraph::Shape{1}),
::testing::Values(ngraph::Shape{1, 3, 1}),
::testing::Values(ngraph::Shape{1, 3, 1})));
::testing::Values(ngraph::Shape{1, 1, 3, 1})));
INSTANTIATE_TEST_SUITE_P(FQInOUt_ones__multiplier_4D_with_channel, FQMulFusion,
::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
@ -176,6 +178,14 @@ INSTANTIATE_TEST_SUITE_P(FQInOUt_ones__multiplier_4D_with_channel, FQMulFusion,
::testing::Values(ngraph::Shape{1, 64, 3, 3}),
::testing::Values(ngraph::Shape{1, 64, 3, 3})));
INSTANTIATE_TEST_CASE_P(FQInOUt_ones__multiplier_3D, FQMulFusion,
::testing::Combine(::testing::Values(ngraph::Shape{1, 128, 512}),
::testing::Values(ngraph::Shape{1}),
::testing::Values(ngraph::Shape{1}),
::testing::Values(ngraph::Shape{512}),
::testing::Values(ngraph::Shape{1, 1, 512})));
TEST(FQMulFusion_NonConstInputs, AllInputsNonConst) {
const auto data = std::make_shared<ngraph::opset4::Parameter>(
ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224});
@ -383,6 +393,53 @@ TEST(TransformationTests, FakeQuantizeMultiplyFusionNegative) {
ASSERT_EQ(function->get_output_shape(0), ngraph::Shape({1, 300, 16}));
}
TEST(TransformationTests, FakeQuantizeMultiplyFusionMulConstWithEqualValues) {
const auto data = std::make_shared<ngraph::opset4::Parameter>(
ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224});
const auto in_low =
std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
const auto in_high =
std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
const auto out_low = ngraph::opset4::Constant::create(
ngraph::element::Type_t::f32, ngraph::Shape{}, {1.0f});
const auto out_high = ngraph::opset4::Constant::create(
ngraph::element::Type_t::f32, ngraph::Shape{}, {100.0f});
const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
data, in_low, in_high, out_low, out_high, 42);
const auto mul_value = ngraph::opset4::Constant::create(
ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 1, 1}, {3, 3, 3});
const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
auto function = std::make_shared<ngraph::Function>(ngraph::OutputVector{mul},
ngraph::ParameterVector{data, in_low, in_high});
const auto expected_out_low = ngraph::opset4::Constant::create(
ngraph::element::Type_t::f32, ngraph::Shape{1}, {3.0f});
// this constant should be created by constant folding of the last FQ input
const auto expected_out_high = ngraph::opset4::Constant::create(
ngraph::element::Type_t::f32, ngraph::Shape{1}, {300.0f});
const auto expected_fq = std::make_shared<ngraph::opset4::FakeQuantize>(
data, in_low, in_high, expected_out_low, expected_out_high, 42);
const auto expected_function =
std::make_shared<ngraph::Function>(ngraph::OutputVector{expected_fq},
ngraph::ParameterVector{data, in_low, in_high});
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
manager.run_passes(function);
ASSERT_NO_THROW(check_rt_info(function));
const auto res = compare_functions(function, expected_function, true);
ASSERT_TRUE(res.first) << res.second;
}
} // namespace

View File

@ -42,7 +42,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionPositiveConstant) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -81,7 +80,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionConstantOnFirstInput) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -120,7 +118,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionReshape) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -140,13 +137,13 @@ TEST(TransformationTests, MulFakeQuantizeFusionReshape) {
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
TEST(TransformationTests, MulFakeQuantizeFusionConstantNonScalarWithEqualValues) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {-2});
auto mul_const = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {2, 2, 2});
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
@ -159,16 +156,15 @@ TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-0.5});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {-10});
auto output_low = opset5::Constant::create(element::f32, Shape{1}, {10});
auto output_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0.5});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {10});
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
input_high, output_low,
output_high, 11);
@ -179,84 +175,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, MulFakeQuantizeFusionConstantSomeNegative) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto mul_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 1, -2});
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
input_high, output_low,
output_high, 20);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto input_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0.5f, 1.0f, -0.5f});
auto input_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10.0f, 20.0f, -10.0f});
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10.0f, -10.0f, 10.0f});
auto output_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10.0f, 10.0f, -10.0f});
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
input_high, output_low,
output_high, 20);
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, MulFakeQuantizeFusionConstantSomeNegativeF16) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
auto mul_const = opset5::Constant::create(element::f16, Shape{3, 1, 1}, {2, 1, -2});
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
auto input_low = opset5::Constant::create(element::f16, Shape{1}, {1});
auto input_high = opset5::Constant::create(element::f16, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {-10, -10, -10});
auto output_high = opset5::Constant::create(element::f16, Shape{1}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
input_high, output_low,
output_high, 20);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
auto input_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {0.5f, 1.0f, -0.5f});
auto input_high = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {10.0f, 20.0f, -10.0f});
auto output_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {-10.0f, -10.0f, 10.0f});
auto output_high = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {10.0f, 10.0f, -10.0f});
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
input_high, output_low,
output_high, 20);
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
@ -276,7 +194,6 @@ TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.register_pass<pass::ConstantFolding>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
@ -297,3 +214,79 @@ TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, NegativeMulFakeQuantizeFusionLowPrecision) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
auto mul_const = opset5::Constant::create(element::f16, Shape{1}, {2});
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
auto input_low = opset5::Constant::create(element::f16, Shape{1}, {1});
auto input_high = opset5::Constant::create(element::f16, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {0, 0, 0});
auto output_high = opset5::Constant::create(element::f16, Shape{1}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
input_high, output_low,
output_high, 11);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
f_ref = clone_function(*f);
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, NegativeMulFakeQuantizeFusionConstantAllNegative) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {-2});
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
input_high, output_low,
output_high, 11);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
f_ref = clone_function(*f);
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, NegativeMulFakeQuantizeFusionConstantSomeNegative) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 3, 14, 14};
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
auto mul_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 1, -2});
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
input_high, output_low,
output_high, 20);
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
f_ref = clone_function(*f);
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::MulFakeQuantizeFusion>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}

View File

@ -24,16 +24,16 @@
using namespace testing;
TEST(TransformationTests, FQTransposeTest1) {
auto data1 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 3}, {1, 2, 3});
auto data2 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{3}, {1, 2, 3});
auto data3 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
auto data4 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
auto data5 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
auto transpose_order = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {0, 2, 1});
std::shared_ptr<ngraph::Function> f(nullptr);
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data1, data2, data3, data4, data5, 1);
auto data = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 3}, {1, 2, 3});
auto input_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
auto input_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
auto output_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
auto output_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
auto transpose_order = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {0, 2, 1});
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data, input_low, input_high, output_low, output_high, 1);
auto transpose = std::make_shared<ngraph::op::Transpose>(fq, transpose_order);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{transpose}, ngraph::ParameterVector{});
@ -47,29 +47,35 @@ TEST(TransformationTests, FQTransposeTest1) {
manager.register_pass<ngraph::pass::ConstantFolding>();
ASSERT_NO_THROW(manager.run_passes(f));
}
std::vector<size_t> ref_shape{1, 3, 1};
for (auto op : f->get_ops()) {
if (auto constant = ngraph::as_type_ptr<ngraph::op::Constant>(op)) {
auto shape = constant->get_shape();
ASSERT_EQ(shape, ref_shape);
}
{
auto data = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 1}, {1, 2, 3});
auto input_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {2});
auto input_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {3});
auto output_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {2});
auto output_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {3});
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data, input_low, input_high, output_low, output_high, 1);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fq}, ngraph::ParameterVector{});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, FQTransposeDynamic) {
auto data1 = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
auto data2 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{3}, {1, 2, 3});
auto data3 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
auto data4 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
auto data5 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
auto data = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
auto input_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
auto input_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
auto output_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
auto output_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
auto transpose_order = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {0, 2, 1});
std::shared_ptr<ngraph::Function> f(nullptr);
{
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data1, data2, data3, data4, data5, 1);
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data, input_low, input_high, output_low, output_high, 1);
auto transpose = std::make_shared<ngraph::op::Transpose>(fq, transpose_order);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{transpose}, ngraph::ParameterVector{data1});
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{transpose}, ngraph::ParameterVector{data});
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();