Reenable AddFakeQuantizeFusion and MulFakeQuantizeFusion (#5574)
* Reenable AddFakeQuantizeFusion and MulFakeQuantizeFusion * remove unused variable * is_single_value simplify * skip transformations for low precision types * add comment regarding restriction in AddFakeQuantizeFusion * remove fp16 test * remove negative const handling
This commit is contained in:
parent
8985feff6f
commit
5d6ef444a5
@ -171,11 +171,14 @@ std::stringstream toStream(const std::vector<float>& dequantizationValues) {
|
||||
}
|
||||
|
||||
void LayerTransformation::printDequantizationInfo(const std::shared_ptr<Node>& layer) {
|
||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(ov::as_type_ptr<opset1::FakeQuantize>(layer));
|
||||
std::cout <<
|
||||
layer->get_type_name() << (NetworkHelper::isConstantPath(layer) ? " on weights " : " on activations ") <<
|
||||
layer->get_friendly_name() << ":" << std::endl <<
|
||||
" details : " << quantizationDetails << std::endl;
|
||||
auto fq = as_type_ptr<opset1::FakeQuantize>(layer);
|
||||
if (fq) {
|
||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(ov::as_type_ptr<opset1::FakeQuantize>(layer));
|
||||
std::cout <<
|
||||
layer->get_type_name() << (NetworkHelper::isConstantPath(layer) ? " on weights " : " on activations ") <<
|
||||
layer->get_friendly_name() << ":" << std::endl <<
|
||||
" details : " << quantizationDetails << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void LayerTransformation::printDequantizationInfo(const DataPrecision& dataPrecision) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/validation_util.hpp>
|
||||
#include "itt.hpp"
|
||||
|
||||
|
||||
@ -29,38 +30,85 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() {
|
||||
ngraph::pattern::any_input()});
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_value_map = m.get_pattern_value_map();
|
||||
const auto& input = pattern_value_map.at(input_pattern);
|
||||
const auto& type = input.get_element_type();
|
||||
if (type.bitwidth() < element::f32.bitwidth())
|
||||
return false;
|
||||
auto fq = std::dynamic_pointer_cast<opset5::FakeQuantize>(pattern_value_map.at(fq_pattern).get_node_shared_ptr());
|
||||
if (!fq)
|
||||
return false;
|
||||
std::shared_ptr<Node> add_const = std::dynamic_pointer_cast<opset5::Constant>(pattern_value_map.at(const_pattern).get_node_shared_ptr());
|
||||
const auto& add_node = pattern_value_map.at(add_pattern).get_node_shared_ptr();
|
||||
auto add_const = std::dynamic_pointer_cast<opset5::Constant>(pattern_value_map.at(const_pattern).get_node_shared_ptr());
|
||||
if (!add_const)
|
||||
return false;
|
||||
std::shared_ptr<Node> new_const = add_const;
|
||||
auto const_shape = add_const->get_shape();
|
||||
size_t const_shape_size = shape_size(const_shape);
|
||||
if (const_shape_size > 1) {
|
||||
bool is_single_value = const_shape_size == 1;
|
||||
|
||||
if (!is_single_value) {
|
||||
float v;
|
||||
is_single_value = op::util::get_single_value(add_const, v);
|
||||
if (is_single_value) {
|
||||
new_const = std::make_shared<opset5::Constant>(add_const->get_element_type(), Shape{1}, v);
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_single_value) {
|
||||
// disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1)
|
||||
if (!(const_shape[0] > 1 && const_shape[0] == const_shape_size) &&
|
||||
!(const_shape.size() > 1 && const_shape[1] == const_shape_size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convolution+Add or MatMul+Add can be fused later
|
||||
// so don't fuse Add+FQ in that situation
|
||||
const auto& add_inputs = add_node->input_values();
|
||||
bool add_parent_is_conv_or_mm = std::any_of(add_inputs.begin(), add_inputs.end(),
|
||||
[] (const Output<Node>& node) -> bool {
|
||||
auto node_ptr = node.get_node();
|
||||
return is_type<opset5::Convolution>(node_ptr) ||
|
||||
is_type<opset5::GroupConvolution>(node_ptr) ||
|
||||
is_type<opset5::ConvolutionBackpropData>(node_ptr) ||
|
||||
is_type<opset5::GroupConvolutionBackpropData>(node_ptr) ||
|
||||
is_type<opset5::MatMul>(node_ptr);
|
||||
});
|
||||
if (add_parent_is_conv_or_mm)
|
||||
return false;
|
||||
auto fq_users = fq->get_users();
|
||||
// Concat LPT transformation supports per tensor quantization only
|
||||
bool fq_user_is_concat = std::any_of(fq_users.begin(), fq_users.end(),
|
||||
[] (const Output<Node>& node) -> bool {
|
||||
auto node_ptr = node.get_node();
|
||||
return is_type<opset5::Concat>(node_ptr);
|
||||
});
|
||||
if (fq_user_is_concat)
|
||||
return false;
|
||||
auto diff = fq->get_input_partial_shape(0).rank().get_length() - static_cast<Dimension::value_type>(const_shape.size());
|
||||
if (diff > 0) {
|
||||
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
|
||||
const_shape.insert(const_shape.begin(), diff, 1);
|
||||
new_const = std::make_shared<opset5::Reshape>(new_const,
|
||||
op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
|
||||
}
|
||||
}
|
||||
|
||||
if (const_shape_size > 1 &&
|
||||
static_cast<Dimension::value_type>(const_shape.size()) < fq->get_input_partial_shape(0).rank().get_length()) {
|
||||
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
|
||||
const_shape.insert(const_shape.begin(), fq->get_input_partial_shape(0).rank().get_length() - const_shape.size(), 1);
|
||||
add_const = std::make_shared<opset5::Reshape>(add_const, op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
|
||||
}
|
||||
auto new_input_low = std::make_shared<opset5::Subtract>(fq->input_value(1), add_const);
|
||||
auto new_input_high = std::make_shared<opset5::Subtract>(fq->input_value(2), add_const);
|
||||
auto new_fq = register_new_node<opset5::FakeQuantize>(pattern_value_map.at(input_pattern),
|
||||
auto input_low_sub = std::make_shared<opset5::Subtract>(fq->input_value(1), new_const);
|
||||
std::shared_ptr<Node> new_input_low = get_constant_from_source(input_low_sub);
|
||||
if (!new_input_low)
|
||||
new_input_low = input_low_sub;
|
||||
auto input_high_sub = std::make_shared<opset5::Subtract>(fq->input_value(2), new_const);
|
||||
std::shared_ptr<Node> new_input_high = get_constant_from_source(input_high_sub);
|
||||
if (!new_input_high)
|
||||
new_input_high = input_high_sub;
|
||||
auto new_fq = register_new_node<opset5::FakeQuantize>(input,
|
||||
new_input_low,
|
||||
new_input_high,
|
||||
fq->input_value(3),
|
||||
fq->input_value(4),
|
||||
fq->get_levels());
|
||||
new_fq->set_friendly_name(fq->get_friendly_name());
|
||||
copy_runtime_info({pattern_value_map.at(add_pattern).get_node_shared_ptr(), fq}, {new_input_low, new_input_high, new_fq});
|
||||
copy_runtime_info({add_node, fq}, {new_input_low, new_input_high, new_fq});
|
||||
replace_node(fq, new_fq);
|
||||
return true;
|
||||
};
|
||||
|
@ -195,6 +195,8 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
|
||||
fq_fusions->add_matcher<ngraph::pass::FakeQuantizeReshapeFusion>();
|
||||
fq_fusions->add_matcher<ngraph::pass::PullTransposeThroughFQUp>();
|
||||
fq_fusions->add_matcher<ngraph::pass::ReluFakeQuantizeFusion>();
|
||||
fq_fusions->add_matcher<ngraph::pass::AddFakeQuantizeFusion>();
|
||||
fq_fusions->add_matcher<ngraph::pass::MulFakeQuantizeFusion>();
|
||||
fq_fusions->set_name("ngraph::pass::FakeQuantizeFusions");
|
||||
|
||||
// StridesOptimization should be at the very end
|
||||
|
@ -12,35 +12,10 @@
|
||||
#include <ngraph/opsets/opset4.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/validation_util.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeMulFusion, "FakeQuantizeMulFusion", 0);
|
||||
|
||||
namespace {
|
||||
std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
|
||||
get_adjusted_output_range(ngraph::Output<ngraph::Node> out_low,
|
||||
ngraph::Output<ngraph::Node> out_high,
|
||||
ngraph::Output<ngraph::Node> multiplier) {
|
||||
const auto mul_out_low = std::make_shared<ngraph::opset4::Multiply>(out_low, multiplier);
|
||||
const auto mul_out_high = std::make_shared<ngraph::opset4::Multiply>(out_high, multiplier);
|
||||
copy_runtime_info({out_low.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
|
||||
mul_out_low);
|
||||
copy_runtime_info({out_high.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
|
||||
mul_out_high);
|
||||
|
||||
ngraph::OutputVector new_out_low(1), new_out_high(1);
|
||||
|
||||
if (!mul_out_low->constant_fold(new_out_low, {out_low, multiplier})) {
|
||||
new_out_low[0] = mul_out_low;
|
||||
}
|
||||
|
||||
if (!mul_out_high->constant_fold(new_out_high, {out_high, multiplier})) {
|
||||
new_out_high[0] = mul_out_high;
|
||||
}
|
||||
|
||||
return {new_out_low[0], new_out_high[0]};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// This transformation multiplies the "output_low" and "output_high" inputs of the FQ operation
|
||||
// by the constant value that before transormation is used to multiply the output of FQ.
|
||||
// Both output_low and output_high are multiplied by the value represented as C (a constant) below.
|
||||
@ -64,10 +39,11 @@ std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
|
||||
|
||||
ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
|
||||
MATCHER_SCOPE(FakeQuantizeMulFusion);
|
||||
const auto data_p = ngraph::pattern::any_input();
|
||||
const auto fq_output_low_p = ngraph::pattern::any_input();
|
||||
const auto fq_output_high_p = ngraph::pattern::any_input();
|
||||
|
||||
const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>({ngraph::pattern::any_input(),
|
||||
const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>({data_p,
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
fq_output_low_p,
|
||||
@ -81,20 +57,65 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
|
||||
const auto& data = pattern_map.at(data_p);
|
||||
const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr();
|
||||
|
||||
const auto & original_output_low = pattern_map.at(fq_output_low_p);
|
||||
const auto & original_output_high = pattern_map.at(fq_output_high_p);
|
||||
const auto & mul_constant = pattern_map.at(mul_constant_p);
|
||||
auto mul_constant = pattern_map.at(mul_constant_p).get_node_shared_ptr();
|
||||
auto mul_constant_shape = mul_constant->get_shape();
|
||||
bool is_single_value = shape_size(mul_constant_shape) == 1;
|
||||
|
||||
const auto new_output_limits = get_adjusted_output_range(
|
||||
original_output_low, original_output_high, mul_constant);
|
||||
if (!is_single_value) {
|
||||
float v;
|
||||
auto constant = std::dynamic_pointer_cast<opset4::Constant>(mul_constant);
|
||||
if (constant) {
|
||||
is_single_value = op::util::get_single_value(constant, v);
|
||||
if (is_single_value) {
|
||||
mul_constant_shape = Shape{1};
|
||||
mul_constant = std::make_shared<opset4::Constant>(mul_constant->get_element_type(), mul_constant_shape, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_single_value) {
|
||||
auto fq_outputs = fq_node->get_users();
|
||||
// Convolution and GroupConvolution LP transformations require output low/high to have the same values
|
||||
bool fq_output_is_conv = std::any_of(fq_outputs.begin(), fq_outputs.end(),
|
||||
[] (const std::shared_ptr<Node>& node) -> bool {
|
||||
return is_type<opset4::Convolution>(node) ||
|
||||
is_type<opset4::GroupConvolution>(node);
|
||||
});
|
||||
if (fq_output_is_conv) {
|
||||
return false;
|
||||
}
|
||||
const auto & data_rank = data.get_partial_shape().rank();
|
||||
if (data_rank.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
auto rank = data_rank.get_length();
|
||||
auto diff = rank - mul_constant_shape.size();
|
||||
if (diff > 0) {
|
||||
mul_constant_shape.insert(mul_constant_shape.begin(), diff, 1);
|
||||
mul_constant = std::make_shared<ngraph::opset4::Reshape>(mul_constant,
|
||||
op::Constant::create(element::i64, Shape{mul_constant_shape.size()}, mul_constant_shape), false);
|
||||
}
|
||||
}
|
||||
|
||||
auto get_adjusted_output_range = [&] (const Output<Node>& node) -> std::shared_ptr<Node> {
|
||||
auto ret = std::make_shared<ngraph::opset4::Multiply>(node, mul_constant);
|
||||
copy_runtime_info(node.get_node_shared_ptr(), ret);
|
||||
auto constant = get_constant_from_source(ret);
|
||||
if (constant)
|
||||
return constant;
|
||||
return ret;
|
||||
};
|
||||
|
||||
const auto new_fq_node = fq_node->clone_with_new_inputs({fq_node->input_value(0),
|
||||
fq_node->input_value(1),
|
||||
fq_node->input_value(2),
|
||||
new_output_limits.first,
|
||||
new_output_limits.second});
|
||||
get_adjusted_output_range(original_output_low),
|
||||
get_adjusted_output_range(original_output_high)});
|
||||
|
||||
const auto mul_node = pattern_map.at(mul_node_p).get_node_shared_ptr();
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/validation_util.hpp>
|
||||
#include "itt.hpp"
|
||||
|
||||
|
||||
@ -29,6 +30,10 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() {
|
||||
ngraph::pattern::any_input()});
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_value_map = m.get_pattern_value_map();
|
||||
const auto& input = pattern_value_map.at(input_pattern);
|
||||
const auto& type = input.get_element_type();
|
||||
if (type.bitwidth() < element::f32.bitwidth())
|
||||
return false;
|
||||
auto fq = std::dynamic_pointer_cast<opset5::FakeQuantize>(pattern_value_map.at(fq_pattern).get_node_shared_ptr());
|
||||
if (!fq)
|
||||
return false;
|
||||
@ -37,74 +42,61 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() {
|
||||
return false;
|
||||
|
||||
auto mul_const_value = mul_const->cast_vector<float>();
|
||||
if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f == 0.0f; }))
|
||||
if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f <= 0.0f; }))
|
||||
return false;
|
||||
|
||||
std::shared_ptr<Node> new_const = mul_const;
|
||||
auto const_shape = mul_const->get_shape();
|
||||
size_t const_shape_size = shape_size(const_shape);
|
||||
if (const_shape_size > 1) {
|
||||
bool is_single_value = const_shape_size == 1;
|
||||
|
||||
if (!is_single_value) {
|
||||
float v;
|
||||
is_single_value = op::util::get_single_value(mul_const, v);
|
||||
if (is_single_value) {
|
||||
new_const = std::make_shared<opset5::Constant>(mul_const->get_element_type(), Shape{1}, v);
|
||||
const_shape = Shape{1};
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_single_value) {
|
||||
// disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1)
|
||||
if (!(const_shape[0] > 1 && const_shape[0] == const_shape_size) &&
|
||||
!(const_shape.size() > 1 && const_shape[1] == const_shape_size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> mul_const_node = mul_const;
|
||||
if (const_shape_size > 1 &&
|
||||
static_cast<Dimension::value_type>(const_shape.size()) < fq->get_input_partial_shape(0).rank().get_length()) {
|
||||
const auto& rank = fq->get_input_partial_shape(0).rank();
|
||||
if (rank.is_dynamic())
|
||||
return false;
|
||||
auto fq_users = fq->get_users();
|
||||
// Concat LPT transformation supports per tensor quantization only
|
||||
bool fq_user_is_concat = std::any_of(fq_users.begin(), fq_users.end(),
|
||||
[] (const Output<Node>& node) -> bool {
|
||||
auto node_ptr = node.get_node();
|
||||
return is_type<opset5::Concat>(node_ptr);
|
||||
});
|
||||
if (fq_user_is_concat)
|
||||
return false;
|
||||
auto diff = rank.get_length() - static_cast<Dimension::value_type>(const_shape.size());
|
||||
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
|
||||
const_shape.insert(const_shape.begin(), fq->get_input_partial_shape(0).rank().get_length() - const_shape.size(), 1);
|
||||
mul_const_node = std::make_shared<opset5::Reshape>(mul_const_node,
|
||||
const_shape.insert(const_shape.begin(), diff, 1);
|
||||
new_const = std::make_shared<opset5::Reshape>(new_const,
|
||||
op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
|
||||
}
|
||||
|
||||
auto new_input_low = std::make_shared<opset5::Divide>(fq->input_value(1), mul_const_node);
|
||||
auto new_input_high = std::make_shared<opset5::Divide>(fq->input_value(2), mul_const_node);
|
||||
auto input_low_div = std::make_shared<opset5::Divide>(fq->input_value(1), new_const);
|
||||
std::shared_ptr<Node> new_input_low = get_constant_from_source(input_low_div);
|
||||
if (!new_input_low)
|
||||
new_input_low = input_low_div;
|
||||
auto input_high_div = std::make_shared<opset5::Divide>(fq->input_value(2), new_const);
|
||||
std::shared_ptr<Node> new_input_high = get_constant_from_source(input_high_div);
|
||||
if (!new_input_high)
|
||||
new_input_high = input_high_div;
|
||||
|
||||
auto mul = pattern_value_map.at(mul_pattern).get_node_shared_ptr();
|
||||
const auto& mul_data = pattern_value_map.at(input_pattern);
|
||||
|
||||
std::shared_ptr<Node> new_fq;
|
||||
if (std::all_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f < 0.0f; })) {
|
||||
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low, new_input_high,
|
||||
fq->input_value(4), fq->input_value(3), fq->get_levels());
|
||||
copy_runtime_info({mul, fq}, {mul_const_node, new_input_low, new_input_high, new_fq});
|
||||
} else if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f < 0.0f; })) {
|
||||
const auto& output_low = fq->input_value(3);
|
||||
const auto& output_high = fq->input_value(4);
|
||||
// get the mask of the values from mul_const that are less than zero
|
||||
std::vector<float> less_than_zero;
|
||||
less_than_zero.reserve(mul_const_value.size());
|
||||
// and greater or equal to zero
|
||||
std::vector<float> greater_eq_zero;
|
||||
greater_eq_zero.reserve(mul_const_value.size());
|
||||
for (size_t i = 0; i < mul_const_value.size(); i++) {
|
||||
less_than_zero.push_back(mul_const_value[i] < 0);
|
||||
greater_eq_zero.push_back(mul_const_value[i] >= 0);
|
||||
}
|
||||
auto less_const = op::Constant::create(output_low.get_element_type(), const_shape, less_than_zero);
|
||||
auto greater_eq_const = op::Constant::create(output_low.get_element_type(), const_shape, greater_eq_zero);
|
||||
// new_output_low is defined as follows:
|
||||
// output_low[i], when mul_const[i] >= 0
|
||||
// output_high[i], when mul_const[i] < 0
|
||||
auto new_output_low = std::make_shared<opset5::Add>(
|
||||
std::make_shared<opset5::Multiply>(greater_eq_const, output_low),
|
||||
std::make_shared<opset5::Multiply>(less_const, output_high));
|
||||
// new_output_high is defined as follows:
|
||||
// output_high[i], when mul_const[i] >= 0
|
||||
// output_low[i], when mul_const[i] < 0
|
||||
auto new_output_high = std::make_shared<opset5::Add>(
|
||||
std::make_shared<opset5::Multiply>(greater_eq_const, output_high),
|
||||
std::make_shared<opset5::Multiply>(less_const, output_low));
|
||||
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low,
|
||||
new_input_high, new_output_low, new_output_high, fq->get_levels());
|
||||
} else {
|
||||
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low, new_input_high,
|
||||
fq->input_value(3), fq->input_value(4), fq->get_levels());
|
||||
}
|
||||
|
||||
copy_runtime_info({mul, fq}, {mul_const_node, new_input_low, new_input_high, new_fq});
|
||||
auto new_fq = register_new_node<opset5::FakeQuantize>(input, new_input_low, new_input_high,
|
||||
fq->input_value(3), fq->input_value(4), fq->get_levels());
|
||||
copy_runtime_info({pattern_value_map.at(mul_pattern).get_node_shared_ptr(), fq},
|
||||
{new_const, new_input_low, new_input_high, new_fq});
|
||||
new_fq->set_friendly_name(fq->get_friendly_name());
|
||||
replace_node(fq, new_fq);
|
||||
return true;
|
||||
|
@ -17,18 +17,32 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::PullTransposeThroughFQUp, "PullTransposeThr
|
||||
ngraph::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() {
|
||||
MATCHER_SCOPE(PullTransposeThroughFQUp);
|
||||
auto m_fq = pattern::wrap_type<opset1::FakeQuantize>({pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank())},
|
||||
pattern::any_input(pattern::has_static_shape()),
|
||||
pattern::any_input(pattern::has_static_shape()),
|
||||
pattern::any_input(pattern::has_static_shape()),
|
||||
pattern::any_input(pattern::has_static_shape())},
|
||||
pattern::consumers_count(1));
|
||||
auto m_transpose = pattern::wrap_type<opset1::Transpose>({m_fq, pattern::wrap_type<opset1::Constant>()});
|
||||
auto m_transpose_perm = pattern::wrap_type<opset1::Constant>();
|
||||
auto m_transpose = pattern::wrap_type<opset1::Transpose>({m_fq, m_transpose_perm});
|
||||
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
auto & pattern_map = m.get_pattern_value_map();
|
||||
auto transpose = pattern_map[m_transpose].get_node_shared_ptr();
|
||||
auto fq = pattern_map[m_fq].get_node_shared_ptr();
|
||||
|
||||
auto are_inputs_scalars = shape_size(fq->input_value(1).get_shape()) == 1 &&
|
||||
shape_size(fq->input_value(2).get_shape()) == 1 &&
|
||||
shape_size(fq->input_value(3).get_shape()) == 1 &&
|
||||
shape_size(fq->input_value(4).get_shape()) == 1;
|
||||
if (!are_inputs_scalars) {
|
||||
auto perm = std::dynamic_pointer_cast<opset1::Constant>(pattern_map[m_transpose_perm].get_node_shared_ptr());
|
||||
if (!perm)
|
||||
return false;
|
||||
auto perm_val = perm->cast_vector<int64_t>();
|
||||
if (!(perm_val[0] == 0 && perm_val[1] == 1))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto input_rank = fq->input(0).get_partial_shape().rank().get_length();
|
||||
|
||||
ngraph::NodeVector new_ops;
|
||||
|
@ -42,7 +42,6 @@ TEST(TransformationTests, AddFakeQuantizeFusion) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -62,6 +61,50 @@ TEST(TransformationTests, AddFakeQuantizeFusion) {
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, AddFakeQuantizeFusionWithConvolutionAndScalarConstant) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{1, 3, 2, 2});
|
||||
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
|
||||
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{1}, {2});
|
||||
auto add = std::make_shared<opset5::Add>(conv, add_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{1, 3, 2, 2});
|
||||
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
|
||||
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {18});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(conv, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, AddFakeQuantizeFusionConstantOnFirstInput) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
@ -81,7 +124,44 @@ TEST(TransformationTests, AddFakeQuantizeFusionConstantOnFirstInput) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {18});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, AddFakeQuantizeFusionConstantWithEqualValues) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {2, 2, 2});
|
||||
auto add = std::make_shared<opset5::Add>(add_const, data);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -120,7 +200,6 @@ TEST(TransformationTests, AddFakeQuantizeFusionReshape) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -159,7 +238,6 @@ TEST(TransformationTests, NegativeAddFakeQuantizeFusionNotAConstant) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -180,3 +258,75 @@ TEST(TransformationTests, NegativeAddFakeQuantizeFusionNotAConstant) {
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeAddFakeQuantizeFusionWithConvolutionAndNonScalarConstant) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{4, 3, 2, 2});
|
||||
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
|
||||
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{1, 4, 1, 1}, {1, 2, 3, 4});
|
||||
auto add = std::make_shared<opset5::Add>(conv, add_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto filter = std::make_shared<opset5::Parameter>(element::f32, Shape{4, 3, 2, 2});
|
||||
auto conv = std::make_shared<opset5::Convolution>(data, filter, Strides{1, 1},
|
||||
CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{1, 4, 1, 1}, {1, 2, 3, 4});
|
||||
auto add = std::make_shared<opset5::Add>(conv, add_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, filter});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeAddFakeQuantizeFusionLowPrecision) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
|
||||
auto add_const = opset5::Constant::create(element::f16, Shape{1}, {2});
|
||||
auto add = std::make_shared<opset5::Add>(data, add_const);
|
||||
auto input_low = opset5::Constant::create(element::f16, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f16, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f16, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f16, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
f_ref = clone_function(*f);
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
@ -49,8 +49,10 @@ public:
|
||||
const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
|
||||
data, in_low, in_high, out_low, out_high, 255);
|
||||
|
||||
std::vector<float> mul_const(shape_size(mul_const_shape));
|
||||
std::iota(mul_const.begin(), mul_const.end(), 0);
|
||||
const auto mul_value = ngraph::opset4::Constant::create(
|
||||
ngraph::element::Type_t::f32, mul_const_shape, {3.14f});
|
||||
ngraph::element::Type_t::f32, mul_const_shape, mul_const);
|
||||
const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
|
||||
|
||||
m_function = std::make_shared<ngraph::Function>(
|
||||
@ -167,7 +169,7 @@ INSTANTIATE_TEST_SUITE_P(FQOutputs_1D__multiplier_3D, FQMulFusion,
|
||||
::testing::Values(ngraph::Shape{1, 64, 1, 1}),
|
||||
::testing::Values(ngraph::Shape{1}),
|
||||
::testing::Values(ngraph::Shape{1, 3, 1}),
|
||||
::testing::Values(ngraph::Shape{1, 3, 1})));
|
||||
::testing::Values(ngraph::Shape{1, 1, 3, 1})));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(FQInOUt_ones__multiplier_4D_with_channel, FQMulFusion,
|
||||
::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
|
||||
@ -176,6 +178,14 @@ INSTANTIATE_TEST_SUITE_P(FQInOUt_ones__multiplier_4D_with_channel, FQMulFusion,
|
||||
::testing::Values(ngraph::Shape{1, 64, 3, 3}),
|
||||
::testing::Values(ngraph::Shape{1, 64, 3, 3})));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(FQInOUt_ones__multiplier_3D, FQMulFusion,
|
||||
::testing::Combine(::testing::Values(ngraph::Shape{1, 128, 512}),
|
||||
::testing::Values(ngraph::Shape{1}),
|
||||
::testing::Values(ngraph::Shape{1}),
|
||||
::testing::Values(ngraph::Shape{512}),
|
||||
::testing::Values(ngraph::Shape{1, 1, 512})));
|
||||
|
||||
|
||||
TEST(FQMulFusion_NonConstInputs, AllInputsNonConst) {
|
||||
const auto data = std::make_shared<ngraph::opset4::Parameter>(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224});
|
||||
@ -383,6 +393,53 @@ TEST(TransformationTests, FakeQuantizeMultiplyFusionNegative) {
|
||||
ASSERT_EQ(function->get_output_shape(0), ngraph::Shape({1, 300, 16}));
|
||||
}
|
||||
|
||||
TEST(TransformationTests, FakeQuantizeMultiplyFusionMulConstWithEqualValues) {
|
||||
const auto data = std::make_shared<ngraph::opset4::Parameter>(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224});
|
||||
const auto in_low =
|
||||
std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
|
||||
const auto in_high =
|
||||
std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
|
||||
const auto out_low = ngraph::opset4::Constant::create(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{}, {1.0f});
|
||||
const auto out_high = ngraph::opset4::Constant::create(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{}, {100.0f});
|
||||
const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
|
||||
data, in_low, in_high, out_low, out_high, 42);
|
||||
|
||||
const auto mul_value = ngraph::opset4::Constant::create(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 1, 1}, {3, 3, 3});
|
||||
const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
|
||||
|
||||
auto function = std::make_shared<ngraph::Function>(ngraph::OutputVector{mul},
|
||||
ngraph::ParameterVector{data, in_low, in_high});
|
||||
|
||||
const auto expected_out_low = ngraph::opset4::Constant::create(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{1}, {3.0f});
|
||||
// this constant should be created by constant folding of the last FQ input
|
||||
const auto expected_out_high = ngraph::opset4::Constant::create(
|
||||
ngraph::element::Type_t::f32, ngraph::Shape{1}, {300.0f});
|
||||
|
||||
const auto expected_fq = std::make_shared<ngraph::opset4::FakeQuantize>(
|
||||
data, in_low, in_high, expected_out_low, expected_out_high, 42);
|
||||
|
||||
const auto expected_function =
|
||||
std::make_shared<ngraph::Function>(ngraph::OutputVector{expected_fq},
|
||||
ngraph::ParameterVector{data, in_low, in_high});
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
|
||||
|
||||
manager.run_passes(function);
|
||||
ASSERT_NO_THROW(check_rt_info(function));
|
||||
|
||||
const auto res = compare_functions(function, expected_function, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -42,7 +42,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionPositiveConstant) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -81,7 +80,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionConstantOnFirstInput) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -120,7 +118,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionReshape) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -140,13 +137,13 @@ TEST(TransformationTests, MulFakeQuantizeFusionReshape) {
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantNonScalarWithEqualValues) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {2, 2, 2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
@ -159,16 +156,15 @@ TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-0.5});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {-10});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0.5});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
@ -179,84 +175,6 @@ TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantSomeNegative) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 1, -2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0.5f, 1.0f, -0.5f});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10.0f, 20.0f, -10.0f});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10.0f, -10.0f, 10.0f});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10.0f, 10.0f, -10.0f});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantSomeNegativeF16) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f16, Shape{3, 1, 1}, {2, 1, -2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f16, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f16, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f16, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {0.5f, 1.0f, -0.5f});
|
||||
auto input_high = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {10.0f, 20.0f, -10.0f});
|
||||
auto output_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {-10.0f, -10.0f, 10.0f});
|
||||
auto output_high = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {10.0f, 10.0f, -10.0f});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
@ -276,7 +194,6 @@ TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -297,3 +214,79 @@ TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeMulFakeQuantizeFusionLowPrecision) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f16, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f16, Shape{1}, {2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f16, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f16, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f16, Shape{1, 3, 1, 1}, {0, 0, 0});
|
||||
auto output_high = opset5::Constant::create(element::f16, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
f_ref = clone_function(*f);
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeMulFakeQuantizeFusionConstantAllNegative) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
f_ref = clone_function(*f);
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeMulFakeQuantizeFusionConstantSomeNegative) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 1, -2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
f_ref = clone_function(*f);
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
@ -24,16 +24,16 @@
|
||||
using namespace testing;
|
||||
|
||||
TEST(TransformationTests, FQTransposeTest1) {
|
||||
auto data1 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 3}, {1, 2, 3});
|
||||
auto data2 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{3}, {1, 2, 3});
|
||||
auto data3 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
|
||||
auto data4 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
|
||||
auto data5 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
|
||||
auto transpose_order = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {0, 2, 1});
|
||||
|
||||
std::shared_ptr<ngraph::Function> f(nullptr);
|
||||
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data1, data2, data3, data4, data5, 1);
|
||||
auto data = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 3}, {1, 2, 3});
|
||||
auto input_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
|
||||
auto input_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
|
||||
auto output_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
|
||||
auto output_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
|
||||
auto transpose_order = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {0, 2, 1});
|
||||
|
||||
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data, input_low, input_high, output_low, output_high, 1);
|
||||
auto transpose = std::make_shared<ngraph::op::Transpose>(fq, transpose_order);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{transpose}, ngraph::ParameterVector{});
|
||||
@ -47,29 +47,35 @@ TEST(TransformationTests, FQTransposeTest1) {
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
std::vector<size_t> ref_shape{1, 3, 1};
|
||||
for (auto op : f->get_ops()) {
|
||||
if (auto constant = ngraph::as_type_ptr<ngraph::op::Constant>(op)) {
|
||||
auto shape = constant->get_shape();
|
||||
ASSERT_EQ(shape, ref_shape);
|
||||
}
|
||||
{
|
||||
auto data = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 1}, {1, 2, 3});
|
||||
auto input_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {2});
|
||||
auto input_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {3});
|
||||
auto output_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {2});
|
||||
auto output_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1}, {3});
|
||||
|
||||
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data, input_low, input_high, output_low, output_high, 1);
|
||||
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fq}, ngraph::ParameterVector{});
|
||||
}
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, FQTransposeDynamic) {
|
||||
auto data1 = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
|
||||
auto data2 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{3}, {1, 2, 3});
|
||||
auto data3 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
|
||||
auto data4 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
|
||||
auto data5 = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3}, {1, 2, 3});
|
||||
auto data = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
|
||||
auto input_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
|
||||
auto input_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
|
||||
auto output_low = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {2});
|
||||
auto output_high = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {3});
|
||||
auto transpose_order = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {0, 2, 1});
|
||||
|
||||
std::shared_ptr<ngraph::Function> f(nullptr);
|
||||
{
|
||||
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data1, data2, data3, data4, data5, 1);
|
||||
auto fq = std::make_shared<ngraph::op::FakeQuantize>(data, input_low, input_high, output_low, output_high, 1);
|
||||
auto transpose = std::make_shared<ngraph::op::Transpose>(fq, transpose_order);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{transpose}, ngraph::ParameterVector{data1});
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{transpose}, ngraph::ParameterVector{data});
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
|
Loading…
Reference in New Issue
Block a user