Introduce AddFakeQuantizeFusion and MulFakeQuantizeFusion transformat… (#4102)
This commit is contained in:
parent
83e5bde4ea
commit
9b829d2884
@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API AddFakeQuantizeFusion;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief AddFakeQuantizeFusion transformation replaces following graph:
|
||||
* Add->FakeQuantize to a single FakeQuantize
|
||||
* Restrictions:
|
||||
* - second input to Add is a Constant
|
||||
*/
|
||||
class ngraph::pass::AddFakeQuantizeFusion: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
AddFakeQuantizeFusion();
|
||||
};
|
@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API MulFakeQuantizeFusion;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief MulFakeQuantizeFusion transformation replaces following graph:
|
||||
* Mul->FakeQuantize to a single FakeQuantize
|
||||
* Restrictions:
|
||||
* - second input to Mul is a Constant
|
||||
*/
|
||||
class ngraph::pass::MulFakeQuantizeFusion: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MulFakeQuantizeFusion();
|
||||
};
|
@ -0,0 +1,70 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "itt.hpp"
|
||||
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::AddFakeQuantizeFusion, "AddFakeQuantizeFusion", 0);
|
||||
|
||||
ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() {
|
||||
MATCHER_SCOPE(AddFakeQuantizeFusion);
|
||||
auto input_pattern = ngraph::pattern::any_input();
|
||||
auto const_pattern = ngraph::pattern::wrap_type<opset5::Constant>();
|
||||
auto add_pattern = ngraph::pattern::wrap_type<opset5::Add>({input_pattern, const_pattern},
|
||||
pattern::consumers_count(1));
|
||||
auto fq_pattern = ngraph::pattern::wrap_type<opset5::FakeQuantize>({add_pattern,
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_value_map = m.get_pattern_value_map();
|
||||
auto fq = std::dynamic_pointer_cast<opset5::FakeQuantize>(pattern_value_map.at(fq_pattern).get_node_shared_ptr());
|
||||
if (!fq)
|
||||
return false;
|
||||
std::shared_ptr<Node> add_const = std::dynamic_pointer_cast<opset5::Constant>(pattern_value_map.at(const_pattern).get_node_shared_ptr());
|
||||
if (!add_const)
|
||||
return false;
|
||||
auto const_shape = add_const->get_shape();
|
||||
size_t const_shape_size = shape_size(const_shape);
|
||||
if (const_shape_size > 1) {
|
||||
// disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1)
|
||||
if (!(const_shape[0] > 1 && const_shape[0] == const_shape_size) &&
|
||||
!(const_shape.size() > 1 && const_shape[1] == const_shape_size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (const_shape_size > 1 &&
|
||||
static_cast<Dimension::value_type>(const_shape.size()) < fq->get_input_partial_shape(0).rank().get_length()) {
|
||||
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
|
||||
const_shape.insert(const_shape.begin(), fq->get_input_partial_shape(0).rank().get_length() - const_shape.size(), 1);
|
||||
add_const = std::make_shared<opset5::Reshape>(add_const, op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
|
||||
}
|
||||
auto new_input_low = std::make_shared<opset5::Subtract>(fq->input_value(1), add_const);
|
||||
auto new_input_high = std::make_shared<opset5::Subtract>(fq->input_value(2), add_const);
|
||||
auto new_fq = register_new_node<opset5::FakeQuantize>(pattern_value_map.at(input_pattern),
|
||||
new_input_low,
|
||||
new_input_high,
|
||||
fq->input_value(3),
|
||||
fq->input_value(4),
|
||||
fq->get_levels());
|
||||
new_fq->set_friendly_name(fq->get_friendly_name());
|
||||
copy_runtime_info({pattern_value_map.at(add_pattern).get_node_shared_ptr(), fq}, {new_input_low, new_input_high, new_fq});
|
||||
replace_node(fq, new_fq);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(fq_pattern, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -26,6 +26,8 @@
|
||||
#include "transformations/common_optimizations/hswish_fusion.hpp"
|
||||
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
|
||||
#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
|
||||
#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
|
||||
#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
|
||||
#include "transformations/common_optimizations/clamp_fusion.hpp"
|
||||
#include "transformations/common_optimizations/pad_fusion.hpp"
|
||||
#include "transformations/common_optimizations/eliminate_unsqueeze_gather.hpp"
|
||||
@ -148,6 +150,8 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
|
||||
fq_fusions->add_matcher<ngraph::pass::FakeQuantizeReshapeFusion>();
|
||||
fq_fusions->add_matcher<ngraph::pass::PullTransposeThroughFQUp>();
|
||||
fq_fusions->add_matcher<ngraph::pass::ReluFakeQuantizeFusion>();
|
||||
fq_fusions->add_matcher<ngraph::pass::AddFakeQuantizeFusion>();
|
||||
fq_fusions->add_matcher<ngraph::pass::MulFakeQuantizeFusion>();
|
||||
fq_fusions->set_name("ngraph::pass::FakeQuantizeFusions");
|
||||
|
||||
manager.run_passes(f);
|
||||
|
@ -0,0 +1,116 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "itt.hpp"
|
||||
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::MulFakeQuantizeFusion, "MulFakeQuantizeFusion", 0);
|
||||
|
||||
ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() {
|
||||
MATCHER_SCOPE(MulFakeQuantizeFusion);
|
||||
auto input_pattern = ngraph::pattern::any_input();
|
||||
auto const_pattern = ngraph::pattern::wrap_type<opset5::Constant>();
|
||||
auto mul_pattern = ngraph::pattern::wrap_type<opset5::Multiply>({input_pattern, const_pattern},
|
||||
pattern::consumers_count(1));
|
||||
auto fq_pattern = ngraph::pattern::wrap_type<opset5::FakeQuantize>({mul_pattern,
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_value_map = m.get_pattern_value_map();
|
||||
auto fq = std::dynamic_pointer_cast<opset5::FakeQuantize>(pattern_value_map.at(fq_pattern).get_node_shared_ptr());
|
||||
if (!fq)
|
||||
return false;
|
||||
auto mul_const = std::dynamic_pointer_cast<opset5::Constant>(pattern_value_map.at(const_pattern).get_node_shared_ptr());
|
||||
if (!mul_const)
|
||||
return false;
|
||||
|
||||
auto mul_const_value = mul_const->cast_vector<float>();
|
||||
if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f == 0.0f; }))
|
||||
return false;
|
||||
|
||||
auto const_shape = mul_const->get_shape();
|
||||
size_t const_shape_size = shape_size(const_shape);
|
||||
if (const_shape_size > 1) {
|
||||
// disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1)
|
||||
if (!(const_shape[0] > 1 && const_shape[0] == const_shape_size) &&
|
||||
!(const_shape.size() > 1 && const_shape[1] == const_shape_size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> mul_const_node = mul_const;
|
||||
if (const_shape_size > 1 &&
|
||||
static_cast<Dimension::value_type>(const_shape.size()) < fq->get_input_partial_shape(0).rank().get_length()) {
|
||||
// Reshape constants like (C, 1, 1) to (1, C, 1, 1)
|
||||
const_shape.insert(const_shape.begin(), fq->get_input_partial_shape(0).rank().get_length() - const_shape.size(), 1);
|
||||
mul_const_node = std::make_shared<opset5::Reshape>(mul_const_node,
|
||||
op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false);
|
||||
}
|
||||
|
||||
auto new_input_low = std::make_shared<opset5::Divide>(fq->input_value(1), mul_const_node);
|
||||
auto new_input_high = std::make_shared<opset5::Divide>(fq->input_value(2), mul_const_node);
|
||||
|
||||
auto mul = pattern_value_map.at(mul_pattern).get_node_shared_ptr();
|
||||
const auto& mul_data = pattern_value_map.at(input_pattern);
|
||||
|
||||
std::shared_ptr<Node> new_fq;
|
||||
if (std::all_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f < 0.0f; })) {
|
||||
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low, new_input_high,
|
||||
fq->input_value(4), fq->input_value(3), fq->get_levels());
|
||||
copy_runtime_info({mul, fq}, {mul_const_node, new_input_low, new_input_high, new_fq});
|
||||
} else if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f < 0.0f; })) {
|
||||
const auto& output_low = fq->input_value(3);
|
||||
const auto& output_high = fq->input_value(4);
|
||||
auto zero = op::Constant::create(element::f32, Shape{}, {0.0f});
|
||||
// get the mask of the values from mul_const that are less than zero
|
||||
std::vector<float> less_than_zero;
|
||||
less_than_zero.reserve(mul_const_value.size());
|
||||
// and greater or equal to zero
|
||||
std::vector<float> greater_eq_zero;
|
||||
greater_eq_zero.reserve(mul_const_value.size());
|
||||
for (size_t i = 0; i < mul_const_value.size(); i++) {
|
||||
less_than_zero.push_back(mul_const_value[i] < 0);
|
||||
greater_eq_zero.push_back(mul_const_value[i] >= 0);
|
||||
}
|
||||
auto less_const = op::Constant::create(element::f32, const_shape, less_than_zero);
|
||||
auto greater_eq_const = op::Constant::create(element::f32, const_shape, greater_eq_zero);
|
||||
// new_output_low is defined as follows:
|
||||
// output_low[i], when mul_const[i] >= 0
|
||||
// output_high[i], when mul_const[i] < 0
|
||||
auto new_output_low = std::make_shared<opset5::Add>(
|
||||
std::make_shared<opset5::Multiply>(greater_eq_const, output_low),
|
||||
std::make_shared<opset5::Multiply>(less_const, output_high));
|
||||
// new_output_high is defined as follows:
|
||||
// output_high[i], when mul_const[i] >= 0
|
||||
// output_low[i], when mul_const[i] < 0
|
||||
auto new_output_high = std::make_shared<opset5::Add>(
|
||||
std::make_shared<opset5::Multiply>(greater_eq_const, output_high),
|
||||
std::make_shared<opset5::Multiply>(less_const, output_low));
|
||||
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low,
|
||||
new_input_high, new_output_low, new_output_high, fq->get_levels());
|
||||
} else {
|
||||
new_fq = register_new_node<opset5::FakeQuantize>(mul_data, new_input_low, new_input_high,
|
||||
fq->input_value(3), fq->input_value(4), fq->get_levels());
|
||||
}
|
||||
|
||||
copy_runtime_info({mul, fq}, {mul_const_node, new_input_low, new_input_high, new_fq});
|
||||
new_fq->set_friendly_name(fq->get_friendly_name());
|
||||
replace_node(fq, new_fq);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(fq_pattern, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -41,12 +41,12 @@ ngraph::pass::ReluFakeQuantizeFusion::ReluFakeQuantizeFusion() {
|
||||
if (!fq)
|
||||
return false;
|
||||
|
||||
auto new_fq = std::make_shared<ngraph::opset5::FakeQuantize>(data,
|
||||
fq->input_value(1),
|
||||
fq->input_value(2),
|
||||
fq->input_value(3),
|
||||
fq->input_value(4),
|
||||
fq->get_levels());
|
||||
auto new_fq = register_new_node<ngraph::opset5::FakeQuantize>(data,
|
||||
fq->input_value(1),
|
||||
fq->input_value(2),
|
||||
fq->input_value(3),
|
||||
fq->input_value(4),
|
||||
fq->get_levels());
|
||||
new_fq->set_friendly_name(fq->get_friendly_name());
|
||||
|
||||
copy_runtime_info({relu.get_node_shared_ptr(), fq}, new_fq);
|
||||
|
@ -0,0 +1,182 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
|
||||
|
||||
TEST(TransformationTests, AddFakeQuantizeFusion) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{1}, {2});
|
||||
auto add = std::make_shared<opset5::Add>(data, add_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {18});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, AddFakeQuantizeFusionConstantOnFirstInput) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{1}, {2});
|
||||
auto add = std::make_shared<opset5::Add>(add_const, data);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {18});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, AddFakeQuantizeFusionReshape) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto add_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 3, 4});
|
||||
auto add = std::make_shared<opset5::Add>(data, add_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-2, -3, -4});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {18, 17, 16});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeAddFakeQuantizeFusionNotAConstant) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto add_2nd_input = std::make_shared<opset5::Parameter>(element::f32, Shape{1});
|
||||
auto add = std::make_shared<opset5::Add>(data, add_2nd_input);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, add_2nd_input});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::AddFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto add_2nd_input = std::make_shared<opset5::Parameter>(element::f32, Shape{1});
|
||||
auto add = std::make_shared<opset5::Add>(data, add_2nd_input);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(add, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, add_2nd_input});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
@ -0,0 +1,260 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionPositiveConstant) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0, 0, 0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0.5});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0, 0, 0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantOnFirstInput) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(mul_const, data);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0, 0, 0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0.5});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0, 0, 0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionReshape) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 4, 5});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0.5, 0.25, 0.2});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10, 5, 4});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantAllNegative) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{1}, {-2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {-0.5});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {-10});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, MulFakeQuantizeFusionConstantSomeNegative) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_const = opset5::Constant::create(element::f32, Shape{3, 1, 1}, {2, 1, -2});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_const);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {1});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10, -10, -10});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {0.5f, 1.0f, -0.5f});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10.0f, 20.0f, -10.0f});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {-10.0f, -10.0f, 10.0f});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{1, 3, 1, 1}, {10.0f, 10.0f, -10.0f});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(data, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 20);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NegativeMulFakeQuantizeFusionNotAConstant) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_2nd_input = std::make_shared<opset5::Parameter>(element::f32, Shape{1});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_2nd_input);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, mul_2nd_input});
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<pass::MulFakeQuantizeFusion>();
|
||||
m.register_pass<pass::ConstantFolding>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
{
|
||||
auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
|
||||
auto mul_2nd_input = std::make_shared<opset5::Parameter>(element::f32, Shape{1});
|
||||
auto mul = std::make_shared<opset5::Multiply>(data, mul_2nd_input);
|
||||
auto input_low = opset5::Constant::create(element::f32, Shape{1}, {0});
|
||||
auto input_high = opset5::Constant::create(element::f32, Shape{1}, {20});
|
||||
auto output_low = opset5::Constant::create(element::f32, Shape{}, {0});
|
||||
auto output_high = opset5::Constant::create(element::f32, Shape{}, {10});
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(mul, input_low,
|
||||
input_high, output_low,
|
||||
output_high, 11);
|
||||
f_ref = std::make_shared<Function>(NodeVector{fq}, ParameterVector{data, mul_2nd_input});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
@ -26,7 +26,7 @@ const std::vector<FuseFakeQuantizeTransformationTestValues> testValues = {
|
||||
}
|
||||
},
|
||||
{
|
||||
ngraph::Shape{128, 1},
|
||||
ngraph::Shape{128, 3},
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
|
@ -26,7 +26,7 @@ const std::vector<FuseFakeQuantizeTransformationTestValues> testValues = {
|
||||
},
|
||||
// 1) Multiply with different input and output shape
|
||||
{
|
||||
ngraph::Shape{128, 1},
|
||||
ngraph::Shape{128, 3},
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
|
Loading…
Reference in New Issue
Block a user