[CPU] FakeQuantize decomposition (#3741)

This commit is contained in:
Maxim Andronov 2021-02-02 09:37:02 +03:00 committed by GitHub
parent cca0d568e0
commit 537179b235
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 762 additions and 7 deletions

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -57,6 +57,8 @@
#include <transformations/convert_precision.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
#include <transformations/op_conversions/fq_decomposition.hpp>
#include <transformations/utils/utils.hpp>
#include <ngraph/opsets/opset2.hpp>
#include <ngraph/opsets/opset3.hpp>
@ -71,6 +73,8 @@
# include <low_precision/group_convolution.hpp>
# include <low_precision/multiply_to_group_convolution.hpp>
#include "nodes/mkldnn_quantize_node.h"
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
#if defined(_WIN32) || defined(WIN32)
#include <intrin.h>
@ -227,13 +231,22 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
transformer.transform(nGraphFunc);
}
bool has_fake_quantize = ::ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
ngraph::pass::Manager legacyManager;
legacyManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
// not legacy actually, but it should be the last transformation in the transformation pipeline
legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
auto legacyPassConfig = legacyManager.get_pass_config();
legacyPassConfig->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr &node) -> bool {
return !MKLDNNQuantizeNode::isNeedToDecompose(node);
});
legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
@ -248,15 +261,16 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
return false;
});
legacyManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
legacyPassConfig->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
return node->get_rt_info().count("UNROLL_TI") == 0;
});
legacyManager.run_passes(nGraphFunc);
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork));
clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize));
OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision");

View File

@ -18,6 +18,8 @@
#include <cpu/x64/jit_generator.hpp>
#include "ie_parallel.hpp"
#include <ngraph/opsets/opset1.hpp>
// Quantization ranges validation is switched off by default in order to avoid regressions on user side
// #define VALIDATE_QUANTIZATION_RANGES
@ -1029,7 +1031,7 @@ void MKLDNNQuantizeNode::init() {
float ih = inputHighData[isInputHighBroadcasted ? 0 : i];
#if defined(VALIDATE_QUANTIZATION_RANGES)
if ((il == ih && levels != 2) || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) {
if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) {
THROW_IE_EXCEPTION << "Quantize layer with name '" << getName() << "' has invalid input quantize ranges: "
<< "inputLow = " << il << ", inputHigh = " << ih;
}
@ -1578,6 +1580,33 @@ void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) {
isPostOpDataInitialized = true;
}
bool MKLDNNQuantizeNode::isNeedToDecompose(const std::shared_ptr<const ngraph::Node>& node) {
if (const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(node)) {
for (size_t i = 0; i < fq->get_input_size(); i++) {
if (fq->get_input_shape(i).size() > 5)
return true;
}
for (size_t i = 1; i < fq->get_input_size(); i++) {
size_t count_not_unit_axis = 0;
auto shape = fq->get_input_shape(i);
if (ngraph::shape_size(shape) != 1) {
size_t not_unit_axis = 0;
for (size_t i = 0; i < shape.size(); i++) {
if (shape[i] > 1) {
not_unit_axis = i;
count_not_unit_axis++;
}
}
if (count_not_unit_axis > 1 || not_unit_axis > 1)
return true;
}
}
}
return false;
}
bool MKLDNNQuantizeNode::created() const {
return getType() == Quantize;
}

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -113,6 +113,8 @@ public:
void appendPostOps(mkldnn::post_ops& ops) override;
static bool isNeedToDecompose(const std::shared_ptr<const ngraph::Node>& node);
private:
void init() override;
std::vector<mkldnn::memory::format_tag> getDataFormats() const;

View File

@ -0,0 +1,47 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API FakeQuantizeDecomposition;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief FakeQuantizeDecomposition transformation decomposes FakeQuantize layer.
*
* Expression from specification:
* if x <= min(input_low, input_high):
* output = output_low
* elif x > max(input_low, input_high):
* output = output_high
* else:
* output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low
*
* expand brackets into round:
* round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low))
* div on (levels-1) and mult on (output_high - output_low) => mult on (output_high - output_low) / (levels-1)
*
* =>
* round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + output_low
*
* This transformation doesn't support following cases:
* 1. At least one 'range' input is not Constant
* 2. At least one 'input_low' input value greater or equal than 'input_high' input value
*
*/
class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
FakeQuantizeDecomposition();
};

View File

@ -0,0 +1,124 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "itt.hpp"
#include "transformations/op_conversions/fq_decomposition.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/builder/autobroadcast.hpp>
#include <numeric>
NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0);
bool isValidRangesInputs(const std::shared_ptr<ngraph::opset1::FakeQuantize> &fq) {
auto il = fq->input_value(1);
auto ih = fq->input_value(2);
auto greater_equal = std::make_shared<ngraph::opset1::GreaterEqual>(il, ih);
ngraph::OutputVector result(1);
if (!greater_equal->constant_fold(result, greater_equal->input_values()))
return false;
auto res_node = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(result[0].get_node_shared_ptr());
const std::vector<bool> comp_result = res_node->cast_vector<bool>();
return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { return value; });
}
ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() {
MATCHER_SCOPE(FakeQuantizeDecomposition);
auto data = ngraph::pattern::any_input();
auto il = ngraph::pattern::wrap_type<opset1::Constant>();
auto ih = ngraph::pattern::wrap_type<opset1::Constant>();
auto ol = ngraph::pattern::wrap_type<opset1::Constant>();
auto oh = ngraph::pattern::wrap_type<opset1::Constant>();
auto fake_quantize = ngraph::pattern::wrap_type<ngraph::opset1::FakeQuantize>({data, il, ih, ol, oh});
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
auto &pattern_to_output = m.get_pattern_value_map();
const auto fake_quantize_node = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(pattern_to_output.at(fake_quantize).get_node_shared_ptr());
if (fake_quantize_node == nullptr || transformation_callback(fake_quantize_node) || !isValidRangesInputs(fake_quantize_node)) {
return false;
}
Output<Node> data{fake_quantize_node->input_value(0)};
const Output<Node> input_low{fake_quantize_node->input_value(1)};
const Output<Node> input_high{fake_quantize_node->input_value(2)};
const Output<Node> output_low{fake_quantize_node->input_value(3)};
const Output<Node> output_high{fake_quantize_node->input_value(4)};
auto input_type = data.get_element_type();
ngraph::NodeVector decomp_ops;
if (input_type != input_low.get_element_type()) {
input_type = input_low.get_element_type();
data = std::make_shared<ngraph::opset1::Convert>(data, input_type);
decomp_ops.push_back(data.get_node_shared_ptr());
}
// if we set input_low or input_high in formula we got output = output_low and output = output_high respectively
// so we just clamp x
const auto max = std::make_shared<ngraph::opset1::Maximum>(data, input_low);
const auto min = std::make_shared<ngraph::opset1::Minimum>(max, input_high);
decomp_ops.push_back(max);
decomp_ops.push_back(min);
// (levels-1)
const auto levels_minus_one = std::make_shared<ngraph::opset1::Constant>(input_type, Shape{}, fake_quantize_node->get_levels() - 1);
decomp_ops.push_back(levels_minus_one);
// (input_high - input_low)
const auto subInHighLow = std::make_shared<ngraph::opset1::Subtract>(input_high, input_low);
// (levels-1) / (input_high - input_low)
const auto isc = std::make_shared<ngraph::opset1::Divide>(levels_minus_one, subInHighLow);
// input_low * (levels-1) / (input_high - input_low)
const auto ish = std::make_shared<ngraph::opset1::Multiply>(input_low, isc);
decomp_ops.push_back(subInHighLow);
decomp_ops.push_back(isc);
decomp_ops.push_back(ish);
// x * (levels-1) / (input_high - input_low)
const auto after_isc_apply = std::make_shared<ngraph::opset1::Multiply>(min, isc);
// x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)
const auto after_ish_apply = std::make_shared<ngraph::opset1::Subtract>(after_isc_apply, ish);
decomp_ops.push_back(after_isc_apply);
decomp_ops.push_back(after_ish_apply);
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low))
const auto round = std::make_shared<ngraph::opset5::Round>(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN);
decomp_ops.push_back(round);
// (output_high - output_low)
const auto sub_out_high_low = std::make_shared<ngraph::opset1::Subtract>(output_high, output_low);
// (output_high - output_low) / (levels-1)
const auto osc = std::make_shared<ngraph::opset1::Divide>(sub_out_high_low, levels_minus_one);
decomp_ops.push_back(sub_out_high_low);
decomp_ops.push_back(osc);
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1)
const auto after_osc_apply = std::make_shared<ngraph::opset1::Multiply>(round, osc);
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) +
// output_low
std::shared_ptr<Node> result = std::make_shared<ngraph::opset1::Add>(after_osc_apply, output_low);
decomp_ops.push_back(after_osc_apply);
decomp_ops.push_back(result);
if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) {
result = std::make_shared<ngraph::opset1::Convert>(result, fake_quantize_node->get_output_element_type(0));
decomp_ops.push_back(result);
}
result->set_friendly_name(m.get_match_root()->get_friendly_name());
ngraph::copy_runtime_info(fake_quantize_node, decomp_ops);
ngraph::replace_node(m.get_match_root(), result);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(fake_quantize, matcher_name);
register_matcher(m, callback);
}

View File

@ -0,0 +1,249 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/op_conversions/fq_decomposition.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "common_test_utils/common_utils.hpp"
using FakeQuantizeDecompositionBasicParams = std::tuple<ngraph::element::Type_t, // 'data' input precision
ngraph::Shape, // data shape
ngraph::element::Type_t, // 'range' inputs precision
ngraph::Shape, // il shape
ngraph::Shape, // ih shape
ngraph::Shape, // ol shape
ngraph::Shape, // oh shape
size_t // levels
>;
using FakeQuantizeDecompositionParamsSet = std::tuple<FakeQuantizeDecompositionBasicParams,
std::pair<float, float>, // il and ih values
bool // should be decompos
>;
class FakeQuantizeDecompositionTest : public CommonTestUtils::TestsCommon, public ::testing::WithParamInterface<FakeQuantizeDecompositionParamsSet> {
public:
static std::string getTestCaseName(::testing::TestParamInfo<FakeQuantizeDecompositionParamsSet> obj) {
FakeQuantizeDecompositionBasicParams basic_params;
std::pair<float, float> input_ranges_values;
bool should_be_decompos;
std::tie(basic_params, input_ranges_values, should_be_decompos) = obj.param;
ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape;
ngraph::element::Type_t data_prec, ranges_prec;
size_t levels;
std::tie(data_prec, data_shape, ranges_prec, il_shape, ih_shape, ol_shape, oh_shape, levels) = basic_params;
std::ostringstream result;
result << "DATA=" << CommonTestUtils::vec2str(data_shape) << "_";
result << "DATA_PRC=" << ngraph::element::Type(data_prec) << "_";
result << "IL=" << CommonTestUtils::vec2str(il_shape) << "_" << input_ranges_values.first << "_";
result << "IH=" << CommonTestUtils::vec2str(ih_shape) << "_" << input_ranges_values.second << "_";
result << "OL=" << CommonTestUtils::vec2str(ol_shape) << "_";
result << "OH=" << CommonTestUtils::vec2str(oh_shape) << "_";
result << "RANGES_PRC=" << ngraph::element::Type(ranges_prec) << "_";
result << "LEVELS=" << levels;
return result.str();
}
protected:
void SetUp() {
FakeQuantizeDecompositionBasicParams basic_params;
std::pair<float, float> input_ranges_values;
bool should_be_decompos;
std::tie(basic_params, input_ranges_values, should_be_decompos) = this->GetParam();
ngraph::Shape data_shape, il_shape, ih_shape, ol_shape, oh_shape;
ngraph::element::Type_t data_prec, ranges_prec;
size_t levels;
std::tie(data_prec, data_shape, ranges_prec, il_shape, ih_shape, ol_shape, oh_shape, levels) = basic_params;
bool need_convert = data_prec != ranges_prec;
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
{
const auto data = std::make_shared<ngraph::opset1::Parameter>(data_prec, ngraph::PartialShape(data_shape));
const auto il = std::make_shared<ngraph::opset1::Constant>(ranges_prec, il_shape, input_ranges_values.first);
const auto ih = std::make_shared<ngraph::opset1::Constant>(ranges_prec, ih_shape, input_ranges_values.second);
const auto ol = std::make_shared<ngraph::opset1::Constant>(ranges_prec, ol_shape);
const auto oh = std::make_shared<ngraph::opset1::Constant>(ranges_prec, oh_shape);
const auto fq = std::make_shared<ngraph::opset1::FakeQuantize>(data, il, ih, ol, oh, levels);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{fq}, ngraph::ParameterVector{data});
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
auto input_data = std::make_shared<ngraph::opset1::Parameter>(data_prec, ngraph::PartialShape(data_shape));
ngraph::ParameterVector params;
params.push_back(input_data);
std::shared_ptr<ngraph::Node> data = input_data;
const auto il = std::make_shared<ngraph::opset1::Constant>(ranges_prec, il_shape, input_ranges_values.first);
const auto ih = std::make_shared<ngraph::opset1::Constant>(ranges_prec, ih_shape, input_ranges_values.second);
const auto ol = std::make_shared<ngraph::opset1::Constant>(ranges_prec, ol_shape);
const auto oh = std::make_shared<ngraph::opset1::Constant>(ranges_prec, oh_shape);
if (should_be_decompos) {
if (need_convert) {
data = std::make_shared<ngraph::opset1::Convert>(data, ranges_prec);
}
const auto max = std::make_shared<ngraph::opset1::Maximum>(data, il);
const auto min = std::make_shared<ngraph::opset1::Minimum>(max, ih);
const auto levels_minus_one = std::make_shared<ngraph::opset1::Constant>(ranges_prec, ngraph::Shape{}, levels - 1);
const auto sub_in_high_low = std::make_shared<ngraph::opset1::Subtract>(ih, il);
const auto isc = std::make_shared<ngraph::opset1::Divide>(levels_minus_one, sub_in_high_low);
const auto ish = std::make_shared<ngraph::opset1::Multiply>(il, isc);
const auto after_isc_apply = std::make_shared<ngraph::opset1::Multiply>(min, isc);
const auto after_ish_apply = std::make_shared<ngraph::opset1::Subtract>(after_isc_apply, ish);
const auto round = std::make_shared<ngraph::opset5::Round>(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN);
const auto sub_out_high_low = std::make_shared<ngraph::opset1::Subtract>(oh, ol);
const auto osc = std::make_shared<ngraph::opset1::Divide>(sub_out_high_low, levels_minus_one);
const auto after_osc_apply = std::make_shared<ngraph::opset1::Multiply>(round, osc);
const auto after_out_low_add = std::make_shared<ngraph::opset1::Add>(after_osc_apply, ol);
std::shared_ptr<ngraph::Node> result = after_out_low_add;
if (need_convert) {
result = std::make_shared<ngraph::opset1::Convert>(result, data_prec);
}
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{result}, params);
} else {
const auto fq = std::make_shared<ngraph::opset1::FakeQuantize>(data, il, ih, ol, oh, levels);
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fq}, params);
}
}
const auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
};
TEST_P(FakeQuantizeDecompositionTest, CompareFunctions) {}
const std::vector<ngraph::element::Type_t> precisions = {ngraph::element::Type_t::f16, ngraph::element::Type_t::f32};
const std::vector<size_t> levels = {16, 255, 256};
const std::vector<std::pair<float, float>> input_ranges_supported = {
{-10.0f, 10.f}
};
const auto simple_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{2, 3, 4, 5}),
::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{1, 3, 1, 1}),
::testing::Values(ngraph::Shape{1, 3, 1, 1}),
::testing::Values(ngraph::Shape{1, 3, 1, 1}),
::testing::Values(ngraph::Shape{1, 3, 1, 1}),
::testing::ValuesIn(levels));
const auto broadcast_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{2, 3, 4, 5}),
::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{1, 3, 4, 1}),
::testing::Values(ngraph::Shape{1, 1, 4, 5}),
::testing::Values(ngraph::Shape{1, 1, 1, 1}),
::testing::Values(ngraph::Shape{1, 1, 1, 1}),
::testing::ValuesIn(levels));
const auto elementwise_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{2, 3, 4, 5}),
::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{2, 3, 4, 5}),
::testing::Values(ngraph::Shape{2, 3, 4, 1}),
::testing::Values(ngraph::Shape{2, 3, 4, 5}),
::testing::Values(ngraph::Shape{2, 3, 4, 5}),
::testing::ValuesIn(levels));
const auto broadcast_6D_fq_basic = ::testing::Combine(::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{2, 3, 4, 5, 6, 7}),
::testing::ValuesIn(precisions),
::testing::Values(ngraph::Shape{2, 3, 4, 1, 1, 1}),
::testing::Values(ngraph::Shape{1, 3, 4, 5, 1, 1}),
::testing::Values(ngraph::Shape{1, 1, 1, 5, 6, 7}),
::testing::Values(ngraph::Shape{1, 1, 1, 5, 6, 7}),
::testing::ValuesIn(levels));
INSTANTIATE_TEST_CASE_P(SimpleFakeQuantize_Decomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
simple_fq_basic,
::testing::ValuesIn(input_ranges_supported),
::testing::Values(true)),
FakeQuantizeDecompositionTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantize_Decomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
broadcast_fq_basic,
::testing::ValuesIn(input_ranges_supported),
::testing::Values(true)),
FakeQuantizeDecompositionTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantize_Decomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
elementwise_fq_basic,
::testing::ValuesIn(input_ranges_supported),
::testing::Values(true)),
FakeQuantizeDecompositionTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(FakeQuantize6D_Decomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
broadcast_6D_fq_basic,
::testing::ValuesIn(input_ranges_supported),
::testing::Values(true)),
FakeQuantizeDecompositionTest::getTestCaseName);
const std::vector<std::pair<float, float>> input_ranges_unsupported = {
{10.0f, -10.f},
{5.0f, 5.0f},
{-5.0f, -5.0f}
};
INSTANTIATE_TEST_CASE_P(SimpleFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
simple_fq_basic,
::testing::ValuesIn(input_ranges_unsupported),
::testing::Values(false)),
FakeQuantizeDecompositionTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(BroadcastFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
broadcast_fq_basic,
::testing::ValuesIn(input_ranges_unsupported),
::testing::Values(false)),
FakeQuantizeDecompositionTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(ElementwiseFakeQuantize_NoDecomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
elementwise_fq_basic,
::testing::ValuesIn(input_ranges_unsupported),
::testing::Values(false)),
FakeQuantizeDecompositionTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(FakeQuantize6D_NoDecomposition, FakeQuantizeDecompositionTest,
::testing::Combine(
broadcast_6D_fq_basic,
::testing::ValuesIn(input_ranges_unsupported),
::testing::Values(false)),
FakeQuantizeDecompositionTest::getTestCaseName);

View File

@ -0,0 +1,288 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
using namespace InferenceEngine;
using namespace ngraph;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using fqSpecificParams = std::tuple<int64_t, // 'data' input low bounds
int64_t, // 'data' input high bounds
std::vector<float>, // output low
std::vector<float>, // output high
std::vector<SizeVector>, // 'range' inputs shapes
size_t>; // levels
using fqLayerTestParamsSet = std::tuple<fqSpecificParams,
SizeVector, // 'data' input shape
Precision, // input precision
std::pair<std::vector<float>, std::vector<float>>, // il and ih values
bool, // should be decomposed
CPUSpecificParams>;
class FakeQuantizeLayerCPUTest : public testing::WithParamInterface<fqLayerTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
fqSpecificParams fqParams;
SizeVector inDataShape;
Precision inPrec;
std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
bool shouldBeDecomposed;
CPUSpecificParams cpuParams;
std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
int64_t inDataLowBounds, inDataHighBounds;
std::vector<float> inputLow, inputHigh, outputLow, outputHigh;
std::vector<SizeVector> inRangesShapes;
size_t levels;
inputLow = inputRangesValues.first;
inputHigh = inputRangesValues.second;
std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, inRangesShapes, levels) = fqParams;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_";
result << "inPrec=" << inPrec.name() << "_";
std::string rs = "";
for (size_t i = 0; i < inRangesShapes.size(); i++) {
rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_";
}
result << "RS=" << rs;
result << "LOW_BOUNDS=" << inDataLowBounds << "_";
result << "HIGH_BOUNDS=" << inDataHighBounds << "_";
result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_";
result << "IH=" << CommonTestUtils::vec2str(inputHigh) << "_";
result << "OL=" << CommonTestUtils::vec2str(outputLow) << "_";
result << "OH=" << CommonTestUtils::vec2str(outputHigh) << "_";
result << "LEVELS=" << levels;
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
void Infer() override {
inferRequest = executableNetwork.CreateInferRequest();
inputs.clear();
const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo();
auto input = inDataMap.begin();
Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds);
inferRequest.SetBlob(input->second->name(), blob);
inputs.push_back(blob);
inferRequest.Infer();
}
protected:
std::string layerName;
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
fqSpecificParams fqParams;
SizeVector inDataShape;
Precision inPrec;
std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
bool shouldBeDecomposed;
CPUSpecificParams cpuParams;
std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::vector<SizeVector> inRangesShapes;
size_t levels;
std::vector<std::vector<float>> rangesBounds(RANGES_INPUT_NUMBER);
rangesBounds[0] = inputRangesValues.first;
rangesBounds[1] = inputRangesValues.second;
std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams;
auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
ParameterVector params = builder::makeParams(ngInPrec, {inDataShape});
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset5::Parameter>(params));
auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty());
auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty());
auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty());
auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty());
auto fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], il, ih, ol, oh, levels);
layerName = shouldBeDecomposed ? "" : "Quantize";
if (selectedType.empty()) {
selectedType = getPrimitiveType() + "_" + inPrec.name();
}
fq->get_rt_info() = getCPUInfo();
function = std::make_shared<Function>(fq, params, "FakeQuantizeCPU");
}
private:
const size_t RANGES_INPUT_NUMBER = 4;
int64_t inDataLowBounds, inDataHighBounds;
};
TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) {
Run();
CheckPluginRelatedResults(executableNetwork, layerName);
}
const std::vector<size_t> levels = {16, 255, 256};
int64_t dataLowBounds{-10}, dataHighBounds{10};
const std::vector<std::pair<std::vector<float>, std::vector<float>>> input_ranges = {
{{0.0f}, {5.f}},
{{-10.0f}, {-5.f}}
};
const std::vector<float> outputLow{5.0f}, outputHigh{25.0f};
namespace fqImpl {
std::vector<CPUSpecificParams> memForm4D_jit = {
CPUSpecificParams({nchw}, {nchw}, {}, {}),
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
};
const std::vector<std::vector<SizeVector>> rangesShapes4D_jit = {
{{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}},
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
};
const auto specificParams4D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes4D_jit),
::testing::ValuesIn(levels));
const auto testParams4D_jit = ::testing::Combine(specificParams4D_jit,
::testing::Values(SizeVector{4, 5, 6, 7}),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
::testing::ValuesIn(filterCPUSpecificParams(memForm4D_jit)));
INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D_jit, FakeQuantizeLayerCPUTest, testParams4D_jit, FakeQuantizeLayerCPUTest::getTestCaseName);
std::vector<CPUSpecificParams> memForm4D_ref = {
CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"})
};
const std::vector<std::vector<SizeVector>> rangesShapes4D_ref = {
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
};
const auto specificParams4D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes4D_ref),
::testing::ValuesIn(levels));
const auto testParams4D_ref = ::testing::Combine(specificParams4D_ref,
::testing::Values(SizeVector{4, 5, 6, 7}),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
::testing::ValuesIn(memForm4D_ref));
INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_4D_ref, FakeQuantizeLayerCPUTest, testParams4D_ref, FakeQuantizeLayerCPUTest::getTestCaseName);
std::vector<CPUSpecificParams> memForm5D_jit = {
CPUSpecificParams({ncdhw}, {ncdhw}, {}, {}),
CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),
CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {})
};
const std::vector<std::vector<SizeVector>> rangesShapes5D_jit = {
{{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}},
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
};
const auto specificParams5D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes5D_jit),
::testing::ValuesIn(levels));
const auto testParams5D_jit = ::testing::Combine(specificParams5D_jit,
::testing::Values(SizeVector{3, 4, 5, 6, 7}),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
::testing::ValuesIn(filterCPUSpecificParams(memForm5D_jit)));
INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D_jit, FakeQuantizeLayerCPUTest, testParams5D_jit, FakeQuantizeLayerCPUTest::getTestCaseName);
std::vector<CPUSpecificParams> memForm5D_ref = {
CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"})
};
const std::vector<std::vector<SizeVector>> rangesShapes5D_ref = {
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
};
const auto specificParams5D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes5D_ref),
::testing::ValuesIn(levels));
const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,
::testing::Values(SizeVector{3, 4, 5, 6, 7}),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
::testing::ValuesIn(memForm5D_ref));
INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_5D_ref, FakeQuantizeLayerCPUTest, testParams5D_ref, FakeQuantizeLayerCPUTest::getTestCaseName);
} // namespace fqImpl
const std::vector<SizeVector> dataShapes = {
{4, 5, 6, 7},
{3, 4, 5, 6, 7},
{2, 3, 4, 5, 6, 7},
};
const std::vector<std::vector<SizeVector>> rangesShapes = {
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}},
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}},
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
};
namespace fqDecompos {
const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes),
::testing::ValuesIn(levels));
const auto testParams = ::testing::Combine(specificParams,
::testing::ValuesIn(dataShapes),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(true),
::testing::Values(CPUSpecificParams{}));
INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_Decompos, FakeQuantizeLayerCPUTest, testParams, FakeQuantizeLayerCPUTest::getTestCaseName);
} // namespace fqDecompos
} // namespace CPULayerTestsDefinitions

View File

@ -77,6 +77,8 @@ std::string CPUTestsBase::impls2str(const std::vector<std::string> &priority) {
}
void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const {
if (nodeType.empty()) return;
ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined.";
bool isNodeFound = false;
InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();

View File

@ -223,11 +223,11 @@ namespace ngraph
out_high,
i,
out_high_offsets);
if (arg[i] <= in_low_val)
if (arg[i] <= std::min(in_low_val, in_high_val))
{
out[i] = out_low_val;
}
else if (arg[i] > in_high_val)
else if (arg[i] > std::max(in_low_val, in_high_val))
{
out[i] = out_high_val;
}