[LPT] MoveFakeQuantize (#6723)

* add move_fake_quantize_for_concat_transformation, mfk and mfk_function

* fix relu_transformation.cpp

* backup

* add change

* add cpu test

* [LPT] MoveFakeQuantizeTransformation: fixes

* get InferenceEngine::NotImplemented

* fix ieFuncTests

* try without new cpu_test

* fix cpuFuncTests and ieFuncTests

* fix tests

* fix lin

* add cpu test

* fix link and matcher in move_fake_quantize.cpp

* update matcher

* add gpu test

* naming fix

* move_fake_quantize.cpp add set_fr_name for new_concat

* naming new fq fix

* fix NetworkHelper::copyInfo naming

* concat.cpp naming fix

* gpu tests fix

* rm network_helper changes

* rm extra output

* resolve conversations

* resolve other conversations

* add multi inputs for concat

* fix lin

* fix move_fake_qunatize naming

* rm maxpool from mfk_function

* mkldnn update

* fix style

* rm extra change

* fix concat matcher

* rm mkldnn_plugin changes

* fix conversations

* fix interval

* fix and add isQuantizedStatic, add attribute and negative tests

* add negative plugin tests

* fix style:

Co-authored-by: Edward Shogulin <edward.shogulin@intel.com>
This commit is contained in:
Nikita Demashov
2021-09-15 17:15:57 +03:00
committed by GitHub
parent 0df7dab345
commit 5b285ed105
11 changed files with 957 additions and 3 deletions

View File

@@ -0,0 +1,25 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <ngraph/ngraph.hpp>
#include "low_precision/layer_transformation.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
class LP_TRANSFORMATIONS_API MoveFakeQuantize : public LayerTransformation {
public:
NGRAPH_RTTI_DECLARATION;
MoveFakeQuantize(const Params& params = Params());
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@@ -138,6 +138,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat });
NetworkHelper::copyInfo({ concat, convert }, convert);
convert->set_friendly_name(concat->get_friendly_name() + "/DequantizationConvert");
lastDequantization = convert;
}
@@ -150,6 +151,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(subtractNodes, 1)));
NetworkHelper::copyInfo({ concat, subtract }, subtract);
subtract->set_friendly_name(concat->get_friendly_name() + "/DequantizationSubtract");
lastDequantization = subtract;
}
@@ -163,6 +165,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
layerDequantizations[0].multiply->get_output_element_type(0));
NetworkHelper::copyInfo({ concat, multiply }, multiply);
multiply->set_friendly_name(concat->get_friendly_name() + "/DequantizationMultyply");
lastDequantization = multiply;
}
@@ -325,13 +328,12 @@ bool ConcatTransformation::isQuantizedStatic(const std::shared_ptr<const Node>&
return false;
}
const auto axis = concat->get_axis();
const auto outputRank = concat->get_output_partial_shape(0).rank();
if (axis < 0 && outputRank.is_dynamic()) {
if (outputRank.is_dynamic()) {
return false;
}
const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outputRank);
const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), concat->get_axis(), outputRank);
return normalizedAxis == 1ul;
}

View File

@@ -66,6 +66,7 @@
#include "low_precision/transpose.hpp"
#include "low_precision/unsqueeze.hpp"
#include "low_precision/variadic_split.hpp"
#include "low_precision/move_fake_quantize.hpp"
// cleanup transformations
#include "low_precision/convert.hpp"
@@ -197,6 +198,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
prerequisites->add_matcher<PullReshapeThroughDequantization>(supportedTypes);
prerequisites->add_matcher<PullTransposeThroughDequantization>(supportedTypes);
prerequisites->add_matcher<ngraph::pass::LinOpSequenceFusion>();
prerequisites->add_matcher<ngraph::pass::low_precision::MoveFakeQuantize>();
manager.register_pass<TypeRelaxedReplacer>();

View File

@@ -0,0 +1,107 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/move_fake_quantize.hpp"
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <memory>
#include <ngraph/ngraph.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/pattern/op/or.hpp>
#include "low_precision/concat.hpp"
#include "low_precision/network_helper.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MoveFakeQuantize, "MoveFakeQuantize", 0);
MoveFakeQuantize::MoveFakeQuantize(const Params& params) : LayerTransformation(params) {
const auto concat = ngraph::pattern::wrap_type<opset1::Concat>(pattern::consumers_count(1));
const auto operation = ngraph::pattern::wrap_type<opset1::Relu>({ concat });
const auto input_low = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
const auto input_high = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
const auto output_low = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
const auto output_high = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
const auto fq_with_operation = ngraph::pattern::wrap_type<opset1::FakeQuantize>({ operation,
input_low,
input_high,
output_low,
output_high});
const auto fq = ngraph::pattern::wrap_type<opset1::FakeQuantize>({ concat,
input_low,
input_high,
output_low,
output_high });
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
auto op = m.get_match_root();
if (transformation_callback(op)) {
return false;
}
return transform(*context, m);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(
std::make_shared<pattern::op::Or>(OutputVector{fq, fq_with_operation}),
"MoveFakeQuantize");
this->register_matcher(m, callback);
}
bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
auto fq = m.get_match_root();
auto operation = fq->get_input_node_shared_ptr(0);
std::shared_ptr<ngraph::Node> concat;
bool only_concat = true;
std::string fq_original_name = fq->get_friendly_name(), operation_original_name;
if (is_type<opset1::Concat>(operation)) {
concat = operation;
} else {
operation_original_name = operation->get_friendly_name();
concat = operation->get_input_node_shared_ptr(0);
only_concat = false;
}
if (!ConcatTransformation::isQuantizedStatic(concat)) {
return false;
}
std::vector<std::shared_ptr<ngraph::Node>> fqs;
size_t input_size = concat->get_input_size();
for (size_t i{ 0 }; i < input_size; ++i) {
std::shared_ptr<ngraph::Node> fq_input;
if (only_concat) {
fq_input = concat->get_input_node_shared_ptr(i);
} else {
auto input = concat->get_input_node_shared_ptr(i);
fq_input = operation->clone_with_new_inputs({ input });
fq_input->set_friendly_name(operation_original_name + "_" + std::to_string(i + 1));
}
auto newFq = fq->clone_with_new_inputs({ fq_input,
fq->get_input_node_shared_ptr(1),
fq->get_input_node_shared_ptr(2),
fq->get_input_node_shared_ptr(3),
fq->get_input_node_shared_ptr(4) });
newFq->set_friendly_name(fq_original_name + "_" + std::to_string(i + 1));
fqs.push_back(newFq);
}
ngraph::copy_runtime_info(fq, fqs);
auto newConcat = concat->clone_with_new_inputs(ngraph::OutputVector(fqs.begin(), fqs.end()));
newConcat->set_friendly_name(concat->get_friendly_name());
replace_node(fq, newConcat);
NetworkHelper::copyInfo(concat, newConcat);
updateOutput(context, newConcat, fq);
return true;
}
bool MoveFakeQuantize::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
return true;
}
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@@ -0,0 +1,364 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "layer_transformation.hpp"
#include <string>
#include <sstream>
#include <memory>
#include <vector>
#include <gtest/gtest.h>
#include <low_precision/concat.hpp>
#include <transformations/utils/utils.hpp>
#include <transformations/init_node_info.hpp>
#include <low_precision/relu.hpp>
#include <low_precision/low_precision.hpp>
#include "low_precision/move_fake_quantize.hpp"
#include <low_precision/fake_quantize_decomposition.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
#include "lpt_ngraph_functions/common/builders.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/relu_function.hpp"
#include "simple_low_precision_transformer.hpp"
using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;
namespace {
class MoveFakeQuantizeTransformationActualValues {
public:
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
std::string operation;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
};
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationActualValues& values) {
return out << "_" <<
values.fakeQuantizeBefore1 << "_" <<
values.convertBefore1.outPrecision << "_" <<
values.dequantizationBefore1 << "_" <<
values.fakeQuantizeBefore2 << "_" <<
values.convertBefore2.outPrecision << "_" <<
values.dequantizationBefore2 << "_" <<
values.operation << "_" <<
values.fakeQuantizeAfter << "_" <<
values.convertAfter.outPrecision << "_" <<
values.dequantizationAfter;
}
class MoveFakeQuantizeTransformationResultValues {
public:
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
std::string operation;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
ngraph::element::Type precisionAfterOperation;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfterNotFQ;
};
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationResultValues& values) {
return out << "_" <<
values.fakeQuantizeBefore1 << "_" <<
values.convertBefore1.outPrecision << "_" <<
values.dequantizationBefore1 << "_" <<
values.fakeQuantizeBefore2 << "_" <<
values.convertBefore2.outPrecision << "_" <<
values.dequantizationBefore2 << "_" <<
values.operation << "_" <<
values.fakeQuantizeAfter << "_" <<
values.convertAfter << "_" <<
values.dequantizationAfter << "_" <<
values.dequantizationAfterNotFQ;
}
class MoveFakeQuantizeTransformationTestValues {
public:
MoveFakeQuantizeTransformationTestValues() = default;
MoveFakeQuantizeTransformationTestValues(
const TestTransformationParams& params,
const bool multiChannels,
const std::int64_t axis,
const MoveFakeQuantizeTransformationActualValues& actual,
const MoveFakeQuantizeTransformationResultValues& result,
const bool addNotPrecisionPreservedOperation = false,
const bool checkIntervalsAlignmentAttributes = true) :
params(params),
multiChannels(multiChannels),
axis(axis),
actual(actual),
result(result) {}
TestTransformationParams params;
bool multiChannels;
std::int64_t axis;
MoveFakeQuantizeTransformationActualValues actual;
MoveFakeQuantizeTransformationResultValues result;
// add not precision preserved operation to set output precision for FakeQuantize
// don't set to 'true' by default to keep test cases with tested operation as output
};
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationTestValues& values) {
return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result;
}
typedef std::tuple <
ngraph::element::Type,
ngraph::PartialShape,
MoveFakeQuantizeTransformationTestValues
> MoveFakeQuantizeTransformationParams;
class MoveFakeQuantizeTransformation : public LayerTransformation, public testing::WithParamInterface<MoveFakeQuantizeTransformationParams> {
public:
void SetUp() override {
const ngraph::element::Type precision = std::get<0>(GetParam());
const ngraph::PartialShape shape = std::get<1>(GetParam());
MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(GetParam());
// dequantization output precision depends on input precision
// to avoid huge amount of tests cases let's define dequantization output precision as input precision
if (!testValues.actual.dequantizationBefore1.multiply.empty()) {
testValues.actual.dequantizationBefore1.multiply.outPrecision = precision;
}
if (!testValues.actual.dequantizationBefore2.multiply.empty()) {
testValues.actual.dequantizationBefore2.multiply.outPrecision = precision;
}
IntervalsAlignmentSharedValue::Interval interval{ -1.28f, 2.55f };
actualFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
precision,
shape,
testValues.actual.fakeQuantizeBefore1,
testValues.actual.convertBefore1,
testValues.actual.dequantizationBefore1,
testValues.actual.fakeQuantizeBefore2,
testValues.actual.convertBefore2,
testValues.actual.dequantizationBefore2,
testValues.actual.operation,
testValues.actual.fakeQuantizeAfter,
testValues.actual.convertAfter,
testValues.actual.dequantizationAfter,
{
ngraph::builder::subgraph::make_shared_attribute_ptr<PrecisionPreservedAttribute>(true),
ngraph::builder::subgraph::make_shared_attribute_ptr<IntervalsAlignmentAttribute>(interval, 256),
ngraph::builder::subgraph::make_shared_attribute_ptr<QuantizationAlignmentAttribute>(false)
},
ngraph::element::undefined,
{},
testValues.axis);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
});
const auto params = TestTransformationParams::toParams(testValues.params);
ov::pass::Manager manager;
manager.register_pass<ngraph::pass::low_precision::MoveFakeQuantize>(params);
manager.run_passes(actualFunction);
// dequantization output precision depends on input precision
// to avoid huge amount of tests cases let's define dequantization output precision as input precision
if (!testValues.result.dequantizationAfter.multiply.empty()) {
testValues.result.dequantizationAfter.multiply.outPrecision = precision;
}
if (!testValues.params.updatePrecisions &&
(precision == ngraph::element::f32) &&
!testValues.result.dequantizationAfter.convert.empty()) {
testValues.result.dequantizationAfter.convert = {};
}
referenceFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
precision,
shape,
testValues.result.fakeQuantizeBefore1,
testValues.result.convertBefore1,
testValues.result.dequantizationBefore1,
testValues.result.fakeQuantizeBefore2,
testValues.result.convertBefore2,
testValues.result.dequantizationBefore2,
testValues.result.operation,
testValues.result.fakeQuantizeAfter,
testValues.result.convertAfter,
testValues.result.dequantizationAfter,
{
ngraph::builder::subgraph::make_shared_attribute_ptr<PrecisionPreservedAttribute>(true),
ngraph::builder::subgraph::make_shared_attribute_ptr<IntervalsAlignmentAttribute>(interval, 256),
ngraph::builder::subgraph::make_shared_attribute_ptr<QuantizationAlignmentAttribute>(false)
},
testValues.result.precisionAfterOperation,
{},
testValues.axis);
}
static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
const ngraph::element::Type precision = std::get<0>(obj.param);
const ngraph::PartialShape shape = std::get<1>(obj.param);
const MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(obj.param);
std::ostringstream result;
result <<
LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" <<
(testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
"axis_" << testValues.axis << "_" <<
testValues.actual << "_" <<
testValues.result << "_";
return result.str();
}
};
TEST_P(MoveFakeQuantizeTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types();
auto res = compare_functions(referenceFunction, actualFunction, true, true, true, true, true);
ASSERT_TRUE(res.first) << res.second;
const auto actualFakeQuantizes = LayerTransformation::get<opset1::FakeQuantize>(actualFunction);
ASSERT_TRUE(checkIfOutputAttributesSharedValuesAreTheSame<std::shared_ptr<PrecisionsAttribute>>(actualFakeQuantizes)) <<
"PrecisionsAttribute are not the same";
}
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
ngraph::element::f16
};
namespace testValues1 {
const std::vector<ngraph::PartialShape> shapes = {
{ 1, 3, 9, 9 },
{ 4, 3, 9, 9 },
{ Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic() }
};
const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
// U8: concat
{
LayerTransformation::createParamsU8I8(),
false,
1,
{
{},
{},
{},
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{}
},
{
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
"",
{},
{},
{},
},
false,
false
},
{
LayerTransformation::createParamsU8I8(),
false,
1,
{
{},
{},
{},
{},
{},
{},
"relu",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{}
},
{
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
"relu",
{},
{},
{},
},
false,
false
},
{
LayerTransformation::createParamsU8I8(),
false,
0,
{
{},
{},
{},
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{}
},
{
{},
{},
{},
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{}
},
false,
false
},
};
INSTANTIATE_TEST_SUITE_P(
smoke_LPT,
MoveFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(precisions),
::testing::ValuesIn(shapes),
::testing::ValuesIn(testValues)),
MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace testValues1
} // namespace

View File

@@ -0,0 +1,86 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
//ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true)
};
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
// without operation
{
{},
{},
{},
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
"Concatenation",
"U8",
1,
},
// with ReLU operation
{
{},
{},
{},
{},
{},
{},
"relu",
{ 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
{},
{},
"Concatenation",
"U8",
1
},
// negative axis
{
{},
{},
{},
{},
{},
{},
"",
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
{},
{},
"Concatenation",
"FP32",
0
}
};
const std::vector<ngraph::Shape> shapes = {
{ 1, 3, 16, 16 },
{ 4, 3, 16, 16 }
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)),
MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,86 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(),
};
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
// without operation
{
{},
{},
{},
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
"Concat",
"U8",
1,
},
// with ReLU operation
{
{},
{},
{},
{},
{},
{},
"relu",
{ 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
{},
{},
"Concat",
"U8",
1
},
// negative axis
{
{},
{},
{},
{},
{},
{},
"",
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
{},
{},
"Concat",
"FP32",
0
}
};
const std::vector<ngraph::Shape> shapes = {
{ 1, 3, 16, 16 },
{ 4, 3, 16, 16 }
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)),
MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,57 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <memory>
#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
#include "low_precision/move_fake_quantize.hpp"
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
namespace LayerTestsDefinitions {
class MoveFakeQuantizeTransformationParam {
public:
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
std::string operation;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
std::string layerName;
std::string expectedKernelType;
std::int64_t axis;
};
typedef std::tuple <
ngraph::element::Type,
ngraph::Shape,
std::string,
ngraph::pass::low_precision::LayerTransformation::Params,
MoveFakeQuantizeTransformationParam
> MoveFakeQuantizeTransformationParams;
class MoveFakeQuantizeTransformation :
public testing::WithParamInterface<MoveFakeQuantizeTransformationParams>,
public LayerTestsUtils::LayerTransformation {
public:
static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj);
protected:
void SetUp() override;
void Run() override;
};
} // namespace LayerTestsDefinitions

View File

@@ -0,0 +1,77 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
namespace LayerTestsDefinitions {
std::string MoveFakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
ngraph::element::Type netPrecision;
ngraph::PartialShape inputShape;
std::string targetDevice;
ngraph::pass::low_precision::LayerTransformation::Params params;
MoveFakeQuantizeTransformationParam param;
std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;
std::ostringstream result;
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) <<
param.operation << param.fakeQuantizeAfter;
return result.str();
}
void MoveFakeQuantizeTransformation::SetUp() {
ngraph::element::Type netPrecision;
ngraph::PartialShape inputShape;
ngraph::pass::low_precision::LayerTransformation::Params params;
MoveFakeQuantizeTransformationParam param;
std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam();
function = ngraph::builder::subgraph::MoveFakeQuantize::get(
netPrecision,
inputShape,
param.fakeQuantizeBefore1,
param.convertBefore1,
param.dequantizationBefore1,
param.fakeQuantizeBefore2,
param.convertBefore2,
param.dequantizationBefore2,
param.operation,
param.fakeQuantizeAfter,
param.convertAfter,
param.dequantizationAfter,
{},
{},
{},
param.axis);
}
void MoveFakeQuantizeTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<4>(GetParam());
const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
auto expectedPrecision = params.expectedKernelType;
if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
expectedPrecision = "FP16";
}
EXPECT_EQ(actualPrecision, expectedPrecision);
}
TEST_P(MoveFakeQuantizeTransformation, CompareWithRefImpl) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <algorithm>
#include <memory>
#include <ngraph/ngraph.hpp>
#include "low_precision/layer_transformation.hpp"
#include "common/fake_quantize_on_data.hpp"
#include "common/dequantization_operations.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
class MoveFakeQuantize {
public:
static std::shared_ptr<ngraph::Function> get(
const ngraph::element::Type inputPrecision,
const ngraph::PartialShape& inputShape,
const FakeQuantizeOnDataWithConstant& fqOnData1,
const DequantizationOperations::Convert& convert1,
const DequantizationOperations& dequantization1,
const FakeQuantizeOnDataWithConstant& fqOnData2,
const DequantizationOperations::Convert& convert2,
const DequantizationOperations& dequantization2,
const std::string& operation,
const FakeQuantizeOnDataWithConstant& fqOnData3,
const DequantizationOperations::Convert& convert3,
const DequantizationOperations& dequantization3,
const std::vector<std::shared_ptr<Variant>>& concatAttributes,
const ngraph::element::Type precisionAfterOperation,
const DequantizationOperations& dequantizationAfter,
const std::int64_t& axis);
};
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@@ -0,0 +1,107 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
#include <low_precision/relu.hpp>
#include <ngraph/opsets/opset1.hpp>
#include "ngraph_ops/type_relaxed.hpp"
#include "low_precision/network_helper.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "lpt_ngraph_functions/common/builders.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
using namespace ngraph::pass;
std::shared_ptr<ngraph::Function> MoveFakeQuantize::get(
const ngraph::element::Type inputPrecision,
const ngraph::PartialShape& inputShape,
const FakeQuantizeOnDataWithConstant& fqOnData1,
const DequantizationOperations::Convert& convert1,
const DequantizationOperations& dequantization1,
const FakeQuantizeOnDataWithConstant& fqOnData2,
const DequantizationOperations::Convert& convert2,
const DequantizationOperations& dequantization2,
const std::string& operation,
const FakeQuantizeOnDataWithConstant& fqOnData3,
const DequantizationOperations::Convert& convert3,
const DequantizationOperations& dequantization3,
const std::vector<std::shared_ptr<Variant>>& concatAttributes,
const ngraph::element::Type precisionAfterOperation,
const DequantizationOperations& dequantizationAfter,
const std::int64_t& axis) {
const auto input1 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
input1->set_friendly_name("input1");
const auto input2 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
input2->set_friendly_name("input2");
std::shared_ptr<Node> parent1 = input1, parent2 = input2;
if (!fqOnData1.empty()) {
if (operation == "relu") {
auto relu1 = std::make_shared<ngraph::opset1::Relu>(input1->output(0));
parent1 = makeFakeQuantize(relu1, inputPrecision, fqOnData1);
} else {
parent1 = makeFakeQuantize(input1, inputPrecision, fqOnData1);
}
parent1->set_friendly_name("concat_fq1");
if (!convert1.empty()) {
parent1 = std::make_shared<opset1::Convert>(parent1, convert1.outPrecision);
}
if (!dequantization1.empty()) {
parent1 = makeDequantization(parent1, dequantization1);
}
}
if (!fqOnData2.empty()) {
if (operation == "relu") {
auto relu2 = std::make_shared<ngraph::opset1::Relu>(input2->output(0));
parent2 = makeFakeQuantize(relu2, inputPrecision, fqOnData2);
} else {
parent2 = makeFakeQuantize(input1, inputPrecision, fqOnData2);
}
parent2->set_friendly_name("concat_fq2");
if (!convert2.empty()) {
parent1 = std::make_shared<opset1::Convert>(parent2, convert2.outPrecision);
}
if (!dequantization1.empty()) {
parent2 = makeDequantization(parent2, dequantization2);
}
}
const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ parent1, parent2 }, axis);
concat->set_friendly_name("concat");
std::shared_ptr<ngraph::Node> parent = concat;
if (!dequantizationAfter.empty()) {
const auto lastDequantization = makeDequantization(concat, dequantizationAfter);
lastDequantization->set_friendly_name("multiply");
parent = lastDequantization;
}
addAttributes({ parent }, concatAttributes);
if (!fqOnData3.empty()) {
std::shared_ptr<Node> fq;
if (operation == "relu") {
auto relu = std::make_shared<ngraph::opset1::Relu>(concat->output(0));
fq = makeFakeQuantize(relu, inputPrecision, fqOnData3);
} else {
fq = makeFakeQuantize(concat, inputPrecision, fqOnData3);
}
fq->set_friendly_name("fakeQuantizeAfter");
parent = fq;
}
parent->set_friendly_name("output");
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(parent) };
std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
results,
ngraph::ParameterVector{ input1, input2 },
"MoveFakeQuantize");
return function;
}
} // namespace subgraph
} // namespace builder
} // namespace ngraph