[LPT] MoveFakeQuantize (#6723)
* add move_fake_quantize_for_concat_transformation, mfk and mfk_function * fix relu_transformation.cpp * backup * add change * add cpu test * [LPT] MoveFakeQuantizeTransformation: fixes * get InferenceEngine::NotImplemented * fix ieFuncTests * try without new cpu_test * fix cpuFuncTests and ieFuncTests * fix tests * fix lin * add cpu test * fix link and matcher in move_fake_quantize.cpp * update matcher * add gpu test * naming fix * move_fake_quantize.cpp add set_fr_name for new_concat * naming new fq fix * fix NetworkHelper::copyInfo naming * concat.cpp naming fix * gpu tests fix * rm network_helper changes * rm extra output * resolve conversations * resolve other conversations * add multi inputs for concat * fix lin * fix move_fake_qunatize naming * rm maxpool from mfk_function * mkldnn update * fix style * rm extra change * fix concat matcher * rm mkldnn_plugin changes * fix conversations * fix interval * fix and add isQuantizedStatic, add attribute and negative tests * add negative plugin tests * fix style: Co-authored-by: Edward Shogulin <edward.shogulin@intel.com>
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API MoveFakeQuantize : public LayerTransformation {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MoveFakeQuantize(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
@@ -138,6 +138,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat });
|
||||
|
||||
NetworkHelper::copyInfo({ concat, convert }, convert);
|
||||
convert->set_friendly_name(concat->get_friendly_name() + "/DequantizationConvert");
|
||||
lastDequantization = convert;
|
||||
}
|
||||
|
||||
@@ -150,6 +151,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(subtractNodes, 1)));
|
||||
|
||||
NetworkHelper::copyInfo({ concat, subtract }, subtract);
|
||||
subtract->set_friendly_name(concat->get_friendly_name() + "/DequantizationSubtract");
|
||||
lastDequantization = subtract;
|
||||
}
|
||||
|
||||
@@ -163,6 +165,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
layerDequantizations[0].multiply->get_output_element_type(0));
|
||||
|
||||
NetworkHelper::copyInfo({ concat, multiply }, multiply);
|
||||
multiply->set_friendly_name(concat->get_friendly_name() + "/DequantizationMultyply");
|
||||
lastDequantization = multiply;
|
||||
}
|
||||
|
||||
@@ -325,13 +328,12 @@ bool ConcatTransformation::isQuantizedStatic(const std::shared_ptr<const Node>&
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto axis = concat->get_axis();
|
||||
const auto outputRank = concat->get_output_partial_shape(0).rank();
|
||||
if (axis < 0 && outputRank.is_dynamic()) {
|
||||
if (outputRank.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outputRank);
|
||||
const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), concat->get_axis(), outputRank);
|
||||
return normalizedAxis == 1ul;
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@
|
||||
#include "low_precision/transpose.hpp"
|
||||
#include "low_precision/unsqueeze.hpp"
|
||||
#include "low_precision/variadic_split.hpp"
|
||||
#include "low_precision/move_fake_quantize.hpp"
|
||||
|
||||
// cleanup transformations
|
||||
#include "low_precision/convert.hpp"
|
||||
@@ -197,6 +198,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
|
||||
prerequisites->add_matcher<PullReshapeThroughDequantization>(supportedTypes);
|
||||
prerequisites->add_matcher<PullTransposeThroughDequantization>(supportedTypes);
|
||||
prerequisites->add_matcher<ngraph::pass::LinOpSequenceFusion>();
|
||||
prerequisites->add_matcher<ngraph::pass::low_precision::MoveFakeQuantize>();
|
||||
|
||||
manager.register_pass<TypeRelaxedReplacer>();
|
||||
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/move_fake_quantize.hpp"
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
|
||||
#include "low_precision/concat.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MoveFakeQuantize, "MoveFakeQuantize", 0);
|
||||
|
||||
MoveFakeQuantize::MoveFakeQuantize(const Params& params) : LayerTransformation(params) {
|
||||
const auto concat = ngraph::pattern::wrap_type<opset1::Concat>(pattern::consumers_count(1));
|
||||
const auto operation = ngraph::pattern::wrap_type<opset1::Relu>({ concat });
|
||||
const auto input_low = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
|
||||
const auto input_high = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
|
||||
const auto output_low = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
|
||||
const auto output_high = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
|
||||
const auto fq_with_operation = ngraph::pattern::wrap_type<opset1::FakeQuantize>({ operation,
|
||||
input_low,
|
||||
input_high,
|
||||
output_low,
|
||||
output_high});
|
||||
const auto fq = ngraph::pattern::wrap_type<opset1::FakeQuantize>({ concat,
|
||||
input_low,
|
||||
input_high,
|
||||
output_low,
|
||||
output_high });
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(
|
||||
std::make_shared<pattern::op::Or>(OutputVector{fq, fq_with_operation}),
|
||||
"MoveFakeQuantize");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
|
||||
auto fq = m.get_match_root();
|
||||
auto operation = fq->get_input_node_shared_ptr(0);
|
||||
std::shared_ptr<ngraph::Node> concat;
|
||||
bool only_concat = true;
|
||||
std::string fq_original_name = fq->get_friendly_name(), operation_original_name;
|
||||
if (is_type<opset1::Concat>(operation)) {
|
||||
concat = operation;
|
||||
} else {
|
||||
operation_original_name = operation->get_friendly_name();
|
||||
concat = operation->get_input_node_shared_ptr(0);
|
||||
only_concat = false;
|
||||
}
|
||||
if (!ConcatTransformation::isQuantizedStatic(concat)) {
|
||||
return false;
|
||||
}
|
||||
std::vector<std::shared_ptr<ngraph::Node>> fqs;
|
||||
size_t input_size = concat->get_input_size();
|
||||
for (size_t i{ 0 }; i < input_size; ++i) {
|
||||
std::shared_ptr<ngraph::Node> fq_input;
|
||||
if (only_concat) {
|
||||
fq_input = concat->get_input_node_shared_ptr(i);
|
||||
} else {
|
||||
auto input = concat->get_input_node_shared_ptr(i);
|
||||
fq_input = operation->clone_with_new_inputs({ input });
|
||||
fq_input->set_friendly_name(operation_original_name + "_" + std::to_string(i + 1));
|
||||
}
|
||||
auto newFq = fq->clone_with_new_inputs({ fq_input,
|
||||
fq->get_input_node_shared_ptr(1),
|
||||
fq->get_input_node_shared_ptr(2),
|
||||
fq->get_input_node_shared_ptr(3),
|
||||
fq->get_input_node_shared_ptr(4) });
|
||||
newFq->set_friendly_name(fq_original_name + "_" + std::to_string(i + 1));
|
||||
fqs.push_back(newFq);
|
||||
}
|
||||
ngraph::copy_runtime_info(fq, fqs);
|
||||
auto newConcat = concat->clone_with_new_inputs(ngraph::OutputVector(fqs.begin(), fqs.end()));
|
||||
newConcat->set_friendly_name(concat->get_friendly_name());
|
||||
replace_node(fq, newConcat);
|
||||
NetworkHelper::copyInfo(concat, newConcat);
|
||||
updateOutput(context, newConcat, fq);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MoveFakeQuantize::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
@@ -0,0 +1,364 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <low_precision/concat.hpp>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <low_precision/relu.hpp>
|
||||
|
||||
#include <low_precision/low_precision.hpp>
|
||||
|
||||
#include "low_precision/move_fake_quantize.hpp"
|
||||
#include <low_precision/fake_quantize_decomposition.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/relu_function.hpp"
|
||||
#include "simple_low_precision_transformer.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
|
||||
namespace {
|
||||
|
||||
class MoveFakeQuantizeTransformationActualValues {
|
||||
public:
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
|
||||
std::string operation;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationActualValues& values) {
|
||||
return out << "_" <<
|
||||
values.fakeQuantizeBefore1 << "_" <<
|
||||
values.convertBefore1.outPrecision << "_" <<
|
||||
values.dequantizationBefore1 << "_" <<
|
||||
values.fakeQuantizeBefore2 << "_" <<
|
||||
values.convertBefore2.outPrecision << "_" <<
|
||||
values.dequantizationBefore2 << "_" <<
|
||||
values.operation << "_" <<
|
||||
values.fakeQuantizeAfter << "_" <<
|
||||
values.convertAfter.outPrecision << "_" <<
|
||||
values.dequantizationAfter;
|
||||
}
|
||||
|
||||
class MoveFakeQuantizeTransformationResultValues {
|
||||
public:
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
|
||||
std::string operation;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
|
||||
ngraph::element::Type precisionAfterOperation;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationAfterNotFQ;
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationResultValues& values) {
|
||||
return out << "_" <<
|
||||
values.fakeQuantizeBefore1 << "_" <<
|
||||
values.convertBefore1.outPrecision << "_" <<
|
||||
values.dequantizationBefore1 << "_" <<
|
||||
values.fakeQuantizeBefore2 << "_" <<
|
||||
values.convertBefore2.outPrecision << "_" <<
|
||||
values.dequantizationBefore2 << "_" <<
|
||||
values.operation << "_" <<
|
||||
values.fakeQuantizeAfter << "_" <<
|
||||
values.convertAfter << "_" <<
|
||||
values.dequantizationAfter << "_" <<
|
||||
values.dequantizationAfterNotFQ;
|
||||
}
|
||||
|
||||
class MoveFakeQuantizeTransformationTestValues {
|
||||
public:
|
||||
MoveFakeQuantizeTransformationTestValues() = default;
|
||||
MoveFakeQuantizeTransformationTestValues(
|
||||
const TestTransformationParams& params,
|
||||
const bool multiChannels,
|
||||
const std::int64_t axis,
|
||||
const MoveFakeQuantizeTransformationActualValues& actual,
|
||||
const MoveFakeQuantizeTransformationResultValues& result,
|
||||
const bool addNotPrecisionPreservedOperation = false,
|
||||
const bool checkIntervalsAlignmentAttributes = true) :
|
||||
params(params),
|
||||
multiChannels(multiChannels),
|
||||
axis(axis),
|
||||
actual(actual),
|
||||
result(result) {}
|
||||
|
||||
TestTransformationParams params;
|
||||
bool multiChannels;
|
||||
std::int64_t axis;
|
||||
MoveFakeQuantizeTransformationActualValues actual;
|
||||
MoveFakeQuantizeTransformationResultValues result;
|
||||
// add not precision preserved operation to set output precision for FakeQuantize
|
||||
// don't set to 'true' by default to keep test cases with tested operation as output
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationTestValues& values) {
|
||||
return out << "_" << values.multiChannels << "_" << values.actual << "_" << values.result;
|
||||
}
|
||||
|
||||
typedef std::tuple <
|
||||
ngraph::element::Type,
|
||||
ngraph::PartialShape,
|
||||
MoveFakeQuantizeTransformationTestValues
|
||||
> MoveFakeQuantizeTransformationParams;
|
||||
|
||||
class MoveFakeQuantizeTransformation : public LayerTransformation, public testing::WithParamInterface<MoveFakeQuantizeTransformationParams> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const ngraph::element::Type precision = std::get<0>(GetParam());
|
||||
const ngraph::PartialShape shape = std::get<1>(GetParam());
|
||||
MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(GetParam());
|
||||
|
||||
// dequantization output precision depends on input precision
|
||||
// to avoid huge amount of tests cases let's define dequantization output precision as input precision
|
||||
if (!testValues.actual.dequantizationBefore1.multiply.empty()) {
|
||||
testValues.actual.dequantizationBefore1.multiply.outPrecision = precision;
|
||||
}
|
||||
if (!testValues.actual.dequantizationBefore2.multiply.empty()) {
|
||||
testValues.actual.dequantizationBefore2.multiply.outPrecision = precision;
|
||||
}
|
||||
|
||||
IntervalsAlignmentSharedValue::Interval interval{ -1.28f, 2.55f };
|
||||
|
||||
actualFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
|
||||
precision,
|
||||
shape,
|
||||
testValues.actual.fakeQuantizeBefore1,
|
||||
testValues.actual.convertBefore1,
|
||||
testValues.actual.dequantizationBefore1,
|
||||
testValues.actual.fakeQuantizeBefore2,
|
||||
testValues.actual.convertBefore2,
|
||||
testValues.actual.dequantizationBefore2,
|
||||
testValues.actual.operation,
|
||||
testValues.actual.fakeQuantizeAfter,
|
||||
testValues.actual.convertAfter,
|
||||
testValues.actual.dequantizationAfter,
|
||||
{
|
||||
ngraph::builder::subgraph::make_shared_attribute_ptr<PrecisionPreservedAttribute>(true),
|
||||
ngraph::builder::subgraph::make_shared_attribute_ptr<IntervalsAlignmentAttribute>(interval, 256),
|
||||
ngraph::builder::subgraph::make_shared_attribute_ptr<QuantizationAlignmentAttribute>(false)
|
||||
},
|
||||
ngraph::element::undefined,
|
||||
{},
|
||||
testValues.axis);
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
|
||||
});
|
||||
|
||||
const auto params = TestTransformationParams::toParams(testValues.params);
|
||||
ov::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::low_precision::MoveFakeQuantize>(params);
|
||||
manager.run_passes(actualFunction);
|
||||
// dequantization output precision depends on input precision
|
||||
// to avoid huge amount of tests cases let's define dequantization output precision as input precision
|
||||
if (!testValues.result.dequantizationAfter.multiply.empty()) {
|
||||
testValues.result.dequantizationAfter.multiply.outPrecision = precision;
|
||||
}
|
||||
|
||||
if (!testValues.params.updatePrecisions &&
|
||||
(precision == ngraph::element::f32) &&
|
||||
!testValues.result.dequantizationAfter.convert.empty()) {
|
||||
testValues.result.dequantizationAfter.convert = {};
|
||||
}
|
||||
|
||||
referenceFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
|
||||
precision,
|
||||
shape,
|
||||
testValues.result.fakeQuantizeBefore1,
|
||||
testValues.result.convertBefore1,
|
||||
testValues.result.dequantizationBefore1,
|
||||
testValues.result.fakeQuantizeBefore2,
|
||||
testValues.result.convertBefore2,
|
||||
testValues.result.dequantizationBefore2,
|
||||
testValues.result.operation,
|
||||
testValues.result.fakeQuantizeAfter,
|
||||
testValues.result.convertAfter,
|
||||
testValues.result.dequantizationAfter,
|
||||
{
|
||||
ngraph::builder::subgraph::make_shared_attribute_ptr<PrecisionPreservedAttribute>(true),
|
||||
ngraph::builder::subgraph::make_shared_attribute_ptr<IntervalsAlignmentAttribute>(interval, 256),
|
||||
ngraph::builder::subgraph::make_shared_attribute_ptr<QuantizationAlignmentAttribute>(false)
|
||||
},
|
||||
testValues.result.precisionAfterOperation,
|
||||
{},
|
||||
testValues.axis);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
|
||||
const ngraph::element::Type precision = std::get<0>(obj.param);
|
||||
const ngraph::PartialShape shape = std::get<1>(obj.param);
|
||||
const MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(obj.param);
|
||||
|
||||
std::ostringstream result;
|
||||
result <<
|
||||
LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" <<
|
||||
(testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
|
||||
"axis_" << testValues.axis << "_" <<
|
||||
testValues.actual << "_" <<
|
||||
testValues.result << "_";
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MoveFakeQuantizeTransformation, CompareFunctions) {
|
||||
actualFunction->validate_nodes_and_infer_types();
|
||||
auto res = compare_functions(referenceFunction, actualFunction, true, true, true, true, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
|
||||
const auto actualFakeQuantizes = LayerTransformation::get<opset1::FakeQuantize>(actualFunction);
|
||||
ASSERT_TRUE(checkIfOutputAttributesSharedValuesAreTheSame<std::shared_ptr<PrecisionsAttribute>>(actualFakeQuantizes)) <<
|
||||
"PrecisionsAttribute are not the same";
|
||||
}
|
||||
|
||||
const std::vector<ngraph::element::Type> precisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
namespace testValues1 {
|
||||
const std::vector<ngraph::PartialShape> shapes = {
|
||||
{ 1, 3, 9, 9 },
|
||||
{ 4, 3, 9, 9 },
|
||||
{ Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic() }
|
||||
};
|
||||
const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
|
||||
// U8: concat
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
false,
|
||||
1,
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{}
|
||||
},
|
||||
{
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{},
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
},
|
||||
false,
|
||||
false
|
||||
},
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
false,
|
||||
1,
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"relu",
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{}
|
||||
},
|
||||
{
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{},
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{},
|
||||
"relu",
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
},
|
||||
false,
|
||||
false
|
||||
},
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
false,
|
||||
0,
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{}
|
||||
},
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{}
|
||||
},
|
||||
false,
|
||||
false
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
MoveFakeQuantizeTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(precisions),
|
||||
::testing::ValuesIn(shapes),
|
||||
::testing::ValuesIn(testValues)),
|
||||
MoveFakeQuantizeTransformation::getTestCaseName);
|
||||
} // namespace testValues1
|
||||
} // namespace
|
||||
@@ -0,0 +1,86 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
//ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true)
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
|
||||
// without operation
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{},
|
||||
"Concatenation",
|
||||
"U8",
|
||||
1,
|
||||
},
|
||||
// with ReLU operation
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"relu",
|
||||
{ 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
|
||||
{},
|
||||
{},
|
||||
"Concatenation",
|
||||
"U8",
|
||||
1
|
||||
},
|
||||
// negative axis
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
|
||||
{},
|
||||
{},
|
||||
"Concatenation",
|
||||
"FP32",
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> shapes = {
|
||||
{ 1, 3, 16, 16 },
|
||||
{ 4, 3, 16, 16 }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(shapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(trasformationParamValues),
|
||||
::testing::ValuesIn(params)),
|
||||
MoveFakeQuantizeTransformation::getTestCaseName);
|
||||
} // namespace
|
||||
@@ -0,0 +1,86 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(),
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
|
||||
// without operation
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
|
||||
{},
|
||||
{},
|
||||
"Concat",
|
||||
"U8",
|
||||
1,
|
||||
},
|
||||
// with ReLU operation
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"relu",
|
||||
{ 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
|
||||
{},
|
||||
{},
|
||||
"Concat",
|
||||
"U8",
|
||||
1
|
||||
},
|
||||
// negative axis
|
||||
{
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
"",
|
||||
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
|
||||
{},
|
||||
{},
|
||||
"Concat",
|
||||
"FP32",
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> shapes = {
|
||||
{ 1, 3, 16, 16 },
|
||||
{ 4, 3, 16, 16 }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(shapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::ValuesIn(trasformationParamValues),
|
||||
::testing::ValuesIn(params)),
|
||||
MoveFakeQuantizeTransformation::getTestCaseName);
|
||||
} // namespace
|
||||
@@ -0,0 +1,57 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||
|
||||
#include "low_precision/move_fake_quantize.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class MoveFakeQuantizeTransformationParam {
|
||||
public:
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
|
||||
std::string operation;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
|
||||
std::string layerName;
|
||||
std::string expectedKernelType;
|
||||
std::int64_t axis;
|
||||
};
|
||||
|
||||
typedef std::tuple <
|
||||
ngraph::element::Type,
|
||||
ngraph::Shape,
|
||||
std::string,
|
||||
ngraph::pass::low_precision::LayerTransformation::Params,
|
||||
MoveFakeQuantizeTransformationParam
|
||||
> MoveFakeQuantizeTransformationParams;
|
||||
|
||||
class MoveFakeQuantizeTransformation :
|
||||
public testing::WithParamInterface<MoveFakeQuantizeTransformationParams>,
|
||||
public LayerTestsUtils::LayerTransformation {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
|
||||
void Run() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision_transformations/move_fake_quantize_transformation.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string MoveFakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
|
||||
ngraph::element::Type netPrecision;
|
||||
ngraph::PartialShape inputShape;
|
||||
std::string targetDevice;
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
MoveFakeQuantizeTransformationParam param;
|
||||
std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) <<
|
||||
param.operation << param.fakeQuantizeAfter;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void MoveFakeQuantizeTransformation::SetUp() {
|
||||
ngraph::element::Type netPrecision;
|
||||
ngraph::PartialShape inputShape;
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
MoveFakeQuantizeTransformationParam param;
|
||||
std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam();
|
||||
|
||||
function = ngraph::builder::subgraph::MoveFakeQuantize::get(
|
||||
netPrecision,
|
||||
inputShape,
|
||||
param.fakeQuantizeBefore1,
|
||||
param.convertBefore1,
|
||||
param.dequantizationBefore1,
|
||||
param.fakeQuantizeBefore2,
|
||||
param.convertBefore2,
|
||||
param.dequantizationBefore2,
|
||||
param.operation,
|
||||
param.fakeQuantizeAfter,
|
||||
param.convertAfter,
|
||||
param.dequantizationAfter,
|
||||
{},
|
||||
{},
|
||||
{},
|
||||
param.axis);
|
||||
}
|
||||
|
||||
void MoveFakeQuantizeTransformation::Run() {
|
||||
LayerTestsCommon::Run();
|
||||
|
||||
const auto params = std::get<4>(GetParam());
|
||||
const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
|
||||
auto expectedPrecision = params.expectedKernelType;
|
||||
if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
|
||||
expectedPrecision = "FP16";
|
||||
}
|
||||
EXPECT_EQ(actualPrecision, expectedPrecision);
|
||||
}
|
||||
|
||||
TEST_P(MoveFakeQuantizeTransformation, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "common/fake_quantize_on_data.hpp"
|
||||
#include "common/dequantization_operations.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
namespace subgraph {
|
||||
|
||||
class MoveFakeQuantize {
|
||||
public:
|
||||
static std::shared_ptr<ngraph::Function> get(
|
||||
const ngraph::element::Type inputPrecision,
|
||||
const ngraph::PartialShape& inputShape,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData1,
|
||||
const DequantizationOperations::Convert& convert1,
|
||||
const DequantizationOperations& dequantization1,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData2,
|
||||
const DequantizationOperations::Convert& convert2,
|
||||
const DequantizationOperations& dequantization2,
|
||||
const std::string& operation,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData3,
|
||||
const DequantizationOperations::Convert& convert3,
|
||||
const DequantizationOperations& dequantization3,
|
||||
const std::vector<std::shared_ptr<Variant>>& concatAttributes,
|
||||
const ngraph::element::Type precisionAfterOperation,
|
||||
const DequantizationOperations& dequantizationAfter,
|
||||
const std::int64_t& axis);
|
||||
};
|
||||
|
||||
} // namespace subgraph
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
||||
@@ -0,0 +1,107 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "lpt_ngraph_functions/move_fake_quantize_function.hpp"
|
||||
#include <low_precision/relu.hpp>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
namespace subgraph {
|
||||
|
||||
using namespace ngraph::pass;
|
||||
|
||||
std::shared_ptr<ngraph::Function> MoveFakeQuantize::get(
|
||||
const ngraph::element::Type inputPrecision,
|
||||
const ngraph::PartialShape& inputShape,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData1,
|
||||
const DequantizationOperations::Convert& convert1,
|
||||
const DequantizationOperations& dequantization1,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData2,
|
||||
const DequantizationOperations::Convert& convert2,
|
||||
const DequantizationOperations& dequantization2,
|
||||
const std::string& operation,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData3,
|
||||
const DequantizationOperations::Convert& convert3,
|
||||
const DequantizationOperations& dequantization3,
|
||||
const std::vector<std::shared_ptr<Variant>>& concatAttributes,
|
||||
const ngraph::element::Type precisionAfterOperation,
|
||||
const DequantizationOperations& dequantizationAfter,
|
||||
const std::int64_t& axis) {
|
||||
|
||||
const auto input1 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
|
||||
input1->set_friendly_name("input1");
|
||||
|
||||
const auto input2 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
|
||||
input2->set_friendly_name("input2");
|
||||
std::shared_ptr<Node> parent1 = input1, parent2 = input2;
|
||||
if (!fqOnData1.empty()) {
|
||||
if (operation == "relu") {
|
||||
auto relu1 = std::make_shared<ngraph::opset1::Relu>(input1->output(0));
|
||||
parent1 = makeFakeQuantize(relu1, inputPrecision, fqOnData1);
|
||||
} else {
|
||||
parent1 = makeFakeQuantize(input1, inputPrecision, fqOnData1);
|
||||
}
|
||||
parent1->set_friendly_name("concat_fq1");
|
||||
if (!convert1.empty()) {
|
||||
parent1 = std::make_shared<opset1::Convert>(parent1, convert1.outPrecision);
|
||||
}
|
||||
if (!dequantization1.empty()) {
|
||||
parent1 = makeDequantization(parent1, dequantization1);
|
||||
}
|
||||
}
|
||||
if (!fqOnData2.empty()) {
|
||||
if (operation == "relu") {
|
||||
auto relu2 = std::make_shared<ngraph::opset1::Relu>(input2->output(0));
|
||||
parent2 = makeFakeQuantize(relu2, inputPrecision, fqOnData2);
|
||||
} else {
|
||||
parent2 = makeFakeQuantize(input1, inputPrecision, fqOnData2);
|
||||
}
|
||||
parent2->set_friendly_name("concat_fq2");
|
||||
if (!convert2.empty()) {
|
||||
parent1 = std::make_shared<opset1::Convert>(parent2, convert2.outPrecision);
|
||||
}
|
||||
if (!dequantization1.empty()) {
|
||||
parent2 = makeDequantization(parent2, dequantization2);
|
||||
}
|
||||
}
|
||||
const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ parent1, parent2 }, axis);
|
||||
concat->set_friendly_name("concat");
|
||||
std::shared_ptr<ngraph::Node> parent = concat;
|
||||
if (!dequantizationAfter.empty()) {
|
||||
const auto lastDequantization = makeDequantization(concat, dequantizationAfter);
|
||||
lastDequantization->set_friendly_name("multiply");
|
||||
parent = lastDequantization;
|
||||
}
|
||||
addAttributes({ parent }, concatAttributes);
|
||||
if (!fqOnData3.empty()) {
|
||||
std::shared_ptr<Node> fq;
|
||||
if (operation == "relu") {
|
||||
auto relu = std::make_shared<ngraph::opset1::Relu>(concat->output(0));
|
||||
fq = makeFakeQuantize(relu, inputPrecision, fqOnData3);
|
||||
} else {
|
||||
fq = makeFakeQuantize(concat, inputPrecision, fqOnData3);
|
||||
}
|
||||
fq->set_friendly_name("fakeQuantizeAfter");
|
||||
parent = fq;
|
||||
}
|
||||
parent->set_friendly_name("output");
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(parent) };
|
||||
std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
|
||||
results,
|
||||
ngraph::ParameterVector{ input1, input2 },
|
||||
"MoveFakeQuantize");
|
||||
return function;
|
||||
}
|
||||
|
||||
} // namespace subgraph
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
||||
Reference in New Issue
Block a user