[LPT] Added method to insert dequantization and avoid dq propagation to the shapeOf subgraps (#8214)

This commit is contained in:
Vladislav Golubev 2021-11-11 12:10:40 +03:00 committed by GitHub
parent 634e933c6d
commit 869408075c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 813 additions and 56 deletions

View File

@ -210,6 +210,11 @@ public:
static bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& node);
static void insertDequantizationAfter(
const std::shared_ptr<Node>& originalNode,
const std::shared_ptr<Node>& dequantization,
const std::shared_ptr<Node>& newNode);
static void replaceAttributeInNodes(
std::shared_ptr<ngraph::Function> f,
const std::string& name,

View File

@ -17,7 +17,7 @@ namespace low_precision {
class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
public:
NGRAPH_RTTI_DECLARATION;
ReduceSumTransformation(const Params& params);
ReduceSumTransformation(const Params& params = Params());
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;

View File

@ -227,7 +227,7 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
ngraph::op::TemporaryReplaceOutputType(newAddOrSubtract, element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, element::f32).get());
replace_node(add, newMultiply);
NetworkHelper::insertDequantizationAfter(add, newMultiply, newAddOrSubtract);
NetworkHelper::copyInfo(add, newAddOrSubtract);
ngraph::copy_runtime_info({ add, newMultiply }, newMultiply);
}

View File

@ -70,8 +70,7 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt
replacement = std::make_shared<ngraph::opset1::Clamp>(newClamp->input_value(0), min, max);
}
replace_node(newClamp, replacement);
replacement->set_friendly_name(newClamp->get_friendly_name());
replace_node_update_name(newClamp, replacement);
element::Type outputClampType = dequantization.multiply ?
dequantization.multiply->get_output_element_type(0) :

View File

@ -160,7 +160,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
lastDequantization = multiply;
}
replace_node(concat, lastDequantization);
NetworkHelper::insertDequantizationAfter(concat, lastDequantization, newConcat);
NetworkHelper::copyInfo(concat, newConcat);
updateOutput(context, lastDequantization, newConcat);
return true;

View File

@ -81,6 +81,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
std::shared_ptr<Node> newMultiplyAfter;
{
std::shared_ptr<opset1::Subtract> subtract;
if (dequantization.subtract != nullptr) {
@ -172,13 +173,13 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
}
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
std::shared_ptr<ngraph::opset1::Multiply> newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
std::vector<element::Type>{ deqPrecision, deqPrecision },
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
ngraph::op::TemporaryReplaceOutputType(relaxedNewConvolution, deqPrecision).get(),
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
replace_node(convolution, newMultiplyAfter);
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, relaxedNewConvolution);
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
if (ov::is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
@ -242,7 +243,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
});
NetworkHelper::copyInfo(convolution, newConvolution);
auto newMultiplyAfter = std::make_shared<opset1::Multiply>(
newMultiplyAfter = std::make_shared<opset1::Multiply>(
newConvolution,
foldConvert(
fold_reshape<opset1::Reshape>(
@ -250,7 +251,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
false),
convolution->get_output_element_type(0)));
replace_node(convolution, newMultiplyAfter);
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, newConvolution);
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
}
@ -308,8 +309,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
}
}
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
updateOutput(context, finalDequantization, convolution);

View File

@ -98,6 +98,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
std::shared_ptr<Node> newMultiplyAfter;
{
if (dequantization.subtract != nullptr) {
NetworkHelper::optimizeSubtract(dequantization.subtract);
@ -116,13 +117,13 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
std::vector<element::Type>{deqPrecision, deqPrecision},
std::vector<element::Type>{deqPrecision});
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
std::vector<element::Type>{ deqPrecision, deqPrecision },
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
NetworkHelper::insertDequantizationAfter(convolutionBackpropData, newMultiplyAfter, relaxedConvolutionBackpropData);
replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
if (ov::is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
@ -154,15 +155,17 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
auto inputs = convolutionBackpropData->input_values();
inputs[1] = multiplyFromWeights->input_value(0);
auto newMultiplyAfter = std::make_shared<opset1::Multiply>(
convolutionBackpropData->clone_with_new_inputs(inputs),
const auto newconvolutionBackpropData = convolutionBackpropData->copy_with_new_inputs(inputs);
newMultiplyAfter = std::make_shared<opset1::Multiply>(
newconvolutionBackpropData,
foldConvert(
fold_reshape<opset1::Reshape>(
multiplyFromWeights->input_value(1),
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
false),
convolutionBackpropData->get_output_element_type(0)));
replace_node(convolutionBackpropData, newMultiplyAfter);
NetworkHelper::insertDequantizationAfter(convolutionBackpropData, newMultiplyAfter, newconvolutionBackpropData);
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
}
@ -202,8 +205,8 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
convolutionBackpropData = newConvolution;
}
}
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
updateOutput(context, finalDequantization, convolutionBackpropData);

View File

@ -230,7 +230,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> decomposeFakeQuantize(
deqPrecision,
newFakeQuantizeLayer);
replace_node(layer, dequantization.multiply);
NetworkHelper::insertDequantizationAfter(layer, dequantization.multiply, newFakeQuantizeLayer);
std::vector<std::shared_ptr<ngraph::Node>> sourceNodes{ layer };
std::vector<std::shared_ptr<ngraph::Node>> targetNodes{ newFakeQuantizeLayer, dequantization.multiply };

View File

@ -167,7 +167,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
newMultiply->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationMultiply");
replace_node(matMul, newMultiply);
NetworkHelper::insertDequantizationAfter(matMul, newMultiply, newMatMul);
copy_runtime_info({ newMultiply, matMul }, newMultiply);
updateOutput(context, newMultiply, newMatMul);

View File

@ -159,7 +159,7 @@ bool MVNTransformation::transform(TransformationContext &context, ngraph::patter
mvn->get_output_element_type(0));
ngraph::copy_runtime_info({ mvn, newMultiply }, newMultiply);
replace_node(mvn, newMultiply);
NetworkHelper::insertDequantizationAfter(mvn, newMultiply, newMVN);
updateOutput(context, newMultiply, newMVN);
return true;

View File

@ -1108,7 +1108,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
dequantize->set_friendly_name(newFQ->get_friendly_name() + "/DequantizationMultiply");
ngraph::copy_runtime_info({ newFQ, dequantize }, dequantize);
replace_node(fq, dequantize);
insertDequantizationAfter(fq, dequantize, newFQ);
return std::make_tuple(newFQ, dequantize);
}
@ -1574,36 +1574,32 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
assert(operation->get_output_size() == 1);
// we must have dequantization multiply
assert(dequantization.multiply != nullptr);
OutputVector inputs = operation->input_values();
const size_t dequantizationIndex = getChildInputIndex(dequantization.multiply, operation);
inputs[dequantizationIndex] = moveSubtract ?
dequantization.data :
(dequantization.subtract == nullptr ? dequantization.data : dequantization.subtract);
inputs[dequantizationIndex] = (!moveSubtract && dequantization.subtract != nullptr) ?
dequantization.subtract :
dequantization.data;
const std::shared_ptr<ngraph::Node> newOperation = operation->clone_with_new_inputs(inputs);
const auto newOperation = operation->clone_with_new_inputs(inputs);
newOperation->set_friendly_name(operation->get_friendly_name());
ngraph::copy_runtime_info(operation, newOperation);
auto op = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(newOperation);
if (op != nullptr) {
if (updatePrecision) {
op->set_overridden_output_type(newOperation->get_input_element_type(0));
} else if (dequantization.multiply) {
op->set_overridden_output_type(dequantization.multiplyConstant->get_element_type());
} else if (dequantization.subtract) {
op->set_overridden_output_type(dequantization.subtractConstant->get_element_type());
}
std::dynamic_pointer_cast<ngraph::Node>(newOperation)->validate_and_infer_types();
if (const auto op = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(newOperation)) {
op->set_overridden_output_type(updatePrecision ?
newOperation->get_input_element_type(0) :
dequantization.multiplyConstant->get_element_type());
newOperation->validate_and_infer_types();
}
std::shared_ptr<Node> parent = newOperation;
const element::Type deqPrecision = dequantization.multiplyConstant->get_element_type();
const bool shouldConvert = (newOperation->get_output_element_type(0) != deqPrecision);
auto parent = newOperation;
if (shouldConvert) {
const auto convertOutputPrecision = dequantization.convert != nullptr ?
dequantization.convert->get_output_element_type(0) :
deqPrecision;
const auto convertOutputPrecision = dequantization.convert ? dequantization.convert->get_element_type() : deqPrecision;
parent = std::make_shared<opset1::Convert>(parent, convertOutputPrecision);
ngraph::copy_runtime_info({ newOperation, parent }, parent);
}
@ -1619,12 +1615,9 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
}
parent = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(
dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ?
dequantization.subtractConstant :
foldConvert(dequantization.subtractConstant->output(0), parentPrecision), element::f32).get());
element::TypeVector{ element::f32, element::f32 }, element::TypeVector{ element::f32 },
op::TemporaryReplaceOutputType(parent, element::f32).get(),
op::TemporaryReplaceOutputType(foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get());
ngraph::copy_runtime_info({ newOperation, parent }, parent);
} else {
parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
@ -1633,23 +1626,21 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
}
if (dequantization.multiply != nullptr) {
auto multiplyConstant = dequantization.multiplyConstant;
const element::Type parentPrecision = parent->get_output_element_type(0);
if (parentPrecision.bitwidth() < multiplyConstant->get_element_type().bitwidth()) {
if (parentPrecision.bitwidth() < dequantization.multiplyConstant->get_element_type().bitwidth()) {
THROW_IE_LPT_EXCEPTION(*parent) <<
"unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision <<
", multiply dequantization constant " << multiplyConstant->get_friendly_name() << ":" << multiplyConstant->get_element_type();
", multiply dequantization constant " << dequantization.multiplyConstant->get_friendly_name() << ":" <<
dequantization.multiplyConstant->get_element_type();
}
parent = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
opset1::Multiply(parent,
multiplyConstant->output(0).get_element_type() == parentPrecision ?
multiplyConstant :
foldConvert(multiplyConstant->output(0), parentPrecision)),
opset1::Multiply(parent, foldConvert(dequantization.multiplyConstant, parentPrecision)),
dequantization.multiply->get_output_element_type(0));
ngraph::copy_runtime_info({ newOperation, parent }, parent);
}
replace_node(operation, parent);
insertDequantizationAfter(operation, parent, newOperation);
if ((!moveSubtract) && (dequantization.convert != nullptr) && (dequantization.subtract != nullptr)) {
// issue #43088
@ -1878,6 +1869,21 @@ bool isDisabled(const std::shared_ptr<Node>& node) {
}
return false;
}
void NetworkHelper::insertDequantizationAfter(
const std::shared_ptr<Node>& originalNode,
const std::shared_ptr<Node>& dequantization,
const std::shared_ptr<Node>& newNode) {
replace_node(originalNode, dequantization);
// We do it to avoid dequantization propagation to the shapeOf subgraphs
for (const auto& input : dequantization->get_output_target_inputs(0)) {
if (const auto shapeOf = as_type_ptr<opset1::ShapeOf>(input.get_node()->shared_from_this())) {
const auto newShapeOf = shapeOf->clone_with_new_inputs({ newNode });
replace_node_update_name(shapeOf, newShapeOf);
}
}
}
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -140,7 +140,7 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph
ngraph::op::TemporaryReplaceOutputType(newNormalize, element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(newScalesConst, element::f32).get());
replace_node(normalize, newMultiply);
NetworkHelper::insertDequantizationAfter(normalize, newMultiply, newNormalize);
ngraph::copy_runtime_info({ normalize, newMultiply }, newMultiply);
updateOutput(context, newMultiply, newNormalize);

View File

@ -109,6 +109,16 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
}
}
// We do it to avoid dequantization propagation to the shapeOf subgraphs
for (size_t i = 0; i < replacement.size(); ++i) {
for (const auto& input : replacement[i].get_target_inputs()) {
if (const auto shapeOf = as_type_ptr<opset1::ShapeOf>(input.get_node()->shared_from_this())) {
const auto newShapeOf = shapeOf->clone_with_new_inputs({ newSplit->output(i) });
replace_node_update_name(shapeOf, newShapeOf);
}
}
}
updateOutputs(context, lastNodes, newSplit);
return true;
}

View File

@ -0,0 +1,734 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <sstream>
#include <memory>
#include "low_precision/add.hpp"
#include "low_precision/avg_pool.hpp"
#include "low_precision/clamp.hpp"
#include "low_precision/concat.hpp"
#include "low_precision/convolution.hpp"
#include "low_precision/convolution_backprop_data.hpp"
#include "low_precision/depth_to_space.hpp"
#include "low_precision/fake_quantize_decomposition.hpp"
#include "low_precision/group_convolution.hpp"
#include "low_precision/interpolate.hpp"
#include "low_precision/mat_mul.hpp"
#include "low_precision/max_pool.hpp"
#include "low_precision/multiply.hpp"
#include "low_precision/mvn.hpp"
#include "low_precision/normalize_l2.hpp"
#include "low_precision/pad.hpp"
#include "low_precision/prelu.hpp"
#include "low_precision/reduce_max.hpp"
#include "low_precision/reduce_mean.hpp"
#include "low_precision/reduce_min.hpp"
#include "low_precision/reduce_sum.hpp"
#include "low_precision/reshape.hpp"
#include "low_precision/relu.hpp"
#include "low_precision/squeeze.hpp"
#include "low_precision/split.hpp"
#include "low_precision/shuffle_channels.hpp"
#include "low_precision/strided_slice.hpp"
#include "low_precision/transpose.hpp"
#include "low_precision/unsqueeze.hpp"
#include "low_precision/variadic_split.hpp"
#include "low_precision/network_helper.hpp"
#include "lpt_ngraph_functions/common/builders.hpp"
#include <gtest/gtest.h>
#include <transformations/utils/utils.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;
TEST(LPT, AvoidDequantizationToShapeOfPropagationAddTransformation) {
auto input1 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto input2 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert1 = std::make_shared<opset1::Convert>(input1, element::f32);
auto convert2 = std::make_shared<opset1::Convert>(input2, element::f32);
auto mul1 = std::make_shared<opset1::Multiply>(convert1, opset1::Constant::create(element::f32, {}, { 2.f }));
auto mul2 = std::make_shared<opset1::Multiply>(convert2, opset1::Constant::create(element::f32, {}, { 4.f }));
auto add = std::make_shared<opset1::Add>(mul1, mul2);
auto shapeOf = std::make_shared<opset1::ShapeOf>(add);
auto result1 = std::make_shared<opset1::Result>(add);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input1, input2 });
pass::Manager m;
m.register_pass<pass::low_precision::AddTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationAvgPoolTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto avgPool = std::make_shared<opset1::AvgPool>(mul, Strides{ 1, 1 }, Shape{ 1, 1 }, Shape{ 0, 0 }, Shape{ 2, 2 }, true);
auto shapeOf = std::make_shared<opset1::ShapeOf>(avgPool);
auto result1 = std::make_shared<opset1::Result>(avgPool);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::AvgPoolTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationClampTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto clamp = std::make_shared<opset1::Clamp>(mul, 0.0, 6.0);
auto shapeOf = std::make_shared<opset1::ShapeOf>(clamp);
auto result1 = std::make_shared<opset1::Result>(clamp);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ClampTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationConcatTransformation) {
auto input1 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto input2 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert1 = std::make_shared<opset1::Convert>(input1, element::f32);
auto convert2 = std::make_shared<opset1::Convert>(input2, element::f32);
auto mul1 = std::make_shared<opset1::Multiply>(convert1, opset1::Constant::create(element::f32, {}, { 2.f }));
auto mul2 = std::make_shared<opset1::Multiply>(convert2, opset1::Constant::create(element::f32, {}, { 4.f }));
auto concat = std::make_shared<opset1::Concat>(OutputVector{ mul1, mul2 }, 1);
auto shapeOf = std::make_shared<opset1::ShapeOf>(concat);
auto result1 = std::make_shared<opset1::Result>(concat);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input1, input2 });
pass::Manager m;
m.register_pass<pass::low_precision::ConcatTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationConvolutionTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto weights = opset1::Constant::create(element::i8, { 6, 3, 1, 1 }, { 3 });
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
auto convolution = std::make_shared<opset1::Convolution>(
mul,
mulOnWeights,
ngraph::Strides{ 1, 1 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::Strides{ 1, 1 });
auto shapeOf = std::make_shared<opset1::ShapeOf>(convolution);
auto result1 = std::make_shared<opset1::Result>(convolution);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ConvolutionTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationConvolutionBackpropDataTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 8, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto weights = opset1::Constant::create(element::i8, { 8, 2, 1, 1 }, { 3 });
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
mul,
mulOnWeights,
ngraph::Strides{ 1, 1 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::Strides{ 1, 1 });
auto shapeOf = std::make_shared<opset1::ShapeOf>(convolutionBackpropData);
auto result1 = std::make_shared<opset1::Result>(convolutionBackpropData);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ConvolutionBackpropDataTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationDepthToSpaceTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto d2s = std::make_shared<opset1::DepthToSpace>(mul, op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST);
auto shapeOf = std::make_shared<opset1::ShapeOf>(d2s);
auto result1 = std::make_shared<opset1::Result>(d2s);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::DepthToSpaceTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationFakeQuantizeDecompositionTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::f32, PartialShape{ 1, 3, 16, 16 });
ngraph::builder::subgraph::FakeQuantizeOnData fqValues{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} };
auto fakeQuantize = ngraph::builder::subgraph::makeFakeQuantize(input, element::f32, fqValues);
auto shapeOf = std::make_shared<opset1::ShapeOf>(fakeQuantize);
auto& outInfo = fakeQuantize->output(0).get_rt_info();
auto attribute = ngraph::pass::low_precision::make_shared_attribute<PrecisionsAttribute>(element::TypeVector{ element::u8, element::i8 });
auto attributeWrapper = std::make_shared<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(attribute);
outInfo.emplace(ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name, attributeWrapper);
auto result1 = std::make_shared<opset1::Result>(fakeQuantize);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::FakeQuantizeDecompositionTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationGroupConvolutionTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 2 * 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto weights = opset1::Constant::create(element::i8, { 6, 3, 7, 7 }, { 2 });
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
auto reshapeConst = opset1::Constant::create(element::i32, { 5 }, { 2, 3, 3, 7, 7 });
auto reshapeOnWeights = std::make_shared<opset1::Reshape>(mulOnWeights, reshapeConst, true);
auto groupConvolution = std::make_shared<opset1::GroupConvolution>(
mul,
reshapeOnWeights,
ngraph::Strides{ 1, 1 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::Strides{ 1, 1 });
auto shapeOf = std::make_shared<opset1::ShapeOf>(groupConvolution);
auto result1 = std::make_shared<opset1::Result>(groupConvolution);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::GroupConvolutionTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationInterpolateTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto outShape = opset1::Constant::create(element::i32, { 4 }, { 1, 3, 18, 18});
op::v0::InterpolateAttrs attributes;
attributes.align_corners = false;
attributes.antialias = false;
attributes.axes = AxisSet{ 2, 3 };
attributes.mode = "nearest";
attributes.pads_begin = { 0 };
attributes.pads_end = { 0 };
auto interpolate = std::make_shared<opset1::Interpolate>(mul, outShape, attributes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(interpolate);
auto result1 = std::make_shared<opset1::Result>(interpolate);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::InterpolateTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationMatMulTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 1024 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto weights = opset1::Constant::create(element::i8, { 2048, 1024 }, { 3 });
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
auto matmul = std::make_shared<opset1::MatMul>(mul, mulOnWeights, false, true);
auto shapeOf = std::make_shared<opset1::ShapeOf>(matmul);
auto result1 = std::make_shared<opset1::Result>(matmul);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::MatMulTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationMaxPoolTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto maxPool = std::make_shared<opset1::MaxPool>(mul, Strides{ 1, 1 }, Shape{ 1, 1 }, Shape{ 0, 0 }, Shape{ 2, 2 });
auto shapeOf = std::make_shared<opset1::ShapeOf>(maxPool);
auto result1 = std::make_shared<opset1::Result>(maxPool);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::MaxPoolTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationMultiplyTransformation) {
auto input1 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto input2 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert1 = std::make_shared<opset1::Convert>(input1, element::f32);
auto convert2 = std::make_shared<opset1::Convert>(input2, element::f32);
auto mul1 = std::make_shared<opset1::Multiply>(convert1, opset1::Constant::create(element::f32, {}, { 2.f }));
auto mul2 = std::make_shared<opset1::Multiply>(convert2, opset1::Constant::create(element::f32, {}, { 4.f }));
auto mul = std::make_shared<opset1::Multiply>(mul1, mul2);
auto shapeOf = std::make_shared<opset1::ShapeOf>(mul);
auto result1 = std::make_shared<opset1::Result>(mul);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input1, input2 });
pass::Manager m;
m.register_pass<pass::low_precision::MultiplyTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationMVNTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto MVN = std::make_shared<op::TypeRelaxed<op::v0::MVN>>(mul);
auto shapeOf = std::make_shared<opset1::ShapeOf>(MVN);
auto result1 = std::make_shared<opset1::Result>(MVN);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::MVNTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationNormalizeL2Transformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
auto normalize = std::make_shared<opset1::NormalizeL2>(mul, axes, 0.01, ov::op::EpsMode::ADD);
auto shapeOf = std::make_shared<opset1::ShapeOf>(normalize);
auto result1 = std::make_shared<opset1::Result>(normalize);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::NormalizeL2Transformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationPadTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto pads_begin = opset1::Constant::create(element::i32, { 4 }, { 0, 0, 1, 1 });
auto pads_end = opset1::Constant::create(element::i32, { 4 }, { 0, 0, 1, 1 });
auto pad = std::make_shared<opset1::Pad>(mul, pads_begin, pads_end, op::PadMode::CONSTANT);
auto shapeOf = std::make_shared<opset1::ShapeOf>(pad);
auto result1 = std::make_shared<opset1::Result>(pad);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::PadTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationPReluTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto slope = opset1::Constant::create(element::f32, { 1, 3, 1, 1 }, { 0.01f });
auto prelu = std::make_shared<opset1::PRelu>(mul, slope);
auto shapeOf = std::make_shared<opset1::ShapeOf>(prelu);
auto result1 = std::make_shared<opset1::Result>(prelu);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::PReluTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceMaxTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
auto reduce = std::make_shared<opset1::ReduceMax>(mul, axes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
auto result1 = std::make_shared<opset1::Result>(reduce);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ReduceMaxTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceMeanTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
auto reduce = std::make_shared<opset1::ReduceMean>(mul, axes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
auto result1 = std::make_shared<opset1::Result>(reduce);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ReduceMeanTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceMinTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
auto reduce = std::make_shared<opset1::ReduceMin>(mul, axes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
auto result1 = std::make_shared<opset1::Result>(reduce);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ReduceMinTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceSumTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
auto reduce = std::make_shared<opset1::ReduceSum>(mul, axes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
auto result1 = std::make_shared<opset1::Result>(reduce);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ReduceSumTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationReshapeTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto outShape = opset1::Constant::create(element::i32, { 3 }, { 1, 3, -1 });
auto reshape = std::make_shared<opset1::Reshape>(mul, outShape, true);
auto shapeOf = std::make_shared<opset1::ShapeOf>(reshape);
auto result1 = std::make_shared<opset1::Result>(reshape);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ReshapeTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationReluTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto relu = std::make_shared<opset1::Relu>(mul);
auto shapeOf = std::make_shared<opset1::ShapeOf>(relu);
auto result1 = std::make_shared<opset1::Result>(relu);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ReluTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationSqueezeTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 1 }, { 0 });
auto squeeze = std::make_shared<opset1::Squeeze>(mul, axes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(squeeze);
auto result1 = std::make_shared<opset1::Result>(squeeze);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::SqueezeTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationSplitTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axis = opset1::Constant::create(element::i32, {}, { 1 });
auto split = std::make_shared<opset1::Split>(mul, axis, 3);
auto shapeOf = std::make_shared<opset1::ShapeOf>(split);
auto result1 = std::make_shared<opset1::Result>(split->output(0));
auto result2 = std::make_shared<opset1::Result>(split->output(1));
auto result3 = std::make_shared<opset1::Result>(split->output(2));
auto result4 = std::make_shared<opset1::Result>(shapeOf->output(0));
auto f = std::make_shared<Function>(ResultVector{ result1, result2, result3, result4 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::SplitTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationShuffleChannelsTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto shuffleChannels = std::make_shared<opset1::ShuffleChannels>(mul);
auto shapeOf = std::make_shared<opset1::ShapeOf>(shuffleChannels);
auto result1 = std::make_shared<opset1::Result>(shuffleChannels);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::ShuffleChannelsTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationStridedSliceTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto beginParam = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 0, 0, 0 });
auto endParam = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 1, 2, 1, 1 });
auto stridesParam = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 1, 1, 1, 1 });
auto stridedSlice = std::make_shared<ngraph::opset1::StridedSlice>(
mul, beginParam, endParam, stridesParam,
std::vector<std::int64_t>{ 1, 0, 1, 1 },
std::vector<std::int64_t>{ 1, 0, 1, 1 });
auto shapeOf = std::make_shared<opset1::ShapeOf>(stridedSlice);
auto result1 = std::make_shared<opset1::Result>(stridedSlice);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::StridedSliceTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationTransposeTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto constant = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 1, 3, 2 });
auto transpose = std::make_shared<ngraph::opset1::Transpose>(mul, constant);
auto shapeOf = std::make_shared<opset1::ShapeOf>(transpose);
auto result1 = std::make_shared<opset1::Result>(transpose);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::TransposeTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationUnsqueezeTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axes = opset1::Constant::create(element::i32, { 1 }, { 3 });
auto unsqueeze = std::make_shared<opset1::Unsqueeze>(mul, axes);
auto shapeOf = std::make_shared<opset1::ShapeOf>(unsqueeze);
auto result1 = std::make_shared<opset1::Result>(unsqueeze);
auto result2 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::UnsqueezeTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}
TEST(LPT, AvoidDequantizationToShapeOfPropagationVariadicSplitTransformation) {
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
auto axis = opset1::Constant::create(element::i32, {}, { 1 });
auto lengths = opset1::Constant::create(element::i32, { 2 }, { 1, 2 });
auto variadicSplit = std::make_shared<opset1::VariadicSplit>(mul, axis, lengths);
auto shapeOf = std::make_shared<opset1::ShapeOf>(variadicSplit->output(0));
auto result1 = std::make_shared<opset1::Result>(variadicSplit->output(0));
auto result2 = std::make_shared<opset1::Result>(variadicSplit->output(1));
auto result3 = std::make_shared<opset1::Result>(shapeOf);
auto f = std::make_shared<Function>(ResultVector{ result1, result2, result3 }, ParameterVector{ input });
pass::Manager m;
m.register_pass<pass::low_precision::VariadicSplitTransformation>();
m.run_passes(f);
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
ASSERT_TRUE(dqBeforeShapeOf.empty());
}