[LPT] Added method to insert dequantization and avoid dq propagation to the shapeOf subgraps (#8214)
This commit is contained in:
parent
634e933c6d
commit
869408075c
@ -210,6 +210,11 @@ public:
|
|||||||
|
|
||||||
static bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& node);
|
static bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& node);
|
||||||
|
|
||||||
|
static void insertDequantizationAfter(
|
||||||
|
const std::shared_ptr<Node>& originalNode,
|
||||||
|
const std::shared_ptr<Node>& dequantization,
|
||||||
|
const std::shared_ptr<Node>& newNode);
|
||||||
|
|
||||||
static void replaceAttributeInNodes(
|
static void replaceAttributeInNodes(
|
||||||
std::shared_ptr<ngraph::Function> f,
|
std::shared_ptr<ngraph::Function> f,
|
||||||
const std::string& name,
|
const std::string& name,
|
||||||
|
@ -17,7 +17,7 @@ namespace low_precision {
|
|||||||
class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
|
class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
|
||||||
public:
|
public:
|
||||||
NGRAPH_RTTI_DECLARATION;
|
NGRAPH_RTTI_DECLARATION;
|
||||||
ReduceSumTransformation(const Params& params);
|
ReduceSumTransformation(const Params& params = Params());
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
||||||
|
|
||||||
|
@ -227,7 +227,7 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
ngraph::op::TemporaryReplaceOutputType(newAddOrSubtract, element::f32).get(),
|
ngraph::op::TemporaryReplaceOutputType(newAddOrSubtract, element::f32).get(),
|
||||||
ngraph::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, element::f32).get());
|
ngraph::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, element::f32).get());
|
||||||
|
|
||||||
replace_node(add, newMultiply);
|
NetworkHelper::insertDequantizationAfter(add, newMultiply, newAddOrSubtract);
|
||||||
NetworkHelper::copyInfo(add, newAddOrSubtract);
|
NetworkHelper::copyInfo(add, newAddOrSubtract);
|
||||||
ngraph::copy_runtime_info({ add, newMultiply }, newMultiply);
|
ngraph::copy_runtime_info({ add, newMultiply }, newMultiply);
|
||||||
}
|
}
|
||||||
|
@ -70,8 +70,7 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
replacement = std::make_shared<ngraph::opset1::Clamp>(newClamp->input_value(0), min, max);
|
replacement = std::make_shared<ngraph::opset1::Clamp>(newClamp->input_value(0), min, max);
|
||||||
}
|
}
|
||||||
|
|
||||||
replace_node(newClamp, replacement);
|
replace_node_update_name(newClamp, replacement);
|
||||||
replacement->set_friendly_name(newClamp->get_friendly_name());
|
|
||||||
|
|
||||||
element::Type outputClampType = dequantization.multiply ?
|
element::Type outputClampType = dequantization.multiply ?
|
||||||
dequantization.multiply->get_output_element_type(0) :
|
dequantization.multiply->get_output_element_type(0) :
|
||||||
|
@ -160,7 +160,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
|||||||
lastDequantization = multiply;
|
lastDequantization = multiply;
|
||||||
}
|
}
|
||||||
|
|
||||||
replace_node(concat, lastDequantization);
|
NetworkHelper::insertDequantizationAfter(concat, lastDequantization, newConcat);
|
||||||
NetworkHelper::copyInfo(concat, newConcat);
|
NetworkHelper::copyInfo(concat, newConcat);
|
||||||
updateOutput(context, lastDequantization, newConcat);
|
updateOutput(context, lastDequantization, newConcat);
|
||||||
return true;
|
return true;
|
||||||
|
@ -81,6 +81,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
||||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
|
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
|
||||||
|
|
||||||
|
std::shared_ptr<Node> newMultiplyAfter;
|
||||||
{
|
{
|
||||||
std::shared_ptr<opset1::Subtract> subtract;
|
std::shared_ptr<opset1::Subtract> subtract;
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
@ -172,13 +173,13 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
|
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
|
||||||
|
|
||||||
std::shared_ptr<ngraph::opset1::Multiply> newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||||
ngraph::op::TemporaryReplaceOutputType(relaxedNewConvolution, deqPrecision).get(),
|
ngraph::op::TemporaryReplaceOutputType(relaxedNewConvolution, deqPrecision).get(),
|
||||||
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
||||||
|
|
||||||
replace_node(convolution, newMultiplyAfter);
|
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, relaxedNewConvolution);
|
||||||
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||||
|
|
||||||
if (ov::is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
|
if (ov::is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
|
||||||
@ -242,7 +243,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
});
|
});
|
||||||
NetworkHelper::copyInfo(convolution, newConvolution);
|
NetworkHelper::copyInfo(convolution, newConvolution);
|
||||||
|
|
||||||
auto newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
||||||
newConvolution,
|
newConvolution,
|
||||||
foldConvert(
|
foldConvert(
|
||||||
fold_reshape<opset1::Reshape>(
|
fold_reshape<opset1::Reshape>(
|
||||||
@ -250,7 +251,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
||||||
false),
|
false),
|
||||||
convolution->get_output_element_type(0)));
|
convolution->get_output_element_type(0)));
|
||||||
replace_node(convolution, newMultiplyAfter);
|
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, newConvolution);
|
||||||
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,8 +309,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
|
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
|
||||||
convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
|
|
||||||
ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
|
ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
|
||||||
updateOutput(context, finalDequantization, convolution);
|
updateOutput(context, finalDequantization, convolution);
|
||||||
|
|
||||||
|
@ -98,6 +98,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
|
|
||||||
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
|
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
|
||||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
||||||
|
std::shared_ptr<Node> newMultiplyAfter;
|
||||||
{
|
{
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||||
@ -116,13 +117,13 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||||
std::vector<element::Type>{deqPrecision});
|
std::vector<element::Type>{deqPrecision});
|
||||||
|
|
||||||
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||||
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
||||||
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
||||||
|
NetworkHelper::insertDequantizationAfter(convolutionBackpropData, newMultiplyAfter, relaxedConvolutionBackpropData);
|
||||||
|
|
||||||
replace_node(convolutionBackpropData, newMultiplyAfter);
|
|
||||||
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
||||||
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
||||||
if (ov::is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
if (ov::is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
||||||
@ -154,15 +155,17 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
|
|
||||||
auto inputs = convolutionBackpropData->input_values();
|
auto inputs = convolutionBackpropData->input_values();
|
||||||
inputs[1] = multiplyFromWeights->input_value(0);
|
inputs[1] = multiplyFromWeights->input_value(0);
|
||||||
auto newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
|
||||||
convolutionBackpropData->clone_with_new_inputs(inputs),
|
const auto newconvolutionBackpropData = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||||
|
newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
||||||
|
newconvolutionBackpropData,
|
||||||
foldConvert(
|
foldConvert(
|
||||||
fold_reshape<opset1::Reshape>(
|
fold_reshape<opset1::Reshape>(
|
||||||
multiplyFromWeights->input_value(1),
|
multiplyFromWeights->input_value(1),
|
||||||
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
||||||
false),
|
false),
|
||||||
convolutionBackpropData->get_output_element_type(0)));
|
convolutionBackpropData->get_output_element_type(0)));
|
||||||
replace_node(convolutionBackpropData, newMultiplyAfter);
|
NetworkHelper::insertDequantizationAfter(convolutionBackpropData, newMultiplyAfter, newconvolutionBackpropData);
|
||||||
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,8 +205,8 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
convolutionBackpropData = newConvolution;
|
convolutionBackpropData = newConvolution;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
|
|
||||||
convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
|
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
|
||||||
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
|
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
|
||||||
updateOutput(context, finalDequantization, convolutionBackpropData);
|
updateOutput(context, finalDequantization, convolutionBackpropData);
|
||||||
|
|
||||||
|
@ -230,7 +230,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> decomposeFakeQuantize(
|
|||||||
deqPrecision,
|
deqPrecision,
|
||||||
newFakeQuantizeLayer);
|
newFakeQuantizeLayer);
|
||||||
|
|
||||||
replace_node(layer, dequantization.multiply);
|
NetworkHelper::insertDequantizationAfter(layer, dequantization.multiply, newFakeQuantizeLayer);
|
||||||
|
|
||||||
std::vector<std::shared_ptr<ngraph::Node>> sourceNodes{ layer };
|
std::vector<std::shared_ptr<ngraph::Node>> sourceNodes{ layer };
|
||||||
std::vector<std::shared_ptr<ngraph::Node>> targetNodes{ newFakeQuantizeLayer, dequantization.multiply };
|
std::vector<std::shared_ptr<ngraph::Node>> targetNodes{ newFakeQuantizeLayer, dequantization.multiply };
|
||||||
|
@ -167,7 +167,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
|
|
||||||
newMultiply->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationMultiply");
|
newMultiply->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationMultiply");
|
||||||
|
|
||||||
replace_node(matMul, newMultiply);
|
NetworkHelper::insertDequantizationAfter(matMul, newMultiply, newMatMul);
|
||||||
copy_runtime_info({ newMultiply, matMul }, newMultiply);
|
copy_runtime_info({ newMultiply, matMul }, newMultiply);
|
||||||
|
|
||||||
updateOutput(context, newMultiply, newMatMul);
|
updateOutput(context, newMultiply, newMatMul);
|
||||||
|
@ -159,7 +159,7 @@ bool MVNTransformation::transform(TransformationContext &context, ngraph::patter
|
|||||||
mvn->get_output_element_type(0));
|
mvn->get_output_element_type(0));
|
||||||
ngraph::copy_runtime_info({ mvn, newMultiply }, newMultiply);
|
ngraph::copy_runtime_info({ mvn, newMultiply }, newMultiply);
|
||||||
|
|
||||||
replace_node(mvn, newMultiply);
|
NetworkHelper::insertDequantizationAfter(mvn, newMultiply, newMVN);
|
||||||
|
|
||||||
updateOutput(context, newMultiply, newMVN);
|
updateOutput(context, newMultiply, newMVN);
|
||||||
return true;
|
return true;
|
||||||
|
@ -1108,7 +1108,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
|||||||
dequantize->set_friendly_name(newFQ->get_friendly_name() + "/DequantizationMultiply");
|
dequantize->set_friendly_name(newFQ->get_friendly_name() + "/DequantizationMultiply");
|
||||||
ngraph::copy_runtime_info({ newFQ, dequantize }, dequantize);
|
ngraph::copy_runtime_info({ newFQ, dequantize }, dequantize);
|
||||||
|
|
||||||
replace_node(fq, dequantize);
|
insertDequantizationAfter(fq, dequantize, newFQ);
|
||||||
|
|
||||||
return std::make_tuple(newFQ, dequantize);
|
return std::make_tuple(newFQ, dequantize);
|
||||||
}
|
}
|
||||||
@ -1574,36 +1574,32 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
|||||||
|
|
||||||
assert(operation->get_output_size() == 1);
|
assert(operation->get_output_size() == 1);
|
||||||
|
|
||||||
|
// we must have dequantization multiply
|
||||||
|
assert(dequantization.multiply != nullptr);
|
||||||
|
|
||||||
OutputVector inputs = operation->input_values();
|
OutputVector inputs = operation->input_values();
|
||||||
const size_t dequantizationIndex = getChildInputIndex(dequantization.multiply, operation);
|
const size_t dequantizationIndex = getChildInputIndex(dequantization.multiply, operation);
|
||||||
inputs[dequantizationIndex] = moveSubtract ?
|
inputs[dequantizationIndex] = (!moveSubtract && dequantization.subtract != nullptr) ?
|
||||||
dequantization.data :
|
dequantization.subtract :
|
||||||
(dequantization.subtract == nullptr ? dequantization.data : dequantization.subtract);
|
dequantization.data;
|
||||||
|
|
||||||
const std::shared_ptr<ngraph::Node> newOperation = operation->clone_with_new_inputs(inputs);
|
const auto newOperation = operation->clone_with_new_inputs(inputs);
|
||||||
newOperation->set_friendly_name(operation->get_friendly_name());
|
newOperation->set_friendly_name(operation->get_friendly_name());
|
||||||
ngraph::copy_runtime_info(operation, newOperation);
|
ngraph::copy_runtime_info(operation, newOperation);
|
||||||
|
|
||||||
auto op = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(newOperation);
|
if (const auto op = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(newOperation)) {
|
||||||
if (op != nullptr) {
|
op->set_overridden_output_type(updatePrecision ?
|
||||||
if (updatePrecision) {
|
newOperation->get_input_element_type(0) :
|
||||||
op->set_overridden_output_type(newOperation->get_input_element_type(0));
|
dequantization.multiplyConstant->get_element_type());
|
||||||
} else if (dequantization.multiply) {
|
newOperation->validate_and_infer_types();
|
||||||
op->set_overridden_output_type(dequantization.multiplyConstant->get_element_type());
|
|
||||||
} else if (dequantization.subtract) {
|
|
||||||
op->set_overridden_output_type(dequantization.subtractConstant->get_element_type());
|
|
||||||
}
|
|
||||||
std::dynamic_pointer_cast<ngraph::Node>(newOperation)->validate_and_infer_types();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Node> parent = newOperation;
|
||||||
|
|
||||||
const element::Type deqPrecision = dequantization.multiplyConstant->get_element_type();
|
const element::Type deqPrecision = dequantization.multiplyConstant->get_element_type();
|
||||||
const bool shouldConvert = (newOperation->get_output_element_type(0) != deqPrecision);
|
const bool shouldConvert = (newOperation->get_output_element_type(0) != deqPrecision);
|
||||||
|
|
||||||
auto parent = newOperation;
|
|
||||||
if (shouldConvert) {
|
if (shouldConvert) {
|
||||||
const auto convertOutputPrecision = dequantization.convert != nullptr ?
|
const auto convertOutputPrecision = dequantization.convert ? dequantization.convert->get_element_type() : deqPrecision;
|
||||||
dequantization.convert->get_output_element_type(0) :
|
|
||||||
deqPrecision;
|
|
||||||
parent = std::make_shared<opset1::Convert>(parent, convertOutputPrecision);
|
parent = std::make_shared<opset1::Convert>(parent, convertOutputPrecision);
|
||||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||||
}
|
}
|
||||||
@ -1619,12 +1615,9 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
|||||||
}
|
}
|
||||||
|
|
||||||
parent = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
parent = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
|
element::TypeVector{ element::f32, element::f32 }, element::TypeVector{ element::f32 },
|
||||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||||
ngraph::op::TemporaryReplaceOutputType(
|
op::TemporaryReplaceOutputType(foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get());
|
||||||
dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ?
|
|
||||||
dequantization.subtractConstant :
|
|
||||||
foldConvert(dequantization.subtractConstant->output(0), parentPrecision), element::f32).get());
|
|
||||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||||
} else {
|
} else {
|
||||||
parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
|
parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
|
||||||
@ -1633,23 +1626,21 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (dequantization.multiply != nullptr) {
|
if (dequantization.multiply != nullptr) {
|
||||||
auto multiplyConstant = dequantization.multiplyConstant;
|
|
||||||
const element::Type parentPrecision = parent->get_output_element_type(0);
|
const element::Type parentPrecision = parent->get_output_element_type(0);
|
||||||
if (parentPrecision.bitwidth() < multiplyConstant->get_element_type().bitwidth()) {
|
if (parentPrecision.bitwidth() < dequantization.multiplyConstant->get_element_type().bitwidth()) {
|
||||||
THROW_IE_LPT_EXCEPTION(*parent) <<
|
THROW_IE_LPT_EXCEPTION(*parent) <<
|
||||||
"unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision <<
|
"unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision <<
|
||||||
", multiply dequantization constant " << multiplyConstant->get_friendly_name() << ":" << multiplyConstant->get_element_type();
|
", multiply dequantization constant " << dequantization.multiplyConstant->get_friendly_name() << ":" <<
|
||||||
|
dequantization.multiplyConstant->get_element_type();
|
||||||
}
|
}
|
||||||
|
|
||||||
parent = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
parent = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||||
opset1::Multiply(parent,
|
opset1::Multiply(parent, foldConvert(dequantization.multiplyConstant, parentPrecision)),
|
||||||
multiplyConstant->output(0).get_element_type() == parentPrecision ?
|
|
||||||
multiplyConstant :
|
|
||||||
foldConvert(multiplyConstant->output(0), parentPrecision)),
|
|
||||||
dequantization.multiply->get_output_element_type(0));
|
dequantization.multiply->get_output_element_type(0));
|
||||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||||
}
|
}
|
||||||
replace_node(operation, parent);
|
|
||||||
|
insertDequantizationAfter(operation, parent, newOperation);
|
||||||
|
|
||||||
if ((!moveSubtract) && (dequantization.convert != nullptr) && (dequantization.subtract != nullptr)) {
|
if ((!moveSubtract) && (dequantization.convert != nullptr) && (dequantization.subtract != nullptr)) {
|
||||||
// issue #43088
|
// issue #43088
|
||||||
@ -1878,6 +1869,21 @@ bool isDisabled(const std::shared_ptr<Node>& node) {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void NetworkHelper::insertDequantizationAfter(
|
||||||
|
const std::shared_ptr<Node>& originalNode,
|
||||||
|
const std::shared_ptr<Node>& dequantization,
|
||||||
|
const std::shared_ptr<Node>& newNode) {
|
||||||
|
replace_node(originalNode, dequantization);
|
||||||
|
|
||||||
|
// We do it to avoid dequantization propagation to the shapeOf subgraphs
|
||||||
|
for (const auto& input : dequantization->get_output_target_inputs(0)) {
|
||||||
|
if (const auto shapeOf = as_type_ptr<opset1::ShapeOf>(input.get_node()->shared_from_this())) {
|
||||||
|
const auto newShapeOf = shapeOf->clone_with_new_inputs({ newNode });
|
||||||
|
replace_node_update_name(shapeOf, newShapeOf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace low_precision
|
} // namespace low_precision
|
||||||
} // namespace pass
|
} // namespace pass
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
@ -140,7 +140,7 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph
|
|||||||
ngraph::op::TemporaryReplaceOutputType(newNormalize, element::f32).get(),
|
ngraph::op::TemporaryReplaceOutputType(newNormalize, element::f32).get(),
|
||||||
ngraph::op::TemporaryReplaceOutputType(newScalesConst, element::f32).get());
|
ngraph::op::TemporaryReplaceOutputType(newScalesConst, element::f32).get());
|
||||||
|
|
||||||
replace_node(normalize, newMultiply);
|
NetworkHelper::insertDequantizationAfter(normalize, newMultiply, newNormalize);
|
||||||
ngraph::copy_runtime_info({ normalize, newMultiply }, newMultiply);
|
ngraph::copy_runtime_info({ normalize, newMultiply }, newMultiply);
|
||||||
|
|
||||||
updateOutput(context, newMultiply, newNormalize);
|
updateOutput(context, newMultiply, newNormalize);
|
||||||
|
@ -109,6 +109,16 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We do it to avoid dequantization propagation to the shapeOf subgraphs
|
||||||
|
for (size_t i = 0; i < replacement.size(); ++i) {
|
||||||
|
for (const auto& input : replacement[i].get_target_inputs()) {
|
||||||
|
if (const auto shapeOf = as_type_ptr<opset1::ShapeOf>(input.get_node()->shared_from_this())) {
|
||||||
|
const auto newShapeOf = shapeOf->clone_with_new_inputs({ newSplit->output(i) });
|
||||||
|
replace_node_update_name(shapeOf, newShapeOf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
updateOutputs(context, lastNodes, newSplit);
|
updateOutputs(context, lastNodes, newSplit);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,734 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <sstream>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "low_precision/add.hpp"
|
||||||
|
#include "low_precision/avg_pool.hpp"
|
||||||
|
#include "low_precision/clamp.hpp"
|
||||||
|
#include "low_precision/concat.hpp"
|
||||||
|
#include "low_precision/convolution.hpp"
|
||||||
|
#include "low_precision/convolution_backprop_data.hpp"
|
||||||
|
#include "low_precision/depth_to_space.hpp"
|
||||||
|
#include "low_precision/fake_quantize_decomposition.hpp"
|
||||||
|
#include "low_precision/group_convolution.hpp"
|
||||||
|
#include "low_precision/interpolate.hpp"
|
||||||
|
#include "low_precision/mat_mul.hpp"
|
||||||
|
#include "low_precision/max_pool.hpp"
|
||||||
|
#include "low_precision/multiply.hpp"
|
||||||
|
#include "low_precision/mvn.hpp"
|
||||||
|
#include "low_precision/normalize_l2.hpp"
|
||||||
|
#include "low_precision/pad.hpp"
|
||||||
|
#include "low_precision/prelu.hpp"
|
||||||
|
#include "low_precision/reduce_max.hpp"
|
||||||
|
#include "low_precision/reduce_mean.hpp"
|
||||||
|
#include "low_precision/reduce_min.hpp"
|
||||||
|
#include "low_precision/reduce_sum.hpp"
|
||||||
|
#include "low_precision/reshape.hpp"
|
||||||
|
#include "low_precision/relu.hpp"
|
||||||
|
#include "low_precision/squeeze.hpp"
|
||||||
|
#include "low_precision/split.hpp"
|
||||||
|
#include "low_precision/shuffle_channels.hpp"
|
||||||
|
#include "low_precision/strided_slice.hpp"
|
||||||
|
#include "low_precision/transpose.hpp"
|
||||||
|
#include "low_precision/unsqueeze.hpp"
|
||||||
|
#include "low_precision/variadic_split.hpp"
|
||||||
|
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <transformations/utils/utils.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace ngraph::pass;
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationAddTransformation) {
|
||||||
|
auto input1 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto input2 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
|
||||||
|
auto convert1 = std::make_shared<opset1::Convert>(input1, element::f32);
|
||||||
|
auto convert2 = std::make_shared<opset1::Convert>(input2, element::f32);
|
||||||
|
|
||||||
|
auto mul1 = std::make_shared<opset1::Multiply>(convert1, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
auto mul2 = std::make_shared<opset1::Multiply>(convert2, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
|
||||||
|
auto add = std::make_shared<opset1::Add>(mul1, mul2);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(add);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(add);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input1, input2 });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::AddTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationAvgPoolTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto avgPool = std::make_shared<opset1::AvgPool>(mul, Strides{ 1, 1 }, Shape{ 1, 1 }, Shape{ 0, 0 }, Shape{ 2, 2 }, true);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(avgPool);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(avgPool);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::AvgPoolTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationClampTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto clamp = std::make_shared<opset1::Clamp>(mul, 0.0, 6.0);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(clamp);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(clamp);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ClampTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationConcatTransformation) {
|
||||||
|
auto input1 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto input2 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
|
||||||
|
auto convert1 = std::make_shared<opset1::Convert>(input1, element::f32);
|
||||||
|
auto convert2 = std::make_shared<opset1::Convert>(input2, element::f32);
|
||||||
|
|
||||||
|
auto mul1 = std::make_shared<opset1::Multiply>(convert1, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
auto mul2 = std::make_shared<opset1::Multiply>(convert2, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
|
||||||
|
auto concat = std::make_shared<opset1::Concat>(OutputVector{ mul1, mul2 }, 1);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(concat);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(concat);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input1, input2 });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ConcatTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationConvolutionTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto weights = opset1::Constant::create(element::i8, { 6, 3, 1, 1 }, { 3 });
|
||||||
|
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
|
||||||
|
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
|
||||||
|
auto convolution = std::make_shared<opset1::Convolution>(
|
||||||
|
mul,
|
||||||
|
mulOnWeights,
|
||||||
|
ngraph::Strides{ 1, 1 },
|
||||||
|
ngraph::CoordinateDiff{ 0, 0 },
|
||||||
|
ngraph::CoordinateDiff{ 0, 0 },
|
||||||
|
ngraph::Strides{ 1, 1 });
|
||||||
|
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(convolution);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(convolution);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ConvolutionTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationConvolutionBackpropDataTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 8, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto weights = opset1::Constant::create(element::i8, { 8, 2, 1, 1 }, { 3 });
|
||||||
|
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
|
||||||
|
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
|
||||||
|
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
|
||||||
|
mul,
|
||||||
|
mulOnWeights,
|
||||||
|
ngraph::Strides{ 1, 1 },
|
||||||
|
ngraph::CoordinateDiff{ 0, 0 },
|
||||||
|
ngraph::CoordinateDiff{ 0, 0 },
|
||||||
|
ngraph::Strides{ 1, 1 });
|
||||||
|
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(convolutionBackpropData);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(convolutionBackpropData);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ConvolutionBackpropDataTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationDepthToSpaceTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto d2s = std::make_shared<opset1::DepthToSpace>(mul, op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(d2s);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(d2s);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::DepthToSpaceTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationFakeQuantizeDecompositionTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::f32, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
|
||||||
|
ngraph::builder::subgraph::FakeQuantizeOnData fqValues{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} };
|
||||||
|
auto fakeQuantize = ngraph::builder::subgraph::makeFakeQuantize(input, element::f32, fqValues);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(fakeQuantize);
|
||||||
|
|
||||||
|
auto& outInfo = fakeQuantize->output(0).get_rt_info();
|
||||||
|
auto attribute = ngraph::pass::low_precision::make_shared_attribute<PrecisionsAttribute>(element::TypeVector{ element::u8, element::i8 });
|
||||||
|
auto attributeWrapper = std::make_shared<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(attribute);
|
||||||
|
outInfo.emplace(ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name, attributeWrapper);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(fakeQuantize);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::FakeQuantizeDecompositionTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationGroupConvolutionTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 2 * 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto weights = opset1::Constant::create(element::i8, { 6, 3, 7, 7 }, { 2 });
|
||||||
|
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
|
||||||
|
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
auto reshapeConst = opset1::Constant::create(element::i32, { 5 }, { 2, 3, 3, 7, 7 });
|
||||||
|
auto reshapeOnWeights = std::make_shared<opset1::Reshape>(mulOnWeights, reshapeConst, true);
|
||||||
|
|
||||||
|
auto groupConvolution = std::make_shared<opset1::GroupConvolution>(
|
||||||
|
mul,
|
||||||
|
reshapeOnWeights,
|
||||||
|
ngraph::Strides{ 1, 1 },
|
||||||
|
ngraph::CoordinateDiff{ 0, 0 },
|
||||||
|
ngraph::CoordinateDiff{ 0, 0 },
|
||||||
|
ngraph::Strides{ 1, 1 });
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(groupConvolution);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(groupConvolution);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::GroupConvolutionTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationInterpolateTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto outShape = opset1::Constant::create(element::i32, { 4 }, { 1, 3, 18, 18});
|
||||||
|
op::v0::InterpolateAttrs attributes;
|
||||||
|
attributes.align_corners = false;
|
||||||
|
attributes.antialias = false;
|
||||||
|
attributes.axes = AxisSet{ 2, 3 };
|
||||||
|
attributes.mode = "nearest";
|
||||||
|
attributes.pads_begin = { 0 };
|
||||||
|
attributes.pads_end = { 0 };
|
||||||
|
auto interpolate = std::make_shared<opset1::Interpolate>(mul, outShape, attributes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(interpolate);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(interpolate);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::InterpolateTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationMatMulTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 1024 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto weights = opset1::Constant::create(element::i8, { 2048, 1024 }, { 3 });
|
||||||
|
auto convertOnWeights = std::make_shared<opset1::Convert>(weights, element::f32);
|
||||||
|
auto mulOnWeights = std::make_shared<opset1::Multiply>(convertOnWeights, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
|
||||||
|
auto matmul = std::make_shared<opset1::MatMul>(mul, mulOnWeights, false, true);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(matmul);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(matmul);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::MatMulTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationMaxPoolTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto maxPool = std::make_shared<opset1::MaxPool>(mul, Strides{ 1, 1 }, Shape{ 1, 1 }, Shape{ 0, 0 }, Shape{ 2, 2 });
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(maxPool);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(maxPool);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::MaxPoolTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationMultiplyTransformation) {
|
||||||
|
auto input1 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto input2 = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
|
||||||
|
auto convert1 = std::make_shared<opset1::Convert>(input1, element::f32);
|
||||||
|
auto convert2 = std::make_shared<opset1::Convert>(input2, element::f32);
|
||||||
|
|
||||||
|
auto mul1 = std::make_shared<opset1::Multiply>(convert1, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
auto mul2 = std::make_shared<opset1::Multiply>(convert2, opset1::Constant::create(element::f32, {}, { 4.f }));
|
||||||
|
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(mul1, mul2);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(mul);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(mul);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input1, input2 });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::MultiplyTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationMVNTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto MVN = std::make_shared<op::TypeRelaxed<op::v0::MVN>>(mul);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(MVN);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(MVN);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::MVNTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationNormalizeL2Transformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
|
||||||
|
auto normalize = std::make_shared<opset1::NormalizeL2>(mul, axes, 0.01, ov::op::EpsMode::ADD);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(normalize);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(normalize);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::NormalizeL2Transformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationPadTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto pads_begin = opset1::Constant::create(element::i32, { 4 }, { 0, 0, 1, 1 });
|
||||||
|
auto pads_end = opset1::Constant::create(element::i32, { 4 }, { 0, 0, 1, 1 });
|
||||||
|
auto pad = std::make_shared<opset1::Pad>(mul, pads_begin, pads_end, op::PadMode::CONSTANT);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(pad);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(pad);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::PadTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationPReluTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto slope = opset1::Constant::create(element::f32, { 1, 3, 1, 1 }, { 0.01f });
|
||||||
|
auto prelu = std::make_shared<opset1::PRelu>(mul, slope);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(prelu);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(prelu);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::PReluTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceMaxTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
|
||||||
|
auto reduce = std::make_shared<opset1::ReduceMax>(mul, axes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(reduce);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ReduceMaxTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceMeanTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
|
||||||
|
auto reduce = std::make_shared<opset1::ReduceMean>(mul, axes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(reduce);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ReduceMeanTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceMinTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
|
||||||
|
auto reduce = std::make_shared<opset1::ReduceMin>(mul, axes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(reduce);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ReduceMinTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationReduceSumTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 2 }, { 2, 3 });
|
||||||
|
auto reduce = std::make_shared<opset1::ReduceSum>(mul, axes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(reduce);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(reduce);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ReduceSumTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationReshapeTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto outShape = opset1::Constant::create(element::i32, { 3 }, { 1, 3, -1 });
|
||||||
|
auto reshape = std::make_shared<opset1::Reshape>(mul, outShape, true);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(reshape);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(reshape);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ReshapeTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationReluTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto relu = std::make_shared<opset1::Relu>(mul);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(relu);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(relu);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ReluTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationSqueezeTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 1 }, { 0 });
|
||||||
|
auto squeeze = std::make_shared<opset1::Squeeze>(mul, axes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(squeeze);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(squeeze);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::SqueezeTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationSplitTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axis = opset1::Constant::create(element::i32, {}, { 1 });
|
||||||
|
auto split = std::make_shared<opset1::Split>(mul, axis, 3);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(split);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(split->output(0));
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(split->output(1));
|
||||||
|
auto result3 = std::make_shared<opset1::Result>(split->output(2));
|
||||||
|
auto result4 = std::make_shared<opset1::Result>(shapeOf->output(0));
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2, result3, result4 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::SplitTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationShuffleChannelsTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto shuffleChannels = std::make_shared<opset1::ShuffleChannels>(mul);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(shuffleChannels);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(shuffleChannels);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::ShuffleChannelsTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationStridedSliceTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto beginParam = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 0, 0, 0 });
|
||||||
|
auto endParam = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 1, 2, 1, 1 });
|
||||||
|
auto stridesParam = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 1, 1, 1, 1 });
|
||||||
|
auto stridedSlice = std::make_shared<ngraph::opset1::StridedSlice>(
|
||||||
|
mul, beginParam, endParam, stridesParam,
|
||||||
|
std::vector<std::int64_t>{ 1, 0, 1, 1 },
|
||||||
|
std::vector<std::int64_t>{ 1, 0, 1, 1 });
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(stridedSlice);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(stridedSlice);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::StridedSliceTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationTransposeTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto constant = ngraph::op::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 1, 3, 2 });
|
||||||
|
auto transpose = std::make_shared<ngraph::opset1::Transpose>(mul, constant);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(transpose);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(transpose);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::TransposeTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationUnsqueezeTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axes = opset1::Constant::create(element::i32, { 1 }, { 3 });
|
||||||
|
auto unsqueeze = std::make_shared<opset1::Unsqueeze>(mul, axes);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(unsqueeze);
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(unsqueeze);
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::UnsqueezeTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LPT, AvoidDequantizationToShapeOfPropagationVariadicSplitTransformation) {
|
||||||
|
auto input = std::make_shared<opset1::Parameter>(element::u8, PartialShape{ 1, 3, 16, 16 });
|
||||||
|
auto convert = std::make_shared<opset1::Convert>(input, element::f32);
|
||||||
|
auto mul = std::make_shared<opset1::Multiply>(convert, opset1::Constant::create(element::f32, {}, { 2.f }));
|
||||||
|
|
||||||
|
auto axis = opset1::Constant::create(element::i32, {}, { 1 });
|
||||||
|
auto lengths = opset1::Constant::create(element::i32, { 2 }, { 1, 2 });
|
||||||
|
auto variadicSplit = std::make_shared<opset1::VariadicSplit>(mul, axis, lengths);
|
||||||
|
auto shapeOf = std::make_shared<opset1::ShapeOf>(variadicSplit->output(0));
|
||||||
|
|
||||||
|
auto result1 = std::make_shared<opset1::Result>(variadicSplit->output(0));
|
||||||
|
auto result2 = std::make_shared<opset1::Result>(variadicSplit->output(1));
|
||||||
|
auto result3 = std::make_shared<opset1::Result>(shapeOf);
|
||||||
|
|
||||||
|
auto f = std::make_shared<Function>(ResultVector{ result1, result2, result3 }, ParameterVector{ input });
|
||||||
|
pass::Manager m;
|
||||||
|
m.register_pass<pass::low_precision::VariadicSplitTransformation>();
|
||||||
|
m.run_passes(f);
|
||||||
|
|
||||||
|
auto dqBeforeShapeOf = low_precision::NetworkHelper::getDequantization(result2->get_input_node_shared_ptr(0));
|
||||||
|
ASSERT_TRUE(dqBeforeShapeOf.empty());
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user