[LPT] ConvolutionBackpropData support (#5313)

* [LPT] ConvolutionBackpropData support

* minor fixes

* [Transformations] Legacy subtract precision keep

* [LPT] ConvolutionBackpropData tests improvements

* [LPT] ConvolutionBackpropData weights folding when can't be transformed

* [LPT] CanBeTransformed unification and convolution weights folding

* [LPT] GPU INT8 optimizations condition flag

* [LPT] Concat precision predict improvement

* [LPT] Turn off asymmetric quantization for Deconvolution on GPU

* [LPT] Improvements from review

* [LPT] Check if layer after concat isQuantized and require per-tensor quantize

* [LPT] Improvement for Deconv->FQ pattern

* [LPT] Commented failing tests
This commit is contained in:
Vladimir Zinoviev 2021-05-18 00:59:01 +03:00 committed by GitHub
parent f84b25722c
commit e41e25533d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 1400 additions and 167 deletions

View File

@ -70,6 +70,7 @@
#include <low_precision/pull_reshape_through_dequantization.hpp> #include <low_precision/pull_reshape_through_dequantization.hpp>
#include <low_precision/pull_transpose_through_dequantization.hpp> #include <low_precision/pull_transpose_through_dequantization.hpp>
#include <low_precision/transformer.hpp> #include <low_precision/transformer.hpp>
#include <low_precision/convolution_backprop_data.hpp>
#include <low_precision/mat_mul.hpp> #include <low_precision/mat_mul.hpp>
#include <low_precision/strided_slice.hpp> #include <low_precision/strided_slice.hpp>
#include <low_precision/network_helper.hpp> #include <low_precision/network_helper.hpp>
@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params) .add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false) .setSupportAsymmetricQuantization(false)
.setSupport3DTensorOnActivations(false)) .setSupport3DTensorOnActivations(false))
.add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false)
.setDeconvolutionSpecificChannelsRatio(true))
// INT8 StridedSlice not supported // INT8 StridedSlice not supported
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>()); .remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());

View File

@ -0,0 +1,25 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/ngraph.hpp>
#include "weightable_layer_transformation.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
public:
ConvolutionBackpropDataTransformation(const Params& params);
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
public: public:
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {} DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
explicit DataPrecision(const element::Type& precision) {
this->precision = precision;
min = getMinValue(precision, 256);
max = getMaxValue(precision, 256);
hasZeroPoint = false;
}
DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) : DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
precision(precision), precision(precision),
min(min), min(min),
@ -122,29 +129,6 @@ public:
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) { static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
return signedInterval ? element::i8 : element::u8; return signedInterval ? element::i8 : element::u8;
} }
static float getMin(const size_t quantizationLevels, const bool signedInterval) {
if (quantizationLevels == 255) {
return signedInterval ? -127.0f : 0.0f;
} else if (quantizationLevels == 256) {
return signedInterval ? -128.0f : 0.0f;
} else {
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
// FIXME: not completed
return signedInterval ? -128.0f : 0.0f;
}
}
static float getMax(const size_t quantizationLevels, const bool signedInterval) {
if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
return signedInterval ? 127.0f : 255.0f;
} else {
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
// FIXME: not completed
// return quantizationLevels - 1.0;
return signedInterval ? 127.0f : 255.0f;
}
}
}; };
inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) { inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
@ -181,7 +165,8 @@ public:
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 }, std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
std::vector<element::Type> precisionsOnWeights = { element::i8 }, std::vector<element::Type> precisionsOnWeights = { element::i8 },
element::Type deqPrecision = element::f32, element::Type deqPrecision = element::f32,
bool support3DTensorOnActivations = true) : bool support3DTensorOnActivations = true,
bool deconvolutionSpecificChannelsRatio = false) :
updatePrecisions(updatePrecisions), updatePrecisions(updatePrecisions),
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations), quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights), quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
@ -189,7 +174,8 @@ public:
precisionsOnActivations(precisionsOnActivations), precisionsOnActivations(precisionsOnActivations),
precisionsOnWeights(precisionsOnWeights), precisionsOnWeights(precisionsOnWeights),
deqPrecision(deqPrecision), deqPrecision(deqPrecision),
support3DTensorOnActivations(support3DTensorOnActivations) { support3DTensorOnActivations(support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
if (precisionsOnActivations.size() == 0ul) { if (precisionsOnActivations.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed"; THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
} }
@ -234,6 +220,11 @@ public:
return *this; return *this;
} }
Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
return *this;
}
bool updatePrecisions; bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
@ -242,6 +233,7 @@ public:
std::vector<element::Type> precisionsOnWeights; std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision; element::Type deqPrecision;
bool support3DTensorOnActivations; bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;
}; };
class PrecisionDetails { class PrecisionDetails {
@ -318,6 +310,7 @@ protected:
std::vector<element::Type> precisionsOnWeights; std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision; element::Type deqPrecision;
bool support3DTensorOnActivations; bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;
// absolute value, used to determine quantization interval asymmetry // absolute value, used to determine quantization interval asymmetry
float quantizationIntervalAsymmetryThreshold; float quantizationIntervalAsymmetryThreshold;

View File

@ -109,7 +109,8 @@ public:
const float max, const float max,
const bool hasZeroPoint, const bool hasZeroPoint,
const bool updatePrecision, const bool updatePrecision,
const element::Type deqPrecision = element::f32); const element::Type deqPrecision = element::f32,
const size_t outChannelsShapeIndex = 0);
static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize( static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
std::shared_ptr<opset1::FakeQuantize> fq, std::shared_ptr<opset1::FakeQuantize> fq,
@ -183,7 +184,7 @@ public:
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node); static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq); static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues); static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false); static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
@ -191,8 +192,16 @@ public:
static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize); static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
static std::vector<element::Type> precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) noexcept;
private: private:
static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet); static std::shared_ptr<Node> foldFakeQuantize(
const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValues,
const bool roundValuesWasSet,
int outChannelsShapeIndex = 0);
// 1 - on weights // 1 - on weights
// 0 - weightable layer was not found // 0 - weightable layer was not found

View File

@ -303,10 +303,6 @@ private:
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations, std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
GraphRewrite& pass, GraphRewrite& pass,
TransformationContext& context); TransformationContext& context);
std::vector<element::Type> precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) const noexcept;
}; };
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite { class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {

View File

@ -22,7 +22,7 @@ public:
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override; bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
protected: protected:
void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const; void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
static bool isGroup(const std::shared_ptr<Node>& node); static bool isGroup(const std::shared_ptr<Node>& node);
static bool isDepthwise(const std::shared_ptr<Node>& node); static bool isDepthwise(const std::shared_ptr<Node>& node);

View File

@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex); const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
if (is_type<opset1::Convolution>(parent) || if (is_type<opset1::Convolution>(parent) ||
is_type<opset1::GroupConvolution>(parent) || is_type<opset1::GroupConvolution>(parent) ||
is_type<opset1::ConvolutionBackpropData>(parent) ||
(is_type<opset1::MatMul>(parent) && (is_type<opset1::MatMul>(parent) &&
(is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) { (is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
return nullptr; return nullptr;

View File

@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
return false; return false;
} }
DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
if (dataPrecision.precision == ngraph::element::undefined) { fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
if (concatParentsChildrensPrecisions.empty()) {
return false; return false;
} }
std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]); fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
if (fq == nullptr) { if (fq == nullptr) {
return false; return false;
} }
@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
if (quantizationDetails.inputHighValues.size() != 1ul) { if (quantizationDetails.inputHighValues.size() != 1ul) {
return false; return false;
} }
std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false); if (concatParentsChildrensPrecisions.empty()) {
if (dataPrecision2.precision == ngraph::element::undefined) {
return false; return false;
} }
if (dataPrecision.precision != dataPrecision2.precision) {
// quantization levels are the same, difference can be in sign
// wider interval (precision) is preferable: use signed if least one interval is signed
dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
}
} }
if (dataPrecision.precision == ngraph::element::undefined) { DataPrecision dataPrecision;
return false; if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
dataPrecision = DataPrecision(element::i8);
} else {
dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
} }
std::vector<QuantizationDetails> quantizationLayersDetails; std::vector<QuantizationDetails> quantizationLayersDetails;

View File

@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) { for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat); const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
for (const std::shared_ptr<ngraph::Node>& child : children) { for (const std::shared_ptr<ngraph::Node>& child : children) {
if (is_type<ngraph::opset1::Convolution>(child.get())) { if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
this->layerTransformationsManager->isQuantized(child)) {
return false; return false;
} }
} }

View File

@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
auto convolution = m.get_match_root(); auto convolution = m.get_match_root();
if (!canConvolutionBeTransformed(context, convolution)) { if (!canConvolutionBeTransformed(context, convolution)) {
return false; auto weightInput = convolution->get_input_node_shared_ptr(1);
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolution, 1ul) :
NetworkHelper::getDequantization(reshapeFromWeights);
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
if (reshapeFromWeights != nullptr) {
resultConstant = fold_reshape<opset1::Reshape>(
resultConstant,
reshapeFromWeights->input_value(1),
false);
}
if (as_type_ptr<opset1::Constant>(resultConstant)) {
replace_node(weightInput, resultConstant);
}
} else {
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
}
return true;
} }
convolution = NetworkHelper::separateInStandaloneBranch(convolution); convolution = NetworkHelper::separateInStandaloneBranch(convolution);

View File

@ -0,0 +1,218 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/convolution_backprop_data.hpp"
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include <cassert>
#include "low_precision/network_helper.hpp"
#include "low_precision/common/dequantization_op.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
}
void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>(
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>(
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
}
bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
if (deconvolutionSpecificChannelsRatio) {
size_t inputChannels = layer->get_input_shape(0)[1];
size_t outputChannels = layer->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return false;
}
}
return WeightableLayerTransformation::isQuantized(layer, false);
}
bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
auto convolutionBackpropData = m.get_match_root();
if (!canBeTransformed(context, convolutionBackpropData)) {
auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightsInput);
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
NetworkHelper::getDequantization(reshapeFromWeights);
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
if (reshapeFromWeights != nullptr) {
resultConstant = fold_reshape<opset1::Reshape>(
resultConstant,
reshapeFromWeights->input_value(1),
false);
}
if (as_type_ptr<opset1::Constant>(resultConstant)) {
replace_node(weightsInput, resultConstant);
}
} else {
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
}
return true;
}
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
{
if (dequantization.subtract != nullptr) {
std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
NetworkHelper::optimizeSubtract(dequantization.subtract);
}
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
reducedConstant->get_output_element_type(0),
Shape{ 1 },
reducedConstant->cast_vector<float>()[0]);
auto inputs = convolutionBackpropData->input_values();
inputs[0] = dequantization.multiply->input_value(0);
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
const auto relaxedConvolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
*as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
std::vector<element::Type>{deqPrecision, deqPrecision},
std::vector<element::Type>{deqPrecision});
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
std::vector<element::Type>{ deqPrecision, deqPrecision },
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
replace_node(convolutionBackpropData, newConvolution);
convolutionBackpropData = newConvolution;
}
}
{
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
const std::shared_ptr<opset1::FakeQuantize> fq = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
std::shared_ptr<ngraph::Node> newFQ = NetworkHelper::fold_fake_quantize(fq, true);
NetworkHelper::copyInfo(fq, newFQ);
replace_node(fq, newFQ);
}
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
convolutionBackpropData->input_value(1).get_node_shared_ptr());
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
{
Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
auto inputs = convolutionBackpropData->input_values();
inputs[1] = multiplyFromWeights->input_value(0);
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
convolutionBackpropData->copy_with_new_inputs(inputs),
foldConvert(
fold_reshape<opset1::Reshape>(
multiplyFromWeights->input_value(1),
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
false),
convolutionBackpropData->get_output_element_type(0)));
replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
}
if (subtractFromWeights != nullptr) {
// optimize zero point on weights
auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
if (optimizedSubtract == nullptr) {
subtractFromWeights = nullptr;
} else {
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
const Shape weightsShape = subtractFromWeights->input(0).get_shape();
Shape zeroPointShape(weightsShape.size(), 1ul);
zeroPointShape[1] = weightsShape[1];
auto zeroPointConstant = fold<opset1::Broadcast>(
subtractFromWeights->get_input_node_shared_ptr(1),
std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
}
}
std::shared_ptr<opset1::Convert> convertFromWeights =
as_type_ptr<opset1::Convert>(
subtractFromWeights == nullptr ?
multiplyFromWeights->get_input_node_shared_ptr(0) :
subtractFromWeights->get_input_node_shared_ptr(0));
if (convertFromWeights != nullptr) {
auto inputs = convolutionBackpropData->input_values();
inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
// remove Convert on weights
auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
replace_node(convolutionBackpropData, newConvolution);
convolutionBackpropData = newConvolution;
}
}
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
updateOutput(context, finalDequantization, convolutionBackpropData);
auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
if (is_type<opset1::Reshape>(onWeights)) {
onWeights = onWeights->get_input_node_shared_ptr(0);
}
if (is_type<opset1::Subtract>(onWeights)) {
auto& rt = onWeights->get_rt_info();
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
}
return true;
}
bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
if (deconvolutionSpecificChannelsRatio) {
size_t inputChannels = op->get_input_shape(0)[1];
size_t outputChannels = op->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return false;
}
}
return canConvolutionBeTransformed(context, op);
}
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root()); std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
if (!NetworkHelper::isQuantizeSupported(layer)) { if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
return false; return false;
} }
@ -149,7 +149,9 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0)); inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
} else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) { } else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
if (is_type<opset1::Convolution>(fq::getData(eltwise)) || if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
is_type<opset1::GroupConvolution>(fq::getData(eltwise))) { is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
return nullptr; return nullptr;
} }

View File

@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0); const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize); const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>( auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
opset1::FakeQuantize( opset1::FakeQuantize(
fakeQuantizeParent->output(parentIndex), fakeQuantizeParent->output(parentIndex),
foldConvert(fakeQuantize->input_value(1), deqPrecision), inputLow,
foldConvert(fakeQuantize->input_value(2), deqPrecision), inputHigh,
outputLowConst_f32, outputLowConst_f32,
outputHighConst_f32, outputHighConst_f32,
fakeQuantize->get_levels()), fakeQuantize->get_levels()),

View File

@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0); const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize); const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>( auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
opset1::FakeQuantize( opset1::FakeQuantize(
fakeQuantizeParent->output(parentIndex), fakeQuantizeParent->output(parentIndex),
foldConvert(fakeQuantize->input_value(1), deqPrecision), inputLow,
foldConvert(fakeQuantize->input_value(2), deqPrecision), inputHigh,
outputLowConst_f32, outputLowConst_f32,
outputHighConst_f32, outputHighConst_f32,
fakeQuantize->get_levels()), fakeQuantize->get_levels()),
@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
for (const auto& target : children) { for (const auto& target : children) {
const auto convolution = is_type<opset1::Convolution>(target.get_node()); const auto convolution = is_type<opset1::Convolution>(target.get_node());
const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node()); const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
if (convolution || groupConvolution) { const auto convolutionBackpropData = is_type<opset1::ConvolutionBackpropData>(target.get_node());
if (convolution || groupConvolution || convolutionBackpropData) {
return false; return false;
} }
} }

View File

@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
precisionsOnWeights(params.precisionsOnWeights), precisionsOnWeights(params.precisionsOnWeights),
deqPrecision(params.deqPrecision), deqPrecision(params.deqPrecision),
support3DTensorOnActivations(params.support3DTensorOnActivations), support3DTensorOnActivations(params.support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
quantizationIntervalAsymmetryThreshold(0.002f), quantizationIntervalAsymmetryThreshold(0.002f),
zeroThreshold(1.e-6f), zeroThreshold(1.e-6f),
minQuantizationLevels(2ul), minQuantizationLevels(2ul),

View File

@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
return is_type<opset1::Parameter>(node) || return is_type<opset1::Parameter>(node) ||
is_type<opset1::Convolution>(node) || is_type<opset1::Convolution>(node) ||
is_type<opset1::GroupConvolution>(node) || is_type<opset1::GroupConvolution>(node) ||
is_type<opset1::MatMul>(node); is_type<opset1::MatMul>(node) ||
is_type<opset1::ConvolutionBackpropData>(node);
}; };
if (isNotConstantPathOperation(op)) { if (isNotConstantPathOperation(op)) {
@ -440,8 +441,11 @@ std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<op
return foldFakeQuantize(fq, false, false); return foldFakeQuantize(fq, false, false);
} }
std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues) { std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(
return foldFakeQuantize(fq, roundValues, true); const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValues,
const int outChannelsShapeIndex) {
return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
} }
FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) { FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
@ -591,7 +595,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
std::shared_ptr<Node> NetworkHelper::foldFakeQuantize( std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
const std::shared_ptr<opset1::FakeQuantize>& fq, const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValuesArg, const bool roundValuesArg,
const bool roundValuesWasSet) { const bool roundValuesWasSet,
const int outChannelsShapeIndex) {
if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) && if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) && is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) && is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
@ -630,10 +635,20 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
if (constShape.empty() || constShape.size() > 5lu) { if (constShape.empty() || constShape.size() > 5lu) {
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size(); THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
} }
if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
}
// OIDHW size_t OC;
const size_t OC = constShape[0]; size_t IC;
const size_t IC = constShape.size() > 1lu ? constShape[1] : 1; // OIDHW or IODHW
if (constShape.size() == 1) {
OC = constShape[0];
IC = 1;
} else {
OC = constShape[outChannelsShapeIndex];
IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
}
const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1; const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1; const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1; const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
@ -667,29 +682,35 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
auto levels_1 = fq->get_levels() - 1.f; auto levels_1 = fq->get_levels() - 1.f;
//const size_t DHW = D * H * W; const size_t DHW = D * H * W;
const size_t IDHW = IC * D * H * W; const size_t IDHW = IC * D * H * W;
const auto values = constant->cast_vector<float>(); const auto values = constant->cast_vector<float>();
std::vector<float> quantizedValues(OC * IC * D * H * W); std::vector<float> quantizedValues(OC * IC * D * H * W);
for (size_t oc = 0; oc < OC; ++oc) { for (size_t oc = 0; oc < OC; ++oc) {
for (size_t iidx = 0; iidx < IDHW; ++iidx) { const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc]; const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc]; const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc]; const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc]; for (size_t ic = 0; ic < IC; ++ic) {
for (size_t iidx = 0; iidx < DHW; ++iidx) {
size_t idx;
if (outChannelsShapeIndex == 0) {
idx = oc * IDHW + ic * DHW + iidx;
} else {
idx = ic * IDHW + oc * DHW + iidx;
}
const size_t idx = oc * IDHW + iidx; if (values[idx] <= inputLow) {
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
if (values[idx] <= inputLow) { } else if (values[idx] > inputHigh) {
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow; quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
} else if (values[idx] > inputHigh) { } else {
quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh; const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
} else { levels_1 * (outputHigh - outputLow) + outputLow;
const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) / quantizedValues[idx] = roundValues ? std::roundf(value) : value;
levels_1 * (outputHigh - outputLow) + outputLow; }
quantizedValues[idx] = roundValues ? std::roundf(value) : value;
} }
} }
} }
@ -818,7 +839,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
const float max, const float max,
const bool hasZeroPoint, const bool hasZeroPoint,
const bool updatePrecision, const bool updatePrecision,
const element::Type deqPrecision) { const element::Type deqPrecision,
const size_t outChannelsShapeIndex) {
using std::make_shared; using std::make_shared;
const auto outputLow = fq->input_value(3); const auto outputLow = fq->input_value(3);
@ -898,7 +920,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
newMax->output(0), newMax->output(0),
fq->get_levels(), fq->get_levels(),
fq->get_auto_broadcast()), fq->get_auto_broadcast()),
true); true,
outChannelsShapeIndex);
NetworkHelper::copyInfo(fq, newFQ); NetworkHelper::copyInfo(fq, newFQ);
std::shared_ptr<ngraph::Node> convert2; std::shared_ptr<ngraph::Node> convert2;
@ -1548,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
if (is_type<opset1::Subtract>(node)) { if (is_type<opset1::Subtract>(node)) {
const auto parent = node->get_input_node_shared_ptr(0); const auto parent = node->get_input_node_shared_ptr(0);
const auto intNode = is_type<opset1::Convert>(parent) ? parent : node; const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
const auto intType = intNode->get_input_element_type(0); const auto type = intNode->get_input_element_type(0);
if (intType == element::u8 || intType == element::i8) { if (type == element::u8 || type == element::i8) {
min = DataPrecision::getMinValue(intType, 256) - 0.5f; min = DataPrecision::getMinValue(type, 256) - 0.5f;
max = DataPrecision::getMaxValue(intType, 256) + 0.5f; max = DataPrecision::getMaxValue(type, 256) + 0.5f;
} else { } else {
return false; return type == element::f32 || type == element::f16;
} }
auto subtract1input = node->get_input_node_shared_ptr(1); auto subtract1input = node->get_input_node_shared_ptr(1);
if (is_type<opset1::Convert>(subtract1input)) { if (is_type<opset1::Convert>(subtract1input)) {
@ -1595,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
return true; return true;
} }
std::vector<element::Type> NetworkHelper::precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) noexcept {
std::vector<element::Type> v3;
auto v1Copy = v1;
auto v2Copy = v2;
std::sort(v1Copy.begin(), v1Copy.end());
std::sort(v2Copy.begin(), v2Copy.end());
std::set_intersection(v1Copy.begin(), v1Copy.end(),
v2Copy.begin(), v2Copy.end(),
std::back_inserter(v3));
return v3;
}
} // namespace low_precision } // namespace low_precision
} // namespace pass } // namespace pass
} // namespace ngraph } // namespace ngraph

View File

@ -34,6 +34,7 @@
#include "low_precision/avg_pool.hpp" #include "low_precision/avg_pool.hpp"
#include "low_precision/clamp.hpp" #include "low_precision/clamp.hpp"
#include "low_precision/convolution.hpp" #include "low_precision/convolution.hpp"
#include "low_precision/convolution_backprop_data.hpp"
#include "low_precision/depth_to_space.hpp" #include "low_precision/depth_to_space.hpp"
#include "low_precision/fake_quantize.hpp" #include "low_precision/fake_quantize.hpp"
#include "low_precision/group_convolution.hpp" #include "low_precision/group_convolution.hpp"
@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
add<AvgPoolTransformation, opset1::AvgPool>(params). add<AvgPoolTransformation, opset1::AvgPool>(params).
add<ClampTransformation, opset1::Clamp>(params). add<ClampTransformation, opset1::Clamp>(params).
add<ConvolutionTransformation, opset1::Convolution>(params). add<ConvolutionTransformation, opset1::Convolution>(params).
add<ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(params).
add<DepthToSpaceTransformation, opset1::DepthToSpace>(params). add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
add<FakeQuantizeTransformation, opset1::FakeQuantize>(params). add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
add<GroupConvolutionTransformation, opset1::GroupConvolution>(params). add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
make_matcher_type_relaxed<opset1::Clamp>(this); make_matcher_type_relaxed<opset1::Clamp>(this);
make_matcher_type_relaxed<opset1::Concat>(this); make_matcher_type_relaxed<opset1::Concat>(this);
make_matcher_type_relaxed<opset1::Convolution>(this); make_matcher_type_relaxed<opset1::Convolution>(this);
make_matcher_type_relaxed<opset1::ConvolutionBackpropData>(this);
make_matcher_type_relaxed<opset1::DepthToSpace>(this); make_matcher_type_relaxed<opset1::DepthToSpace>(this);
make_matcher_type_relaxed<opset1::FakeQuantize>(this); make_matcher_type_relaxed<opset1::FakeQuantize>(this);
make_matcher_type_relaxed<opset1::GroupConvolution>(this); make_matcher_type_relaxed<opset1::GroupConvolution>(this);
@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
network->validate_nodes_and_infer_types(); network->validate_nodes_and_infer_types();
} }
std::vector<element::Type> LowPrecisionTransformer::precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) const noexcept {
std::vector<element::Type> v3;
auto v1Copy = v1;
auto v2Copy = v2;
std::sort(v1Copy.begin(), v1Copy.end());
std::sort(v2Copy.begin(), v2Copy.end());
std::set_intersection(v1Copy.begin(), v1Copy.end(),
v2Copy.begin(), v2Copy.end(),
std::back_inserter(v3));
return v3;
}
std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept { std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
const std::string operantionType = LowPrecisionTransformations::getType(op); const std::string operantionType = LowPrecisionTransformations::getType(op);
const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType); const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
@ -456,7 +442,7 @@ std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(c
std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations(); std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
for (const auto& transform : transformation) { for (const auto& transform : transformation) {
precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations()); precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations());
} }
return precisions; return precisions;
} }

View File

@ -26,7 +26,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
return false; return false;
} }
if (updatePrecisions && !NetworkHelper::checkZeroPoint(dequantization.subtract)) { if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
return false; return false;
} }
@ -46,24 +46,10 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
return false; return false;
} }
if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) { if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
const std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
if (as_type_ptr<opset1::Constant>(resultConstant)) {
replace_node(fqOnWeights, resultConstant);
}
return false; return false;
} }
} else { } else {
if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) { if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
const auto resultDequantization = NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
if (resultDequantization.empty() && reshapeFromWeights) {
const auto foldedReshape = fold<opset1::Reshape>(
reshapeFromWeights->get_input_node_shared_ptr(0),
reshapeFromWeights->get_input_node_shared_ptr(1),
reshapeFromWeights->get_special_zero());
if (is_type<opset1::Constant>(foldedReshape)) {
replace_node(reshapeFromWeights, foldedReshape);
}
}
return false; return false;
} }
} }
@ -170,9 +156,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
return false; return false;
} }
if ( // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
(shape_size(constOutputShape) != constOutputShape[0]) || if ( // Check if all dimensions of scale except the output channels are all ones
((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) { (shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
return false; return false;
} }
} else { } else {
@ -256,7 +244,7 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr<Node> l
return false; return false;
} }
void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> node) const { void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& node, const size_t outChannelsShapeIndex) const {
const auto fq = getFakeQuantizeOnWeights(node); const auto fq = getFakeQuantizeOnWeights(node);
if (fq == nullptr) { if (fq == nullptr) {
return; return;
@ -270,7 +258,9 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::sha
dataPrecision.min, dataPrecision.min,
dataPrecision.max, dataPrecision.max,
dataPrecision.hasZeroPoint, dataPrecision.hasZeroPoint,
updatePrecisions); updatePrecisions,
element::f32,
outChannelsShapeIndex);
std::shared_ptr<ngraph::Node> fqOnWeights = std::get<0>(tuple); std::shared_ptr<ngraph::Node> fqOnWeights = std::get<0>(tuple);
if (as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) { if (as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) {

View File

@ -76,6 +76,7 @@
#include <low_precision/transformer.hpp> #include <low_precision/transformer.hpp>
#include <low_precision/convert_subtract_constant.hpp> #include <low_precision/convert_subtract_constant.hpp>
#include <low_precision/convolution.hpp> #include <low_precision/convolution.hpp>
#include <low_precision/convolution_backprop_data.hpp>
#include <low_precision/group_convolution.hpp> #include <low_precision/group_convolution.hpp>
#include <low_precision/multiply_to_group_convolution.hpp> #include <low_precision/multiply_to_group_convolution.hpp>
#include <low_precision/network_helper.hpp> #include <low_precision/network_helper.hpp>
@ -328,7 +329,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
.add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>( .add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true)) LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
.addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>( .addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }))); LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }))
.remove<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>());
transformer.transform(nGraphFunc); transformer.transform(nGraphFunc);
} }

View File

@ -29,6 +29,7 @@ public:
const Strides& dilations, const Strides& dilations,
const CoordinateDiff& pads_begin, const CoordinateDiff& pads_begin,
const CoordinateDiff& pads_end, const CoordinateDiff& pads_end,
const element::Type output_type,
const size_t& group = 1, const size_t& group = 1,
const PadType& auto_pad = PadType::EXPLICIT, const PadType& auto_pad = PadType::EXPLICIT,
const CoordinateDiff& output_padding = {}, const CoordinateDiff& output_padding = {},
@ -41,6 +42,7 @@ public:
const Strides& dilations, const Strides& dilations,
const CoordinateDiff& pads_begin, const CoordinateDiff& pads_begin,
const CoordinateDiff& pads_end, const CoordinateDiff& pads_end,
const element::Type output_type,
const size_t& group = 1, const size_t& group = 1,
const PadType& auto_pad = PadType::EXPLICIT, const PadType& auto_pad = PadType::EXPLICIT,
const CoordinateDiff& output_padding = {}, const CoordinateDiff& output_padding = {},
@ -79,6 +81,7 @@ protected:
size_t m_group; size_t m_group;
CoordinateDiff m_output_padding; CoordinateDiff m_output_padding;
std::shared_ptr<Node> m_output_shape; std::shared_ptr<Node> m_output_shape;
element::Type m_output_type;
}; };
} // namespace op } // namespace op

View File

@ -13,6 +13,7 @@
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
#include "ngraph/validation_util.hpp" #include "ngraph/validation_util.hpp"
#include "ngraph/opsets/opset1.hpp" #include "ngraph/opsets/opset1.hpp"
#include "ngraph_ops/type_relaxed.hpp"
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
@ -25,6 +26,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
const Strides& dilations, const Strides& dilations,
const CoordinateDiff& pads_begin, const CoordinateDiff& pads_begin,
const CoordinateDiff& pads_end, const CoordinateDiff& pads_end,
const element::Type output_type,
const size_t& group, const size_t& group,
const PadType& auto_pad, const PadType& auto_pad,
const CoordinateDiff& output_padding, const CoordinateDiff& output_padding,
@ -37,7 +39,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
, m_auto_pad(auto_pad) , m_auto_pad(auto_pad)
, m_group(group) , m_group(group)
, m_output_padding(output_padding) , m_output_padding(output_padding)
, m_output_shape(output_shape) { , m_output_shape(output_shape)
, m_output_type(output_type) {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
@ -48,6 +51,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
const Strides& dilations, const Strides& dilations,
const CoordinateDiff& pads_begin, const CoordinateDiff& pads_begin,
const CoordinateDiff& pads_end, const CoordinateDiff& pads_end,
const element::Type output_type,
const size_t& group, const size_t& group,
const PadType& auto_pad, const PadType& auto_pad,
const CoordinateDiff& output_padding, const CoordinateDiff& output_padding,
@ -60,7 +64,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
, m_auto_pad(auto_pad) , m_auto_pad(auto_pad)
, m_group(group) , m_group(group)
, m_output_padding(output_padding) , m_output_padding(output_padding)
, m_output_shape(output_shape) { , m_output_shape(output_shape)
, m_output_type(output_type) {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
@ -81,13 +86,32 @@ void op::DeconvolutionIE::validate_and_infer_types() {
} }
Output<Node> conv; Output<Node> conv;
if (m_output_shape) { if (m_output_shape) {
conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights, m_output_shape, conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding); std::vector<element::Type>{ element::f32, element::f32 },
std::vector<element::Type>{ element::f32 },
ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
m_output_shape,
m_strides,
m_pads_begin,
m_pads_end,
m_dilations,
m_auto_pad,
m_output_padding);
} else { } else {
conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights, conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding); std::vector<element::Type>{ element::f32, element::f32 },
std::vector<element::Type>{ element::f32 },
ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
m_strides,
m_pads_begin,
m_pads_end,
m_dilations,
m_auto_pad,
m_output_padding);
} }
set_output_type(0, conv.get_element_type(), conv.get_partial_shape()); set_output_type(0, m_output_type, conv.get_partial_shape());
} }
shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const { shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
@ -99,6 +123,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
m_dilations, m_dilations,
m_pads_begin, m_pads_begin,
m_pads_end, m_pads_end,
m_output_type,
m_group, m_group,
m_auto_pad, m_auto_pad,
m_output_padding, m_output_padding,
@ -111,6 +136,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
m_dilations, m_dilations,
m_pads_begin, m_pads_begin,
m_pads_end, m_pads_end,
m_output_type,
m_group, m_group,
m_auto_pad, m_auto_pad,
m_output_padding, m_output_padding,

View File

@ -113,6 +113,7 @@ ngraph::pass::ConvertDeconvolution::ConvertDeconvolution() {
deconv->get_dilations(), deconv->get_dilations(),
deconv->get_pads_begin(), deconv->get_pads_begin(),
deconv->get_pads_end(), deconv->get_pads_end(),
deconv->get_output_element_type(0),
1 /* groups */, 1 /* groups */,
deconv->get_auto_pad(), deconv->get_auto_pad(),
deconv->get_output_padding(), deconv->get_output_padding(),
@ -158,6 +159,7 @@ ngraph::pass::ConvertGroupDeconvolution::ConvertGroupDeconvolution() {
gconv->get_dilations(), gconv->get_dilations(),
gconv->get_pads_begin(), gconv->get_pads_begin(),
gconv->get_pads_end(), gconv->get_pads_end(),
gconv->get_output_element_type(0),
group, group,
gconv->get_auto_pad(), gconv->get_auto_pad(),
gconv->get_output_padding(), gconv->get_output_padding(),

View File

@ -38,11 +38,14 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() {
const std::shared_ptr<Node> child = subChildren.begin()->get_node()->shared_from_this(); const std::shared_ptr<Node> child = subChildren.begin()->get_node()->shared_from_this();
if (child != nullptr) { if (child != nullptr) {
if (is_type<opset1::Convolution>(child) || if (is_type<opset1::Convolution>(child) ||
is_type<opset1::ConvolutionBackpropData>(child) ||
is_type<opset1::GroupConvolution>(child) || is_type<opset1::GroupConvolution>(child) ||
is_type<opset1::GroupConvolutionBackpropData>(child) ||
is_type<opset1::MatMul>(child) || is_type<opset1::MatMul>(child) ||
(is_type<opset1::Reshape>(child) && (is_type<opset1::Reshape>(child) &&
(child->output(0).get_target_inputs().size() == 1ul) && (child->output(0).get_target_inputs().size() == 1ul) &&
is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()))) { (is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) ||
is_type<opset1::GroupConvolutionBackpropData>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this())))) {
const auto input1Type = sub->input(0).get_element_type(); const auto input1Type = sub->input(0).get_element_type();
const auto input2Type = sub->input(1).get_element_type(); const auto input2Type = sub->input(1).get_element_type();
if (((input1Type == element::u8) && (input2Type == element::u8)) || if (((input1Type == element::u8) && (input2Type == element::u8)) ||

View File

@ -0,0 +1,334 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "layer_transformation.hpp"
#include <string>
#include <sstream>
#include <memory>
#include <gtest/gtest.h>
#include <transformations/utils/utils.hpp>
#include <transformations/init_node_info.hpp>
#include <low_precision/convolution_backprop_data.hpp>
#include <low_precision/network_helper.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "simple_low_precision_transformer.hpp"
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;
class ConvolutionBackpropDataTransformationTestValues {
public:
class Actual {
public:
ngraph::element::Type precisionBeforeDequantization;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
builder::subgraph::DequantizationOperations dequantizationOnWeights;
std::shared_ptr<ngraph::opset1::Constant> weights;
Actual() = default;
Actual(
const ngraph::element::Type& precisionBeforeDequantization,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
const std::shared_ptr<ngraph::opset1::Constant>& weights) :
precisionBeforeDequantization(precisionBeforeDequantization),
dequantizationOnActivations(dequantizationOnActivations),
fakeQuantizeOnWeights(fakeQuantizeOnWeights),
weights(weights) {}
Actual(
const ngraph::element::Type& precisionBeforeDequantization,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
const std::shared_ptr<ngraph::opset1::Constant>& weights) :
precisionBeforeDequantization(precisionBeforeDequantization),
dequantizationOnActivations(dequantizationOnActivations),
dequantizationOnWeights(dequantizationOnWeights),
weights(weights) {}
};
class Expected {
public:
ngraph::element::Type precisionBeforeDequantization;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
builder::subgraph::DequantizationOperations dequantizationOnWeights;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
std::shared_ptr<ngraph::opset1::Constant> weights;
bool transformed;
};
ngraph::pass::low_precision::LayerTransformation::Params params;
Actual actual;
Expected expected;
};
typedef std::tuple<
element::Type,
ngraph::Shape,
ConvolutionBackpropDataTransformationTestValues> ConvolutionBackpropDataTransformationParams;
class ConvolutionBackpropDataTransformation : public LayerTransformation, public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams> {
public:
void SetUp() override {
const auto netPrecision = std::get<0>(GetParam());
const auto inputShape = std::get<1>(GetParam());
auto outputShape = inputShape;
outputShape[1] /= 4;
outputShape[2] *= 2;
outputShape[3] *= 2;
auto testValues = std::get<2>(GetParam());
std::shared_ptr<Node> actualWeights = pass::low_precision::fold<opset1::Broadcast>(
testValues.actual.weights,
opset1::Constant::create(
element::i64,
Shape{inputShape.size()},
Shape{inputShape[1], outputShape[1], 1, 1}));
if (!testValues.actual.fakeQuantizeOnWeights.empty()) {
actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
outputShape,
netPrecision,
testValues.actual.fakeQuantizeOnWeights,
as_type_ptr<opset1::Constant>(actualWeights));
} else {
actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
outputShape,
netPrecision,
testValues.actual.dequantizationOnWeights,
as_type_ptr<opset1::Constant>(actualWeights));
}
actualFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getOriginal(
testValues.actual.precisionBeforeDequantization,
netPrecision,
inputShape,
outputShape,
testValues.actual.dequantizationOnActivations,
actualWeights);
SimpleLowPrecisionTransformer transform;
transform.add<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation, ngraph::opset1::Convolution>(testValues.params);
transform.transform(actualFunction);
std::shared_ptr<Node> refWeights = pass::low_precision::fold<opset1::Broadcast>(
testValues.expected.weights,
opset1::Constant::create(
element::i64,
Shape{inputShape.size()},
Shape{inputShape[1], outputShape[1], 1, 1}));
if (!testValues.expected.transformed) {
refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
outputShape,
netPrecision,
testValues.actual.fakeQuantizeOnWeights,
as_type_ptr<opset1::Constant>(refWeights));
} else {
refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
outputShape,
netPrecision,
testValues.expected.dequantizationOnWeights,
as_type_ptr<opset1::Constant>(refWeights));
}
referenceFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getReference(
testValues.expected.precisionBeforeDequantization,
netPrecision,
inputShape,
outputShape,
testValues.expected.dequantizationOnActivations,
refWeights,
testValues.expected.dequantizationAfter);
}
static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
const auto netPrecision = std::get<0>(obj.param);
auto inputShape = std::get<1>(obj.param);
ConvolutionBackpropDataTransformationTestValues testValues = std::get<2>(obj.param);
std::ostringstream result;
result << toString(testValues.params) << "_" <<
netPrecision << "_" <<
inputShape << "_" <<
testValues.actual.precisionBeforeDequantization << "_" <<
testValues.actual.dequantizationOnActivations << "_" <<
testValues.actual.dequantizationOnWeights << "_" <<
testValues.actual.fakeQuantizeOnWeights << "_" <<"_weights_" <<
testValues.actual.weights->get_element_type() << "_" << "{ " <<
testValues.actual.weights->cast_vector<float>()[0] << " }_";
return result.str();
}
};
TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types();
auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
ASSERT_TRUE(res.first) << res.second;
}
const std::vector<element::Type> netPrecisions = {
element::f32,
element::f16
};
const std::vector<ngraph::Shape> shapes = {
ngraph::Shape({ 1, 8, 16, 16 })
};
const std::vector<ConvolutionBackpropDataTransformationTestValues> testValues = {
// with zero point
{
LayerTransformation::createParamsU8I8(),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
{},
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
true
}
},
// updatePrecisions = false
{
LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
{},
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -125.f }),
true
}
},
// QDq version
{
LayerTransformation::createParamsU8I8(),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
{{ngraph::element::f32}, { 2.f }, { 0.01f }},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
{{}, { { 2.f }, ngraph::element::f32, {1, 2, 1, 1}, true, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, {}},
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1 }}},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
true
}
},
// without zero point
{
LayerTransformation::createParamsU8I8(),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, {}, { 0.02f }},
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{},
{},
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
true
}
},
// QDq version
{
LayerTransformation::createParamsU8I8(),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, {}, { 0.02f }},
{{ngraph::element::f32}, {}, { 0.01f }},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{},
{},
{{}, {}, {{ 0.0002f }, ngraph::element::f32, {1}}},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
true
}
},
// per-channel dequantization with the same values
{
LayerTransformation::createParamsU8I8(),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f} }},
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{},
{},
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
true
}
},
// per-channel dequantization with different values
{
LayerTransformation::createParamsU8I8(),
// ActualValues
{
ngraph::element::u8,
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
},
// ExpectedValues
{
ngraph::element::u8,
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
{},
{},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
true
}
},
};
INSTANTIATE_TEST_CASE_P(
smoke_LPT,
ConvolutionBackpropDataTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::ValuesIn(testValues)),
ConvolutionBackpropDataTransformation::getTestCaseName);

View File

@ -231,7 +231,7 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
} }
}, },
// Actual & Transformed: // Actual:
// //
// Parameter Constant Constant Constant // Parameter Constant Constant Constant
// |U8 |U8 |FP32 |I8 // |U8 |U8 |FP32 |I8
@ -246,6 +246,22 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
// \FP32 /FP32 // \FP32 /FP32
// \ / // \ /
// Convolution // Convolution
//
// Transformed:
//
// Parameter Constant
// |U8 |U8
// | |
// Convert Convert
// \FP32 /FP32
// \ /
// Subtract Constant
// \FP32 /FP32
// \ /
// Multiply Constant
// \FP32 /FP32
// \ /
// Convolution
{ {
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
// ActualValues // ActualValues
@ -262,8 +278,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
{ {
ngraph::element::u8, ngraph::element::u8,
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }}, {{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }},
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }}, {},
{ std::vector<float>{ 2.f }, ngraph::element::f32}, { std::vector<float>{ -3.75f }, ngraph::element::f32},
{}, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
@ -434,12 +450,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
{ {1000.f}, element::f32, {}, false }, { {1000.f}, element::f32, {}, false },
{ {0.02f}, element::f32, {}, false } { {0.02f}, element::f32, {}, false }
}, },
{ {},
{ ngraph::element::f32, false }, { std::vector<float>{ -3.75f }, ngraph::element::f32},
{ {127.f}, element::f32, {}, false },
{ {0.03f}, element::f32, {}, false }
},
{ std::vector<float>{ 2.f }, ngraph::element::i8},
{}, {},
ngraph::element::f32, ngraph::element::f32,
{} {}

View File

@ -160,8 +160,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
{ {
ngraph::element::u8, ngraph::element::u8,
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }}, {{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
} }
@ -288,13 +288,13 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
{{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }}, {{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }},
{{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}} {{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}}
}, },
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
} }
}, },
// dequantization in second dimension // float input
{ {
LayerTransformation::createParamsU8I8(), LayerTransformation::createParamsU8I8(),
// ActualValues // ActualValues
@ -316,8 +316,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
{{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }}, {{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }},
{{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}} {{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}}
}, },
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
} }
@ -356,8 +356,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
{ {
ngraph::element::f32, ngraph::element::f32,
{{}, {}, { {0.02f}, element::f32 }}, {{}, {}, { {0.02f}, element::f32 }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
} }
@ -396,8 +396,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
{ {
ngraph::element::u8, ngraph::element::u8,
{{element::f32}, { 1000.f }, { {0.02f}, element::f32 }}, {{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
} }

View File

@ -160,8 +160,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
{ {
ngraph::element::u8, ngraph::element::u8,
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }}, {{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
{}, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
@ -286,8 +286,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
{ {
ngraph::element::f32, ngraph::element::f32,
{{}, {}, { 0.02f }}, {{}, {}, { 0.02f }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
{}, {},
ngraph::element::f32, ngraph::element::f32,
{} {}
@ -459,8 +459,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
{ {
ngraph::element::f32, ngraph::element::f32,
{{}, {}, { 0.02f }}, {{}, {}, { 0.02f }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }), op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, {},
{}, {},
ngraph::element::f32, ngraph::element::f32,
{} {}

View File

@ -60,7 +60,7 @@ private:
auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape); auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1}); auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
auto conv = std::make_shared<ngraph::op::DeconvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1), auto conv = std::make_shared<ngraph::op::DeconvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0)); ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::element::f32);
return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input}); return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
} }

View File

@ -0,0 +1,100 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
};
const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
// FQ on weights
// with zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
"",
""
},
// without zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
"",
""
},
// with incorrect zero point on activations
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
"",
""
},
// with incorrect zero point on weights
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
{255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
"",
""
},
// QDq on weights
// with zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
},
// without zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
{{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
},
// with incorrect zero point on activations
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
},
// with incorrect zero point on weights
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
{{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
}
};
const std::vector<ngraph::Shape> inputShapes = {
{ 1, 8, 16, 16 }
};
const std::vector<ngraph::Shape> outputShapes = {
{ 16, 16 }
};
INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes),
::testing::ValuesIn(outputShapes),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)),
ConvolutionBackpropDataTransformation::getTestCaseName);
} // namespace

View File

@ -0,0 +1,103 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
};
const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
// FQ on weights
// with zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
"",
""
},
// without zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
"",
""
},
// TODO: check fails in CI
// // with incorrect zero point on activations
// {
// {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
// {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
// "",
// ""
// },
// // with incorrect zero point on weights
// {
// {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
// {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
// "",
// ""
// },
// QDq on weights
// with zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
},
// without zero point
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
{{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
},
// with incorrect zero point on activations
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
},
// with incorrect zero point on weights
{
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
{{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
"",
""
}
};
const std::vector<ngraph::Shape> inputShapes = {
{ 1, 8, 16, 16 },
{ 1, 32, 16, 16 }
};
const std::vector<ngraph::Shape> outputShapes = {
{ 16, 16 }
};
INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes),
::testing::ValuesIn(outputShapes),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)),
ConvolutionBackpropDataTransformation::getTestCaseName);
} // namespace

View File

@ -0,0 +1,65 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <memory>
#include <utility>
#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
namespace LayerTestsDefinitions {
class ConvolutionBackpropDataTransformationParam {
public:
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights;
std::string layerName;
std::string expectedKernelType;
ConvolutionBackpropDataTransformationParam() = default;
ConvolutionBackpropDataTransformationParam(
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
std::string layerName,
std::string expectedKernelType) :
fakeQuantizeOnData(fakeQuantizeOnData), fakeQuantizeOnWeights(fakeQuantizeOnWeights),
layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
ConvolutionBackpropDataTransformationParam(
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights,
std::string layerName,
std::string expectedKernelType) :
fakeQuantizeOnData(fakeQuantizeOnData), dequantizationOnWeights(std::move(dequantizationOnWeights)),
layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
};
typedef std::tuple<
ngraph::element::Type, // netPrecision
ngraph::Shape, // inputShape
ngraph::Shape, // outputShape
std::string, // targetDevice
ngraph::pass::low_precision::LayerTransformation::Params,
ConvolutionBackpropDataTransformationParam
> ConvolutionBackpropDataTransformationParams;
class ConvolutionBackpropDataTransformation :
public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams>,
public LayerTestsUtils::LayerTransformation {
public:
static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj);
protected:
void SetUp() override;
void Run() override;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,77 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
#include <tuple>
#include <vector>
#include <string>
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
namespace LayerTestsDefinitions {
std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
ngraph::element::Type netPrecision;
ngraph::Shape inputShape;
ngraph::Shape outputShape;
std::string targetDevice;
ngraph::pass::low_precision::LayerTransformation::Params params;
ConvolutionBackpropDataTransformationParam param;
std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = obj.param;
std::ostringstream result;
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
outputShape << "_" <<
param.fakeQuantizeOnData << "_" <<
param.fakeQuantizeOnWeights << "_" <<
param.dequantizationOnWeights;
return result.str();
}
void ConvolutionBackpropDataTransformation::SetUp() {
threshold = 0.1f;
ngraph::element::Type netPrecision;
ngraph::Shape inputShape;
ngraph::Shape outputShape;
ngraph::pass::low_precision::LayerTransformation::Params params;
ConvolutionBackpropDataTransformationParam param;
std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = this->GetParam();
std::shared_ptr<ngraph::Node> weights;
if (!param.fakeQuantizeOnWeights.empty()) {
weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
netPrecision,
param.fakeQuantizeOnWeights);
} else {
weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
netPrecision,
param.dequantizationOnWeights);
}
function = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::get(
netPrecision,
inputShape,
outputShape,
param.fakeQuantizeOnData,
weights);
}
void ConvolutionBackpropDataTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<5>(GetParam());
const auto actualType = getRuntimePrecision(params.layerName);
EXPECT_EQ(actualType, params.expectedKernelType);
}
TEST_P(ConvolutionBackpropDataTransformation, CompareWithRefImpl) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,54 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/ngraph.hpp>
#include <ngraph/opsets/opset1.hpp>
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
class ConvolutionBackpropDataFunction {
public:
static std::shared_ptr<Node> getWeights(
const Shape& shape,
const element::Type& netPrecision,
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
const std::shared_ptr<opset1::Constant>& value = nullptr);
static std::shared_ptr<Node> getWeights(
const Shape& shape,
const element::Type& netPrecision,
const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
const std::shared_ptr<opset1::Constant>& value = nullptr);
static std::shared_ptr<Function> get(
const element::Type netPrecision,
const Shape& inputShape,
const Shape& outputShape,
const builder::subgraph::FakeQuantizeOnData& fqOnData,
const std::shared_ptr<Node>& weights);
static std::shared_ptr<Function> getOriginal(
const element::Type precision,
const element::Type netPrecision,
const Shape& inputShape,
const Shape& outputShape,
const builder::subgraph::DequantizationOperations& dequantization,
const std::shared_ptr<Node>& weights);
static std::shared_ptr<Function> getReference(
const element::Type precision,
const element::Type netPrecision,
const Shape& inputShape,
const Shape& outputShape,
const builder::subgraph::DequantizationOperations& dequantization,
const std::shared_ptr<Node>& weights,
const builder::subgraph::DequantizationOperations& dequantizationAfter);
};
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,149 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph_ops/type_relaxed.hpp>
#include "ngraph_functions/subgraph_builders.hpp"
#include "low_precision/network_helper.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "lpt_ngraph_functions/common/builders.hpp"
#include "low_precision/common/dequantization_op.hpp"
#include "low_precision/network_helper.hpp"
using namespace ngraph::pass::low_precision;
namespace ngraph {
namespace builder {
namespace subgraph {
std::shared_ptr<Function> ConvolutionBackpropDataFunction::get(
const element::Type netPrecision,
const Shape& inputShape,
const Shape& outputShape,
const builder::subgraph::FakeQuantizeOnData& fqOnData,
const std::shared_ptr<Node>& weights) {
const auto input = std::make_shared<opset1::Parameter>(netPrecision, inputShape);
const auto fq = makeFakeQuantize(input, netPrecision, fqOnData);
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
fq,
weights,
Strides{ 1, 1 },
CoordinateDiff{ 0, 0 },
CoordinateDiff{ 0, 0 },
Strides{ 1, 1 });
ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
}
std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
const Shape& shape,
const element::Type& netPrecision,
const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
const std::shared_ptr<opset1::Constant>& value) {
const auto weights = value != nullptr ?
value :
std::make_shared<opset1::Constant>(
element::i8,
shape,
std::vector<float>(shape_size(shape), 1));
const auto convert = std::make_shared<opset1::Convert>(weights, netPrecision);
OutputVector convertedOutput(1);
convert->constant_fold(convertedOutput, convert->input_values());
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
const auto fq = makeFakeQuantize(convertedWeights, netPrecision, fqOnWeights);
return fq;
}
std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
const Shape& shape,
const element::Type& netPrecision,
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
const std::shared_ptr<opset1::Constant>& value) {
auto weights =
value != nullptr ?
value :
std::make_shared<opset1::Constant>(
element::i8,
shape,
std::vector<float>(shape_size(shape), 1));
auto dequantizationStructure = dequantizationOnWeights;
dequantizationStructure.setPrecision(netPrecision);
if (!dequantizationOnWeights.subtract.constantPrecision.is_real()) {
dequantizationStructure.subtract.constantPrecision = dequantizationOnWeights.subtract.constantPrecision;
}
if (weights->get_element_type().is_real()) {
weights = as_type_ptr<opset1::Constant>(fold<opset1::Convert>(weights, netPrecision));
}
const auto dq = makeDequantization(weights, dequantizationStructure);
return dq;
}
std::shared_ptr<Function> ConvolutionBackpropDataFunction::getOriginal(
const element::Type precision,
const element::Type netPrecision,
const Shape& inputShape,
const Shape& outputShape,
const builder::subgraph::DequantizationOperations& dequantization,
const std::shared_ptr<Node>& weights) {
const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
auto dequantizationStructure = dequantization;
dequantizationStructure.multiply.outPrecision = netPrecision;
const auto activations = makeDequantization(input, dequantizationStructure);
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
activations,
weights,
Strides{ 1, 1 },
CoordinateDiff{ 0, 0 },
CoordinateDiff{ 0, 0 },
Strides{ 1, 1 });
convolutionBackpropData->set_friendly_name("output");
ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
}
std::shared_ptr<Function> ConvolutionBackpropDataFunction::getReference(
const element::Type precision,
const element::Type netPrecision,
const Shape& inputShape,
const Shape& outputShape,
const builder::subgraph::DequantizationOperations& dequantization,
const std::shared_ptr<Node>& weights,
const builder::subgraph::DequantizationOperations& dequantizationAfter) {
const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
auto dequantizationStructure = dequantization;
dequantizationStructure.multiply.outPrecision = netPrecision;
const auto activations = makeDequantization(input, dequantizationStructure);
auto convolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
std::vector<element::Type>{ element::f32, element::f32 },
std::vector<element::Type>{ dequantizationAfter.empty() ? netPrecision : element::f32 },
ngraph::op::TemporaryReplaceOutputType(activations, element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
Strides{ 1, 1 },
CoordinateDiff{ 0, 0 },
CoordinateDiff{ 0, 0 },
Strides{ 1, 1 });
auto dequantizationStructureAfter = dequantizationAfter;
dequantizationStructureAfter.multiply.outPrecision = netPrecision;
const auto result = makeDequantization(convolutionBackpropData, dequantizationStructureAfter);
result->set_friendly_name("output");
ngraph::ResultVector results{ std::make_shared<opset1::Result>(result) };
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
}
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -86,8 +86,8 @@ namespace ngraph
class NGRAPH_API ConvolutionBackpropData : public Op class NGRAPH_API ConvolutionBackpropData : public Op
{ {
public: public:
static constexpr NodeTypeInfo type_info{"ConvolutionBackpropData", 1}; NGRAPH_RTTI_DECLARATION;
const NodeTypeInfo& get_type_info() const override { return type_info; }
/// \brief Constructs a batched-convolution data batch-backprop operation. /// \brief Constructs a batched-convolution data batch-backprop operation.
ConvolutionBackpropData() = default; ConvolutionBackpropData() = default;
// clang-format off // clang-format off

View File

@ -85,8 +85,8 @@ namespace ngraph
class NGRAPH_API GroupConvolutionBackpropData : public Op class NGRAPH_API GroupConvolutionBackpropData : public Op
{ {
public: public:
static constexpr NodeTypeInfo type_info{"GroupConvolutionBackpropData", 1}; NGRAPH_RTTI_DECLARATION;
const NodeTypeInfo& get_type_info() const override { return type_info; }
/// \brief Constructs a batched-convolution data batch-backprop operation. /// \brief Constructs a batched-convolution data batch-backprop operation.
GroupConvolutionBackpropData(); GroupConvolutionBackpropData();
// clang-format off // clang-format off

View File

@ -102,12 +102,14 @@ shared_ptr<Node> op::v1::Convolution::clone_with_new_inputs(const OutputVector&
m_auto_pad); m_auto_pad);
} }
constexpr NodeTypeInfo op::v1::ConvolutionBackpropData::type_info;
shared_ptr<Node> op::v1::Convolution::get_default_value() const shared_ptr<Node> op::v1::Convolution::get_default_value() const
{ {
return ngraph::make_constant_from_string("0", get_element_type(), get_shape()); return ngraph::make_constant_from_string("0", get_element_type(), get_shape());
} }
// *** ConvolutionBackpropData OP SET 1 ***
NGRAPH_RTTI_DEFINITION(op::v1::ConvolutionBackpropData, "ConvolutionBackpropData", 1);
op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& data, op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& data,
const Output<Node>& filters, const Output<Node>& filters,
const Output<Node>& output_shape, const Output<Node>& output_shape,

View File

@ -286,7 +286,7 @@ shared_ptr<Node> op::v1::GroupConvolution::clone_with_new_inputs(const OutputVec
// v1::GroupConvolutionBackpropData // v1::GroupConvolutionBackpropData
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
constexpr NodeTypeInfo op::v1::GroupConvolutionBackpropData::type_info; NGRAPH_RTTI_DEFINITION(op::v1::GroupConvolutionBackpropData, "GroupConvolutionBackpropData", 1);
op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData() op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData()
: Op() : Op()