[LPT] ConvolutionBackpropData support (#5313)
* [LPT] ConvolutionBackpropData support * minor fixes * [Transformations] Legacy subtract precision keep * [LPT] ConvolutionBackpropData tests improvements * [LPT] ConvolutionBackpropData weights folding when can't be transformed * [LPT] CanBeTransformed unification and convolution weights folding * [LPT] GPU INT8 optimizations condition flag * [LPT] Concat precision predict improvement * [LPT] Turn off asymmetric quantization for Deconvolution on GPU * [LPT] Improvements from review * [LPT] Check if layer after concat isQuantized and require per-tensor quantize * [LPT] Improvement for Deconv->FQ pattern * [LPT] Commented failing tests
This commit is contained in:
parent
f84b25722c
commit
e41e25533d
@ -70,6 +70,7 @@
|
|||||||
#include <low_precision/pull_reshape_through_dequantization.hpp>
|
#include <low_precision/pull_reshape_through_dequantization.hpp>
|
||||||
#include <low_precision/pull_transpose_through_dequantization.hpp>
|
#include <low_precision/pull_transpose_through_dequantization.hpp>
|
||||||
#include <low_precision/transformer.hpp>
|
#include <low_precision/transformer.hpp>
|
||||||
|
#include <low_precision/convolution_backprop_data.hpp>
|
||||||
#include <low_precision/mat_mul.hpp>
|
#include <low_precision/mat_mul.hpp>
|
||||||
#include <low_precision/strided_slice.hpp>
|
#include <low_precision/strided_slice.hpp>
|
||||||
#include <low_precision/network_helper.hpp>
|
#include <low_precision/network_helper.hpp>
|
||||||
@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
|||||||
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
|
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
|
||||||
.setSupportAsymmetricQuantization(false)
|
.setSupportAsymmetricQuantization(false)
|
||||||
.setSupport3DTensorOnActivations(false))
|
.setSupport3DTensorOnActivations(false))
|
||||||
|
.add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
|
||||||
|
.setSupportAsymmetricQuantization(false)
|
||||||
|
.setDeconvolutionSpecificChannelsRatio(true))
|
||||||
// INT8 StridedSlice not supported
|
// INT8 StridedSlice not supported
|
||||||
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
|
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
|
||||||
|
|
||||||
|
@ -0,0 +1,25 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ngraph/ngraph.hpp>
|
||||||
|
#include "weightable_layer_transformation.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace pass {
|
||||||
|
namespace low_precision {
|
||||||
|
|
||||||
|
class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
|
||||||
|
public:
|
||||||
|
ConvolutionBackpropDataTransformation(const Params& params);
|
||||||
|
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||||
|
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||||
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||||
|
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace low_precision
|
||||||
|
} // namespace pass
|
||||||
|
} // namespace ngraph
|
@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
|
|||||||
public:
|
public:
|
||||||
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
|
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
|
||||||
|
|
||||||
|
explicit DataPrecision(const element::Type& precision) {
|
||||||
|
this->precision = precision;
|
||||||
|
min = getMinValue(precision, 256);
|
||||||
|
max = getMaxValue(precision, 256);
|
||||||
|
hasZeroPoint = false;
|
||||||
|
}
|
||||||
|
|
||||||
DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
|
DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
|
||||||
precision(precision),
|
precision(precision),
|
||||||
min(min),
|
min(min),
|
||||||
@ -122,29 +129,6 @@ public:
|
|||||||
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
|
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
|
||||||
return signedInterval ? element::i8 : element::u8;
|
return signedInterval ? element::i8 : element::u8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float getMin(const size_t quantizationLevels, const bool signedInterval) {
|
|
||||||
if (quantizationLevels == 255) {
|
|
||||||
return signedInterval ? -127.0f : 0.0f;
|
|
||||||
} else if (quantizationLevels == 256) {
|
|
||||||
return signedInterval ? -128.0f : 0.0f;
|
|
||||||
} else {
|
|
||||||
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
|
|
||||||
// FIXME: not completed
|
|
||||||
return signedInterval ? -128.0f : 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static float getMax(const size_t quantizationLevels, const bool signedInterval) {
|
|
||||||
if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
|
|
||||||
return signedInterval ? 127.0f : 255.0f;
|
|
||||||
} else {
|
|
||||||
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
|
|
||||||
// FIXME: not completed
|
|
||||||
// return quantizationLevels - 1.0;
|
|
||||||
return signedInterval ? 127.0f : 255.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
|
inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
|
||||||
@ -181,7 +165,8 @@ public:
|
|||||||
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
|
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
|
||||||
std::vector<element::Type> precisionsOnWeights = { element::i8 },
|
std::vector<element::Type> precisionsOnWeights = { element::i8 },
|
||||||
element::Type deqPrecision = element::f32,
|
element::Type deqPrecision = element::f32,
|
||||||
bool support3DTensorOnActivations = true) :
|
bool support3DTensorOnActivations = true,
|
||||||
|
bool deconvolutionSpecificChannelsRatio = false) :
|
||||||
updatePrecisions(updatePrecisions),
|
updatePrecisions(updatePrecisions),
|
||||||
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
|
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
|
||||||
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
|
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
|
||||||
@ -189,7 +174,8 @@ public:
|
|||||||
precisionsOnActivations(precisionsOnActivations),
|
precisionsOnActivations(precisionsOnActivations),
|
||||||
precisionsOnWeights(precisionsOnWeights),
|
precisionsOnWeights(precisionsOnWeights),
|
||||||
deqPrecision(deqPrecision),
|
deqPrecision(deqPrecision),
|
||||||
support3DTensorOnActivations(support3DTensorOnActivations) {
|
support3DTensorOnActivations(support3DTensorOnActivations),
|
||||||
|
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
|
||||||
if (precisionsOnActivations.size() == 0ul) {
|
if (precisionsOnActivations.size() == 0ul) {
|
||||||
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
|
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
|
||||||
}
|
}
|
||||||
@ -234,6 +220,11 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
|
||||||
|
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
bool updatePrecisions;
|
bool updatePrecisions;
|
||||||
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
|
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
|
||||||
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
|
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
|
||||||
@ -242,6 +233,7 @@ public:
|
|||||||
std::vector<element::Type> precisionsOnWeights;
|
std::vector<element::Type> precisionsOnWeights;
|
||||||
element::Type deqPrecision;
|
element::Type deqPrecision;
|
||||||
bool support3DTensorOnActivations;
|
bool support3DTensorOnActivations;
|
||||||
|
bool deconvolutionSpecificChannelsRatio;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PrecisionDetails {
|
class PrecisionDetails {
|
||||||
@ -318,6 +310,7 @@ protected:
|
|||||||
std::vector<element::Type> precisionsOnWeights;
|
std::vector<element::Type> precisionsOnWeights;
|
||||||
element::Type deqPrecision;
|
element::Type deqPrecision;
|
||||||
bool support3DTensorOnActivations;
|
bool support3DTensorOnActivations;
|
||||||
|
bool deconvolutionSpecificChannelsRatio;
|
||||||
|
|
||||||
// absolute value, used to determine quantization interval asymmetry
|
// absolute value, used to determine quantization interval asymmetry
|
||||||
float quantizationIntervalAsymmetryThreshold;
|
float quantizationIntervalAsymmetryThreshold;
|
||||||
|
@ -109,7 +109,8 @@ public:
|
|||||||
const float max,
|
const float max,
|
||||||
const bool hasZeroPoint,
|
const bool hasZeroPoint,
|
||||||
const bool updatePrecision,
|
const bool updatePrecision,
|
||||||
const element::Type deqPrecision = element::f32);
|
const element::Type deqPrecision = element::f32,
|
||||||
|
const size_t outChannelsShapeIndex = 0);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
|
static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
|
||||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
std::shared_ptr<opset1::FakeQuantize> fq,
|
||||||
@ -183,7 +184,7 @@ public:
|
|||||||
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
|
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
|
||||||
|
|
||||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
|
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
|
||||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues);
|
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);
|
||||||
|
|
||||||
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
|
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
|
||||||
|
|
||||||
@ -191,8 +192,16 @@ public:
|
|||||||
|
|
||||||
static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
|
static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
|
||||||
|
|
||||||
|
static std::vector<element::Type> precisionIntersection(
|
||||||
|
const std::vector<element::Type>& v1,
|
||||||
|
const std::vector<element::Type>& v2) noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet);
|
static std::shared_ptr<Node> foldFakeQuantize(
|
||||||
|
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||||
|
const bool roundValues,
|
||||||
|
const bool roundValuesWasSet,
|
||||||
|
int outChannelsShapeIndex = 0);
|
||||||
|
|
||||||
// 1 - on weights
|
// 1 - on weights
|
||||||
// 0 - weightable layer was not found
|
// 0 - weightable layer was not found
|
||||||
|
@ -303,10 +303,6 @@ private:
|
|||||||
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
|
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
|
||||||
GraphRewrite& pass,
|
GraphRewrite& pass,
|
||||||
TransformationContext& context);
|
TransformationContext& context);
|
||||||
|
|
||||||
std::vector<element::Type> precisionIntersection(
|
|
||||||
const std::vector<element::Type>& v1,
|
|
||||||
const std::vector<element::Type>& v2) const noexcept;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
|
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
|
||||||
|
@ -22,7 +22,7 @@ public:
|
|||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const;
|
void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
|
||||||
static bool isGroup(const std::shared_ptr<Node>& node);
|
static bool isGroup(const std::shared_ptr<Node>& node);
|
||||||
static bool isDepthwise(const std::shared_ptr<Node>& node);
|
static bool isDepthwise(const std::shared_ptr<Node>& node);
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
|
|||||||
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
|
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
|
||||||
if (is_type<opset1::Convolution>(parent) ||
|
if (is_type<opset1::Convolution>(parent) ||
|
||||||
is_type<opset1::GroupConvolution>(parent) ||
|
is_type<opset1::GroupConvolution>(parent) ||
|
||||||
|
is_type<opset1::ConvolutionBackpropData>(parent) ||
|
||||||
(is_type<opset1::MatMul>(parent) &&
|
(is_type<opset1::MatMul>(parent) &&
|
||||||
(is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
|
(is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
|
std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
|
||||||
if (dataPrecision.precision == ngraph::element::undefined) {
|
fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
|
||||||
|
if (concatParentsChildrensPrecisions.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
|
|
||||||
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
||||||
const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
||||||
if (fq == nullptr) {
|
if (fq == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
|||||||
if (quantizationDetails.inputHighValues.size() != 1ul) {
|
if (quantizationDetails.inputHighValues.size() != 1ul) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
|
||||||
|
fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
|
||||||
|
concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
|
||||||
|
|
||||||
const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
|
if (concatParentsChildrensPrecisions.empty()) {
|
||||||
if (dataPrecision2.precision == ngraph::element::undefined) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dataPrecision.precision != dataPrecision2.precision) {
|
|
||||||
// quantization levels are the same, difference can be in sign
|
|
||||||
// wider interval (precision) is preferable: use signed if least one interval is signed
|
|
||||||
dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dataPrecision.precision == ngraph::element::undefined) {
|
DataPrecision dataPrecision;
|
||||||
return false;
|
if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
|
||||||
|
dataPrecision = DataPrecision(element::i8);
|
||||||
|
} else {
|
||||||
|
dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<QuantizationDetails> quantizationLayersDetails;
|
std::vector<QuantizationDetails> quantizationLayersDetails;
|
||||||
|
@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
|
|||||||
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
|
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
|
||||||
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
|
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
|
||||||
for (const std::shared_ptr<ngraph::Node>& child : children) {
|
for (const std::shared_ptr<ngraph::Node>& child : children) {
|
||||||
if (is_type<ngraph::opset1::Convolution>(child.get())) {
|
if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
|
||||||
|
is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
|
||||||
|
this->layerTransformationsManager->isQuantized(child)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
auto convolution = m.get_match_root();
|
auto convolution = m.get_match_root();
|
||||||
|
|
||||||
if (!canConvolutionBeTransformed(context, convolution)) {
|
if (!canConvolutionBeTransformed(context, convolution)) {
|
||||||
return false;
|
auto weightInput = convolution->get_input_node_shared_ptr(1);
|
||||||
|
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
|
||||||
|
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||||
|
NetworkHelper::getDequantization(convolution, 1ul) :
|
||||||
|
NetworkHelper::getDequantization(reshapeFromWeights);
|
||||||
|
if (dequantization.empty()) {
|
||||||
|
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
|
||||||
|
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
||||||
|
if (reshapeFromWeights != nullptr) {
|
||||||
|
resultConstant = fold_reshape<opset1::Reshape>(
|
||||||
|
resultConstant,
|
||||||
|
reshapeFromWeights->input_value(1),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
||||||
|
replace_node(weightInput, resultConstant);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
||||||
|
@ -0,0 +1,218 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "low_precision/convolution_backprop_data.hpp"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
#include "low_precision/common/dequantization_op.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace pass {
|
||||||
|
namespace low_precision {
|
||||||
|
|
||||||
|
ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
|
||||||
|
addPattern(
|
||||||
|
pass,
|
||||||
|
context,
|
||||||
|
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
|
||||||
|
addPattern(
|
||||||
|
pass,
|
||||||
|
context,
|
||||||
|
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
|
||||||
|
addPattern(
|
||||||
|
pass,
|
||||||
|
context,
|
||||||
|
make_op_pattern<opset1::ConvolutionBackpropData>(
|
||||||
|
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
|
||||||
|
addPattern(
|
||||||
|
pass,
|
||||||
|
context,
|
||||||
|
make_op_pattern<opset1::ConvolutionBackpropData>(
|
||||||
|
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
|
||||||
|
if (deconvolutionSpecificChannelsRatio) {
|
||||||
|
size_t inputChannels = layer->get_input_shape(0)[1];
|
||||||
|
size_t outputChannels = layer->get_output_shape(0)[1];
|
||||||
|
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return WeightableLayerTransformation::isQuantized(layer, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
|
||||||
|
auto convolutionBackpropData = m.get_match_root();
|
||||||
|
|
||||||
|
if (!canBeTransformed(context, convolutionBackpropData)) {
|
||||||
|
auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||||
|
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightsInput);
|
||||||
|
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||||
|
NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
|
||||||
|
NetworkHelper::getDequantization(reshapeFromWeights);
|
||||||
|
if (dequantization.empty()) {
|
||||||
|
const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
|
||||||
|
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
||||||
|
if (reshapeFromWeights != nullptr) {
|
||||||
|
resultConstant = fold_reshape<opset1::Reshape>(
|
||||||
|
resultConstant,
|
||||||
|
reshapeFromWeights->input_value(1),
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
||||||
|
replace_node(weightsInput, resultConstant);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
|
||||||
|
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
||||||
|
{
|
||||||
|
if (dequantization.subtract != nullptr) {
|
||||||
|
std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
|
||||||
|
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
|
||||||
|
|
||||||
|
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||||
|
}
|
||||||
|
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
|
||||||
|
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
||||||
|
reducedConstant->get_output_element_type(0),
|
||||||
|
Shape{ 1 },
|
||||||
|
reducedConstant->cast_vector<float>()[0]);
|
||||||
|
auto inputs = convolutionBackpropData->input_values();
|
||||||
|
inputs[0] = dequantization.multiply->input_value(0);
|
||||||
|
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||||
|
|
||||||
|
const auto relaxedConvolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
|
||||||
|
*as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
|
||||||
|
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||||
|
std::vector<element::Type>{deqPrecision});
|
||||||
|
|
||||||
|
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||||
|
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||||
|
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
||||||
|
|
||||||
|
replace_node(convolutionBackpropData, newMultiplyAfter);
|
||||||
|
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||||
|
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
||||||
|
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
||||||
|
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||||
|
replace_node(convolutionBackpropData, newConvolution);
|
||||||
|
convolutionBackpropData = newConvolution;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
|
||||||
|
|
||||||
|
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
|
||||||
|
|
||||||
|
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
|
||||||
|
const std::shared_ptr<opset1::FakeQuantize> fq = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
||||||
|
std::shared_ptr<ngraph::Node> newFQ = NetworkHelper::fold_fake_quantize(fq, true);
|
||||||
|
NetworkHelper::copyInfo(fq, newFQ);
|
||||||
|
replace_node(fq, newFQ);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
|
||||||
|
convolutionBackpropData->input_value(1).get_node_shared_ptr());
|
||||||
|
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
||||||
|
|
||||||
|
{
|
||||||
|
Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
|
||||||
|
auto inputs = convolutionBackpropData->input_values();
|
||||||
|
inputs[1] = multiplyFromWeights->input_value(0);
|
||||||
|
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
||||||
|
convolutionBackpropData->copy_with_new_inputs(inputs),
|
||||||
|
foldConvert(
|
||||||
|
fold_reshape<opset1::Reshape>(
|
||||||
|
multiplyFromWeights->input_value(1),
|
||||||
|
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
||||||
|
false),
|
||||||
|
convolutionBackpropData->get_output_element_type(0)));
|
||||||
|
replace_node(convolutionBackpropData, newMultiplyAfter);
|
||||||
|
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subtractFromWeights != nullptr) {
|
||||||
|
// optimize zero point on weights
|
||||||
|
auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
|
||||||
|
if (optimizedSubtract == nullptr) {
|
||||||
|
subtractFromWeights = nullptr;
|
||||||
|
} else {
|
||||||
|
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
||||||
|
|
||||||
|
const Shape weightsShape = subtractFromWeights->input(0).get_shape();
|
||||||
|
Shape zeroPointShape(weightsShape.size(), 1ul);
|
||||||
|
zeroPointShape[1] = weightsShape[1];
|
||||||
|
|
||||||
|
auto zeroPointConstant = fold<opset1::Broadcast>(
|
||||||
|
subtractFromWeights->get_input_node_shared_ptr(1),
|
||||||
|
std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
|
||||||
|
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<opset1::Convert> convertFromWeights =
|
||||||
|
as_type_ptr<opset1::Convert>(
|
||||||
|
subtractFromWeights == nullptr ?
|
||||||
|
multiplyFromWeights->get_input_node_shared_ptr(0) :
|
||||||
|
subtractFromWeights->get_input_node_shared_ptr(0));
|
||||||
|
if (convertFromWeights != nullptr) {
|
||||||
|
auto inputs = convolutionBackpropData->input_values();
|
||||||
|
inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
|
||||||
|
// remove Convert on weights
|
||||||
|
auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
|
||||||
|
replace_node(convolutionBackpropData, newConvolution);
|
||||||
|
convolutionBackpropData = newConvolution;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
|
||||||
|
convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
|
||||||
|
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
|
||||||
|
updateOutput(context, finalDequantization, convolutionBackpropData);
|
||||||
|
|
||||||
|
auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||||
|
if (is_type<opset1::Reshape>(onWeights)) {
|
||||||
|
onWeights = onWeights->get_input_node_shared_ptr(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_type<opset1::Subtract>(onWeights)) {
|
||||||
|
auto& rt = onWeights->get_rt_info();
|
||||||
|
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
|
||||||
|
if (deconvolutionSpecificChannelsRatio) {
|
||||||
|
size_t inputChannels = op->get_input_shape(0)[1];
|
||||||
|
size_t outputChannels = op->get_output_shape(0)[1];
|
||||||
|
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return canConvolutionBeTransformed(context, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace low_precision
|
||||||
|
} // namespace pass
|
||||||
|
} // namespace ngraph
|
@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
|
|||||||
|
|
||||||
bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||||
std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
|
std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
|
||||||
if (!NetworkHelper::isQuantizeSupported(layer)) {
|
if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,7 +149,9 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
|||||||
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
|
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
|
||||||
} else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
|
} else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
|
||||||
if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
|
if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
|
||||||
is_type<opset1::GroupConvolution>(fq::getData(eltwise))) {
|
is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
|
||||||
|
is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
|
||||||
|
is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
||||||
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
||||||
|
|
||||||
|
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||||
|
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
||||||
|
|
||||||
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||||
opset1::FakeQuantize(
|
opset1::FakeQuantize(
|
||||||
fakeQuantizeParent->output(parentIndex),
|
fakeQuantizeParent->output(parentIndex),
|
||||||
foldConvert(fakeQuantize->input_value(1), deqPrecision),
|
inputLow,
|
||||||
foldConvert(fakeQuantize->input_value(2), deqPrecision),
|
inputHigh,
|
||||||
outputLowConst_f32,
|
outputLowConst_f32,
|
||||||
outputHighConst_f32,
|
outputHighConst_f32,
|
||||||
fakeQuantize->get_levels()),
|
fakeQuantize->get_levels()),
|
||||||
|
@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
||||||
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
||||||
|
|
||||||
|
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||||
|
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
||||||
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
||||||
|
|
||||||
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||||
opset1::FakeQuantize(
|
opset1::FakeQuantize(
|
||||||
fakeQuantizeParent->output(parentIndex),
|
fakeQuantizeParent->output(parentIndex),
|
||||||
foldConvert(fakeQuantize->input_value(1), deqPrecision),
|
inputLow,
|
||||||
foldConvert(fakeQuantize->input_value(2), deqPrecision),
|
inputHigh,
|
||||||
outputLowConst_f32,
|
outputLowConst_f32,
|
||||||
outputHighConst_f32,
|
outputHighConst_f32,
|
||||||
fakeQuantize->get_levels()),
|
fakeQuantize->get_levels()),
|
||||||
@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
|
|||||||
for (const auto& target : children) {
|
for (const auto& target : children) {
|
||||||
const auto convolution = is_type<opset1::Convolution>(target.get_node());
|
const auto convolution = is_type<opset1::Convolution>(target.get_node());
|
||||||
const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
|
const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
|
||||||
if (convolution || groupConvolution) {
|
const auto convolutionBackpropData = is_type<opset1::ConvolutionBackpropData>(target.get_node());
|
||||||
|
if (convolution || groupConvolution || convolutionBackpropData) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
|
|||||||
precisionsOnWeights(params.precisionsOnWeights),
|
precisionsOnWeights(params.precisionsOnWeights),
|
||||||
deqPrecision(params.deqPrecision),
|
deqPrecision(params.deqPrecision),
|
||||||
support3DTensorOnActivations(params.support3DTensorOnActivations),
|
support3DTensorOnActivations(params.support3DTensorOnActivations),
|
||||||
|
deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
|
||||||
quantizationIntervalAsymmetryThreshold(0.002f),
|
quantizationIntervalAsymmetryThreshold(0.002f),
|
||||||
zeroThreshold(1.e-6f),
|
zeroThreshold(1.e-6f),
|
||||||
minQuantizationLevels(2ul),
|
minQuantizationLevels(2ul),
|
||||||
|
@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
|
|||||||
return is_type<opset1::Parameter>(node) ||
|
return is_type<opset1::Parameter>(node) ||
|
||||||
is_type<opset1::Convolution>(node) ||
|
is_type<opset1::Convolution>(node) ||
|
||||||
is_type<opset1::GroupConvolution>(node) ||
|
is_type<opset1::GroupConvolution>(node) ||
|
||||||
is_type<opset1::MatMul>(node);
|
is_type<opset1::MatMul>(node) ||
|
||||||
|
is_type<opset1::ConvolutionBackpropData>(node);
|
||||||
};
|
};
|
||||||
|
|
||||||
if (isNotConstantPathOperation(op)) {
|
if (isNotConstantPathOperation(op)) {
|
||||||
@ -440,8 +441,11 @@ std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<op
|
|||||||
return foldFakeQuantize(fq, false, false);
|
return foldFakeQuantize(fq, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues) {
|
std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(
|
||||||
return foldFakeQuantize(fq, roundValues, true);
|
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||||
|
const bool roundValues,
|
||||||
|
const int outChannelsShapeIndex) {
|
||||||
|
return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
|
FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
|
||||||
@ -591,7 +595,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
|
|||||||
std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
||||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||||
const bool roundValuesArg,
|
const bool roundValuesArg,
|
||||||
const bool roundValuesWasSet) {
|
const bool roundValuesWasSet,
|
||||||
|
const int outChannelsShapeIndex) {
|
||||||
if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
|
if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
|
||||||
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
|
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
|
||||||
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
|
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
|
||||||
@ -630,10 +635,20 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
|||||||
if (constShape.empty() || constShape.size() > 5lu) {
|
if (constShape.empty() || constShape.size() > 5lu) {
|
||||||
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
|
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
|
||||||
}
|
}
|
||||||
|
if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
|
||||||
|
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
|
||||||
|
}
|
||||||
|
|
||||||
// OIDHW
|
size_t OC;
|
||||||
const size_t OC = constShape[0];
|
size_t IC;
|
||||||
const size_t IC = constShape.size() > 1lu ? constShape[1] : 1;
|
// OIDHW or IODHW
|
||||||
|
if (constShape.size() == 1) {
|
||||||
|
OC = constShape[0];
|
||||||
|
IC = 1;
|
||||||
|
} else {
|
||||||
|
OC = constShape[outChannelsShapeIndex];
|
||||||
|
IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
|
||||||
|
}
|
||||||
const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
|
const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
|
||||||
const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
|
const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
|
||||||
const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
|
const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
|
||||||
@ -667,29 +682,35 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
|||||||
|
|
||||||
auto levels_1 = fq->get_levels() - 1.f;
|
auto levels_1 = fq->get_levels() - 1.f;
|
||||||
|
|
||||||
//const size_t DHW = D * H * W;
|
const size_t DHW = D * H * W;
|
||||||
const size_t IDHW = IC * D * H * W;
|
const size_t IDHW = IC * D * H * W;
|
||||||
|
|
||||||
const auto values = constant->cast_vector<float>();
|
const auto values = constant->cast_vector<float>();
|
||||||
std::vector<float> quantizedValues(OC * IC * D * H * W);
|
std::vector<float> quantizedValues(OC * IC * D * H * W);
|
||||||
|
|
||||||
for (size_t oc = 0; oc < OC; ++oc) {
|
for (size_t oc = 0; oc < OC; ++oc) {
|
||||||
for (size_t iidx = 0; iidx < IDHW; ++iidx) {
|
const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
|
||||||
const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
|
const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
|
||||||
const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
|
const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
|
||||||
const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
|
const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
|
||||||
const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
|
for (size_t ic = 0; ic < IC; ++ic) {
|
||||||
|
for (size_t iidx = 0; iidx < DHW; ++iidx) {
|
||||||
|
size_t idx;
|
||||||
|
if (outChannelsShapeIndex == 0) {
|
||||||
|
idx = oc * IDHW + ic * DHW + iidx;
|
||||||
|
} else {
|
||||||
|
idx = ic * IDHW + oc * DHW + iidx;
|
||||||
|
}
|
||||||
|
|
||||||
const size_t idx = oc * IDHW + iidx;
|
if (values[idx] <= inputLow) {
|
||||||
|
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
|
||||||
if (values[idx] <= inputLow) {
|
} else if (values[idx] > inputHigh) {
|
||||||
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
|
quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
|
||||||
} else if (values[idx] > inputHigh) {
|
} else {
|
||||||
quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
|
const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
|
||||||
} else {
|
levels_1 * (outputHigh - outputLow) + outputLow;
|
||||||
const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
|
quantizedValues[idx] = roundValues ? std::roundf(value) : value;
|
||||||
levels_1 * (outputHigh - outputLow) + outputLow;
|
}
|
||||||
quantizedValues[idx] = roundValues ? std::roundf(value) : value;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -818,7 +839,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
|||||||
const float max,
|
const float max,
|
||||||
const bool hasZeroPoint,
|
const bool hasZeroPoint,
|
||||||
const bool updatePrecision,
|
const bool updatePrecision,
|
||||||
const element::Type deqPrecision) {
|
const element::Type deqPrecision,
|
||||||
|
const size_t outChannelsShapeIndex) {
|
||||||
using std::make_shared;
|
using std::make_shared;
|
||||||
|
|
||||||
const auto outputLow = fq->input_value(3);
|
const auto outputLow = fq->input_value(3);
|
||||||
@ -898,7 +920,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
|||||||
newMax->output(0),
|
newMax->output(0),
|
||||||
fq->get_levels(),
|
fq->get_levels(),
|
||||||
fq->get_auto_broadcast()),
|
fq->get_auto_broadcast()),
|
||||||
true);
|
true,
|
||||||
|
outChannelsShapeIndex);
|
||||||
NetworkHelper::copyInfo(fq, newFQ);
|
NetworkHelper::copyInfo(fq, newFQ);
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Node> convert2;
|
std::shared_ptr<ngraph::Node> convert2;
|
||||||
@ -1548,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
|
|||||||
if (is_type<opset1::Subtract>(node)) {
|
if (is_type<opset1::Subtract>(node)) {
|
||||||
const auto parent = node->get_input_node_shared_ptr(0);
|
const auto parent = node->get_input_node_shared_ptr(0);
|
||||||
const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
|
const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
|
||||||
const auto intType = intNode->get_input_element_type(0);
|
const auto type = intNode->get_input_element_type(0);
|
||||||
if (intType == element::u8 || intType == element::i8) {
|
if (type == element::u8 || type == element::i8) {
|
||||||
min = DataPrecision::getMinValue(intType, 256) - 0.5f;
|
min = DataPrecision::getMinValue(type, 256) - 0.5f;
|
||||||
max = DataPrecision::getMaxValue(intType, 256) + 0.5f;
|
max = DataPrecision::getMaxValue(type, 256) + 0.5f;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return type == element::f32 || type == element::f16;
|
||||||
}
|
}
|
||||||
auto subtract1input = node->get_input_node_shared_ptr(1);
|
auto subtract1input = node->get_input_node_shared_ptr(1);
|
||||||
if (is_type<opset1::Convert>(subtract1input)) {
|
if (is_type<opset1::Convert>(subtract1input)) {
|
||||||
@ -1595,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<element::Type> NetworkHelper::precisionIntersection(
|
||||||
|
const std::vector<element::Type>& v1,
|
||||||
|
const std::vector<element::Type>& v2) noexcept {
|
||||||
|
std::vector<element::Type> v3;
|
||||||
|
|
||||||
|
auto v1Copy = v1;
|
||||||
|
auto v2Copy = v2;
|
||||||
|
|
||||||
|
std::sort(v1Copy.begin(), v1Copy.end());
|
||||||
|
std::sort(v2Copy.begin(), v2Copy.end());
|
||||||
|
|
||||||
|
std::set_intersection(v1Copy.begin(), v1Copy.end(),
|
||||||
|
v2Copy.begin(), v2Copy.end(),
|
||||||
|
std::back_inserter(v3));
|
||||||
|
return v3;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace low_precision
|
} // namespace low_precision
|
||||||
} // namespace pass
|
} // namespace pass
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include "low_precision/avg_pool.hpp"
|
#include "low_precision/avg_pool.hpp"
|
||||||
#include "low_precision/clamp.hpp"
|
#include "low_precision/clamp.hpp"
|
||||||
#include "low_precision/convolution.hpp"
|
#include "low_precision/convolution.hpp"
|
||||||
|
#include "low_precision/convolution_backprop_data.hpp"
|
||||||
#include "low_precision/depth_to_space.hpp"
|
#include "low_precision/depth_to_space.hpp"
|
||||||
#include "low_precision/fake_quantize.hpp"
|
#include "low_precision/fake_quantize.hpp"
|
||||||
#include "low_precision/group_convolution.hpp"
|
#include "low_precision/group_convolution.hpp"
|
||||||
@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
|
|||||||
add<AvgPoolTransformation, opset1::AvgPool>(params).
|
add<AvgPoolTransformation, opset1::AvgPool>(params).
|
||||||
add<ClampTransformation, opset1::Clamp>(params).
|
add<ClampTransformation, opset1::Clamp>(params).
|
||||||
add<ConvolutionTransformation, opset1::Convolution>(params).
|
add<ConvolutionTransformation, opset1::Convolution>(params).
|
||||||
|
add<ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(params).
|
||||||
add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
|
add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
|
||||||
add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
|
add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
|
||||||
add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
|
add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
|
||||||
@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
|
|||||||
make_matcher_type_relaxed<opset1::Clamp>(this);
|
make_matcher_type_relaxed<opset1::Clamp>(this);
|
||||||
make_matcher_type_relaxed<opset1::Concat>(this);
|
make_matcher_type_relaxed<opset1::Concat>(this);
|
||||||
make_matcher_type_relaxed<opset1::Convolution>(this);
|
make_matcher_type_relaxed<opset1::Convolution>(this);
|
||||||
|
make_matcher_type_relaxed<opset1::ConvolutionBackpropData>(this);
|
||||||
make_matcher_type_relaxed<opset1::DepthToSpace>(this);
|
make_matcher_type_relaxed<opset1::DepthToSpace>(this);
|
||||||
make_matcher_type_relaxed<opset1::FakeQuantize>(this);
|
make_matcher_type_relaxed<opset1::FakeQuantize>(this);
|
||||||
make_matcher_type_relaxed<opset1::GroupConvolution>(this);
|
make_matcher_type_relaxed<opset1::GroupConvolution>(this);
|
||||||
@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
|||||||
network->validate_nodes_and_infer_types();
|
network->validate_nodes_and_infer_types();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<element::Type> LowPrecisionTransformer::precisionIntersection(
|
|
||||||
const std::vector<element::Type>& v1,
|
|
||||||
const std::vector<element::Type>& v2) const noexcept {
|
|
||||||
std::vector<element::Type> v3;
|
|
||||||
|
|
||||||
auto v1Copy = v1;
|
|
||||||
auto v2Copy = v2;
|
|
||||||
|
|
||||||
std::sort(v1Copy.begin(), v1Copy.end());
|
|
||||||
std::sort(v2Copy.begin(), v2Copy.end());
|
|
||||||
|
|
||||||
std::set_intersection(v1Copy.begin(), v1Copy.end(),
|
|
||||||
v2Copy.begin(), v2Copy.end(),
|
|
||||||
std::back_inserter(v3));
|
|
||||||
return v3;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
|
std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
|
||||||
const std::string operantionType = LowPrecisionTransformations::getType(op);
|
const std::string operantionType = LowPrecisionTransformations::getType(op);
|
||||||
const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
|
const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
|
||||||
@ -456,7 +442,7 @@ std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(c
|
|||||||
std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
|
std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
|
||||||
|
|
||||||
for (const auto& transform : transformation) {
|
for (const auto& transform : transformation) {
|
||||||
precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations());
|
precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations());
|
||||||
}
|
}
|
||||||
return precisions;
|
return precisions;
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (updatePrecisions && !NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,24 +46,10 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
|
if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
|
||||||
const std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
|
||||||
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
|
||||||
replace_node(fqOnWeights, resultConstant);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
||||||
const auto resultDequantization = NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
|
|
||||||
if (resultDequantization.empty() && reshapeFromWeights) {
|
|
||||||
const auto foldedReshape = fold<opset1::Reshape>(
|
|
||||||
reshapeFromWeights->get_input_node_shared_ptr(0),
|
|
||||||
reshapeFromWeights->get_input_node_shared_ptr(1),
|
|
||||||
reshapeFromWeights->get_special_zero());
|
|
||||||
if (is_type<opset1::Constant>(foldedReshape)) {
|
|
||||||
replace_node(reshapeFromWeights, foldedReshape);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -170,9 +156,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones
|
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
||||||
(shape_size(constOutputShape) != constOutputShape[0]) ||
|
if ( // Check if all dimensions of scale except the output channels are all ones
|
||||||
((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) {
|
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
|
||||||
|
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
|
||||||
|
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -256,7 +244,7 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr<Node> l
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> node) const {
|
void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& node, const size_t outChannelsShapeIndex) const {
|
||||||
const auto fq = getFakeQuantizeOnWeights(node);
|
const auto fq = getFakeQuantizeOnWeights(node);
|
||||||
if (fq == nullptr) {
|
if (fq == nullptr) {
|
||||||
return;
|
return;
|
||||||
@ -270,7 +258,9 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::sha
|
|||||||
dataPrecision.min,
|
dataPrecision.min,
|
||||||
dataPrecision.max,
|
dataPrecision.max,
|
||||||
dataPrecision.hasZeroPoint,
|
dataPrecision.hasZeroPoint,
|
||||||
updatePrecisions);
|
updatePrecisions,
|
||||||
|
element::f32,
|
||||||
|
outChannelsShapeIndex);
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Node> fqOnWeights = std::get<0>(tuple);
|
std::shared_ptr<ngraph::Node> fqOnWeights = std::get<0>(tuple);
|
||||||
if (as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) {
|
if (as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) {
|
||||||
|
@ -76,6 +76,7 @@
|
|||||||
#include <low_precision/transformer.hpp>
|
#include <low_precision/transformer.hpp>
|
||||||
#include <low_precision/convert_subtract_constant.hpp>
|
#include <low_precision/convert_subtract_constant.hpp>
|
||||||
#include <low_precision/convolution.hpp>
|
#include <low_precision/convolution.hpp>
|
||||||
|
#include <low_precision/convolution_backprop_data.hpp>
|
||||||
#include <low_precision/group_convolution.hpp>
|
#include <low_precision/group_convolution.hpp>
|
||||||
#include <low_precision/multiply_to_group_convolution.hpp>
|
#include <low_precision/multiply_to_group_convolution.hpp>
|
||||||
#include <low_precision/network_helper.hpp>
|
#include <low_precision/network_helper.hpp>
|
||||||
@ -328,7 +329,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
|||||||
.add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
|
.add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
|
||||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
|
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
|
||||||
.addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
|
.addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
|
||||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
|
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }))
|
||||||
|
.remove<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>());
|
||||||
|
|
||||||
transformer.transform(nGraphFunc);
|
transformer.transform(nGraphFunc);
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@ public:
|
|||||||
const Strides& dilations,
|
const Strides& dilations,
|
||||||
const CoordinateDiff& pads_begin,
|
const CoordinateDiff& pads_begin,
|
||||||
const CoordinateDiff& pads_end,
|
const CoordinateDiff& pads_end,
|
||||||
|
const element::Type output_type,
|
||||||
const size_t& group = 1,
|
const size_t& group = 1,
|
||||||
const PadType& auto_pad = PadType::EXPLICIT,
|
const PadType& auto_pad = PadType::EXPLICIT,
|
||||||
const CoordinateDiff& output_padding = {},
|
const CoordinateDiff& output_padding = {},
|
||||||
@ -41,6 +42,7 @@ public:
|
|||||||
const Strides& dilations,
|
const Strides& dilations,
|
||||||
const CoordinateDiff& pads_begin,
|
const CoordinateDiff& pads_begin,
|
||||||
const CoordinateDiff& pads_end,
|
const CoordinateDiff& pads_end,
|
||||||
|
const element::Type output_type,
|
||||||
const size_t& group = 1,
|
const size_t& group = 1,
|
||||||
const PadType& auto_pad = PadType::EXPLICIT,
|
const PadType& auto_pad = PadType::EXPLICIT,
|
||||||
const CoordinateDiff& output_padding = {},
|
const CoordinateDiff& output_padding = {},
|
||||||
@ -79,6 +81,7 @@ protected:
|
|||||||
size_t m_group;
|
size_t m_group;
|
||||||
CoordinateDiff m_output_padding;
|
CoordinateDiff m_output_padding;
|
||||||
std::shared_ptr<Node> m_output_shape;
|
std::shared_ptr<Node> m_output_shape;
|
||||||
|
element::Type m_output_type;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace op
|
} // namespace op
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include "ngraph/util.hpp"
|
#include "ngraph/util.hpp"
|
||||||
#include "ngraph/validation_util.hpp"
|
#include "ngraph/validation_util.hpp"
|
||||||
#include "ngraph/opsets/opset1.hpp"
|
#include "ngraph/opsets/opset1.hpp"
|
||||||
|
#include "ngraph_ops/type_relaxed.hpp"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
@ -25,6 +26,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
|||||||
const Strides& dilations,
|
const Strides& dilations,
|
||||||
const CoordinateDiff& pads_begin,
|
const CoordinateDiff& pads_begin,
|
||||||
const CoordinateDiff& pads_end,
|
const CoordinateDiff& pads_end,
|
||||||
|
const element::Type output_type,
|
||||||
const size_t& group,
|
const size_t& group,
|
||||||
const PadType& auto_pad,
|
const PadType& auto_pad,
|
||||||
const CoordinateDiff& output_padding,
|
const CoordinateDiff& output_padding,
|
||||||
@ -37,7 +39,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
|||||||
, m_auto_pad(auto_pad)
|
, m_auto_pad(auto_pad)
|
||||||
, m_group(group)
|
, m_group(group)
|
||||||
, m_output_padding(output_padding)
|
, m_output_padding(output_padding)
|
||||||
, m_output_shape(output_shape) {
|
, m_output_shape(output_shape)
|
||||||
|
, m_output_type(output_type) {
|
||||||
constructor_validate_and_infer_types();
|
constructor_validate_and_infer_types();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,6 +51,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
|||||||
const Strides& dilations,
|
const Strides& dilations,
|
||||||
const CoordinateDiff& pads_begin,
|
const CoordinateDiff& pads_begin,
|
||||||
const CoordinateDiff& pads_end,
|
const CoordinateDiff& pads_end,
|
||||||
|
const element::Type output_type,
|
||||||
const size_t& group,
|
const size_t& group,
|
||||||
const PadType& auto_pad,
|
const PadType& auto_pad,
|
||||||
const CoordinateDiff& output_padding,
|
const CoordinateDiff& output_padding,
|
||||||
@ -60,7 +64,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
|||||||
, m_auto_pad(auto_pad)
|
, m_auto_pad(auto_pad)
|
||||||
, m_group(group)
|
, m_group(group)
|
||||||
, m_output_padding(output_padding)
|
, m_output_padding(output_padding)
|
||||||
, m_output_shape(output_shape) {
|
, m_output_shape(output_shape)
|
||||||
|
, m_output_type(output_type) {
|
||||||
constructor_validate_and_infer_types();
|
constructor_validate_and_infer_types();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,13 +86,32 @@ void op::DeconvolutionIE::validate_and_infer_types() {
|
|||||||
}
|
}
|
||||||
Output<Node> conv;
|
Output<Node> conv;
|
||||||
if (m_output_shape) {
|
if (m_output_shape) {
|
||||||
conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights, m_output_shape,
|
conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
|
||||||
m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding);
|
std::vector<element::Type>{ element::f32, element::f32 },
|
||||||
|
std::vector<element::Type>{ element::f32 },
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
|
||||||
|
m_output_shape,
|
||||||
|
m_strides,
|
||||||
|
m_pads_begin,
|
||||||
|
m_pads_end,
|
||||||
|
m_dilations,
|
||||||
|
m_auto_pad,
|
||||||
|
m_output_padding);
|
||||||
} else {
|
} else {
|
||||||
conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights,
|
conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
|
||||||
m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding);
|
std::vector<element::Type>{ element::f32, element::f32 },
|
||||||
|
std::vector<element::Type>{ element::f32 },
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
|
||||||
|
m_strides,
|
||||||
|
m_pads_begin,
|
||||||
|
m_pads_end,
|
||||||
|
m_dilations,
|
||||||
|
m_auto_pad,
|
||||||
|
m_output_padding);
|
||||||
}
|
}
|
||||||
set_output_type(0, conv.get_element_type(), conv.get_partial_shape());
|
set_output_type(0, m_output_type, conv.get_partial_shape());
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
|
shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
|
||||||
@ -99,6 +123,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
|
|||||||
m_dilations,
|
m_dilations,
|
||||||
m_pads_begin,
|
m_pads_begin,
|
||||||
m_pads_end,
|
m_pads_end,
|
||||||
|
m_output_type,
|
||||||
m_group,
|
m_group,
|
||||||
m_auto_pad,
|
m_auto_pad,
|
||||||
m_output_padding,
|
m_output_padding,
|
||||||
@ -111,6 +136,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
|
|||||||
m_dilations,
|
m_dilations,
|
||||||
m_pads_begin,
|
m_pads_begin,
|
||||||
m_pads_end,
|
m_pads_end,
|
||||||
|
m_output_type,
|
||||||
m_group,
|
m_group,
|
||||||
m_auto_pad,
|
m_auto_pad,
|
||||||
m_output_padding,
|
m_output_padding,
|
||||||
|
@ -113,6 +113,7 @@ ngraph::pass::ConvertDeconvolution::ConvertDeconvolution() {
|
|||||||
deconv->get_dilations(),
|
deconv->get_dilations(),
|
||||||
deconv->get_pads_begin(),
|
deconv->get_pads_begin(),
|
||||||
deconv->get_pads_end(),
|
deconv->get_pads_end(),
|
||||||
|
deconv->get_output_element_type(0),
|
||||||
1 /* groups */,
|
1 /* groups */,
|
||||||
deconv->get_auto_pad(),
|
deconv->get_auto_pad(),
|
||||||
deconv->get_output_padding(),
|
deconv->get_output_padding(),
|
||||||
@ -158,6 +159,7 @@ ngraph::pass::ConvertGroupDeconvolution::ConvertGroupDeconvolution() {
|
|||||||
gconv->get_dilations(),
|
gconv->get_dilations(),
|
||||||
gconv->get_pads_begin(),
|
gconv->get_pads_begin(),
|
||||||
gconv->get_pads_end(),
|
gconv->get_pads_end(),
|
||||||
|
gconv->get_output_element_type(0),
|
||||||
group,
|
group,
|
||||||
gconv->get_auto_pad(),
|
gconv->get_auto_pad(),
|
||||||
gconv->get_output_padding(),
|
gconv->get_output_padding(),
|
||||||
|
@ -38,11 +38,14 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() {
|
|||||||
const std::shared_ptr<Node> child = subChildren.begin()->get_node()->shared_from_this();
|
const std::shared_ptr<Node> child = subChildren.begin()->get_node()->shared_from_this();
|
||||||
if (child != nullptr) {
|
if (child != nullptr) {
|
||||||
if (is_type<opset1::Convolution>(child) ||
|
if (is_type<opset1::Convolution>(child) ||
|
||||||
|
is_type<opset1::ConvolutionBackpropData>(child) ||
|
||||||
is_type<opset1::GroupConvolution>(child) ||
|
is_type<opset1::GroupConvolution>(child) ||
|
||||||
|
is_type<opset1::GroupConvolutionBackpropData>(child) ||
|
||||||
is_type<opset1::MatMul>(child) ||
|
is_type<opset1::MatMul>(child) ||
|
||||||
(is_type<opset1::Reshape>(child) &&
|
(is_type<opset1::Reshape>(child) &&
|
||||||
(child->output(0).get_target_inputs().size() == 1ul) &&
|
(child->output(0).get_target_inputs().size() == 1ul) &&
|
||||||
is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()))) {
|
(is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) ||
|
||||||
|
is_type<opset1::GroupConvolutionBackpropData>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this())))) {
|
||||||
const auto input1Type = sub->input(0).get_element_type();
|
const auto input1Type = sub->input(0).get_element_type();
|
||||||
const auto input2Type = sub->input(1).get_element_type();
|
const auto input2Type = sub->input(1).get_element_type();
|
||||||
if (((input1Type == element::u8) && (input2Type == element::u8)) ||
|
if (((input1Type == element::u8) && (input2Type == element::u8)) ||
|
||||||
|
@ -0,0 +1,334 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "layer_transformation.hpp"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <sstream>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <transformations/utils/utils.hpp>
|
||||||
|
#include <transformations/init_node_info.hpp>
|
||||||
|
#include <low_precision/convolution_backprop_data.hpp>
|
||||||
|
#include <low_precision/network_helper.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
|
#include "simple_low_precision_transformer.hpp"
|
||||||
|
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace ngraph::pass;
|
||||||
|
|
||||||
|
class ConvolutionBackpropDataTransformationTestValues {
|
||||||
|
public:
|
||||||
|
class Actual {
|
||||||
|
public:
|
||||||
|
ngraph::element::Type precisionBeforeDequantization;
|
||||||
|
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
|
||||||
|
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
|
||||||
|
builder::subgraph::DequantizationOperations dequantizationOnWeights;
|
||||||
|
std::shared_ptr<ngraph::opset1::Constant> weights;
|
||||||
|
|
||||||
|
Actual() = default;
|
||||||
|
Actual(
|
||||||
|
const ngraph::element::Type& precisionBeforeDequantization,
|
||||||
|
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
|
||||||
|
const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
|
||||||
|
const std::shared_ptr<ngraph::opset1::Constant>& weights) :
|
||||||
|
precisionBeforeDequantization(precisionBeforeDequantization),
|
||||||
|
dequantizationOnActivations(dequantizationOnActivations),
|
||||||
|
fakeQuantizeOnWeights(fakeQuantizeOnWeights),
|
||||||
|
weights(weights) {}
|
||||||
|
Actual(
|
||||||
|
const ngraph::element::Type& precisionBeforeDequantization,
|
||||||
|
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
|
||||||
|
const std::shared_ptr<ngraph::opset1::Constant>& weights) :
|
||||||
|
precisionBeforeDequantization(precisionBeforeDequantization),
|
||||||
|
dequantizationOnActivations(dequantizationOnActivations),
|
||||||
|
dequantizationOnWeights(dequantizationOnWeights),
|
||||||
|
weights(weights) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Expected {
|
||||||
|
public:
|
||||||
|
ngraph::element::Type precisionBeforeDequantization;
|
||||||
|
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
|
||||||
|
builder::subgraph::DequantizationOperations dequantizationOnWeights;
|
||||||
|
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
|
||||||
|
std::shared_ptr<ngraph::opset1::Constant> weights;
|
||||||
|
bool transformed;
|
||||||
|
};
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||||
|
Actual actual;
|
||||||
|
Expected expected;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
element::Type,
|
||||||
|
ngraph::Shape,
|
||||||
|
ConvolutionBackpropDataTransformationTestValues> ConvolutionBackpropDataTransformationParams;
|
||||||
|
|
||||||
|
class ConvolutionBackpropDataTransformation : public LayerTransformation, public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams> {
|
||||||
|
public:
|
||||||
|
void SetUp() override {
|
||||||
|
const auto netPrecision = std::get<0>(GetParam());
|
||||||
|
const auto inputShape = std::get<1>(GetParam());
|
||||||
|
auto outputShape = inputShape;
|
||||||
|
outputShape[1] /= 4;
|
||||||
|
outputShape[2] *= 2;
|
||||||
|
outputShape[3] *= 2;
|
||||||
|
auto testValues = std::get<2>(GetParam());
|
||||||
|
|
||||||
|
std::shared_ptr<Node> actualWeights = pass::low_precision::fold<opset1::Broadcast>(
|
||||||
|
testValues.actual.weights,
|
||||||
|
opset1::Constant::create(
|
||||||
|
element::i64,
|
||||||
|
Shape{inputShape.size()},
|
||||||
|
Shape{inputShape[1], outputShape[1], 1, 1}));
|
||||||
|
if (!testValues.actual.fakeQuantizeOnWeights.empty()) {
|
||||||
|
actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
outputShape,
|
||||||
|
netPrecision,
|
||||||
|
testValues.actual.fakeQuantizeOnWeights,
|
||||||
|
as_type_ptr<opset1::Constant>(actualWeights));
|
||||||
|
} else {
|
||||||
|
actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
outputShape,
|
||||||
|
netPrecision,
|
||||||
|
testValues.actual.dequantizationOnWeights,
|
||||||
|
as_type_ptr<opset1::Constant>(actualWeights));
|
||||||
|
}
|
||||||
|
|
||||||
|
actualFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getOriginal(
|
||||||
|
testValues.actual.precisionBeforeDequantization,
|
||||||
|
netPrecision,
|
||||||
|
inputShape,
|
||||||
|
outputShape,
|
||||||
|
testValues.actual.dequantizationOnActivations,
|
||||||
|
actualWeights);
|
||||||
|
|
||||||
|
SimpleLowPrecisionTransformer transform;
|
||||||
|
transform.add<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation, ngraph::opset1::Convolution>(testValues.params);
|
||||||
|
transform.transform(actualFunction);
|
||||||
|
std::shared_ptr<Node> refWeights = pass::low_precision::fold<opset1::Broadcast>(
|
||||||
|
testValues.expected.weights,
|
||||||
|
opset1::Constant::create(
|
||||||
|
element::i64,
|
||||||
|
Shape{inputShape.size()},
|
||||||
|
Shape{inputShape[1], outputShape[1], 1, 1}));
|
||||||
|
|
||||||
|
if (!testValues.expected.transformed) {
|
||||||
|
refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
outputShape,
|
||||||
|
netPrecision,
|
||||||
|
testValues.actual.fakeQuantizeOnWeights,
|
||||||
|
as_type_ptr<opset1::Constant>(refWeights));
|
||||||
|
} else {
|
||||||
|
refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
outputShape,
|
||||||
|
netPrecision,
|
||||||
|
testValues.expected.dequantizationOnWeights,
|
||||||
|
as_type_ptr<opset1::Constant>(refWeights));
|
||||||
|
}
|
||||||
|
|
||||||
|
referenceFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getReference(
|
||||||
|
testValues.expected.precisionBeforeDequantization,
|
||||||
|
netPrecision,
|
||||||
|
inputShape,
|
||||||
|
outputShape,
|
||||||
|
testValues.expected.dequantizationOnActivations,
|
||||||
|
refWeights,
|
||||||
|
testValues.expected.dequantizationAfter);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
|
||||||
|
const auto netPrecision = std::get<0>(obj.param);
|
||||||
|
auto inputShape = std::get<1>(obj.param);
|
||||||
|
ConvolutionBackpropDataTransformationTestValues testValues = std::get<2>(obj.param);
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << toString(testValues.params) << "_" <<
|
||||||
|
netPrecision << "_" <<
|
||||||
|
inputShape << "_" <<
|
||||||
|
testValues.actual.precisionBeforeDequantization << "_" <<
|
||||||
|
testValues.actual.dequantizationOnActivations << "_" <<
|
||||||
|
testValues.actual.dequantizationOnWeights << "_" <<
|
||||||
|
testValues.actual.fakeQuantizeOnWeights << "_" <<"_weights_" <<
|
||||||
|
testValues.actual.weights->get_element_type() << "_" << "{ " <<
|
||||||
|
testValues.actual.weights->cast_vector<float>()[0] << " }_";
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) {
|
||||||
|
actualFunction->validate_nodes_and_infer_types();
|
||||||
|
auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<element::Type> netPrecisions = {
|
||||||
|
element::f32,
|
||||||
|
element::f16
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::Shape> shapes = {
|
||||||
|
ngraph::Shape({ 1, 8, 16, 16 })
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ConvolutionBackpropDataTransformationTestValues> testValues = {
|
||||||
|
// with zero point
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8(),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||||
|
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
|
||||||
|
{},
|
||||||
|
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// updatePrecisions = false
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||||
|
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
|
||||||
|
{},
|
||||||
|
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||||
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// QDq version
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8(),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||||
|
{{ngraph::element::f32}, { 2.f }, { 0.01f }},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
|
||||||
|
{{}, { { 2.f }, ngraph::element::f32, {1, 2, 1, 1}, true, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, {}},
|
||||||
|
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1 }}},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// without zero point
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8(),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, {}, { 0.02f }},
|
||||||
|
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// QDq version
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8(),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, {}, { 0.02f }},
|
||||||
|
{{ngraph::element::f32}, {}, { 0.01f }},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
{{}, {}, {{ 0.0002f }, ngraph::element::f32, {1}}},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// per-channel dequantization with the same values
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8(),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f} }},
|
||||||
|
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
// per-channel dequantization with different values
|
||||||
|
{
|
||||||
|
LayerTransformation::createParamsU8I8(),
|
||||||
|
// ActualValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
|
||||||
|
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||||
|
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||||
|
},
|
||||||
|
// ExpectedValues
|
||||||
|
{
|
||||||
|
ngraph::element::u8,
|
||||||
|
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
|
true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
smoke_LPT,
|
||||||
|
ConvolutionBackpropDataTransformation,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::ValuesIn(shapes),
|
||||||
|
::testing::ValuesIn(testValues)),
|
||||||
|
ConvolutionBackpropDataTransformation::getTestCaseName);
|
@ -231,7 +231,7 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
// Actual & Transformed:
|
// Actual:
|
||||||
//
|
//
|
||||||
// Parameter Constant Constant Constant
|
// Parameter Constant Constant Constant
|
||||||
// |U8 |U8 |FP32 |I8
|
// |U8 |U8 |FP32 |I8
|
||||||
@ -246,6 +246,22 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
|||||||
// \FP32 /FP32
|
// \FP32 /FP32
|
||||||
// \ /
|
// \ /
|
||||||
// Convolution
|
// Convolution
|
||||||
|
//
|
||||||
|
// Transformed:
|
||||||
|
//
|
||||||
|
// Parameter Constant
|
||||||
|
// |U8 |U8
|
||||||
|
// | |
|
||||||
|
// Convert Convert
|
||||||
|
// \FP32 /FP32
|
||||||
|
// \ /
|
||||||
|
// Subtract Constant
|
||||||
|
// \FP32 /FP32
|
||||||
|
// \ /
|
||||||
|
// Multiply Constant
|
||||||
|
// \FP32 /FP32
|
||||||
|
// \ /
|
||||||
|
// Convolution
|
||||||
{
|
{
|
||||||
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
|
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
|
||||||
// ActualValues
|
// ActualValues
|
||||||
@ -262,8 +278,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::u8,
|
ngraph::element::u8,
|
||||||
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }},
|
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }},
|
||||||
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }},
|
{},
|
||||||
{ std::vector<float>{ 2.f }, ngraph::element::f32},
|
{ std::vector<float>{ -3.75f }, ngraph::element::f32},
|
||||||
{},
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
@ -434,12 +450,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
|||||||
{ {1000.f}, element::f32, {}, false },
|
{ {1000.f}, element::f32, {}, false },
|
||||||
{ {0.02f}, element::f32, {}, false }
|
{ {0.02f}, element::f32, {}, false }
|
||||||
},
|
},
|
||||||
{
|
{},
|
||||||
{ ngraph::element::f32, false },
|
{ std::vector<float>{ -3.75f }, ngraph::element::f32},
|
||||||
{ {127.f}, element::f32, {}, false },
|
|
||||||
{ {0.03f}, element::f32, {}, false }
|
|
||||||
},
|
|
||||||
{ std::vector<float>{ 2.f }, ngraph::element::i8},
|
|
||||||
{},
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
|
@ -160,8 +160,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::u8,
|
ngraph::element::u8,
|
||||||
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
|
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
}
|
}
|
||||||
@ -288,13 +288,13 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
|||||||
{{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }},
|
{{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }},
|
||||||
{{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}}
|
{{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}}
|
||||||
},
|
},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
// dequantization in second dimension
|
// float input
|
||||||
{
|
{
|
||||||
LayerTransformation::createParamsU8I8(),
|
LayerTransformation::createParamsU8I8(),
|
||||||
// ActualValues
|
// ActualValues
|
||||||
@ -316,8 +316,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
|||||||
{{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }},
|
{{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }},
|
||||||
{{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}}
|
{{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}}
|
||||||
},
|
},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
}
|
}
|
||||||
@ -356,8 +356,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{{}, {}, { {0.02f}, element::f32 }},
|
{{}, {}, { {0.02f}, element::f32 }},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
}
|
}
|
||||||
@ -396,8 +396,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::u8,
|
ngraph::element::u8,
|
||||||
{{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
|
{{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
}
|
}
|
||||||
|
@ -160,8 +160,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::u8,
|
ngraph::element::u8,
|
||||||
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
|
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
{},
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
@ -286,8 +286,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{{}, {}, { 0.02f }},
|
{{}, {}, { 0.02f }},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
{},
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
@ -459,8 +459,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
|
|||||||
{
|
{
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{{}, {}, { 0.02f }},
|
{{}, {}, { 0.02f }},
|
||||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
{},
|
||||||
{},
|
{},
|
||||||
ngraph::element::f32,
|
ngraph::element::f32,
|
||||||
{}
|
{}
|
||||||
|
@ -60,7 +60,7 @@ private:
|
|||||||
auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
|
auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
|
||||||
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
|
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
|
||||||
auto conv = std::make_shared<ngraph::op::DeconvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
|
auto conv = std::make_shared<ngraph::op::DeconvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
|
||||||
ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
|
ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::element::f32);
|
||||||
|
|
||||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
|
return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,100 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
|
||||||
|
#include "common_test_utils/test_constants.hpp"
|
||||||
|
|
||||||
|
using namespace LayerTestsDefinitions;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||||
|
ngraph::element::f32
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||||
|
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
|
||||||
|
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
|
||||||
|
// FQ on weights
|
||||||
|
// with zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||||
|
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// without zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||||
|
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// with incorrect zero point on activations
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||||
|
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// with incorrect zero point on weights
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||||
|
{255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// QDq on weights
|
||||||
|
// with zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||||
|
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// without zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||||
|
{{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// with incorrect zero point on activations
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||||
|
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// with incorrect zero point on weights
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||||
|
{{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::Shape> inputShapes = {
|
||||||
|
{ 1, 8, 16, 16 }
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::Shape> outputShapes = {
|
||||||
|
{ 16, 16 }
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::ValuesIn(inputShapes),
|
||||||
|
::testing::ValuesIn(outputShapes),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||||
|
::testing::ValuesIn(trasformationParamValues),
|
||||||
|
::testing::ValuesIn(params)),
|
||||||
|
ConvolutionBackpropDataTransformation::getTestCaseName);
|
||||||
|
} // namespace
|
@ -0,0 +1,103 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
|
||||||
|
#include "common_test_utils/test_constants.hpp"
|
||||||
|
|
||||||
|
using namespace LayerTestsDefinitions;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||||
|
ngraph::element::f32,
|
||||||
|
ngraph::element::f16
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||||
|
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
|
||||||
|
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
|
||||||
|
// FQ on weights
|
||||||
|
// with zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||||
|
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// without zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||||
|
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// TODO: check fails in CI
|
||||||
|
// // with incorrect zero point on activations
|
||||||
|
// {
|
||||||
|
// {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||||
|
// {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||||
|
// "",
|
||||||
|
// ""
|
||||||
|
// },
|
||||||
|
// // with incorrect zero point on weights
|
||||||
|
// {
|
||||||
|
// {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||||
|
// {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||||
|
// "",
|
||||||
|
// ""
|
||||||
|
// },
|
||||||
|
// QDq on weights
|
||||||
|
// with zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||||
|
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// without zero point
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||||
|
{{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// with incorrect zero point on activations
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||||
|
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
},
|
||||||
|
// with incorrect zero point on weights
|
||||||
|
{
|
||||||
|
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||||
|
{{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||||
|
"",
|
||||||
|
""
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::Shape> inputShapes = {
|
||||||
|
{ 1, 8, 16, 16 },
|
||||||
|
{ 1, 32, 16, 16 }
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<ngraph::Shape> outputShapes = {
|
||||||
|
{ 16, 16 }
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::ValuesIn(inputShapes),
|
||||||
|
::testing::ValuesIn(outputShapes),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||||
|
::testing::ValuesIn(trasformationParamValues),
|
||||||
|
::testing::ValuesIn(params)),
|
||||||
|
ConvolutionBackpropDataTransformation::getTestCaseName);
|
||||||
|
} // namespace
|
@ -0,0 +1,65 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
|
||||||
|
#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
class ConvolutionBackpropDataTransformationParam {
|
||||||
|
public:
|
||||||
|
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
|
||||||
|
ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
|
||||||
|
ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights;
|
||||||
|
std::string layerName;
|
||||||
|
std::string expectedKernelType;
|
||||||
|
|
||||||
|
ConvolutionBackpropDataTransformationParam() = default;
|
||||||
|
ConvolutionBackpropDataTransformationParam(
|
||||||
|
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
|
||||||
|
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
|
||||||
|
std::string layerName,
|
||||||
|
std::string expectedKernelType) :
|
||||||
|
fakeQuantizeOnData(fakeQuantizeOnData), fakeQuantizeOnWeights(fakeQuantizeOnWeights),
|
||||||
|
layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
|
||||||
|
ConvolutionBackpropDataTransformationParam(
|
||||||
|
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
|
||||||
|
ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights,
|
||||||
|
std::string layerName,
|
||||||
|
std::string expectedKernelType) :
|
||||||
|
fakeQuantizeOnData(fakeQuantizeOnData), dequantizationOnWeights(std::move(dequantizationOnWeights)),
|
||||||
|
layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
ngraph::element::Type, // netPrecision
|
||||||
|
ngraph::Shape, // inputShape
|
||||||
|
ngraph::Shape, // outputShape
|
||||||
|
std::string, // targetDevice
|
||||||
|
ngraph::pass::low_precision::LayerTransformation::Params,
|
||||||
|
ConvolutionBackpropDataTransformationParam
|
||||||
|
> ConvolutionBackpropDataTransformationParams;
|
||||||
|
|
||||||
|
class ConvolutionBackpropDataTransformation :
|
||||||
|
public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams>,
|
||||||
|
public LayerTestsUtils::LayerTransformation {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override;
|
||||||
|
|
||||||
|
void Run() override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,77 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
|
||||||
|
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
|
||||||
|
ngraph::element::Type netPrecision;
|
||||||
|
ngraph::Shape inputShape;
|
||||||
|
ngraph::Shape outputShape;
|
||||||
|
std::string targetDevice;
|
||||||
|
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||||
|
ConvolutionBackpropDataTransformationParam param;
|
||||||
|
std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
|
||||||
|
outputShape << "_" <<
|
||||||
|
param.fakeQuantizeOnData << "_" <<
|
||||||
|
param.fakeQuantizeOnWeights << "_" <<
|
||||||
|
param.dequantizationOnWeights;
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConvolutionBackpropDataTransformation::SetUp() {
|
||||||
|
threshold = 0.1f;
|
||||||
|
|
||||||
|
ngraph::element::Type netPrecision;
|
||||||
|
ngraph::Shape inputShape;
|
||||||
|
ngraph::Shape outputShape;
|
||||||
|
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||||
|
ConvolutionBackpropDataTransformationParam param;
|
||||||
|
std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = this->GetParam();
|
||||||
|
|
||||||
|
std::shared_ptr<ngraph::Node> weights;
|
||||||
|
|
||||||
|
if (!param.fakeQuantizeOnWeights.empty()) {
|
||||||
|
weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
|
||||||
|
netPrecision,
|
||||||
|
param.fakeQuantizeOnWeights);
|
||||||
|
} else {
|
||||||
|
weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
|
||||||
|
netPrecision,
|
||||||
|
param.dequantizationOnWeights);
|
||||||
|
}
|
||||||
|
|
||||||
|
function = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::get(
|
||||||
|
netPrecision,
|
||||||
|
inputShape,
|
||||||
|
outputShape,
|
||||||
|
param.fakeQuantizeOnData,
|
||||||
|
weights);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConvolutionBackpropDataTransformation::Run() {
|
||||||
|
LayerTestsCommon::Run();
|
||||||
|
|
||||||
|
const auto params = std::get<5>(GetParam());
|
||||||
|
const auto actualType = getRuntimePrecision(params.layerName);
|
||||||
|
EXPECT_EQ(actualType, params.expectedKernelType);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(ConvolutionBackpropDataTransformation, CompareWithRefImpl) {
|
||||||
|
Run();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,54 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ngraph/ngraph.hpp>
|
||||||
|
#include <ngraph/opsets/opset1.hpp>
|
||||||
|
|
||||||
|
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace builder {
|
||||||
|
namespace subgraph {
|
||||||
|
|
||||||
|
class ConvolutionBackpropDataFunction {
|
||||||
|
public:
|
||||||
|
static std::shared_ptr<Node> getWeights(
|
||||||
|
const Shape& shape,
|
||||||
|
const element::Type& netPrecision,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
|
||||||
|
const std::shared_ptr<opset1::Constant>& value = nullptr);
|
||||||
|
static std::shared_ptr<Node> getWeights(
|
||||||
|
const Shape& shape,
|
||||||
|
const element::Type& netPrecision,
|
||||||
|
const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
|
||||||
|
const std::shared_ptr<opset1::Constant>& value = nullptr);
|
||||||
|
static std::shared_ptr<Function> get(
|
||||||
|
const element::Type netPrecision,
|
||||||
|
const Shape& inputShape,
|
||||||
|
const Shape& outputShape,
|
||||||
|
const builder::subgraph::FakeQuantizeOnData& fqOnData,
|
||||||
|
const std::shared_ptr<Node>& weights);
|
||||||
|
static std::shared_ptr<Function> getOriginal(
|
||||||
|
const element::Type precision,
|
||||||
|
const element::Type netPrecision,
|
||||||
|
const Shape& inputShape,
|
||||||
|
const Shape& outputShape,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantization,
|
||||||
|
const std::shared_ptr<Node>& weights);
|
||||||
|
static std::shared_ptr<Function> getReference(
|
||||||
|
const element::Type precision,
|
||||||
|
const element::Type netPrecision,
|
||||||
|
const Shape& inputShape,
|
||||||
|
const Shape& outputShape,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantization,
|
||||||
|
const std::shared_ptr<Node>& weights,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantizationAfter);
|
||||||
|
};
|
||||||
|
} // namespace subgraph
|
||||||
|
} // namespace builder
|
||||||
|
} // namespace ngraph
|
@ -0,0 +1,149 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
|
||||||
|
|
||||||
|
#include <ngraph/opsets/opset1.hpp>
|
||||||
|
#include <ngraph_ops/type_relaxed.hpp>
|
||||||
|
#include "ngraph_functions/subgraph_builders.hpp"
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
|
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||||
|
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||||
|
#include "low_precision/common/dequantization_op.hpp"
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph::pass::low_precision;
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace builder {
|
||||||
|
namespace subgraph {
|
||||||
|
|
||||||
|
std::shared_ptr<Function> ConvolutionBackpropDataFunction::get(
|
||||||
|
const element::Type netPrecision,
|
||||||
|
const Shape& inputShape,
|
||||||
|
const Shape& outputShape,
|
||||||
|
const builder::subgraph::FakeQuantizeOnData& fqOnData,
|
||||||
|
const std::shared_ptr<Node>& weights) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(netPrecision, inputShape);
|
||||||
|
const auto fq = makeFakeQuantize(input, netPrecision, fqOnData);
|
||||||
|
|
||||||
|
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
|
||||||
|
fq,
|
||||||
|
weights,
|
||||||
|
Strides{ 1, 1 },
|
||||||
|
CoordinateDiff{ 0, 0 },
|
||||||
|
CoordinateDiff{ 0, 0 },
|
||||||
|
Strides{ 1, 1 });
|
||||||
|
|
||||||
|
ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
|
||||||
|
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
const Shape& shape,
|
||||||
|
const element::Type& netPrecision,
|
||||||
|
const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
|
||||||
|
const std::shared_ptr<opset1::Constant>& value) {
|
||||||
|
const auto weights = value != nullptr ?
|
||||||
|
value :
|
||||||
|
std::make_shared<opset1::Constant>(
|
||||||
|
element::i8,
|
||||||
|
shape,
|
||||||
|
std::vector<float>(shape_size(shape), 1));
|
||||||
|
const auto convert = std::make_shared<opset1::Convert>(weights, netPrecision);
|
||||||
|
OutputVector convertedOutput(1);
|
||||||
|
convert->constant_fold(convertedOutput, convert->input_values());
|
||||||
|
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
|
||||||
|
const auto fq = makeFakeQuantize(convertedWeights, netPrecision, fqOnWeights);
|
||||||
|
|
||||||
|
return fq;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
|
||||||
|
const Shape& shape,
|
||||||
|
const element::Type& netPrecision,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
|
||||||
|
const std::shared_ptr<opset1::Constant>& value) {
|
||||||
|
auto weights =
|
||||||
|
value != nullptr ?
|
||||||
|
value :
|
||||||
|
std::make_shared<opset1::Constant>(
|
||||||
|
element::i8,
|
||||||
|
shape,
|
||||||
|
std::vector<float>(shape_size(shape), 1));
|
||||||
|
auto dequantizationStructure = dequantizationOnWeights;
|
||||||
|
dequantizationStructure.setPrecision(netPrecision);
|
||||||
|
if (!dequantizationOnWeights.subtract.constantPrecision.is_real()) {
|
||||||
|
dequantizationStructure.subtract.constantPrecision = dequantizationOnWeights.subtract.constantPrecision;
|
||||||
|
}
|
||||||
|
if (weights->get_element_type().is_real()) {
|
||||||
|
weights = as_type_ptr<opset1::Constant>(fold<opset1::Convert>(weights, netPrecision));
|
||||||
|
}
|
||||||
|
const auto dq = makeDequantization(weights, dequantizationStructure);
|
||||||
|
|
||||||
|
return dq;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Function> ConvolutionBackpropDataFunction::getOriginal(
|
||||||
|
const element::Type precision,
|
||||||
|
const element::Type netPrecision,
|
||||||
|
const Shape& inputShape,
|
||||||
|
const Shape& outputShape,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantization,
|
||||||
|
const std::shared_ptr<Node>& weights) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
|
||||||
|
auto dequantizationStructure = dequantization;
|
||||||
|
dequantizationStructure.multiply.outPrecision = netPrecision;
|
||||||
|
const auto activations = makeDequantization(input, dequantizationStructure);
|
||||||
|
|
||||||
|
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
|
||||||
|
activations,
|
||||||
|
weights,
|
||||||
|
Strides{ 1, 1 },
|
||||||
|
CoordinateDiff{ 0, 0 },
|
||||||
|
CoordinateDiff{ 0, 0 },
|
||||||
|
Strides{ 1, 1 });
|
||||||
|
|
||||||
|
convolutionBackpropData->set_friendly_name("output");
|
||||||
|
ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
|
||||||
|
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Function> ConvolutionBackpropDataFunction::getReference(
|
||||||
|
const element::Type precision,
|
||||||
|
const element::Type netPrecision,
|
||||||
|
const Shape& inputShape,
|
||||||
|
const Shape& outputShape,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantization,
|
||||||
|
const std::shared_ptr<Node>& weights,
|
||||||
|
const builder::subgraph::DequantizationOperations& dequantizationAfter) {
|
||||||
|
const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
|
||||||
|
auto dequantizationStructure = dequantization;
|
||||||
|
dequantizationStructure.multiply.outPrecision = netPrecision;
|
||||||
|
const auto activations = makeDequantization(input, dequantizationStructure);
|
||||||
|
|
||||||
|
auto convolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
|
||||||
|
std::vector<element::Type>{ element::f32, element::f32 },
|
||||||
|
std::vector<element::Type>{ dequantizationAfter.empty() ? netPrecision : element::f32 },
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(activations, element::f32).get(),
|
||||||
|
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
|
||||||
|
Strides{ 1, 1 },
|
||||||
|
CoordinateDiff{ 0, 0 },
|
||||||
|
CoordinateDiff{ 0, 0 },
|
||||||
|
Strides{ 1, 1 });
|
||||||
|
|
||||||
|
auto dequantizationStructureAfter = dequantizationAfter;
|
||||||
|
dequantizationStructureAfter.multiply.outPrecision = netPrecision;
|
||||||
|
const auto result = makeDequantization(convolutionBackpropData, dequantizationStructureAfter);
|
||||||
|
result->set_friendly_name("output");
|
||||||
|
ngraph::ResultVector results{ std::make_shared<opset1::Result>(result) };
|
||||||
|
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace subgraph
|
||||||
|
} // namespace builder
|
||||||
|
} // namespace ngraph
|
@ -86,8 +86,8 @@ namespace ngraph
|
|||||||
class NGRAPH_API ConvolutionBackpropData : public Op
|
class NGRAPH_API ConvolutionBackpropData : public Op
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static constexpr NodeTypeInfo type_info{"ConvolutionBackpropData", 1};
|
NGRAPH_RTTI_DECLARATION;
|
||||||
const NodeTypeInfo& get_type_info() const override { return type_info; }
|
|
||||||
/// \brief Constructs a batched-convolution data batch-backprop operation.
|
/// \brief Constructs a batched-convolution data batch-backprop operation.
|
||||||
ConvolutionBackpropData() = default;
|
ConvolutionBackpropData() = default;
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
@ -85,8 +85,8 @@ namespace ngraph
|
|||||||
class NGRAPH_API GroupConvolutionBackpropData : public Op
|
class NGRAPH_API GroupConvolutionBackpropData : public Op
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static constexpr NodeTypeInfo type_info{"GroupConvolutionBackpropData", 1};
|
NGRAPH_RTTI_DECLARATION;
|
||||||
const NodeTypeInfo& get_type_info() const override { return type_info; }
|
|
||||||
/// \brief Constructs a batched-convolution data batch-backprop operation.
|
/// \brief Constructs a batched-convolution data batch-backprop operation.
|
||||||
GroupConvolutionBackpropData();
|
GroupConvolutionBackpropData();
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
@ -102,12 +102,14 @@ shared_ptr<Node> op::v1::Convolution::clone_with_new_inputs(const OutputVector&
|
|||||||
m_auto_pad);
|
m_auto_pad);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr NodeTypeInfo op::v1::ConvolutionBackpropData::type_info;
|
|
||||||
shared_ptr<Node> op::v1::Convolution::get_default_value() const
|
shared_ptr<Node> op::v1::Convolution::get_default_value() const
|
||||||
{
|
{
|
||||||
return ngraph::make_constant_from_string("0", get_element_type(), get_shape());
|
return ngraph::make_constant_from_string("0", get_element_type(), get_shape());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// *** ConvolutionBackpropData OP SET 1 ***
|
||||||
|
NGRAPH_RTTI_DEFINITION(op::v1::ConvolutionBackpropData, "ConvolutionBackpropData", 1);
|
||||||
|
|
||||||
op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& data,
|
op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& data,
|
||||||
const Output<Node>& filters,
|
const Output<Node>& filters,
|
||||||
const Output<Node>& output_shape,
|
const Output<Node>& output_shape,
|
||||||
|
@ -286,7 +286,7 @@ shared_ptr<Node> op::v1::GroupConvolution::clone_with_new_inputs(const OutputVec
|
|||||||
// v1::GroupConvolutionBackpropData
|
// v1::GroupConvolutionBackpropData
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
constexpr NodeTypeInfo op::v1::GroupConvolutionBackpropData::type_info;
|
NGRAPH_RTTI_DEFINITION(op::v1::GroupConvolutionBackpropData, "GroupConvolutionBackpropData", 1);
|
||||||
|
|
||||||
op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData()
|
op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData()
|
||||||
: Op()
|
: Op()
|
||||||
|
Loading…
Reference in New Issue
Block a user