[LPT] ConvolutionBackpropData support (#5313)
* [LPT] ConvolutionBackpropData support * minor fixes * [Transformations] Legacy subtract precision keep * [LPT] ConvolutionBackpropData tests improvements * [LPT] ConvolutionBackpropData weights folding when can't be transformed * [LPT] CanBeTransformed unification and convolution weights folding * [LPT] GPU INT8 optimizations condition flag * [LPT] Concat precision predict improvement * [LPT] Turn off asymmetric quantization for Deconvolution on GPU * [LPT] Improvements from review * [LPT] Check if layer after concat isQuantized and require per-tensor quantize * [LPT] Improvement for Deconv->FQ pattern * [LPT] Commented failing tests
This commit is contained in:
parent
f84b25722c
commit
e41e25533d
@ -70,6 +70,7 @@
|
||||
#include <low_precision/pull_reshape_through_dequantization.hpp>
|
||||
#include <low_precision/pull_transpose_through_dequantization.hpp>
|
||||
#include <low_precision/transformer.hpp>
|
||||
#include <low_precision/convolution_backprop_data.hpp>
|
||||
#include <low_precision/mat_mul.hpp>
|
||||
#include <low_precision/strided_slice.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
@ -381,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
|
||||
.setSupportAsymmetricQuantization(false)
|
||||
.setSupport3DTensorOnActivations(false))
|
||||
.add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
|
||||
.setSupportAsymmetricQuantization(false)
|
||||
.setDeconvolutionSpecificChannelsRatio(true))
|
||||
// INT8 StridedSlice not supported
|
||||
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
|
||||
|
||||
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "weightable_layer_transformation.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
|
||||
public:
|
||||
ConvolutionBackpropDataTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
|
||||
public:
|
||||
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
|
||||
|
||||
explicit DataPrecision(const element::Type& precision) {
|
||||
this->precision = precision;
|
||||
min = getMinValue(precision, 256);
|
||||
max = getMaxValue(precision, 256);
|
||||
hasZeroPoint = false;
|
||||
}
|
||||
|
||||
DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
|
||||
precision(precision),
|
||||
min(min),
|
||||
@ -122,29 +129,6 @@ public:
|
||||
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
|
||||
return signedInterval ? element::i8 : element::u8;
|
||||
}
|
||||
|
||||
static float getMin(const size_t quantizationLevels, const bool signedInterval) {
|
||||
if (quantizationLevels == 255) {
|
||||
return signedInterval ? -127.0f : 0.0f;
|
||||
} else if (quantizationLevels == 256) {
|
||||
return signedInterval ? -128.0f : 0.0f;
|
||||
} else {
|
||||
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
|
||||
// FIXME: not completed
|
||||
return signedInterval ? -128.0f : 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
static float getMax(const size_t quantizationLevels, const bool signedInterval) {
|
||||
if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
|
||||
return signedInterval ? 127.0f : 255.0f;
|
||||
} else {
|
||||
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
|
||||
// FIXME: not completed
|
||||
// return quantizationLevels - 1.0;
|
||||
return signedInterval ? 127.0f : 255.0f;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
|
||||
@ -181,7 +165,8 @@ public:
|
||||
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
|
||||
std::vector<element::Type> precisionsOnWeights = { element::i8 },
|
||||
element::Type deqPrecision = element::f32,
|
||||
bool support3DTensorOnActivations = true) :
|
||||
bool support3DTensorOnActivations = true,
|
||||
bool deconvolutionSpecificChannelsRatio = false) :
|
||||
updatePrecisions(updatePrecisions),
|
||||
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
|
||||
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
|
||||
@ -189,7 +174,8 @@ public:
|
||||
precisionsOnActivations(precisionsOnActivations),
|
||||
precisionsOnWeights(precisionsOnWeights),
|
||||
deqPrecision(deqPrecision),
|
||||
support3DTensorOnActivations(support3DTensorOnActivations) {
|
||||
support3DTensorOnActivations(support3DTensorOnActivations),
|
||||
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
|
||||
if (precisionsOnActivations.size() == 0ul) {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
|
||||
}
|
||||
@ -234,6 +220,11 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
|
||||
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool updatePrecisions;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
|
||||
@ -242,6 +233,7 @@ public:
|
||||
std::vector<element::Type> precisionsOnWeights;
|
||||
element::Type deqPrecision;
|
||||
bool support3DTensorOnActivations;
|
||||
bool deconvolutionSpecificChannelsRatio;
|
||||
};
|
||||
|
||||
class PrecisionDetails {
|
||||
@ -318,6 +310,7 @@ protected:
|
||||
std::vector<element::Type> precisionsOnWeights;
|
||||
element::Type deqPrecision;
|
||||
bool support3DTensorOnActivations;
|
||||
bool deconvolutionSpecificChannelsRatio;
|
||||
|
||||
// absolute value, used to determine quantization interval asymmetry
|
||||
float quantizationIntervalAsymmetryThreshold;
|
||||
|
@ -109,7 +109,8 @@ public:
|
||||
const float max,
|
||||
const bool hasZeroPoint,
|
||||
const bool updatePrecision,
|
||||
const element::Type deqPrecision = element::f32);
|
||||
const element::Type deqPrecision = element::f32,
|
||||
const size_t outChannelsShapeIndex = 0);
|
||||
|
||||
static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
|
||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
||||
@ -183,7 +184,7 @@ public:
|
||||
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
|
||||
|
||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
|
||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues);
|
||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);
|
||||
|
||||
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
|
||||
|
||||
@ -191,8 +192,16 @@ public:
|
||||
|
||||
static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
|
||||
|
||||
static std::vector<element::Type> precisionIntersection(
|
||||
const std::vector<element::Type>& v1,
|
||||
const std::vector<element::Type>& v2) noexcept;
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet);
|
||||
static std::shared_ptr<Node> foldFakeQuantize(
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||
const bool roundValues,
|
||||
const bool roundValuesWasSet,
|
||||
int outChannelsShapeIndex = 0);
|
||||
|
||||
// 1 - on weights
|
||||
// 0 - weightable layer was not found
|
||||
|
@ -303,10 +303,6 @@ private:
|
||||
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
|
||||
GraphRewrite& pass,
|
||||
TransformationContext& context);
|
||||
|
||||
std::vector<element::Type> precisionIntersection(
|
||||
const std::vector<element::Type>& v1,
|
||||
const std::vector<element::Type>& v2) const noexcept;
|
||||
};
|
||||
|
||||
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
|
||||
|
@ -22,7 +22,7 @@ public:
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
|
||||
protected:
|
||||
void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const;
|
||||
void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
|
||||
static bool isGroup(const std::shared_ptr<Node>& node);
|
||||
static bool isDepthwise(const std::shared_ptr<Node>& node);
|
||||
|
||||
|
@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
|
||||
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
|
||||
if (is_type<opset1::Convolution>(parent) ||
|
||||
is_type<opset1::GroupConvolution>(parent) ||
|
||||
is_type<opset1::ConvolutionBackpropData>(parent) ||
|
||||
(is_type<opset1::MatMul>(parent) &&
|
||||
(is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
|
||||
return nullptr;
|
||||
|
@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
return false;
|
||||
}
|
||||
|
||||
DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
|
||||
if (dataPrecision.precision == ngraph::element::undefined) {
|
||||
std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
|
||||
fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
|
||||
if (concatParentsChildrensPrecisions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
|
||||
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
||||
const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
||||
fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
||||
if (fq == nullptr) {
|
||||
return false;
|
||||
}
|
||||
@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
if (quantizationDetails.inputHighValues.size() != 1ul) {
|
||||
return false;
|
||||
}
|
||||
std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
|
||||
fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
|
||||
concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
|
||||
|
||||
const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
|
||||
if (dataPrecision2.precision == ngraph::element::undefined) {
|
||||
if (concatParentsChildrensPrecisions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dataPrecision.precision != dataPrecision2.precision) {
|
||||
// quantization levels are the same, difference can be in sign
|
||||
// wider interval (precision) is preferable: use signed if least one interval is signed
|
||||
dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
|
||||
}
|
||||
}
|
||||
|
||||
if (dataPrecision.precision == ngraph::element::undefined) {
|
||||
return false;
|
||||
DataPrecision dataPrecision;
|
||||
if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
|
||||
dataPrecision = DataPrecision(element::i8);
|
||||
} else {
|
||||
dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
|
||||
}
|
||||
|
||||
std::vector<QuantizationDetails> quantizationLayersDetails;
|
||||
|
@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
|
||||
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
|
||||
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
|
||||
for (const std::shared_ptr<ngraph::Node>& child : children) {
|
||||
if (is_type<ngraph::opset1::Convolution>(child.get())) {
|
||||
if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
|
||||
is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
|
||||
this->layerTransformationsManager->isQuantized(child)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
auto convolution = m.get_match_root();
|
||||
|
||||
if (!canConvolutionBeTransformed(context, convolution)) {
|
||||
return false;
|
||||
auto weightInput = convolution->get_input_node_shared_ptr(1);
|
||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
|
||||
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||
NetworkHelper::getDequantization(convolution, 1ul) :
|
||||
NetworkHelper::getDequantization(reshapeFromWeights);
|
||||
if (dequantization.empty()) {
|
||||
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
|
||||
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
||||
if (reshapeFromWeights != nullptr) {
|
||||
resultConstant = fold_reshape<opset1::Reshape>(
|
||||
resultConstant,
|
||||
reshapeFromWeights->input_value(1),
|
||||
false);
|
||||
}
|
||||
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
||||
replace_node(weightInput, resultConstant);
|
||||
}
|
||||
} else {
|
||||
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
convolution = NetworkHelper::separateInStandaloneBranch(convolution);
|
||||
|
@ -0,0 +1,218 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/convolution_backprop_data.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
||||
}
|
||||
|
||||
void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::ConvolutionBackpropData>(
|
||||
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::ConvolutionBackpropData>(
|
||||
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
|
||||
}
|
||||
|
||||
bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
|
||||
if (deconvolutionSpecificChannelsRatio) {
|
||||
size_t inputChannels = layer->get_input_shape(0)[1];
|
||||
size_t outputChannels = layer->get_output_shape(0)[1];
|
||||
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return WeightableLayerTransformation::isQuantized(layer, false);
|
||||
}
|
||||
|
||||
bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
|
||||
auto convolutionBackpropData = m.get_match_root();
|
||||
|
||||
if (!canBeTransformed(context, convolutionBackpropData)) {
|
||||
auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightsInput);
|
||||
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||
NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
|
||||
NetworkHelper::getDequantization(reshapeFromWeights);
|
||||
if (dequantization.empty()) {
|
||||
const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
|
||||
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
||||
if (reshapeFromWeights != nullptr) {
|
||||
resultConstant = fold_reshape<opset1::Reshape>(
|
||||
resultConstant,
|
||||
reshapeFromWeights->input_value(1),
|
||||
false);
|
||||
}
|
||||
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
||||
replace_node(weightsInput, resultConstant);
|
||||
}
|
||||
} else {
|
||||
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
|
||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
||||
{
|
||||
if (dequantization.subtract != nullptr) {
|
||||
std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
|
||||
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
|
||||
|
||||
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||
}
|
||||
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
|
||||
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
||||
reducedConstant->get_output_element_type(0),
|
||||
Shape{ 1 },
|
||||
reducedConstant->cast_vector<float>()[0]);
|
||||
auto inputs = convolutionBackpropData->input_values();
|
||||
inputs[0] = dequantization.multiply->input_value(0);
|
||||
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||
|
||||
const auto relaxedConvolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
|
||||
*as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
|
||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||
std::vector<element::Type>{deqPrecision});
|
||||
|
||||
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
||||
|
||||
replace_node(convolutionBackpropData, newMultiplyAfter);
|
||||
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
||||
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
||||
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||
replace_node(convolutionBackpropData, newConvolution);
|
||||
convolutionBackpropData = newConvolution;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
|
||||
|
||||
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
|
||||
|
||||
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
|
||||
const std::shared_ptr<opset1::FakeQuantize> fq = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
||||
std::shared_ptr<ngraph::Node> newFQ = NetworkHelper::fold_fake_quantize(fq, true);
|
||||
NetworkHelper::copyInfo(fq, newFQ);
|
||||
replace_node(fq, newFQ);
|
||||
}
|
||||
|
||||
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
|
||||
convolutionBackpropData->input_value(1).get_node_shared_ptr());
|
||||
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
||||
|
||||
{
|
||||
Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
|
||||
auto inputs = convolutionBackpropData->input_values();
|
||||
inputs[1] = multiplyFromWeights->input_value(0);
|
||||
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
||||
convolutionBackpropData->copy_with_new_inputs(inputs),
|
||||
foldConvert(
|
||||
fold_reshape<opset1::Reshape>(
|
||||
multiplyFromWeights->input_value(1),
|
||||
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
||||
false),
|
||||
convolutionBackpropData->get_output_element_type(0)));
|
||||
replace_node(convolutionBackpropData, newMultiplyAfter);
|
||||
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||
}
|
||||
|
||||
if (subtractFromWeights != nullptr) {
|
||||
// optimize zero point on weights
|
||||
auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
|
||||
if (optimizedSubtract == nullptr) {
|
||||
subtractFromWeights = nullptr;
|
||||
} else {
|
||||
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
||||
|
||||
const Shape weightsShape = subtractFromWeights->input(0).get_shape();
|
||||
Shape zeroPointShape(weightsShape.size(), 1ul);
|
||||
zeroPointShape[1] = weightsShape[1];
|
||||
|
||||
auto zeroPointConstant = fold<opset1::Broadcast>(
|
||||
subtractFromWeights->get_input_node_shared_ptr(1),
|
||||
std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
|
||||
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<opset1::Convert> convertFromWeights =
|
||||
as_type_ptr<opset1::Convert>(
|
||||
subtractFromWeights == nullptr ?
|
||||
multiplyFromWeights->get_input_node_shared_ptr(0) :
|
||||
subtractFromWeights->get_input_node_shared_ptr(0));
|
||||
if (convertFromWeights != nullptr) {
|
||||
auto inputs = convolutionBackpropData->input_values();
|
||||
inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
|
||||
// remove Convert on weights
|
||||
auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
|
||||
replace_node(convolutionBackpropData, newConvolution);
|
||||
convolutionBackpropData = newConvolution;
|
||||
}
|
||||
}
|
||||
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
|
||||
convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
|
||||
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
|
||||
updateOutput(context, finalDequantization, convolutionBackpropData);
|
||||
|
||||
auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||
if (is_type<opset1::Reshape>(onWeights)) {
|
||||
onWeights = onWeights->get_input_node_shared_ptr(0);
|
||||
}
|
||||
|
||||
if (is_type<opset1::Subtract>(onWeights)) {
|
||||
auto& rt = onWeights->get_rt_info();
|
||||
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
|
||||
if (deconvolutionSpecificChannelsRatio) {
|
||||
size_t inputChannels = op->get_input_shape(0)[1];
|
||||
size_t outputChannels = op->get_output_shape(0)[1];
|
||||
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return canConvolutionBeTransformed(context, op);
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
|
||||
|
||||
bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||
std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
|
||||
if (!NetworkHelper::isQuantizeSupported(layer)) {
|
||||
if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -149,7 +149,9 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
|
||||
} else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
|
||||
if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
|
||||
is_type<opset1::GroupConvolution>(fq::getData(eltwise))) {
|
||||
is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
|
||||
is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
|
||||
is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
||||
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
||||
|
||||
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
||||
|
||||
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||
opset1::FakeQuantize(
|
||||
fakeQuantizeParent->output(parentIndex),
|
||||
foldConvert(fakeQuantize->input_value(1), deqPrecision),
|
||||
foldConvert(fakeQuantize->input_value(2), deqPrecision),
|
||||
inputLow,
|
||||
inputHigh,
|
||||
outputLowConst_f32,
|
||||
outputHighConst_f32,
|
||||
fakeQuantize->get_levels()),
|
||||
|
@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
||||
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
|
||||
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
|
||||
|
||||
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
||||
|
||||
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
|
||||
opset1::FakeQuantize(
|
||||
fakeQuantizeParent->output(parentIndex),
|
||||
foldConvert(fakeQuantize->input_value(1), deqPrecision),
|
||||
foldConvert(fakeQuantize->input_value(2), deqPrecision),
|
||||
inputLow,
|
||||
inputHigh,
|
||||
outputLowConst_f32,
|
||||
outputHighConst_f32,
|
||||
fakeQuantize->get_levels()),
|
||||
@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
|
||||
for (const auto& target : children) {
|
||||
const auto convolution = is_type<opset1::Convolution>(target.get_node());
|
||||
const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
|
||||
if (convolution || groupConvolution) {
|
||||
const auto convolutionBackpropData = is_type<opset1::ConvolutionBackpropData>(target.get_node());
|
||||
if (convolution || groupConvolution || convolutionBackpropData) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
|
||||
precisionsOnWeights(params.precisionsOnWeights),
|
||||
deqPrecision(params.deqPrecision),
|
||||
support3DTensorOnActivations(params.support3DTensorOnActivations),
|
||||
deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
|
||||
quantizationIntervalAsymmetryThreshold(0.002f),
|
||||
zeroThreshold(1.e-6f),
|
||||
minQuantizationLevels(2ul),
|
||||
|
@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
|
||||
return is_type<opset1::Parameter>(node) ||
|
||||
is_type<opset1::Convolution>(node) ||
|
||||
is_type<opset1::GroupConvolution>(node) ||
|
||||
is_type<opset1::MatMul>(node);
|
||||
is_type<opset1::MatMul>(node) ||
|
||||
is_type<opset1::ConvolutionBackpropData>(node);
|
||||
};
|
||||
|
||||
if (isNotConstantPathOperation(op)) {
|
||||
@ -440,8 +441,11 @@ std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<op
|
||||
return foldFakeQuantize(fq, false, false);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues) {
|
||||
return foldFakeQuantize(fq, roundValues, true);
|
||||
std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||
const bool roundValues,
|
||||
const int outChannelsShapeIndex) {
|
||||
return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
|
||||
@ -591,7 +595,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
|
||||
std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||
const bool roundValuesArg,
|
||||
const bool roundValuesWasSet) {
|
||||
const bool roundValuesWasSet,
|
||||
const int outChannelsShapeIndex) {
|
||||
if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
|
||||
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
|
||||
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
|
||||
@ -630,10 +635,20 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
||||
if (constShape.empty() || constShape.size() > 5lu) {
|
||||
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
|
||||
}
|
||||
if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
|
||||
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
|
||||
}
|
||||
|
||||
// OIDHW
|
||||
const size_t OC = constShape[0];
|
||||
const size_t IC = constShape.size() > 1lu ? constShape[1] : 1;
|
||||
size_t OC;
|
||||
size_t IC;
|
||||
// OIDHW or IODHW
|
||||
if (constShape.size() == 1) {
|
||||
OC = constShape[0];
|
||||
IC = 1;
|
||||
} else {
|
||||
OC = constShape[outChannelsShapeIndex];
|
||||
IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
|
||||
}
|
||||
const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
|
||||
const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
|
||||
const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
|
||||
@ -667,29 +682,35 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
||||
|
||||
auto levels_1 = fq->get_levels() - 1.f;
|
||||
|
||||
//const size_t DHW = D * H * W;
|
||||
const size_t DHW = D * H * W;
|
||||
const size_t IDHW = IC * D * H * W;
|
||||
|
||||
const auto values = constant->cast_vector<float>();
|
||||
std::vector<float> quantizedValues(OC * IC * D * H * W);
|
||||
|
||||
for (size_t oc = 0; oc < OC; ++oc) {
|
||||
for (size_t iidx = 0; iidx < IDHW; ++iidx) {
|
||||
const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
|
||||
const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
|
||||
const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
|
||||
const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
|
||||
const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
|
||||
const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
|
||||
const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
|
||||
const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
|
||||
for (size_t ic = 0; ic < IC; ++ic) {
|
||||
for (size_t iidx = 0; iidx < DHW; ++iidx) {
|
||||
size_t idx;
|
||||
if (outChannelsShapeIndex == 0) {
|
||||
idx = oc * IDHW + ic * DHW + iidx;
|
||||
} else {
|
||||
idx = ic * IDHW + oc * DHW + iidx;
|
||||
}
|
||||
|
||||
const size_t idx = oc * IDHW + iidx;
|
||||
|
||||
if (values[idx] <= inputLow) {
|
||||
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
|
||||
} else if (values[idx] > inputHigh) {
|
||||
quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
|
||||
} else {
|
||||
const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
|
||||
levels_1 * (outputHigh - outputLow) + outputLow;
|
||||
quantizedValues[idx] = roundValues ? std::roundf(value) : value;
|
||||
if (values[idx] <= inputLow) {
|
||||
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
|
||||
} else if (values[idx] > inputHigh) {
|
||||
quantizedValues[idx] = roundValues ? std::roundf(outputHigh) : outputHigh;
|
||||
} else {
|
||||
const float value = std::roundf((values[idx] - inputLow) / (inputHigh - inputLow) * levels_1) /
|
||||
levels_1 * (outputHigh - outputLow) + outputLow;
|
||||
quantizedValues[idx] = roundValues ? std::roundf(value) : value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -818,7 +839,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
const float max,
|
||||
const bool hasZeroPoint,
|
||||
const bool updatePrecision,
|
||||
const element::Type deqPrecision) {
|
||||
const element::Type deqPrecision,
|
||||
const size_t outChannelsShapeIndex) {
|
||||
using std::make_shared;
|
||||
|
||||
const auto outputLow = fq->input_value(3);
|
||||
@ -898,7 +920,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
newMax->output(0),
|
||||
fq->get_levels(),
|
||||
fq->get_auto_broadcast()),
|
||||
true);
|
||||
true,
|
||||
outChannelsShapeIndex);
|
||||
NetworkHelper::copyInfo(fq, newFQ);
|
||||
|
||||
std::shared_ptr<ngraph::Node> convert2;
|
||||
@ -1548,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
|
||||
if (is_type<opset1::Subtract>(node)) {
|
||||
const auto parent = node->get_input_node_shared_ptr(0);
|
||||
const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
|
||||
const auto intType = intNode->get_input_element_type(0);
|
||||
if (intType == element::u8 || intType == element::i8) {
|
||||
min = DataPrecision::getMinValue(intType, 256) - 0.5f;
|
||||
max = DataPrecision::getMaxValue(intType, 256) + 0.5f;
|
||||
const auto type = intNode->get_input_element_type(0);
|
||||
if (type == element::u8 || type == element::i8) {
|
||||
min = DataPrecision::getMinValue(type, 256) - 0.5f;
|
||||
max = DataPrecision::getMaxValue(type, 256) + 0.5f;
|
||||
} else {
|
||||
return false;
|
||||
return type == element::f32 || type == element::f16;
|
||||
}
|
||||
auto subtract1input = node->get_input_node_shared_ptr(1);
|
||||
if (is_type<opset1::Convert>(subtract1input)) {
|
||||
@ -1595,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<element::Type> NetworkHelper::precisionIntersection(
|
||||
const std::vector<element::Type>& v1,
|
||||
const std::vector<element::Type>& v2) noexcept {
|
||||
std::vector<element::Type> v3;
|
||||
|
||||
auto v1Copy = v1;
|
||||
auto v2Copy = v2;
|
||||
|
||||
std::sort(v1Copy.begin(), v1Copy.end());
|
||||
std::sort(v2Copy.begin(), v2Copy.end());
|
||||
|
||||
std::set_intersection(v1Copy.begin(), v1Copy.end(),
|
||||
v2Copy.begin(), v2Copy.end(),
|
||||
std::back_inserter(v3));
|
||||
return v3;
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "low_precision/avg_pool.hpp"
|
||||
#include "low_precision/clamp.hpp"
|
||||
#include "low_precision/convolution.hpp"
|
||||
#include "low_precision/convolution_backprop_data.hpp"
|
||||
#include "low_precision/depth_to_space.hpp"
|
||||
#include "low_precision/fake_quantize.hpp"
|
||||
#include "low_precision/group_convolution.hpp"
|
||||
@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
|
||||
add<AvgPoolTransformation, opset1::AvgPool>(params).
|
||||
add<ClampTransformation, opset1::Clamp>(params).
|
||||
add<ConvolutionTransformation, opset1::Convolution>(params).
|
||||
add<ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(params).
|
||||
add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
|
||||
add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
|
||||
add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
|
||||
@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
|
||||
make_matcher_type_relaxed<opset1::Clamp>(this);
|
||||
make_matcher_type_relaxed<opset1::Concat>(this);
|
||||
make_matcher_type_relaxed<opset1::Convolution>(this);
|
||||
make_matcher_type_relaxed<opset1::ConvolutionBackpropData>(this);
|
||||
make_matcher_type_relaxed<opset1::DepthToSpace>(this);
|
||||
make_matcher_type_relaxed<opset1::FakeQuantize>(this);
|
||||
make_matcher_type_relaxed<opset1::GroupConvolution>(this);
|
||||
@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
network->validate_nodes_and_infer_types();
|
||||
}
|
||||
|
||||
std::vector<element::Type> LowPrecisionTransformer::precisionIntersection(
|
||||
const std::vector<element::Type>& v1,
|
||||
const std::vector<element::Type>& v2) const noexcept {
|
||||
std::vector<element::Type> v3;
|
||||
|
||||
auto v1Copy = v1;
|
||||
auto v2Copy = v2;
|
||||
|
||||
std::sort(v1Copy.begin(), v1Copy.end());
|
||||
std::sort(v2Copy.begin(), v2Copy.end());
|
||||
|
||||
std::set_intersection(v1Copy.begin(), v1Copy.end(),
|
||||
v2Copy.begin(), v2Copy.end(),
|
||||
std::back_inserter(v3));
|
||||
return v3;
|
||||
}
|
||||
|
||||
std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
|
||||
const std::string operantionType = LowPrecisionTransformations::getType(op);
|
||||
const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
|
||||
@ -456,7 +442,7 @@ std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(c
|
||||
std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
|
||||
|
||||
for (const auto& transform : transformation) {
|
||||
precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations());
|
||||
precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations());
|
||||
}
|
||||
return precisions;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
|
||||
return false;
|
||||
}
|
||||
|
||||
if (updatePrecisions && !NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
||||
if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -46,24 +46,10 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
|
||||
return false;
|
||||
}
|
||||
if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
|
||||
const std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
||||
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
||||
replace_node(fqOnWeights, resultConstant);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
|
||||
const auto resultDequantization = NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
|
||||
if (resultDequantization.empty() && reshapeFromWeights) {
|
||||
const auto foldedReshape = fold<opset1::Reshape>(
|
||||
reshapeFromWeights->get_input_node_shared_ptr(0),
|
||||
reshapeFromWeights->get_input_node_shared_ptr(1),
|
||||
reshapeFromWeights->get_special_zero());
|
||||
if (is_type<opset1::Constant>(foldedReshape)) {
|
||||
replace_node(reshapeFromWeights, foldedReshape);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -170,9 +156,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( // Check if all dimensions of scale except the first one (which is O-Output channels dimension) are all ones
|
||||
(shape_size(constOutputShape) != constOutputShape[0]) ||
|
||||
((constOutputShape[0] != 1ul) && (fqFromWeights->get_output_shape(0)[0] != constOutputShape[0]))) {
|
||||
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
||||
if ( // Check if all dimensions of scale except the output channels are all ones
|
||||
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
|
||||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
|
||||
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
@ -256,7 +244,7 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr<Node> l
|
||||
return false;
|
||||
}
|
||||
|
||||
void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> node) const {
|
||||
void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& node, const size_t outChannelsShapeIndex) const {
|
||||
const auto fq = getFakeQuantizeOnWeights(node);
|
||||
if (fq == nullptr) {
|
||||
return;
|
||||
@ -270,7 +258,9 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(std::sha
|
||||
dataPrecision.min,
|
||||
dataPrecision.max,
|
||||
dataPrecision.hasZeroPoint,
|
||||
updatePrecisions);
|
||||
updatePrecisions,
|
||||
element::f32,
|
||||
outChannelsShapeIndex);
|
||||
|
||||
std::shared_ptr<ngraph::Node> fqOnWeights = std::get<0>(tuple);
|
||||
if (as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) {
|
||||
|
@ -76,6 +76,7 @@
|
||||
#include <low_precision/transformer.hpp>
|
||||
#include <low_precision/convert_subtract_constant.hpp>
|
||||
#include <low_precision/convolution.hpp>
|
||||
#include <low_precision/convolution_backprop_data.hpp>
|
||||
#include <low_precision/group_convolution.hpp>
|
||||
#include <low_precision/multiply_to_group_convolution.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
@ -328,7 +329,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
||||
.add<GroupConvolutionTransformation, ngraph::opset1::GroupConvolution>(
|
||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true))
|
||||
.addStandaloneCleanup<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(
|
||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })));
|
||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }))
|
||||
.remove<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>());
|
||||
|
||||
transformer.transform(nGraphFunc);
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ public:
|
||||
const Strides& dilations,
|
||||
const CoordinateDiff& pads_begin,
|
||||
const CoordinateDiff& pads_end,
|
||||
const element::Type output_type,
|
||||
const size_t& group = 1,
|
||||
const PadType& auto_pad = PadType::EXPLICIT,
|
||||
const CoordinateDiff& output_padding = {},
|
||||
@ -41,6 +42,7 @@ public:
|
||||
const Strides& dilations,
|
||||
const CoordinateDiff& pads_begin,
|
||||
const CoordinateDiff& pads_end,
|
||||
const element::Type output_type,
|
||||
const size_t& group = 1,
|
||||
const PadType& auto_pad = PadType::EXPLICIT,
|
||||
const CoordinateDiff& output_padding = {},
|
||||
@ -79,6 +81,7 @@ protected:
|
||||
size_t m_group;
|
||||
CoordinateDiff m_output_padding;
|
||||
std::shared_ptr<Node> m_output_shape;
|
||||
element::Type m_output_type;
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "ngraph/util.hpp"
|
||||
#include "ngraph/validation_util.hpp"
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
@ -25,6 +26,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
||||
const Strides& dilations,
|
||||
const CoordinateDiff& pads_begin,
|
||||
const CoordinateDiff& pads_end,
|
||||
const element::Type output_type,
|
||||
const size_t& group,
|
||||
const PadType& auto_pad,
|
||||
const CoordinateDiff& output_padding,
|
||||
@ -37,7 +39,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
||||
, m_auto_pad(auto_pad)
|
||||
, m_group(group)
|
||||
, m_output_padding(output_padding)
|
||||
, m_output_shape(output_shape) {
|
||||
, m_output_shape(output_shape)
|
||||
, m_output_type(output_type) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@ -48,6 +51,7 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
||||
const Strides& dilations,
|
||||
const CoordinateDiff& pads_begin,
|
||||
const CoordinateDiff& pads_end,
|
||||
const element::Type output_type,
|
||||
const size_t& group,
|
||||
const PadType& auto_pad,
|
||||
const CoordinateDiff& output_padding,
|
||||
@ -60,7 +64,8 @@ op::DeconvolutionIE::DeconvolutionIE(const Output<Node>& data,
|
||||
, m_auto_pad(auto_pad)
|
||||
, m_group(group)
|
||||
, m_output_padding(output_padding)
|
||||
, m_output_shape(output_shape) {
|
||||
, m_output_shape(output_shape)
|
||||
, m_output_type(output_type) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@ -81,13 +86,32 @@ void op::DeconvolutionIE::validate_and_infer_types() {
|
||||
}
|
||||
Output<Node> conv;
|
||||
if (m_output_shape) {
|
||||
conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights, m_output_shape,
|
||||
m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding);
|
||||
conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
|
||||
std::vector<element::Type>{ element::f32, element::f32 },
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
|
||||
m_output_shape,
|
||||
m_strides,
|
||||
m_pads_begin,
|
||||
m_pads_end,
|
||||
m_dilations,
|
||||
m_auto_pad,
|
||||
m_output_padding);
|
||||
} else {
|
||||
conv = std::make_shared<opset1::GroupConvolutionBackpropData>(input_value(0), weights,
|
||||
m_strides, m_pads_begin, m_pads_end, m_dilations, m_auto_pad, m_output_padding);
|
||||
conv = std::make_shared<op::TypeRelaxed<opset1::GroupConvolutionBackpropData>>(
|
||||
std::vector<element::Type>{ element::f32, element::f32 },
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(input_value(0), element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
|
||||
m_strides,
|
||||
m_pads_begin,
|
||||
m_pads_end,
|
||||
m_dilations,
|
||||
m_auto_pad,
|
||||
m_output_padding);
|
||||
}
|
||||
set_output_type(0, conv.get_element_type(), conv.get_partial_shape());
|
||||
set_output_type(0, m_output_type, conv.get_partial_shape());
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
|
||||
@ -99,6 +123,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
|
||||
m_dilations,
|
||||
m_pads_begin,
|
||||
m_pads_end,
|
||||
m_output_type,
|
||||
m_group,
|
||||
m_auto_pad,
|
||||
m_output_padding,
|
||||
@ -111,6 +136,7 @@ shared_ptr<Node> op::DeconvolutionIE::clone_with_new_inputs(const ngraph::Output
|
||||
m_dilations,
|
||||
m_pads_begin,
|
||||
m_pads_end,
|
||||
m_output_type,
|
||||
m_group,
|
||||
m_auto_pad,
|
||||
m_output_padding,
|
||||
|
@ -113,6 +113,7 @@ ngraph::pass::ConvertDeconvolution::ConvertDeconvolution() {
|
||||
deconv->get_dilations(),
|
||||
deconv->get_pads_begin(),
|
||||
deconv->get_pads_end(),
|
||||
deconv->get_output_element_type(0),
|
||||
1 /* groups */,
|
||||
deconv->get_auto_pad(),
|
||||
deconv->get_output_padding(),
|
||||
@ -158,6 +159,7 @@ ngraph::pass::ConvertGroupDeconvolution::ConvertGroupDeconvolution() {
|
||||
gconv->get_dilations(),
|
||||
gconv->get_pads_begin(),
|
||||
gconv->get_pads_end(),
|
||||
gconv->get_output_element_type(0),
|
||||
group,
|
||||
gconv->get_auto_pad(),
|
||||
gconv->get_output_padding(),
|
||||
|
@ -38,11 +38,14 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() {
|
||||
const std::shared_ptr<Node> child = subChildren.begin()->get_node()->shared_from_this();
|
||||
if (child != nullptr) {
|
||||
if (is_type<opset1::Convolution>(child) ||
|
||||
is_type<opset1::ConvolutionBackpropData>(child) ||
|
||||
is_type<opset1::GroupConvolution>(child) ||
|
||||
is_type<opset1::GroupConvolutionBackpropData>(child) ||
|
||||
is_type<opset1::MatMul>(child) ||
|
||||
(is_type<opset1::Reshape>(child) &&
|
||||
(is_type<opset1::Reshape>(child) &&
|
||||
(child->output(0).get_target_inputs().size() == 1ul) &&
|
||||
is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()))) {
|
||||
(is_type<opset1::GroupConvolution>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) ||
|
||||
is_type<opset1::GroupConvolutionBackpropData>(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this())))) {
|
||||
const auto input1Type = sub->input(0).get_element_type();
|
||||
const auto input2Type = sub->input(1).get_element_type();
|
||||
if (((input1Type == element::u8) && (input2Type == element::u8)) ||
|
||||
|
@ -0,0 +1,334 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <low_precision/convolution_backprop_data.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "simple_low_precision_transformer.hpp"
|
||||
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
|
||||
class ConvolutionBackpropDataTransformationTestValues {
|
||||
public:
|
||||
class Actual {
|
||||
public:
|
||||
ngraph::element::Type precisionBeforeDequantization;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
|
||||
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
|
||||
builder::subgraph::DequantizationOperations dequantizationOnWeights;
|
||||
std::shared_ptr<ngraph::opset1::Constant> weights;
|
||||
|
||||
Actual() = default;
|
||||
Actual(
|
||||
const ngraph::element::Type& precisionBeforeDequantization,
|
||||
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
|
||||
const builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
|
||||
const std::shared_ptr<ngraph::opset1::Constant>& weights) :
|
||||
precisionBeforeDequantization(precisionBeforeDequantization),
|
||||
dequantizationOnActivations(dequantizationOnActivations),
|
||||
fakeQuantizeOnWeights(fakeQuantizeOnWeights),
|
||||
weights(weights) {}
|
||||
Actual(
|
||||
const ngraph::element::Type& precisionBeforeDequantization,
|
||||
const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnActivations,
|
||||
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
|
||||
const std::shared_ptr<ngraph::opset1::Constant>& weights) :
|
||||
precisionBeforeDequantization(precisionBeforeDequantization),
|
||||
dequantizationOnActivations(dequantizationOnActivations),
|
||||
dequantizationOnWeights(dequantizationOnWeights),
|
||||
weights(weights) {}
|
||||
};
|
||||
|
||||
class Expected {
|
||||
public:
|
||||
ngraph::element::Type precisionBeforeDequantization;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
|
||||
builder::subgraph::DequantizationOperations dequantizationOnWeights;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
|
||||
std::shared_ptr<ngraph::opset1::Constant> weights;
|
||||
bool transformed;
|
||||
};
|
||||
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
Actual actual;
|
||||
Expected expected;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
element::Type,
|
||||
ngraph::Shape,
|
||||
ConvolutionBackpropDataTransformationTestValues> ConvolutionBackpropDataTransformationParams;
|
||||
|
||||
class ConvolutionBackpropDataTransformation : public LayerTransformation, public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const auto netPrecision = std::get<0>(GetParam());
|
||||
const auto inputShape = std::get<1>(GetParam());
|
||||
auto outputShape = inputShape;
|
||||
outputShape[1] /= 4;
|
||||
outputShape[2] *= 2;
|
||||
outputShape[3] *= 2;
|
||||
auto testValues = std::get<2>(GetParam());
|
||||
|
||||
std::shared_ptr<Node> actualWeights = pass::low_precision::fold<opset1::Broadcast>(
|
||||
testValues.actual.weights,
|
||||
opset1::Constant::create(
|
||||
element::i64,
|
||||
Shape{inputShape.size()},
|
||||
Shape{inputShape[1], outputShape[1], 1, 1}));
|
||||
if (!testValues.actual.fakeQuantizeOnWeights.empty()) {
|
||||
actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||
outputShape,
|
||||
netPrecision,
|
||||
testValues.actual.fakeQuantizeOnWeights,
|
||||
as_type_ptr<opset1::Constant>(actualWeights));
|
||||
} else {
|
||||
actualWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||
outputShape,
|
||||
netPrecision,
|
||||
testValues.actual.dequantizationOnWeights,
|
||||
as_type_ptr<opset1::Constant>(actualWeights));
|
||||
}
|
||||
|
||||
actualFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getOriginal(
|
||||
testValues.actual.precisionBeforeDequantization,
|
||||
netPrecision,
|
||||
inputShape,
|
||||
outputShape,
|
||||
testValues.actual.dequantizationOnActivations,
|
||||
actualWeights);
|
||||
|
||||
SimpleLowPrecisionTransformer transform;
|
||||
transform.add<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation, ngraph::opset1::Convolution>(testValues.params);
|
||||
transform.transform(actualFunction);
|
||||
std::shared_ptr<Node> refWeights = pass::low_precision::fold<opset1::Broadcast>(
|
||||
testValues.expected.weights,
|
||||
opset1::Constant::create(
|
||||
element::i64,
|
||||
Shape{inputShape.size()},
|
||||
Shape{inputShape[1], outputShape[1], 1, 1}));
|
||||
|
||||
if (!testValues.expected.transformed) {
|
||||
refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||
outputShape,
|
||||
netPrecision,
|
||||
testValues.actual.fakeQuantizeOnWeights,
|
||||
as_type_ptr<opset1::Constant>(refWeights));
|
||||
} else {
|
||||
refWeights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||
outputShape,
|
||||
netPrecision,
|
||||
testValues.expected.dequantizationOnWeights,
|
||||
as_type_ptr<opset1::Constant>(refWeights));
|
||||
}
|
||||
|
||||
referenceFunction = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getReference(
|
||||
testValues.expected.precisionBeforeDequantization,
|
||||
netPrecision,
|
||||
inputShape,
|
||||
outputShape,
|
||||
testValues.expected.dequantizationOnActivations,
|
||||
refWeights,
|
||||
testValues.expected.dequantizationAfter);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
|
||||
const auto netPrecision = std::get<0>(obj.param);
|
||||
auto inputShape = std::get<1>(obj.param);
|
||||
ConvolutionBackpropDataTransformationTestValues testValues = std::get<2>(obj.param);
|
||||
|
||||
std::ostringstream result;
|
||||
result << toString(testValues.params) << "_" <<
|
||||
netPrecision << "_" <<
|
||||
inputShape << "_" <<
|
||||
testValues.actual.precisionBeforeDequantization << "_" <<
|
||||
testValues.actual.dequantizationOnActivations << "_" <<
|
||||
testValues.actual.dequantizationOnWeights << "_" <<
|
||||
testValues.actual.fakeQuantizeOnWeights << "_" <<"_weights_" <<
|
||||
testValues.actual.weights->get_element_type() << "_" << "{ " <<
|
||||
testValues.actual.weights->cast_vector<float>()[0] << " }_";
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvolutionBackpropDataTransformation, CompareFunctions) {
|
||||
actualFunction->validate_nodes_and_infer_types();
|
||||
auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
const std::vector<element::Type> netPrecisions = {
|
||||
element::f32,
|
||||
element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> shapes = {
|
||||
ngraph::Shape({ 1, 8, 16, 16 })
|
||||
};
|
||||
|
||||
const std::vector<ConvolutionBackpropDataTransformationTestValues> testValues = {
|
||||
// with zero point
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
|
||||
{},
|
||||
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
// updatePrecisions = false
|
||||
{
|
||||
LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
|
||||
{},
|
||||
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
// QDq version
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
|
||||
{{ngraph::element::f32}, { 2.f }, { 0.01f }},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{}, { { 128.f }, ngraph::element::f32, {}, false }, {}},
|
||||
{{}, { { 2.f }, ngraph::element::f32, {1, 2, 1, 1}, true, 1ul, element::i8, false, { "DISABLED_CONSTANT_FOLDING" } }, {}},
|
||||
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1 }}},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
// without zero point
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, { 0.02f }},
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{},
|
||||
{},
|
||||
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
// QDq version
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, { 0.02f }},
|
||||
{{ngraph::element::f32}, {}, { 0.01f }},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{},
|
||||
{},
|
||||
{{}, {}, {{ 0.0002f }, ngraph::element::f32, {1}}},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
// per-channel dequantization with the same values
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f, 0.02f} }},
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{},
|
||||
{},
|
||||
{{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}},
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ -125.f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
// per-channel dequantization with different values
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector<float>{ 2.f })
|
||||
},
|
||||
// ExpectedValues
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, { std::vector<float>{0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f, 0.02f, 0.01f} }},
|
||||
{},
|
||||
{},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
true
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
smoke_LPT,
|
||||
ConvolutionBackpropDataTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(shapes),
|
||||
::testing::ValuesIn(testValues)),
|
||||
ConvolutionBackpropDataTransformation::getTestCaseName);
|
@ -231,7 +231,7 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
||||
}
|
||||
},
|
||||
|
||||
// Actual & Transformed:
|
||||
// Actual:
|
||||
//
|
||||
// Parameter Constant Constant Constant
|
||||
// |U8 |U8 |FP32 |I8
|
||||
@ -246,6 +246,22 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Convolution
|
||||
//
|
||||
// Transformed:
|
||||
//
|
||||
// Parameter Constant
|
||||
// |U8 |U8
|
||||
// | |
|
||||
// Convert Convert
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Subtract Constant
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Multiply Constant
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Convolution
|
||||
{
|
||||
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
|
||||
// ActualValues
|
||||
@ -262,8 +278,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, { 0.02f }},
|
||||
{{ngraph::element::f32}, { {127.f}, element::f32, {}, false, 1ul, element::i8, true }, { 0.03f }},
|
||||
{ std::vector<float>{ 2.f }, ngraph::element::f32},
|
||||
{},
|
||||
{ std::vector<float>{ -3.75f }, ngraph::element::f32},
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
@ -434,12 +450,8 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
|
||||
{ {1000.f}, element::f32, {}, false },
|
||||
{ {0.02f}, element::f32, {}, false }
|
||||
},
|
||||
{
|
||||
{ ngraph::element::f32, false },
|
||||
{ {127.f}, element::f32, {}, false },
|
||||
{ {0.03f}, element::f32, {}, false }
|
||||
},
|
||||
{ std::vector<float>{ 2.f }, ngraph::element::i8},
|
||||
{},
|
||||
{ std::vector<float>{ -3.75f }, ngraph::element::f32},
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
|
@ -160,8 +160,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
}
|
||||
@ -288,13 +288,13 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
||||
{{ 128.f, 0.f, 128.f }, ngraph::element::f32, { 1, 3, 1, 1 }},
|
||||
{{ 0.02f, 0.01f, 0.03f }, ngraph::element::f32, {1, 3, 1, 1}}
|
||||
},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
}
|
||||
},
|
||||
// dequantization in second dimension
|
||||
// float input
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
// ActualValues
|
||||
@ -316,8 +316,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
||||
{{ 128.f }, ngraph::element::f32, { 1, 1, 1, 1 }},
|
||||
{{ 0.02f }, ngraph::element::f32, {1, 1, 1, 1}}
|
||||
},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
}
|
||||
@ -356,8 +356,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, { {0.02f}, element::f32 }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
}
|
||||
@ -396,8 +396,8 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
}
|
||||
|
@ -160,8 +160,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ ngraph::element::f32 }, { 128.f }, { 0.02f }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
@ -286,8 +286,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, { 0.02f }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
@ -459,8 +459,8 @@ const std::vector<GroupConvolutionTestValues> testValues = {
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, { 0.02f }},
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ -1.25f }),
|
||||
{},
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{}
|
||||
|
@ -60,7 +60,7 @@ private:
|
||||
auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, input_shape);
|
||||
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1});
|
||||
auto conv = std::make_shared<ngraph::op::DeconvolutionIE>(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1),
|
||||
ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0));
|
||||
ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::element::f32);
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{input});
|
||||
}
|
||||
|
@ -0,0 +1,100 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
|
||||
// FQ on weights
|
||||
// with zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// without zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// with incorrect zero point on activations
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// with incorrect zero point on weights
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||
{255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// QDq on weights
|
||||
// with zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// without zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||
{{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// with incorrect zero point on activations
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// with incorrect zero point on weights
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||
{{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> inputShapes = {
|
||||
{ 1, 8, 16, 16 }
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> outputShapes = {
|
||||
{ 16, 16 }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(outputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(trasformationParamValues),
|
||||
::testing::ValuesIn(params)),
|
||||
ConvolutionBackpropDataTransformation::getTestCaseName);
|
||||
} // namespace
|
@ -0,0 +1,103 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::element::Type> netPrecisions = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16
|
||||
};
|
||||
|
||||
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true),
|
||||
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false)
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::ConvolutionBackpropDataTransformationParam> params = {
|
||||
// FQ on weights
|
||||
// with zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { -127.f }, { 127.f }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// without zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||
{255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// TODO: check fails in CI
|
||||
// // with incorrect zero point on activations
|
||||
// {
|
||||
// {256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||
// {255ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 254.f }, { 0.f }, { 25.4f }},
|
||||
// "",
|
||||
// ""
|
||||
// },
|
||||
// // with incorrect zero point on weights
|
||||
// {
|
||||
// {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||
// {255ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||
// "",
|
||||
// ""
|
||||
// },
|
||||
// QDq on weights
|
||||
// with zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// without zero point
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { 0.f }, { 25.5f }},
|
||||
{{ngraph::element::f32}, {}, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// with incorrect zero point on activations
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 5.f }, { 6.f }, { 5.f }, { 6.f }},
|
||||
{{ngraph::element::f32}, { {12.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
},
|
||||
// with incorrect zero point on weights
|
||||
{
|
||||
{256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 255.f }, { -12.7f }, { 12.8f }},
|
||||
{{ngraph::element::f32}, { {1000.f}, ngraph::element::f32, {}, false }, { {4.f}, ngraph::element::f32, {}, false }},
|
||||
"",
|
||||
""
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> inputShapes = {
|
||||
{ 1, 8, 16, 16 },
|
||||
{ 1, 32, 16, 16 }
|
||||
};
|
||||
|
||||
const std::vector<ngraph::Shape> outputShapes = {
|
||||
{ 16, 16 }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_LPT, ConvolutionBackpropDataTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(outputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::ValuesIn(trasformationParamValues),
|
||||
::testing::ValuesIn(params)),
|
||||
ConvolutionBackpropDataTransformation::getTestCaseName);
|
||||
} // namespace
|
@ -0,0 +1,65 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
|
||||
#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ConvolutionBackpropDataTransformationParam {
|
||||
public:
|
||||
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
|
||||
ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights;
|
||||
std::string layerName;
|
||||
std::string expectedKernelType;
|
||||
|
||||
ConvolutionBackpropDataTransformationParam() = default;
|
||||
ConvolutionBackpropDataTransformationParam(
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
|
||||
std::string layerName,
|
||||
std::string expectedKernelType) :
|
||||
fakeQuantizeOnData(fakeQuantizeOnData), fakeQuantizeOnWeights(fakeQuantizeOnWeights),
|
||||
layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
|
||||
ConvolutionBackpropDataTransformationParam(
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
|
||||
ngraph::builder::subgraph::DequantizationOperations dequantizationOnWeights,
|
||||
std::string layerName,
|
||||
std::string expectedKernelType) :
|
||||
fakeQuantizeOnData(fakeQuantizeOnData), dequantizationOnWeights(std::move(dequantizationOnWeights)),
|
||||
layerName(std::move(layerName)), expectedKernelType(std::move(expectedKernelType)) {}
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
ngraph::element::Type, // netPrecision
|
||||
ngraph::Shape, // inputShape
|
||||
ngraph::Shape, // outputShape
|
||||
std::string, // targetDevice
|
||||
ngraph::pass::low_precision::LayerTransformation::Params,
|
||||
ConvolutionBackpropDataTransformationParam
|
||||
> ConvolutionBackpropDataTransformationParams;
|
||||
|
||||
class ConvolutionBackpropDataTransformation :
|
||||
public testing::WithParamInterface<ConvolutionBackpropDataTransformationParams>,
|
||||
public LayerTestsUtils::LayerTransformation {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
|
||||
void Run() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,77 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision_transformations/convolution_backprop_data_transformation.hpp"
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::TestParamInfo<ConvolutionBackpropDataTransformationParams> obj) {
|
||||
ngraph::element::Type netPrecision;
|
||||
ngraph::Shape inputShape;
|
||||
ngraph::Shape outputShape;
|
||||
std::string targetDevice;
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
ConvolutionBackpropDataTransformationParam param;
|
||||
std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" <<
|
||||
outputShape << "_" <<
|
||||
param.fakeQuantizeOnData << "_" <<
|
||||
param.fakeQuantizeOnWeights << "_" <<
|
||||
param.dequantizationOnWeights;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void ConvolutionBackpropDataTransformation::SetUp() {
|
||||
threshold = 0.1f;
|
||||
|
||||
ngraph::element::Type netPrecision;
|
||||
ngraph::Shape inputShape;
|
||||
ngraph::Shape outputShape;
|
||||
ngraph::pass::low_precision::LayerTransformation::Params params;
|
||||
ConvolutionBackpropDataTransformationParam param;
|
||||
std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = this->GetParam();
|
||||
|
||||
std::shared_ptr<ngraph::Node> weights;
|
||||
|
||||
if (!param.fakeQuantizeOnWeights.empty()) {
|
||||
weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||
ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
|
||||
netPrecision,
|
||||
param.fakeQuantizeOnWeights);
|
||||
} else {
|
||||
weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights(
|
||||
ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1},
|
||||
netPrecision,
|
||||
param.dequantizationOnWeights);
|
||||
}
|
||||
|
||||
function = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::get(
|
||||
netPrecision,
|
||||
inputShape,
|
||||
outputShape,
|
||||
param.fakeQuantizeOnData,
|
||||
weights);
|
||||
}
|
||||
|
||||
void ConvolutionBackpropDataTransformation::Run() {
|
||||
LayerTestsCommon::Run();
|
||||
|
||||
const auto params = std::get<5>(GetParam());
|
||||
const auto actualType = getRuntimePrecision(params.layerName);
|
||||
EXPECT_EQ(actualType, params.expectedKernelType);
|
||||
}
|
||||
|
||||
TEST_P(ConvolutionBackpropDataTransformation, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,54 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
namespace subgraph {
|
||||
|
||||
class ConvolutionBackpropDataFunction {
|
||||
public:
|
||||
static std::shared_ptr<Node> getWeights(
|
||||
const Shape& shape,
|
||||
const element::Type& netPrecision,
|
||||
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
|
||||
const std::shared_ptr<opset1::Constant>& value = nullptr);
|
||||
static std::shared_ptr<Node> getWeights(
|
||||
const Shape& shape,
|
||||
const element::Type& netPrecision,
|
||||
const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
|
||||
const std::shared_ptr<opset1::Constant>& value = nullptr);
|
||||
static std::shared_ptr<Function> get(
|
||||
const element::Type netPrecision,
|
||||
const Shape& inputShape,
|
||||
const Shape& outputShape,
|
||||
const builder::subgraph::FakeQuantizeOnData& fqOnData,
|
||||
const std::shared_ptr<Node>& weights);
|
||||
static std::shared_ptr<Function> getOriginal(
|
||||
const element::Type precision,
|
||||
const element::Type netPrecision,
|
||||
const Shape& inputShape,
|
||||
const Shape& outputShape,
|
||||
const builder::subgraph::DequantizationOperations& dequantization,
|
||||
const std::shared_ptr<Node>& weights);
|
||||
static std::shared_ptr<Function> getReference(
|
||||
const element::Type precision,
|
||||
const element::Type netPrecision,
|
||||
const Shape& inputShape,
|
||||
const Shape& outputShape,
|
||||
const builder::subgraph::DequantizationOperations& dequantization,
|
||||
const std::shared_ptr<Node>& weights,
|
||||
const builder::subgraph::DequantizationOperations& dequantizationAfter);
|
||||
};
|
||||
} // namespace subgraph
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
@ -0,0 +1,149 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "lpt_ngraph_functions/convolution_backprop_data_function.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph_ops/type_relaxed.hpp>
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
namespace subgraph {
|
||||
|
||||
std::shared_ptr<Function> ConvolutionBackpropDataFunction::get(
|
||||
const element::Type netPrecision,
|
||||
const Shape& inputShape,
|
||||
const Shape& outputShape,
|
||||
const builder::subgraph::FakeQuantizeOnData& fqOnData,
|
||||
const std::shared_ptr<Node>& weights) {
|
||||
const auto input = std::make_shared<opset1::Parameter>(netPrecision, inputShape);
|
||||
const auto fq = makeFakeQuantize(input, netPrecision, fqOnData);
|
||||
|
||||
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
|
||||
fq,
|
||||
weights,
|
||||
Strides{ 1, 1 },
|
||||
CoordinateDiff{ 0, 0 },
|
||||
CoordinateDiff{ 0, 0 },
|
||||
Strides{ 1, 1 });
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
|
||||
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
|
||||
const Shape& shape,
|
||||
const element::Type& netPrecision,
|
||||
const builder::subgraph::FakeQuantizeOnWeights& fqOnWeights,
|
||||
const std::shared_ptr<opset1::Constant>& value) {
|
||||
const auto weights = value != nullptr ?
|
||||
value :
|
||||
std::make_shared<opset1::Constant>(
|
||||
element::i8,
|
||||
shape,
|
||||
std::vector<float>(shape_size(shape), 1));
|
||||
const auto convert = std::make_shared<opset1::Convert>(weights, netPrecision);
|
||||
OutputVector convertedOutput(1);
|
||||
convert->constant_fold(convertedOutput, convert->input_values());
|
||||
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
|
||||
const auto fq = makeFakeQuantize(convertedWeights, netPrecision, fqOnWeights);
|
||||
|
||||
return fq;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> ConvolutionBackpropDataFunction::getWeights(
|
||||
const Shape& shape,
|
||||
const element::Type& netPrecision,
|
||||
const builder::subgraph::DequantizationOperations& dequantizationOnWeights,
|
||||
const std::shared_ptr<opset1::Constant>& value) {
|
||||
auto weights =
|
||||
value != nullptr ?
|
||||
value :
|
||||
std::make_shared<opset1::Constant>(
|
||||
element::i8,
|
||||
shape,
|
||||
std::vector<float>(shape_size(shape), 1));
|
||||
auto dequantizationStructure = dequantizationOnWeights;
|
||||
dequantizationStructure.setPrecision(netPrecision);
|
||||
if (!dequantizationOnWeights.subtract.constantPrecision.is_real()) {
|
||||
dequantizationStructure.subtract.constantPrecision = dequantizationOnWeights.subtract.constantPrecision;
|
||||
}
|
||||
if (weights->get_element_type().is_real()) {
|
||||
weights = as_type_ptr<opset1::Constant>(fold<opset1::Convert>(weights, netPrecision));
|
||||
}
|
||||
const auto dq = makeDequantization(weights, dequantizationStructure);
|
||||
|
||||
return dq;
|
||||
}
|
||||
|
||||
std::shared_ptr<Function> ConvolutionBackpropDataFunction::getOriginal(
|
||||
const element::Type precision,
|
||||
const element::Type netPrecision,
|
||||
const Shape& inputShape,
|
||||
const Shape& outputShape,
|
||||
const builder::subgraph::DequantizationOperations& dequantization,
|
||||
const std::shared_ptr<Node>& weights) {
|
||||
const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
|
||||
auto dequantizationStructure = dequantization;
|
||||
dequantizationStructure.multiply.outPrecision = netPrecision;
|
||||
const auto activations = makeDequantization(input, dequantizationStructure);
|
||||
|
||||
auto convolutionBackpropData = std::make_shared<opset1::ConvolutionBackpropData>(
|
||||
activations,
|
||||
weights,
|
||||
Strides{ 1, 1 },
|
||||
CoordinateDiff{ 0, 0 },
|
||||
CoordinateDiff{ 0, 0 },
|
||||
Strides{ 1, 1 });
|
||||
|
||||
convolutionBackpropData->set_friendly_name("output");
|
||||
ngraph::ResultVector results{ std::make_shared<opset1::Result>(convolutionBackpropData) };
|
||||
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
|
||||
}
|
||||
|
||||
std::shared_ptr<Function> ConvolutionBackpropDataFunction::getReference(
|
||||
const element::Type precision,
|
||||
const element::Type netPrecision,
|
||||
const Shape& inputShape,
|
||||
const Shape& outputShape,
|
||||
const builder::subgraph::DequantizationOperations& dequantization,
|
||||
const std::shared_ptr<Node>& weights,
|
||||
const builder::subgraph::DequantizationOperations& dequantizationAfter) {
|
||||
const auto input = std::make_shared<opset1::Parameter>(precision, inputShape);
|
||||
auto dequantizationStructure = dequantization;
|
||||
dequantizationStructure.multiply.outPrecision = netPrecision;
|
||||
const auto activations = makeDequantization(input, dequantizationStructure);
|
||||
|
||||
auto convolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
|
||||
std::vector<element::Type>{ element::f32, element::f32 },
|
||||
std::vector<element::Type>{ dequantizationAfter.empty() ? netPrecision : element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(activations, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(weights, element::f32).get(),
|
||||
Strides{ 1, 1 },
|
||||
CoordinateDiff{ 0, 0 },
|
||||
CoordinateDiff{ 0, 0 },
|
||||
Strides{ 1, 1 });
|
||||
|
||||
auto dequantizationStructureAfter = dequantizationAfter;
|
||||
dequantizationStructureAfter.multiply.outPrecision = netPrecision;
|
||||
const auto result = makeDequantization(convolutionBackpropData, dequantizationStructureAfter);
|
||||
result->set_friendly_name("output");
|
||||
ngraph::ResultVector results{ std::make_shared<opset1::Result>(result) };
|
||||
return std::make_shared<ngraph::Function>(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation");
|
||||
}
|
||||
|
||||
} // namespace subgraph
|
||||
} // namespace builder
|
||||
} // namespace ngraph
|
@ -86,8 +86,8 @@ namespace ngraph
|
||||
class NGRAPH_API ConvolutionBackpropData : public Op
|
||||
{
|
||||
public:
|
||||
static constexpr NodeTypeInfo type_info{"ConvolutionBackpropData", 1};
|
||||
const NodeTypeInfo& get_type_info() const override { return type_info; }
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
/// \brief Constructs a batched-convolution data batch-backprop operation.
|
||||
ConvolutionBackpropData() = default;
|
||||
// clang-format off
|
||||
|
@ -85,8 +85,8 @@ namespace ngraph
|
||||
class NGRAPH_API GroupConvolutionBackpropData : public Op
|
||||
{
|
||||
public:
|
||||
static constexpr NodeTypeInfo type_info{"GroupConvolutionBackpropData", 1};
|
||||
const NodeTypeInfo& get_type_info() const override { return type_info; }
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
/// \brief Constructs a batched-convolution data batch-backprop operation.
|
||||
GroupConvolutionBackpropData();
|
||||
// clang-format off
|
||||
|
@ -102,12 +102,14 @@ shared_ptr<Node> op::v1::Convolution::clone_with_new_inputs(const OutputVector&
|
||||
m_auto_pad);
|
||||
}
|
||||
|
||||
constexpr NodeTypeInfo op::v1::ConvolutionBackpropData::type_info;
|
||||
shared_ptr<Node> op::v1::Convolution::get_default_value() const
|
||||
{
|
||||
return ngraph::make_constant_from_string("0", get_element_type(), get_shape());
|
||||
}
|
||||
|
||||
// *** ConvolutionBackpropData OP SET 1 ***
|
||||
NGRAPH_RTTI_DEFINITION(op::v1::ConvolutionBackpropData, "ConvolutionBackpropData", 1);
|
||||
|
||||
op::v1::ConvolutionBackpropData::ConvolutionBackpropData(const Output<Node>& data,
|
||||
const Output<Node>& filters,
|
||||
const Output<Node>& output_shape,
|
||||
|
@ -286,7 +286,7 @@ shared_ptr<Node> op::v1::GroupConvolution::clone_with_new_inputs(const OutputVec
|
||||
// v1::GroupConvolutionBackpropData
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
constexpr NodeTypeInfo op::v1::GroupConvolutionBackpropData::type_info;
|
||||
NGRAPH_RTTI_DEFINITION(op::v1::GroupConvolutionBackpropData, "GroupConvolutionBackpropData", 1);
|
||||
|
||||
op::v1::GroupConvolutionBackpropData::GroupConvolutionBackpropData()
|
||||
: Op()
|
||||
|
Loading…
Reference in New Issue
Block a user