[LPT] GPU support3DTensorOnActivations (#4562)

This commit is contained in:
Edward Shogulin
2021-03-03 16:18:25 +03:00
committed by GitHub
parent d76b4da77a
commit 7aed4ab3e7
4 changed files with 18 additions and 3 deletions

View File

@@ -347,7 +347,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
true); // supportAsymmetricQuantization
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false))
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false)
.setSupport3DTensorOnActivations(false))
// INT8 StridedSlice not supported
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());

View File

@@ -172,14 +172,16 @@ public:
bool supportAsymmetricQuantization = false,
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
std::vector<element::Type> precisionsOnWeights = { element::i8 },
element::Type deqPrecision = element::f32) :
element::Type deqPrecision = element::f32,
bool support3DTensorOnActivations = true) :
updatePrecisions(updatePrecisions),
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
supportAsymmetricQuantization(supportAsymmetricQuantization),
precisionsOnActivations(precisionsOnActivations),
precisionsOnWeights(precisionsOnWeights),
deqPrecision(deqPrecision) {
deqPrecision(deqPrecision),
support3DTensorOnActivations(support3DTensorOnActivations) {
if (precisionsOnActivations.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
}
@@ -219,6 +221,11 @@ public:
return *this;
}
Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) {
this->support3DTensorOnActivations = support3DTensorOnActivations;
return *this;
}
bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
@@ -226,6 +233,7 @@ public:
std::vector<element::Type> precisionsOnActivations;
std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
};
class PrecisionDetails {
@@ -301,6 +309,7 @@ protected:
std::vector<element::Type> precisionsOnActivations;
std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision;
bool support3DTensorOnActivations;
// absolute value, used to determine quantization interval asymmetry
float quantizationIntervalAsymmetryThreshold;

View File

@@ -31,6 +31,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
precisionsOnActivations(params.precisionsOnActivations),
precisionsOnWeights(params.precisionsOnWeights),
deqPrecision(params.deqPrecision),
support3DTensorOnActivations(params.support3DTensorOnActivations),
quantizationIntervalAsymmetryThreshold(0.002f),
zeroThreshold(1.e-6f),
minQuantizationLevels(2ul),

View File

@@ -23,6 +23,9 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
}
matMul = as_type_ptr<opset1::MatMul>(NetworkHelper::separateInStandaloneBranch(matMul));
if (!support3DTensorOnActivations && (matMul->input(0).get_shape().size() == 3ul)) {
return false;
}
const auto dequantization1 = NetworkHelper::getDequantization(matMul, 0);
auto dequantization2 = NetworkHelper::getDequantization(matMul, 1);