[LPT] GPU support3DTensorOnActivations (#4562)
This commit is contained in:
@@ -347,7 +347,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
|
||||
true); // supportAsymmetricQuantization
|
||||
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
|
||||
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false))
|
||||
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
|
||||
.setSupportAsymmetricQuantization(false)
|
||||
.setSupport3DTensorOnActivations(false))
|
||||
// INT8 StridedSlice not supported
|
||||
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
|
||||
|
||||
|
||||
@@ -172,14 +172,16 @@ public:
|
||||
bool supportAsymmetricQuantization = false,
|
||||
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
|
||||
std::vector<element::Type> precisionsOnWeights = { element::i8 },
|
||||
element::Type deqPrecision = element::f32) :
|
||||
element::Type deqPrecision = element::f32,
|
||||
bool support3DTensorOnActivations = true) :
|
||||
updatePrecisions(updatePrecisions),
|
||||
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
|
||||
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
|
||||
supportAsymmetricQuantization(supportAsymmetricQuantization),
|
||||
precisionsOnActivations(precisionsOnActivations),
|
||||
precisionsOnWeights(precisionsOnWeights),
|
||||
deqPrecision(deqPrecision) {
|
||||
deqPrecision(deqPrecision),
|
||||
support3DTensorOnActivations(support3DTensorOnActivations) {
|
||||
if (precisionsOnActivations.size() == 0ul) {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
|
||||
}
|
||||
@@ -219,6 +221,11 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) {
|
||||
this->support3DTensorOnActivations = support3DTensorOnActivations;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool updatePrecisions;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
|
||||
@@ -226,6 +233,7 @@ public:
|
||||
std::vector<element::Type> precisionsOnActivations;
|
||||
std::vector<element::Type> precisionsOnWeights;
|
||||
element::Type deqPrecision;
|
||||
bool support3DTensorOnActivations;
|
||||
};
|
||||
|
||||
class PrecisionDetails {
|
||||
@@ -301,6 +309,7 @@ protected:
|
||||
std::vector<element::Type> precisionsOnActivations;
|
||||
std::vector<element::Type> precisionsOnWeights;
|
||||
element::Type deqPrecision;
|
||||
bool support3DTensorOnActivations;
|
||||
|
||||
// absolute value, used to determine quantization interval asymmetry
|
||||
float quantizationIntervalAsymmetryThreshold;
|
||||
|
||||
@@ -31,6 +31,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
|
||||
precisionsOnActivations(params.precisionsOnActivations),
|
||||
precisionsOnWeights(params.precisionsOnWeights),
|
||||
deqPrecision(params.deqPrecision),
|
||||
support3DTensorOnActivations(params.support3DTensorOnActivations),
|
||||
quantizationIntervalAsymmetryThreshold(0.002f),
|
||||
zeroThreshold(1.e-6f),
|
||||
minQuantizationLevels(2ul),
|
||||
|
||||
@@ -23,6 +23,9 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
||||
}
|
||||
|
||||
matMul = as_type_ptr<opset1::MatMul>(NetworkHelper::separateInStandaloneBranch(matMul));
|
||||
if (!support3DTensorOnActivations && (matMul->input(0).get_shape().size() == 3ul)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto dequantization1 = NetworkHelper::getDequantization(matMul, 0);
|
||||
auto dequantization2 = NetworkHelper::getDequantization(matMul, 1);
|
||||
|
||||
Reference in New Issue
Block a user