From ac2370b4207c2543a6af83a29bb94f077ba85862 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Tue, 15 Sep 2020 09:18:58 +0300 Subject: [PATCH] [LPT] Copy constant with several outputs before blob update (cherry-pick to master) (#2198) * [LPT] Copy constant implementation * [LPT] the same Constant ops as FQ interval boundaries --- .../network_helper.hpp | 14 +- .../weightable_layer_transformation.hpp | 2 + .../src/concat.cpp | 16 +- .../src/concat_multi_channels.cpp | 4 +- .../src/convolution.cpp | 1 + .../src/fake_quantize.cpp | 6 +- .../src/fully_connected.cpp | 1 + .../src/network_helper.cpp | 157 ++++++++++++------ .../src/weightable_layer_transformation.cpp | 17 +- ..._constant_fake_quantize_transformation.cpp | 10 +- ..._constant_fake_quantize_transformation.cpp | 7 +- ..._constant_fake_quantize_transformation.hpp | 2 +- ..._constant_fake_quantize_transformation.cpp | 13 +- ...imized_constant_fake_quantize_function.hpp | 3 +- ...imized_constant_fake_quantize_function.cpp | 39 +++-- 15 files changed, 190 insertions(+), 102 deletions(-) diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp index 1cc3af08381..aa422e72843 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp @@ -48,16 +48,22 @@ public: static Blob::Ptr makeNewBlobPtr(const TensorDesc& desc); - static void invertFakeQuantize(const CNNLayer& fakeQuantize); - - static void updateBlobs(CNNLayer& layer, const std::string& blobName, float value); - static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, float value); static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector& values); + static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value); + + static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector& values); + static void updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector& values); + static CNNLayerPtr copyConstant( + TransformationContext& context, + const CNNLayer& quantizeLayer, + const CNNLayerPtr& blobLayer, + const size_t constLayerIndex); + // return true if at least one child uses layer on weights static bool onWeights(const CNNLayer& layer); diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp index 34af7c72ab5..d763b6706aa 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp @@ -47,6 +47,7 @@ protected: std::vector& biasesShifts) const; void updateWeights( + TransformationContext& context, const CNNLayerPtr fakeQuantize, std::vector& outputLowValues, std::vector& outputHighValues) const; @@ -68,6 +69,7 @@ protected: const bool onWeights) const; DataPrecision fillDequantizationsForWeightsPath( + TransformationContext& context, const CNNLayer& weightableLayer, const bool supportAsymmetricQuantization, std::vector& dequantizationScales, diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 378b00b0ea5..c9dae711605 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -148,10 +148,10 @@ void ConcatTransformation::transform(TransformationContext& context, CNNLayer& c switch (quantizedTensorAlignmentOnActivations) { case QuantizedTensorAlignment::None: { const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift; - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue); const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift; - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue); break; } @@ -165,18 +165,18 @@ void ConcatTransformation::transform(TransformationContext& context, CNNLayer& c (outputHighValue / quantizationDetails.outputHighValues[0])) : outputHighValue; - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 1, inputLowValue); - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 2, inputHighValue); - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, dataPrecision.min); - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, dataPrecision.max); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 1, inputLowValue); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 2, inputHighValue); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, dataPrecision.min); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, dataPrecision.max); break; } case QuantizedTensorAlignment::UpdateLevel: { const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift; - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue); const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift; - CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue); + CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue); const int levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); fakeQuantizeLayer.params["levels"] = std::to_string(levels); diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp index 582c86467f0..929694aa5d4 100644 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp @@ -106,8 +106,8 @@ void ConcatMultiChannelsTransformation::transform(TransformationContext& context dequantizationScalesLayers[fakeQuantizeLayer->name] = dequantizationScales; dequantizationShiftsLayers[fakeQuantizeLayer->name] = dequantizationShifts; - CNNNetworkHelper::updateBlobs(*fakeQuantizeLayer, 3, dataPrecision.min); - CNNNetworkHelper::updateBlobs(*fakeQuantizeLayer, 4, dataPrecision.max); + CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 3, dataPrecision.min); + CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 4, dataPrecision.max); } if (updatePrecisions) { diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp index cb1dddbac46..43b8c0a3926 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp @@ -105,6 +105,7 @@ void ConvolutionTransformation::transform(TransformationContext& context, CNNLay const CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul); const DataPrecision dataPrecisionOnWeights = fillDequantizationsForWeightsPath( + context, layer, supportAsymmetricQuantization, originalWeightsDequantizationScales, diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp index 6113f3bdb77..6e32de23fe1 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp @@ -34,8 +34,6 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa THROW_IE_EXCEPTION << "Layer '" << layer.insData.size() << "' has invalid inputs number. 5 is expected."; } - // CNNNetworkHelper::invertFakeQuantize(layer); - // FakeQuantize on weights are used without dequantization ScaleShifts const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer); if (onWeights) { @@ -77,8 +75,8 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa printDequantizationValues(dequantizationScales, dequantizationShifts); #endif - CNNNetworkHelper::updateBlobs(layer, 3, dataPrecision.min); - CNNNetworkHelper::updateBlobs(layer, 4, dataPrecision.max); + CNNNetworkHelper::updateBlobs(context, layer, 3, dataPrecision.min); + CNNNetworkHelper::updateBlobs(context, layer, 4, dataPrecision.max); if (updatePrecisions) { CNNNetworkHelper::setOutDataPrecision(layer, dataPrecision.precision); diff --git a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp index dcaa789990d..e75c29665c9 100644 --- a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp +++ b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp @@ -135,6 +135,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN } fillDequantizationsForWeightsPath( + context, fullyConnected, supportAsymmetricQuantization, originalWeightsDequantizationScales, diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 8556e23402f..ecb06cdae1b 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -183,54 +183,6 @@ Blob::Ptr CNNNetworkHelper::makeNewBlobPtr(const TensorDesc& desc) { return newBlob; } -void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, float value) { - const auto existingBlobIt = layer.blobs.find(blobName); - if (existingBlobIt == layer.blobs.end()) { - THROW_IE_EXCEPTION << "blob '" << blobName << "' was not found in layer " << layer.name; - } - const auto& existingBlobTensorDesc = existingBlobIt->second->getTensorDesc(); - Blob::Ptr newBlob = makeNewBlobPtr(existingBlobTensorDesc); - - newBlob->allocate(); - fillBlobByFP32(newBlob, value); - layer.blobs[existingBlobIt->first] = newBlob; -} - -void CNNNetworkHelper::invertFakeQuantize(const CNNLayer& fakeQuantize) { - if (fakeQuantize.type != "FakeQuantize") { - THROW_IE_EXCEPTION << "invalid layer type " << fakeQuantize.type; - } - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize); - const size_t valuesCount = - std::max(quantizationDetails.inputLowValues.size(), quantizationDetails.outputLowValues.size()); - std::vector inputLowValues(valuesCount); - std::vector inputHightValues(valuesCount); - std::vector outputLowValues(valuesCount); - std::vector outputHighValues(valuesCount); - bool wasInverted = false; - for (size_t i = 0ul; i < valuesCount; ++i) { - if ((quantizationDetails.getInputLowValue(i) > quantizationDetails.getInputHighValue(i)) && - (quantizationDetails.getOutputLowValue(i) > quantizationDetails.getOutputHighValue(i))) { - inputLowValues[i] = quantizationDetails.getInputHighValue(i); - inputHightValues[i] = quantizationDetails.getInputLowValue(i); - outputLowValues[i] = quantizationDetails.getOutputHighValue(i); - outputHighValues[i] = quantizationDetails.getOutputLowValue(i); - wasInverted = true; - } else { - inputLowValues[i] = quantizationDetails.getInputLowValue(i); - inputHightValues[i] = quantizationDetails.getInputHighValue(i); - outputLowValues[i] = quantizationDetails.getOutputLowValue(i); - outputHighValues[i] = quantizationDetails.getOutputHighValue(i); - } - } - - if (wasInverted) { - CNNNetworkHelper::updateBlobs(fakeQuantize, 1, inputLowValues); - CNNNetworkHelper::updateBlobs(fakeQuantize, 2, inputHightValues); - CNNNetworkHelper::updateBlobs(fakeQuantize, 3, outputLowValues); - CNNNetworkHelper::updateBlobs(fakeQuantize, 4, outputHighValues); - } -} void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector& values) { CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex); @@ -288,6 +240,25 @@ void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayer fillBlobByFP32(newBlob, values.data()); } +void CNNNetworkHelper::updateBlobs( + TransformationContext& context, + const CNNLayer& quantizeLayer, + int constLayerIndex, + const std::vector& values) { + CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex); + if (blobLayer == nullptr) { + THROW_IE_EXCEPTION << "layer is absent"; + } + + const auto existingBlobIt = blobLayer->blobs.find("custom"); + if (existingBlobIt == blobLayer->blobs.end()) { + THROW_IE_EXCEPTION << "custom blob was not found "; + } + + blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex); + updateBlobs(quantizeLayer, constLayerIndex, values); +} + void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector& values) { const auto existingBlobIt = layer.blobs.find(blobName); if (existingBlobIt == layer.blobs.end()) { @@ -377,6 +348,96 @@ void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayer blobLayer->blobs[existingBlobIt->first] = newBlob; } +void CNNNetworkHelper::updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value) { + auto inData = quantizeLayer.insData[constLayerIndex].lock(); + if (inData == nullptr) { + THROW_IE_EXCEPTION << "data is absent"; + } + + CNNLayerPtr blobLayer = getCreatorLayer(inData).lock(); + if (blobLayer == nullptr) { + THROW_IE_EXCEPTION << "layer is absent"; + } + + if (blobLayer->blobs.size() != 1) { + THROW_IE_EXCEPTION << "unexpected blobs size"; + } + + blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex); + updateBlobs(quantizeLayer, constLayerIndex, value); +} + +CNNLayerPtr CNNNetworkHelper::copyConstant( + TransformationContext& context, + const CNNLayer& quantizeLayer, + const CNNLayerPtr& blobLayer, + const size_t constLayerIndex) { + size_t repeatsCount = 0ul; + for (size_t i = 0; i < quantizeLayer.insData.size(); ++i) { + auto parentInData = quantizeLayer.insData[i].lock(); + if (parentInData == nullptr) { + continue; + } + const auto quantizeLayerParent = getCreatorLayer(parentInData).lock(); + if (quantizeLayerParent == nullptr) { + continue; + } + if (quantizeLayerParent->name == blobLayer->name) { + repeatsCount++; + } + } + + if (repeatsCount < 2ul) { + return blobLayer; + } + + details::CNNNetworkImpl* networkImpl = dynamic_cast(&context.network); + if (networkImpl == nullptr) { + THROW_IE_EXCEPTION << "Unexpected network type"; + } + + const DataPtr outData = blobLayer->outData[0]; + const std::map& inputTo = getInputTo(outData); + const auto quantizeLayerIt = inputTo.find(quantizeLayer.name); + if (quantizeLayerIt == inputTo.end()) { + THROW_IE_EXCEPTION << "Layer was not found"; + } + + const auto blobIt = blobLayer->blobs.find("custom"); + if (blobIt == blobLayer->blobs.end()) { + THROW_IE_EXCEPTION << "Blob was not found"; + } + + const Blob::Ptr blob = blobIt->second; + Blob::Ptr newBlob = makeNewBlobPtr(blob->getTensorDesc()); + newBlob->allocate(); + + const std::shared_ptr blobValues = CNNNetworkHelper::getFloatData(blob); + fillBlobByFP32(newBlob, blobValues.get()); + + auto newBlobValues = CNNNetworkHelper::getFloatData(newBlob); + + const std::string layerName = blobLayer->name + "/new" + std::to_string(repeatsCount); + CNNLayerPtr newBlobLayer = CNNLayerPtr(new CNNLayer({ layerName, "Const", blob->getTensorDesc().getPrecision() })); + newBlobLayer->blobs.emplace("custom", newBlob); + + const TensorDesc& tensorDesc = blobLayer->outData[0]->getTensorDesc(); + DataPtr newEdgeAfterLayer(new Data(newBlobLayer->name, tensorDesc)); + newEdgeAfterLayer->setName(newBlobLayer->name); + newEdgeAfterLayer->setPrecision(blob->getTensorDesc().getPrecision()); + quantizeLayerIt->second->insData[constLayerIndex] = newEdgeAfterLayer; + getInputTo(newEdgeAfterLayer)[quantizeLayer.name] = quantizeLayerIt->second; + + getCreatorLayer(newEdgeAfterLayer) = newBlobLayer; + newBlobLayer->outData.push_back(newEdgeAfterLayer); + + CNNNetworkImpl* netImpl = dynamic_cast(&context.network); + netImpl->addData(newBlobLayer->name.c_str(), newEdgeAfterLayer); + netImpl->addLayer(newBlobLayer); + + return newBlobLayer; +} + int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) { const std::vector children = getChildren(layer); for (const CNNLayerPtr& child : children) { diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index d62c9e48111..8398eec611b 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -250,14 +250,14 @@ void WeightableLayerTransformation::updateLayerBiasesFcSpecific( CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases); } -void WeightableLayerTransformation::updateWeights(const CNNLayerPtr parent, std::vector& outputLowValues, +void WeightableLayerTransformation::updateWeights(TransformationContext& context, const CNNLayerPtr parent, std::vector& outputLowValues, std::vector& outputHighValues) const { const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parent); // TODO: refactor: move to standalone method switch (quantizedTensorAlignmentOnWeights) { case LayerTransformation::QuantizedTensorAlignment::None: { - CNNNetworkHelper::updateBlobs(*parent, 3, outputLowValues); - CNNNetworkHelper::updateBlobs(*parent, 4, outputHighValues); + CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues); + CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues); break; } case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals: @@ -300,10 +300,10 @@ void WeightableLayerTransformation::updateWeights(const CNNLayerPtr parent, std: outputHighValues[i] = roundf(outputHighValues[i] * maxK); } - CNNNetworkHelper::updateBlobs(*parent, 1, inputLowValues); - CNNNetworkHelper::updateBlobs(*parent, 2, inputHighValues); - CNNNetworkHelper::updateBlobs(*parent, 3, outputLowValues); - CNNNetworkHelper::updateBlobs(*parent, 4, outputHighValues); + CNNNetworkHelper::updateBlobs(context, *parent, 1, inputLowValues); + CNNNetworkHelper::updateBlobs(context, *parent, 2, inputHighValues); + CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues); + CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues); const size_t levels = static_cast(roundf(minOutputIntervalLowValue + maxOutputIntervalHighValue + 1.0)); parent->params["levels"] = std::to_string(levels); @@ -411,6 +411,7 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont } DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath( + TransformationContext& context, const CNNLayer& weightableLayer, const bool supportAsymmetricQuantization, std::vector& dequantizationScales, @@ -461,7 +462,7 @@ DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath( } } - updateWeights(parent, outputLowValues, outputHighValues); + updateWeights(context, parent, outputLowValues, outputHighValues); return dataPrecision; } diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp index 5af9d555891..f37e7c0dd35 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp @@ -11,8 +11,7 @@ using namespace LayerTestsDefinitions; namespace { const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 + InferenceEngine::Precision::FP32 }; const std::vector params = { @@ -22,10 +21,15 @@ const std::vector> inputShapes = { + std::pair({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }), + std::pair({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) }) +}; + INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::SizeVector({ 1, 16 })), + ::testing::ValuesIn(inputShapes), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::ValuesIn(params)), MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp index 7af10777ac5..d7d7b78c445 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp @@ -21,10 +21,15 @@ const std::vector> inputShapes = { + std::pair({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }), + std::pair({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) }) +}; + INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::SizeVector({ 1, 16 })), + ::testing::ValuesIn(inputShapes), ::testing::Values(CommonTestUtils::DEVICE_GPU), ::testing::ValuesIn(params)), MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp index e76243a8361..15d5e671b8c 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp @@ -20,7 +20,7 @@ public: typedef std::tuple< InferenceEngine::Precision, - InferenceEngine::SizeVector, + std::pair, std::string, MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues > MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams; diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp index 6e39ca31f50..e8c2f256b31 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.cpp @@ -23,16 +23,16 @@ namespace LayerTestsDefinitions { std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName( testing::TestParamInfo obj) { InferenceEngine::Precision netPrecision; - InferenceEngine::SizeVector inputShape; + std::pair shapes; std::string targetDevice; InferenceEngine::details::LayerTransformation::Params params; MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param; - std::tie(netPrecision, inputShape, targetDevice, param) = obj.param; + std::tie(netPrecision, shapes, targetDevice, param) = obj.param; std::ostringstream result; result << netPrecision.name() << "_" << - CommonTestUtils::vec2str(inputShape) << "_" << + CommonTestUtils::vec2str(shapes.first) << "_" << CommonTestUtils::vec2str(shapes.second) << "_" << targetDevice << "_" << param.fqOnData << "_" << param.fqOnWeights; @@ -43,15 +43,16 @@ void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() { threshold = 0.01f; InferenceEngine::Precision netPrecision; - InferenceEngine::SizeVector inputShape; + std::pair shapes; InferenceEngine::details::LayerTransformation::Params params; MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param; - std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam(); + std::tie(netPrecision, shapes, targetDevice, param) = this->GetParam(); auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal( precision, - inputShape, + shapes.first, + shapes.second, param.fqOnData, param.fqOnWeights); } diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp index 83983c9be0f..8b008645546 100644 --- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp @@ -16,7 +16,8 @@ class MatMulWithOptimizedConstantFakeQuantizeFunction { public: static std::shared_ptr getOriginal( const ngraph::element::Type precision, - const ngraph::Shape& inputShape, + const ngraph::Shape& inputShape1, + const ngraph::Shape& inputShape2, const FakeQuantizeOnData& fqOnData, const FakeQuantizeOnData& fqOnWeights); }; diff --git a/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp b/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp index ad571d24e09..6543e0d8456 100644 --- a/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp +++ b/inference-engine/tests/ngraph_functions/src/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.cpp @@ -13,34 +13,41 @@ namespace subgraph { std::shared_ptr MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal( const ngraph::element::Type precision, - const ngraph::Shape& inputShape, + const ngraph::Shape& inputShape1, + const ngraph::Shape& inputShape2, const FakeQuantizeOnData& fqOnData, const FakeQuantizeOnData& fqOnWeights) { - const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); - const auto fakeQuantizeOnActivations = fqOnData.empty() ? - nullptr : - ngraph::builder::makeFakeQuantize( - input, precision, fqOnData.quantizationLevel, fqOnData.constantShape, - fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues); + const auto input = std::make_shared(precision, ngraph::Shape(inputShape1)); - const ngraph::Shape weightsShape = { inputShape[1], 10 }; + const auto lowConstantOnActivations = std::make_shared(precision, fqOnData.constantShape, fqOnData.inputLowValues); + const auto highConstantOnActivations = std::make_shared(precision, fqOnData.constantShape, fqOnData.inputHighValues); + const auto fakeQuantizeOnActivations = std::make_shared( + input, + lowConstantOnActivations, + highConstantOnActivations, + lowConstantOnActivations, + highConstantOnActivations, + fqOnWeights.quantizationLevel); + + const ngraph::Shape weightsShape = { inputShape2[0], inputShape1[1] }; const std::vector weigths(weightsShape[0] * weightsShape[1], 10.f); + const auto weightsConst = std::make_shared(precision, weightsShape, weigths); - const auto lowConstant = std::make_shared(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues); - const auto highConstant = std::make_shared(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues); + const auto lowConstantOnWeights = std::make_shared(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues); + const auto highConstantOnWeights = std::make_shared(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues); const auto fakeQuantizeOnWeights = std::make_shared( weightsConst, - lowConstant, - highConstant, - lowConstant, - highConstant, + lowConstantOnWeights, + highConstantOnWeights, + lowConstantOnWeights, + highConstantOnWeights, fqOnWeights.quantizationLevel); const auto matMul = std::make_shared( - fqOnData.empty() ? input : fakeQuantizeOnActivations, + fakeQuantizeOnActivations, fakeQuantizeOnWeights, false, - false); + inputShape1[1] != inputShape2[0]); ngraph::ResultVector results{ std::make_shared(matMul) }; return std::make_shared(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");