diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp index 07023b57539..5e8f13a91f4 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp @@ -259,6 +259,38 @@ bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Op static_cast(op.Parameters[PoolStrideParamIdx])->NumberOfDimensions == 1 && static_cast(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast(op.Parameters[PoolWinParamIdx])->Dimensions[0]; } + +void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) { + IE_ASSERT(gnaModel.Operations != nullptr || gnaModel.NumberOfOperations == 0); + for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) { + auto& gnaOp = gnaModel.Operations[i]; + IE_ASSERT(gnaOp.Operands != nullptr); + IE_ASSERT(gnaOp.Operands[InOpIdx] != nullptr); + auto& inputShape = gnaOp.Operands[InOpIdx]->Shape; + IE_ASSERT(gnaOp.Parameters != nullptr || gnaOp.NumberOfParameters == 0); + if (gnaOp.Type == Gna2OperationTypeConvolution && inputShape.NumberOfDimensions == 2 && + gnaOp.NumberOfParameters >= PoolStrideParamIdx && + gnaOp.Parameters[PoolWinParamIdx]!= nullptr && + gnaOp.Parameters[PoolStrideParamIdx] != nullptr) { + IE_ASSERT(gnaOp.Operands[OutOpIdx] != nullptr); + IE_ASSERT(gnaOp.Operands[FilterOpIdx] != nullptr); + IE_ASSERT(gnaOp.Parameters[ConvStrideParamIdx] != nullptr); + + const auto& fltStrideShape = *reinterpret_cast(gnaOp.Parameters[ConvStrideParamIdx]); + const auto fltStride = fltStrideShape.Dimensions[0]; + const auto inVecCnt = inputShape.Dimensions[1]; + const auto nFltSize = gnaOp.Operands[FilterOpIdx]->Shape.Dimensions[1]; + const auto outFromConv = GNAPluginNS::GNAConvolutionLayer::outputFromConv(inVecCnt, nFltSize, fltStride); + const auto& poolWindow = *static_cast(gnaOp.Parameters[PoolWinParamIdx]); + const auto& poolStride = *static_cast(gnaOp.Parameters[PoolStrideParamIdx]); + const auto numberOfOutputs = GNAPluginNS::GNAConvolutionLayer::outputFromPooling( + outFromConv, poolWindow.Dimensions[0], poolStride.Dimensions[0], + useLegacyFormula || isOperationCnnLegacySpecific(gnaOp)); + auto& outputTensor = *gnaOp.Operands[OutOpIdx]; + const_cast(outputTensor.Shape.Dimensions[1]) = numberOfOutputs; + } + } +} #endif void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp, @@ -1677,28 +1709,10 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet HelperGna2OperationSetParameter(gnaOperation, gnaUserAllocator, gnaUserFree, PoolWinParamIdx, poolWindow); HelperGna2OperationSetParameter(gnaOperation, gnaUserAllocator, gnaUserFree, PoolStrideParamIdx, poolStride); - auto& outputTensor = const_cast(*gnaOperation->Operands[OutOpIdx]); - const auto fltStrideShape = reinterpret_cast(gnaOperation->Parameters[ConvStrideParamIdx]); // adjust Gna2OperationTypeConvolution fused layer output dimensions to reflect convolution zeroPadding and pooling - if (gnaOperation->Operands[InOpIdx]->Shape.NumberOfDimensions == 2) { // kDnnConvolutional1dOp - const auto inVecCnt = gnaOperation->Operands[InOpIdx]->Shape.Dimensions[1]; - - const auto nFltSize = gnaOperation->Operands[FilterOpIdx]->Shape.Dimensions[1]; - // Always move 1 "row" - const auto fltStride = fltStrideShape->Dimensions[0]; - const auto outFromConv = outputFromConv(inVecCnt, nFltSize, fltStride); - // FLAT input matrix, pooled outputs per filter - auto effectiveCompileTarget = gnaCompileTarget; - if (isOperationCnnLegacySpecific(*gnaOperation)) { - effectiveCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0; - } - if (effectiveCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) { - outputTensor.Shape.Dimensions[1] = outputFromPooling(outFromConv, poolWindow->Dimensions[0], poolStride->Dimensions[0]); - } else { - outputTensor.Shape.Dimensions[1] = outputFromPoolingLegacy(outFromConv, poolStride->Dimensions[0]); - } - - } else { // kDnnConvolutional2dOp + if (gnaOperation->Operands[InOpIdx]->Shape.NumberOfDimensions != 2) { // kDnnConvolutional2dOp + auto& outputTensor = const_cast(*gnaOperation->Operands[OutOpIdx]); + const auto fltStrideShape = reinterpret_cast(gnaOperation->Parameters[ConvStrideParamIdx]); // Override GNA operation output pointer with the one from pooling component outputTensor.Data = comp.ptr_outputs; diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp index 64a7b305a10..af6de2c6cf7 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp @@ -157,6 +157,9 @@ public: // Checks whether operation is Convolution and its parameters makes it specific to GNA1/GNA2 targets // It does not guarantee that operation fully compatible to GNA1/GNA2, but for sure is not comaptible with GNA3 target static bool isOperationCnnLegacySpecific(const Gna2Operation& operation); + // Recomputes number of outputs from CNN1D operations using legacy or new formula + // If isOperationCnnLegacySpecific() is true the number of outputs will also be recomputed for legacy compatibility + static void updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula); #endif template diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp index ee6bb08d597..d57a4a5cc9c 100644 --- a/inference-engine/src/gna_plugin/gna_device.cpp +++ b/inference-engine/src/gna_plugin/gna_device.cpp @@ -27,6 +27,7 @@ #include "backend/am_intel_dnn.hpp" #include "gna/gna_config.hpp" #include "gna_plugin_log.hpp" +#include "layers/gna_convolution_layer.hpp" //#define MODEL_DUMP @@ -143,10 +144,14 @@ void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) { uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const { std::unique_lock lockGnaCalls{ acrossPluginsSync }; uint32_t modelId; - if (enforceLegacyCnnNeeded()) { + const auto legacyExecTarget = enforceLegacyCnnNeeded(); + if (legacyExecTarget) { enforceLegacyCnns(gnaModel); } enforceLegacyCnnsWhenNeeded(gnaModel); + + GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget); + #if GNA_LIB_VER == 2 && defined MODEL_DUMP std::string path = #ifdef _WIN32 diff --git a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.cpp b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.cpp index f226138251b..ea345a18c25 100644 --- a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.cpp +++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.cpp @@ -56,7 +56,10 @@ uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t st return (in - flt) / stride + 1; } -uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) { +uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride, const bool legacy) { + if (legacy) { + return outputFromPoolingLegacy(in, stride); + } // ceil[(in - window)/stride] + 1, GNA Spec 1.24 if (window > in || window == 0 || stride == 0) { THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")"; diff --git a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp index 1ed7125b633..c603a801255 100644 --- a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp @@ -20,7 +20,7 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv); uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride); -uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride); +uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride, bool legacy = false); uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride); diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp index 4554d5e5f88..a6a38350b58 100644 --- a/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp @@ -219,6 +219,15 @@ const std::vector> configs = { } }; +const std::vector> configsExec30Compile20 = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "1"}, + {"GNA_EXEC_TARGET", "GNA_TARGET_3_0"}, + {"GNA_COMPILE_TARGET", "GNA_TARGET_2_0"} + } +}; + const std::vector padTypes = { op::PadType::VALID, op::PadType::EXPLICIT, @@ -237,6 +246,11 @@ const std::vector models = { modelType::TranspConvBcastAddMaxPoolActTransp }; +const std::vector modelsWithPool = { + modelType::TranspConvBcastAddMaxPoolTransp, + modelType::TranspConvBcastAddMaxPoolActTransp +}; + const std::vector> input2DNHWC = {{1, 4, 4, 32}}; const std::vector> kernels2D = {{1, 2}, {2, 1}, {2, 2}}; const std::vector> strides2D = {{1, 1}}; @@ -277,6 +291,20 @@ INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConv, Decompose2DConvTest, ::testing::ValuesIn(models)), Decompose2DConvTest::getTestCaseName); +// These tests flow compile the model for GNA 2.0 +// and load by GNA Library for GNA 3.0 execution target +// They assure that the W/A for pooling output differences btw GNA 2.0 / 3.0 is properly working +INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConv_Exec30Compile20, Decompose2DConvTest, + ::testing::Combine( + conv2DParams, + miscParams, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configsExec30Compile20), + ::testing::ValuesIn(input2DNHWC), + ::testing::ValuesIn(modelsWithPool)), + Decompose2DConvTest::getTestCaseName); + /* ============= Strides & Dilations Combination ============= */