diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp index 4765bb05b54..6c5975d13e1 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp @@ -23,6 +23,7 @@ #include "dnn.hpp" #include "am_intel_dnn.hpp" #include "dnn_types.h" +#include "gna/gna_config.hpp" #include "gna_types.h" #include "gna_limitations.hpp" #include "layers/gna_convolution_layer.hpp" @@ -248,6 +249,16 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel ptr_inputs = &comp.ptr_inputs; ptr_outputs = &comp.ptr_outputs; } + +bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) { + // GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride + return op.Type == Gna2OperationTypeConvolution && + op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) && + op.Parameters[PoolStrideParamIdx] != nullptr && + op.Parameters[PoolWinParamIdx] != nullptr && + static_cast(op.Parameters[PoolStrideParamIdx])->NumberOfDimensions == 1 && + static_cast(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast(op.Parameters[PoolWinParamIdx])->Dimensions[0]; +} #endif void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp, @@ -1361,7 +1372,7 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() { } #if GNA_LIB_VER == 2 -void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel) { +void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model* gnaModel, const std::string& gnaCompileTarget) { Gna2Operation * gnaOperation; if (gnaModel == nullptr) THROW_GNA_EXCEPTION << "Invalid input parameter"; @@ -1677,9 +1688,16 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet const auto fltStride = fltStrideShape->Dimensions[0]; const auto outFromConv = outputFromConv(inVecCnt, nFltSize, fltStride); // FLAT input matrix, pooled outputs per filter - // TODO: Issue 50386 check why (outFromConv - 1) an not (outFromConv - poolingWindow) - outputTensor.Shape.Dimensions[1] = - (outFromConv - 1) / poolStride->Dimensions[0] + 1; + auto effectiveCompileTarget = gnaCompileTarget; + if (isOperationCnnLegacySpecific(*gnaOperation)) { + effectiveCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0; + } + if (effectiveCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) { + outputTensor.Shape.Dimensions[1] = outputFromPooling(outFromConv, poolWindow->Dimensions[0], poolStride->Dimensions[0]); + } else { + outputTensor.Shape.Dimensions[1] = outputFromPoolingLegacy(outFromConv, poolStride->Dimensions[0]); + } + } else { // kDnnConvolutional2dOp // Override GNA operation output pointer with the one from pooling component outputTensor.Data = comp.ptr_outputs; diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp index 7dcab94a685..64a7b305a10 100644 --- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp +++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp @@ -10,6 +10,7 @@ #include "dnn_types.h" #include "gna_types.h" +#include "gna/gna_config.hpp" #include "gna_plugin_log.hpp" @@ -152,6 +153,10 @@ public: (void*&)ptr_filters, (void*&)ptr_biases); } + + // Checks whether operation is Convolution and its parameters makes it specific to GNA1/GNA2 targets + // It does not guarantee that operation fully compatible to GNA1/GNA2, but for sure is not comaptible with GNA3 target + static bool isOperationCnnLegacySpecific(const Gna2Operation& operation); #endif template @@ -293,7 +298,7 @@ public: #if GNA_LIB_VER == 2 - void InitGNAStruct(Gna2Model *gnaModel); + void InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0); void DestroyGNAStruct(Gna2Model *gnaModel); #else diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp index 8443856a449..3d30e93cec3 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp @@ -31,7 +31,7 @@ bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const { } std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const { - return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w); + return hLimit.GetErrorOrEmpty(h) + wLimit.GetErrorOrEmpty(w); } RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) { diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp index b0a67931d11..6ef0707bfcb 100644 --- a/inference-engine/src/gna_plugin/gna_device.cpp +++ b/inference-engine/src/gna_plugin/gna_device.cpp @@ -24,6 +24,7 @@ #include "gna-api.h" #endif +#include "backend/am_intel_dnn.hpp" #include "gna/gna_config.hpp" #include "gna_plugin_log.hpp" @@ -115,13 +116,26 @@ uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2Accelera return reqId; } +void enforceLegacyCnn(Gna2Operation& operation) { + snprintf( + const_cast(operation.Operands[1]->Layout), + sizeof(operation.Operands[1]->Layout) / sizeof(char), + "GNA1"); +} + void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) { for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) { if (gnaModel.Operations[i].Type == Gna2OperationTypeConvolution) { - snprintf( - const_cast(gnaModel.Operations[i].Operands[1]->Layout), - sizeof(gnaModel.Operations[i].Operands[1]->Layout) / sizeof(char), - "GNA1"); + enforceLegacyCnn(gnaModel.Operations[i]); + } + } +} + +void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) { + for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) { + auto& op = gnaModel.Operations[i]; + if (GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) { + enforceLegacyCnn(op); } } } @@ -132,6 +146,7 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const { if (enforceLegacyCnnNeeded()) { enforceLegacyCnns(gnaModel); } + enforceLegacyCnnsWhenNeeded(gnaModel); #if GNA_LIB_VER == 2 && defined MODEL_DUMP std::string path = #ifdef _WIN32 @@ -582,3 +597,14 @@ void GNADeviceHelper::getGnaPerfCounters(std::map& retPerfCounters); static std::string GetGnaLibraryVersion(); + std::string getEffectiveGnaCompileTarget() const; private: void open(uint8_t const n_threads); @@ -194,6 +195,7 @@ public: static const std::map , const std::string > operandTypes; static void enforceLegacyCnns(Gna2Model& gnaModel); + static void enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel); Gna2DeviceVersion parseDeclaredTarget(std::string target, const bool execTarget) const; Gna2DeviceVersion getDefaultTarget() const; Gna2DeviceVersion getTargetDevice(bool execTarget) const; diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index f91dde7f922..95e4a444a58 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -991,10 +991,14 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { #else nnets.emplace_back(make_shared>(), -1, InferenceEngine::BlobMap()); #endif + std::string effectiveGnaCompileTarget = config.gnaCompileTarget; + if (gnadevice) { + effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget(); + } if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) { // number of layer gets calculated inside that InitGNAStruct function #if GNA_LIB_VER == 2 - dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj); + dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj, effectiveGnaCompileTarget); #else dnn->InitGNAStruct(&std::get<0>(nnets.front())->obj); #endif @@ -1005,7 +1009,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { #if GNA_LIB_VER == 2 gnaModels.push_back(std::make_tuple(make_shared>())); // this can be improved by just copy all structures, but we are too lazy - dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj); + dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj, effectiveGnaCompileTarget); #else nnets.emplace_back(make_shared>(), -1, InferenceEngine::BlobMap()); dnn->InitGNAStruct(&std::get<0>(nnets.back())->obj); diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp index bf063ced32f..4c2a697194d 100644 --- a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp +++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp @@ -12,6 +12,7 @@ #include "backend/gna_limitations.hpp" #include "layers/gna_split_layer.hpp" +#include "layers/gna_convolution_layer.hpp" using namespace GNAPluginNS; @@ -19,6 +20,34 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0); +// Don't split when convolution is 2D and is not mappable to 1D +static bool shouldSplitCnn(const ngraph::Output& node) { + auto convolution = dynamic_cast(node.get_node()); + IE_ASSERT(convolution != nullptr); + auto& input = convolution->get_input_shape(0); + auto& filters = convolution->get_input_shape(1); + uint32_t width = input.back(); + uint32_t in_channels = input.at(1); + if (input.size() >= 4 && filters.size() >= 4) { + uint32_t height = input.at(2); + auto kH = filters.at(2); + auto kW = filters.at(3); + auto sW = convolution->get_strides().at(1); + if (GNAConvolutionLayer::isConv2D(height, width, in_channels, kH, kW) && + !GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, kW, sW)) { + return false; + } + } + return true; +} + +std::shared_ptr getConvForMatcher() { + return ngraph::pattern::wrap_type({ ngraph::pattern::any_input(), + ngraph::pattern::any_input() }, [](const ngraph::Output& convolution) { + return shouldSplitCnn(convolution); + }); +} + static bool Convert(std::shared_ptr conv, std::shared_ptr add, std::shared_ptr bias, @@ -28,9 +57,9 @@ static bool Convert(std::shared_ptr conv, if (input_size <= GNALimitations::bufferMaxSize) { return false; } - - uint32_t width = conv->get_input_shape(0).back(); - uint32_t in_channels = conv->get_input_shape(0).at(1); + auto& input = conv->get_input_shape(0); + uint32_t width = input.back(); + uint32_t in_channels = input.at(1); auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels); IE_ASSERT(split_sizes.size() > 1); std::vector split_sizes_casted(split_sizes.size()); @@ -40,7 +69,7 @@ static bool Convert(std::shared_ptr conv, /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1, otherwise this split axis isn't supported */ - const int64_t width_axis = conv->get_input_shape(0).size() - 1; + const int64_t width_axis = input.size() - 1; auto split_node = std::make_shared(conv->input_value(0), ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector{width_axis}), ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted)); @@ -68,9 +97,7 @@ static bool Convert(std::shared_ptr conv, } SplitConvolution::SplitConvolution() { - auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), - ngraph::pattern::any_input()}); - + auto conv = getConvForMatcher(); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { const auto& pattern_map = m.get_pattern_value_map(); return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr); @@ -81,8 +108,7 @@ SplitConvolution::SplitConvolution() { } SplitConvolutionWithBias::SplitConvolutionWithBias() { - auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), - ngraph::pattern::any_input()}); + auto conv = getConvForMatcher(); auto bias = ngraph::pattern::wrap_type(); auto add = ngraph::pattern::wrap_type({conv, bias}); @@ -97,8 +123,7 @@ SplitConvolutionWithBias::SplitConvolutionWithBias() { } SplitConvolutionWithFq::SplitConvolutionWithFq() { - auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), - ngraph::pattern::any_input()}); + auto conv = getConvForMatcher(); auto bias = ngraph::pattern::wrap_type(); auto add = ngraph::pattern::wrap_type({conv, bias}); auto conv_output = std::make_shared(ngraph::OutputVector{conv, add}); @@ -119,4 +144,4 @@ SplitConvolutionWithFq::SplitConvolutionWithFq() { auto m = std::make_shared(out_fq, "SplitConvolutionWithFq"); this->register_matcher(m, callback); -} \ No newline at end of file +} diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp index 501b7134c60..22a6771ea0d 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp @@ -16,8 +16,8 @@ protected: if (std::find(metrics.begin(), metrics.end(), METRIC_KEY(GNA_LIBRARY_FULL_VERSION)) != metrics.end()) { std::string gnaLibVer = ie_core.GetMetric(targetDevice, METRIC_KEY(GNA_LIBRARY_FULL_VERSION)); - if (gnaLibVer.rfind("2.1", 0) != 0) { - GTEST_SKIP() << "Disabled test due to GNA library version being < 2.1" << std::endl; + if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.0", 0) != 0) { + GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.0" << std::endl; } skipTest = false; }