diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp index 731155df31d..5aa036c1559 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp @@ -7,6 +7,7 @@ #include "dnn_types.h" #include #include +#include namespace GNAPluginNS { namespace GNALimitations { @@ -114,5 +115,10 @@ public: bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage); +inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) { + auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims())); + return total_size / bufferMaxSize + 1; +} + } // namespace GNALimitations } // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 36a63e055e5..01581337aec 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -683,7 +683,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { auto input = layer->insData[0].lock(); auto outputs = *layer->outData.begin(); - auto reshaped_dims = Get2DReshapedData(input, 8)->getDims(); + auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims(); const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ? GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; uint32_t num_rows_in = reshaped_dims[1]; @@ -908,7 +908,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) { auto inputs = layer->insData.begin()->lock(); auto outputs = *layer->outData.begin(); - auto reshaped_dims = Get2DReshapedData(inputs, 8)->getDims(); + auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims(); uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_rows_out = num_rows_in; @@ -1410,7 +1410,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor; } - auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs; + auto input_data = HasTo2DReshapeData(layer) ? + Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs; auto in_dims = input_data->getDims(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size; diff --git a/inference-engine/src/gna_plugin/gna_groups.hpp b/inference-engine/src/gna_plugin/gna_groups.hpp index 704588a153d..9c4654e1adc 100644 --- a/inference-engine/src/gna_plugin/gna_groups.hpp +++ b/inference-engine/src/gna_plugin/gna_groups.hpp @@ -15,7 +15,9 @@ namespace GNAPluginNS { * @param input a pointer to data to be reshaped * @param maxZeroDimSize the maximum size of zero dimension */ -inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t maxZeroDimSize) { +inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t minZeroDimSize, + size_t maxZeroDimSize) { + IE_ASSERT(minZeroDimSize > 0); auto dims = input->getDims(); uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims)); uint32_t numColumnsIn = 1; @@ -23,7 +25,7 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input if (numRowsIn % 8 == 0) { if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) { size_t indexDivide = maxZeroDimSize; - while (indexDivide > 1) { + while (indexDivide > minZeroDimSize) { if ((numRowsIn / 8) % indexDivide == 0) break; --indexDivide; } @@ -55,4 +57,5 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) { // Don't reshape diagonallayers with bias connection return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); } + } // namespace GNAPluginNS \ No newline at end of file diff --git a/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp b/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp index c6c16ffe99a..161c3da66f4 100644 --- a/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp @@ -45,4 +45,18 @@ public: }; std::vector splitOutputLayers; }; + +// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size +static std::vector GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) { + std::vector splitSizes; + uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment; + uint32_t usedSize = 0; + while (usedSize < totalSize) { + uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize); + splitSizes.push_back(partSize); + usedSize += partSize; + } + return splitSizes; +} + } // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index b92bd153370..b7507e8fbf9 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -87,7 +87,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer, }); IE_ASSERT(inputLayer != nullptr); size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() : - Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; + Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1]; std::vector weightsValues(weightsSize, fillValue); IE_ASSERT(diagLayer != nullptr); diagLayer->_weights = make_shared_blob( @@ -1113,6 +1113,9 @@ void InsertConcatAligningFilterPass::run() { SizeVector({filterWeights.size()}), Layout::C)); concatAligningFilter->_weights->allocate(); + if (!concatAligningFilter->_weights->buffer().as()) { + THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName; + } CopyVectorToBlob(concatAligningFilter->_weights, filterWeights); @@ -1395,15 +1398,20 @@ void EltwiseSplitOverChannelsPass::run() { THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1"; } auto oData = l->outData.front(); - auto out_width = GetDataDimSize(oData, DataDimName::W); - auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end()); - // gna limit this to be OxFFFF - auto maxAffineElements = 65536 - 64; - if (totalElementsForOutput <= maxAffineElements) { + auto oDims = oData->getDims(); + auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims)); + if (totalElementsSize <= GNALimitations::bufferMaxSize) { continue; } - auto totalSplits = 1 + totalElementsForOutput / maxAffineElements; + auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; }); + IE_ASSERT(firstValuableDim != std::end(oDims)); + auto splittedElementsSize = *firstValuableDim; + auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim); + + // Split output size should be multiple by 64 to avoid align filters insertion + auto splitSizes = GetAlignedSplitSizes(splittedElementsSize, + GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize); pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n"; auto quantized = InferenceEngine::getInjectedData(l); @@ -1421,27 +1429,13 @@ void EltwiseSplitOverChannelsPass::run() { auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc(); // create split layer outputs - size_t usedElements = 0; - for (size_t i = 0; i < totalSplits; i++) { - SizeVector newDims; - size_t elements_num = std::min(totalElementsForOutput - usedElements, - static_cast(maxAffineElements)); - if (inputDesc.getDims().size() == 2) { - newDims = SizeVector{1, elements_num}; - } else { - elements_num = elements_num - elements_num % out_width; - newDims = SizeVector{1, elements_num / out_width, out_width}; - } - + for (auto elementsNum : splitSizes) { + auto newDims = oDims; + newDims[splittedDimIx] = elementsNum; auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout()); auto data = std::make_shared(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc); getCreatorLayer(data) = split; split->outData.push_back(data); - - usedElements += elements_num; - if (usedElements == totalElementsForOutput) { - break; - } } // replacing connection X->eltwise to X->split auto oData = CNNLayerFindOutData(l, kThEltwiseInput); @@ -1461,7 +1455,7 @@ void EltwiseSplitOverChannelsPass::run() { concat->outData.push_back(masterEltwise->outData.front()); getCreatorLayer(masterEltwise->outData.front()) = concat; - for (size_t k = 0; k != totalSplits; k++) { + for (size_t k = 0; k != splitSizes.size(); k++) { auto eltwiseRaw = std::make_shared( LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32}); IE_ASSERT(eltwiseRaw != nullptr); @@ -1521,7 +1515,9 @@ void SubstituteScaleShiftBroadCastPass::run() { if (was_reshaped) { dataDims = reshaped_data[insData->getName()]; } else { - dataDims = HasTo2DReshapeData(l) ? Get2DReshapedData(insData, 8)->getDims() : insData->getDims(); + dataDims = HasTo2DReshapeData(l) ? + Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() : + insData->getDims(); } if (dataDims.size() <= 2) { diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp index 4043c4aa5f0..b29cc04dac0 100644 --- a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp +++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp @@ -12,6 +12,7 @@ #include #include #include "backend/gna_limitations.hpp" +#include "layers/gna_split_layer.hpp" using namespace GNAPluginNS; @@ -19,22 +20,6 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0); -static std::vector GetConvSplitSizes(std::shared_ptr conv) { - uint32_t width = conv->get_input_shape(0).back(); - uint32_t in_channels = conv->get_input_shape(0).at(1); - uint32_t usedWidth = 0; - std::vector split_sizes; - uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels; - width_max_size = width_max_size - width_max_size % 64; - while (usedWidth < width) { - uint32_t width_part = std::min(width - usedWidth, width_max_size); - split_sizes.push_back(width_part); - usedWidth += width_part; - } - IE_ASSERT(usedWidth == width); - return split_sizes; -} - static bool Convert(std::shared_ptr conv, std::shared_ptr add, std::shared_ptr bias, @@ -45,15 +30,21 @@ static bool Convert(std::shared_ptr conv, return false; } - auto split_sizes = GetConvSplitSizes(conv); + uint32_t width = conv->get_input_shape(0).back(); + uint32_t in_channels = conv->get_input_shape(0).at(1); + auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels); IE_ASSERT(split_sizes.size() > 1); + std::vector split_sizes_casted(split_sizes.size()); + std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) { + return static_cast(size); + }); /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1, otherwise this split axis isn't supported */ const int64_t width_axis = conv->get_input_shape(0).size() - 1; auto split_node = std::make_shared(conv->input_value(0), ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector{width_axis}), - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes)); + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted)); ngraph::copy_runtime_info(conv, split_node); split_node->set_friendly_name(conv->get_friendly_name() + "/split"); ngraph::OutputVector convOutputs; diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp index 5f69ab02615..dd6424be051 100644 --- a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp @@ -54,8 +54,8 @@ protected: auto params = ngraph::builder::makeParams(ngPrc, { inputShape }); auto const_mult2 = ngraph::builder::makeConstant(ngPrc, inputShape, {-1.0f}); - auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY); - function = std::make_shared(sum, params, "EltwiseSplitOverChannelsPassTest"); + auto mul = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY); + function = std::make_shared(mul, params, "EltwiseSplitOverChannelsPassTest"); } }; @@ -77,7 +77,8 @@ const std::vector> configs = { const std::vector> inputShape = { {1, 67000}, - {1, 500000} + {1, 500000}, + {1, 936, 513} }; INSTANTIATE_TEST_SUITE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest, diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp index 9de08e5f84f..71c0cfc3d70 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp @@ -47,6 +47,7 @@ std::map, std::vector>> basic = { {{1, 4, 4, 128}, {{}}}, {{8}, {{}}}, {{5}, {{}}}, + {{1, 936, 513}, {{}}} }; const auto basicCases = ::testing::Combine( diff --git a/inference-engine/tests/unit/gna/gna_get_2d_reshaped_data.cpp b/inference-engine/tests/unit/gna/gna_get_2d_reshaped_data.cpp index 5af1bf88fa7..5e96984e152 100644 --- a/inference-engine/tests/unit/gna/gna_get_2d_reshaped_data.cpp +++ b/inference-engine/tests/unit/gna/gna_get_2d_reshaped_data.cpp @@ -65,7 +65,7 @@ class Get2DReshapedDataTest : public ::testing::Test { InferenceEngine::Layout layout) const { auto data = std::make_shared(input_name, InferenceEngine::TensorDesc(precision, input_shape.first, layout)); - auto new_data = GNAPluginNS::Get2DReshapedData(data, max_batch_size); + auto new_data = GNAPluginNS::Get2DReshapedData(data, 1, max_batch_size); ASSERT_EQ(new_data->getDims(), input_shape.second); ASSERT_EQ(new_data->getPrecision(), precision); ASSERT_EQ(new_data->getLayout(), layout); diff --git a/inference-engine/tests/unit/gna/gna_get_aligned_split_sizes.cpp b/inference-engine/tests/unit/gna/gna_get_aligned_split_sizes.cpp new file mode 100644 index 00000000000..5d017248e49 --- /dev/null +++ b/inference-engine/tests/unit/gna/gna_get_aligned_split_sizes.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +// to suppress deprecated definition errors +#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN +#include "layers/gna_split_layer.hpp" + +namespace { + +using GetAlignedSplitSizesData = std::tuple< + uint32_t, // total size + uint32_t, // maximum split size + uint32_t, // alignment + std::vector // expected sizes +>; + +const std::vector data = { + GetAlignedSplitSizesData{1024, 100, 64, std::vector(16, 64)}, + GetAlignedSplitSizesData{151, 100, 64, std::vector{64, 64, 23}}, + GetAlignedSplitSizesData{151, 65, 32, std::vector{64, 64, 23}}, + GetAlignedSplitSizesData{151, 65, 1, std::vector{65, 65, 21}} +}; + +TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) { + for (const auto &dataItem : data) { + auto sizes = GNAPluginNS::GetAlignedSplitSizes(std::get<0>(dataItem), std::get<1>(dataItem), + std::get<2>(dataItem)); + ASSERT_EQ(sizes, std::get<3>(dataItem)); + } +} + +} // namespace \ No newline at end of file