[GNA] Fixed Eltwise split and batch size selection during 2d reshape (#7042)

* [GNA] Fixed Eltwise split and batch size selection during 2d reshape * [GNA] Added exception if memory isn't allocated for concat filter * Added assert for minZeroDimSize * [GNA] Added unit test for GetAlignedSplitSizes()
2021-08-16 19:27:48 +03:00 · 2021-08-16 19:27:48 +03:00 · a9f7c8effa
commit a9f7c8effa
parent 05c632e072
10 changed files with 102 additions and 53 deletions
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@ -7,6 +7,7 @@
 #include "dnn_types.h"
 #include <cstdint>
 #include <cpp/ie_cnn_network.h>
 #include <ie_algorithm.hpp>
 namespace GNAPluginNS {
 namespace GNALimitations {
@ -114,5 +115,10 @@ public:
 bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
 inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
    auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
    return total_size / bufferMaxSize + 1;
 }
 } // namespace GNALimitations
 } // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -683,7 +683,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto input = layer->insData[0].lock();
    auto outputs = *layer->outData.begin();
-    auto reshaped_dims = Get2DReshapedData(input, 8)->getDims();
+    auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
    const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
        GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
    uint32_t num_rows_in = reshaped_dims[1];
@ -908,7 +908,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto inputs = layer->insData.begin()->lock();
    auto outputs = *layer->outData.begin();
-    auto reshaped_dims = Get2DReshapedData(inputs, 8)->getDims();
+    auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
    uint32_t num_rows_in = reshaped_dims[1];
    uint32_t num_columns_in = reshaped_dims[0];
    uint32_t num_rows_out = num_rows_in;
@ -1410,7 +1410,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
        noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
    }
-    auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs;
+    auto input_data = HasTo2DReshapeData(layer) ?
        Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
    auto in_dims = input_data->getDims();
    auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
    uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
--- a/inference-engine/src/gna_plugin/gna_groups.hpp
+++ b/inference-engine/src/gna_plugin/gna_groups.hpp
@ -15,7 +15,9 @@ namespace GNAPluginNS {
 * @param input a pointer to data to be reshaped
 * @param maxZeroDimSize the maximum size of zero dimension
 */
-inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t maxZeroDimSize) {
+inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t minZeroDimSize,
    size_t maxZeroDimSize) {
    IE_ASSERT(minZeroDimSize > 0);
    auto dims = input->getDims();
    uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims));
    uint32_t numColumnsIn = 1;
@ -23,7 +25,7 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
    if (numRowsIn % 8 == 0) {
        if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) {
            size_t indexDivide = maxZeroDimSize;
-            while (indexDivide > 1) {
+            while (indexDivide > minZeroDimSize) {
                if ((numRowsIn / 8) % indexDivide == 0) break;
                --indexDivide;
            }
@ -55,4 +57,5 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
    // Don't reshape diagonallayers with bias connection
    return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
 }
 } // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp
@ -45,4 +45,18 @@ public:
    };
    std::vector<SplitConnectedLayerInfo> splitOutputLayers;
 };
 // @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
 static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) {
    std::vector<uint32_t> splitSizes;
    uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment;
    uint32_t usedSize = 0;
    while (usedSize < totalSize) {
        uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
        splitSizes.push_back(partSize);
        usedSize += partSize;
    }
    return splitSizes;
 }
 }  // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -87,7 +87,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
    });
    IE_ASSERT(inputLayer != nullptr);
    size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
-        Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
+        Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
    std::vector<float> weightsValues(weightsSize, fillValue);
    IE_ASSERT(diagLayer != nullptr);
    diagLayer->_weights = make_shared_blob<float>(
@ -1113,6 +1113,9 @@ void InsertConcatAligningFilterPass::run() {
                                            SizeVector({filterWeights.size()}),
                                            Layout::C));
                concatAligningFilter->_weights->allocate();
                if (!concatAligningFilter->_weights->buffer().as<float*>()) {
                    THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
                }
                CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
@ -1395,15 +1398,20 @@ void EltwiseSplitOverChannelsPass::run() {
            THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
        }
        auto oData = l->outData.front();
-        auto out_width = GetDataDimSize(oData, DataDimName::W);
+        auto oDims = oData->getDims();
-        auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
+        auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
-         // gna limit this to be OxFFFF
+        if (totalElementsSize <= GNALimitations::bufferMaxSize) {
        auto maxAffineElements = 65536 - 64;
        if (totalElementsForOutput <= maxAffineElements) {
            continue;
        }
-        auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
+        auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
        IE_ASSERT(firstValuableDim != std::end(oDims));
        auto splittedElementsSize = *firstValuableDim;
        auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
        // Split output size should be multiple by 64 to avoid align filters insertion
        auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
            GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize);
        pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
@ -1421,27 +1429,13 @@ void EltwiseSplitOverChannelsPass::run() {
            auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
            // create split layer outputs
-            size_t usedElements = 0;
+            for (auto elementsNum : splitSizes) {
-            for (size_t i = 0; i < totalSplits; i++) {
+                auto newDims = oDims;
-                SizeVector newDims;
+                newDims[splittedDimIx] = elementsNum;
                size_t elements_num = std::min(totalElementsForOutput - usedElements,
                        static_cast<size_t>(maxAffineElements));
                if (inputDesc.getDims().size() == 2) {
                    newDims = SizeVector{1, elements_num};
                } else {
                    elements_num = elements_num - elements_num % out_width;
                    newDims = SizeVector{1, elements_num / out_width, out_width};
                }
                auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
                auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
                getCreatorLayer(data) = split;
                split->outData.push_back(data);
                usedElements += elements_num;
                if (usedElements == totalElementsForOutput) {
                    break;
                }
            }
            // replacing connection X->eltwise to X->split
            auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
@ -1461,7 +1455,7 @@ void EltwiseSplitOverChannelsPass::run() {
        concat->outData.push_back(masterEltwise->outData.front());
        getCreatorLayer(masterEltwise->outData.front()) = concat;
-        for (size_t k = 0; k != totalSplits; k++) {
+        for (size_t k = 0; k != splitSizes.size(); k++) {
            auto eltwiseRaw = std::make_shared<EltwiseLayer>(
                    LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
            IE_ASSERT(eltwiseRaw != nullptr);
@ -1521,7 +1515,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
        if (was_reshaped) {
            dataDims = reshaped_data[insData->getName()];
        } else {
-            dataDims = HasTo2DReshapeData(l) ? Get2DReshapedData(insData, 8)->getDims() : insData->getDims();
+            dataDims = HasTo2DReshapeData(l) ?
                Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
                insData->getDims();
        }
        if (dataDims.size() <= 2) {
--- a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
+++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
@ -12,6 +12,7 @@
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/rt_info.hpp>
 #include "backend/gna_limitations.hpp"
 #include "layers/gna_split_layer.hpp"
 using namespace GNAPluginNS;
@ -19,22 +20,6 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
 NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
 NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
 static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
    uint32_t width = conv->get_input_shape(0).back();
    uint32_t in_channels = conv->get_input_shape(0).at(1);
    uint32_t usedWidth = 0;
    std::vector<int64_t> split_sizes;
    uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
    width_max_size = width_max_size - width_max_size % 64;
    while (usedWidth < width) {
        uint32_t width_part = std::min(width - usedWidth, width_max_size);
        split_sizes.push_back(width_part);
        usedWidth += width_part;
    }
    IE_ASSERT(usedWidth == width);
    return split_sizes;
 }
 static bool Convert(std::shared_ptr<ngraph::Node> conv,
                    std::shared_ptr<ngraph::Node> add,
                    std::shared_ptr<ngraph::Node> bias,
@ -45,15 +30,21 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
        return false;
    }
-    auto split_sizes = GetConvSplitSizes(conv);
+    uint32_t width = conv->get_input_shape(0).back();
    uint32_t in_channels = conv->get_input_shape(0).at(1);
    auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
    IE_ASSERT(split_sizes.size() > 1);
    std::vector<int64_t> split_sizes_casted(split_sizes.size());
    std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
        return static_cast<int64_t>(size);
    });
    /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
        otherwise this split axis isn't supported */
    const int64_t width_axis = conv->get_input_shape(0).size() - 1;
    auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes));
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
    ngraph::copy_runtime_info(conv, split_node);
    split_node->set_friendly_name(conv->get_friendly_name() + "/split");
    ngraph::OutputVector convOutputs;
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
@ -54,8 +54,8 @@ protected:
        auto params = ngraph::builder::makeParams(ngPrc, { inputShape });
        auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, inputShape, {-1.0f});
-        auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
+        auto mul = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
-        function = std::make_shared<ngraph::Function>(sum, params, "EltwiseSplitOverChannelsPassTest");
+        function = std::make_shared<ngraph::Function>(mul, params, "EltwiseSplitOverChannelsPassTest");
    }
 };
@ -77,7 +77,8 @@ const std::vector<std::map<std::string, std::string>> configs = {
 const std::vector<std::vector<size_t>> inputShape = {
    {1, 67000},
-    {1, 500000}
+    {1, 500000},
    {1, 936, 513}
 };
 INSTANTIATE_TEST_SUITE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp
@ -47,6 +47,7 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
        {{1, 4, 4, 128}, {{}}},
        {{8}, {{}}},
        {{5}, {{}}},
        {{1, 936, 513}, {{}}}
 };
 const auto basicCases = ::testing::Combine(
--- a/inference-engine/tests/unit/gna/gna_get_2d_reshaped_data.cpp
+++ b/inference-engine/tests/unit/gna/gna_get_2d_reshaped_data.cpp
@ -65,7 +65,7 @@ class Get2DReshapedDataTest : public ::testing::Test {
                           InferenceEngine::Layout layout) const {
        auto data = std::make_shared<InferenceEngine::Data>(input_name,
            InferenceEngine::TensorDesc(precision, input_shape.first, layout));
-        auto new_data = GNAPluginNS::Get2DReshapedData(data, max_batch_size);
+        auto new_data = GNAPluginNS::Get2DReshapedData(data, 1, max_batch_size);
        ASSERT_EQ(new_data->getDims(), input_shape.second);
        ASSERT_EQ(new_data->getPrecision(), precision);
        ASSERT_EQ(new_data->getLayout(), layout);
--- a/inference-engine/tests/unit/gna/gna_get_aligned_split_sizes.cpp
+++ b/inference-engine/tests/unit/gna/gna_get_aligned_split_sizes.cpp
@ -0,0 +1,36 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include <gtest/gtest.h>
 // to suppress deprecated definition errors
 #define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
 #include "layers/gna_split_layer.hpp"
 namespace {
 using GetAlignedSplitSizesData = std::tuple<
    uint32_t,               // total size
    uint32_t,               // maximum split size
    uint32_t,               // alignment
    std::vector<uint32_t>   // expected sizes
 >;
 const std::vector<GetAlignedSplitSizesData> data = {
    GetAlignedSplitSizesData{1024, 100, 64, std::vector<uint32_t>(16, 64)},
    GetAlignedSplitSizesData{151, 100, 64, std::vector<uint32_t>{64, 64, 23}},
    GetAlignedSplitSizesData{151, 65, 32, std::vector<uint32_t>{64, 64, 23}},
    GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}}
 };
 TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
    for (const auto &dataItem : data) {
        auto sizes = GNAPluginNS::GetAlignedSplitSizes(std::get<0>(dataItem), std::get<1>(dataItem),
                                                       std::get<2>(dataItem));
        ASSERT_EQ(sizes, std::get<3>(dataItem));
    }
 }
 } // namespace