[GNA] add support for 4d eltwise (#1353)

* [GNA] add support for 4d eltwise * cpplint fix * refactor * fix FP16 broadcast pass * change anonymous function to InferenceEngine::details::product * introduce anonymous function to the pass & add layer checks * refactor tileBlob lambda
2020-08-19 15:03:18 +02:00
parent fb885873ea
commit a2b71aef56
5 changed files with 271 additions and 26 deletions
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -957,8 +957,30 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {

    auto outputs = *layer->outData.begin();

-    uint32_t num_rows_in = FROM_IR_DIM(inputs4Bytes, 1);
-    uint32_t num_columns_in = FROM_IR_DIM(inputs4Bytes, 2);
+    auto in_4b_order = getFromIRDimsOrderNCHW(inputs4Bytes->getLayout());
+    auto in_4b_batch = FROM_IR_DIM(inputs4Bytes, in_4b_order[0]);
+    auto in_4b_channels = FROM_IR_DIM(inputs4Bytes, in_4b_order[1]);
+    auto in_4b_height = FROM_IR_DIM(inputs4Bytes, in_4b_order[2]);
+    auto in_4b_width = FROM_IR_DIM(inputs4Bytes, in_4b_order[3]);
+    auto in_4b_total_size = in_4b_batch * in_4b_channels * in_4b_height * in_4b_width;
+
+    auto in_2b_order = getFromIRDimsOrderNCHW(inputs2Bytes->getLayout());
+    auto in_2b_batch = FROM_IR_DIM(inputs2Bytes, in_2b_order[0]);
+    auto in_2b_channels = FROM_IR_DIM(inputs2Bytes, in_2b_order[1]);
+    auto in_2b_height = FROM_IR_DIM(inputs2Bytes, in_2b_order[2]);
+    auto in_2b_width = FROM_IR_DIM(inputs2Bytes, in_2b_order[3]);
+    auto in_2b_total_size = in_2b_batch * in_2b_channels * in_2b_height * in_2b_width;
+
+    if ((in_2b_batch > 1) || (in_4b_batch > 1)) {
+        THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs with batch size that not equals 1 is not supported";
+    }
+
+    if (in_4b_total_size != in_2b_total_size) {
+        THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs size mismatch " << in_4b_total_size << " != " << in_2b_total_size;
+    }
+
+    uint32_t num_rows_in = in_4b_channels * in_4b_height * in_4b_width;
+    uint32_t num_columns_in = in_4b_batch;
    uint32_t num_rows_out = num_rows_in;
    uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;

--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -384,6 +384,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
        passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
        passes->registerPass<SubstituteScaleShiftBroadCastPass>();
        passes->registerPass<FuseMultipleIdentitiesPass>();
+        passes->registerPass<BroadcastConstPass>();
        passIdx = passes->run(passIdx);
    };

--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -986,7 +986,6 @@ void InsertSplitAligningFilterPass::run() {
                CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
            }

-
            // search data that starts from unaligned location
            currentOffset += outputSize * bytesPerSplitElement;
            splitOutIndex++;
@@ -994,6 +993,22 @@ void InsertSplitAligningFilterPass::run() {
    }
 }

+static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
+    auto weightsElements = blob->size();
+    auto weightsBytes = blob->byteSize();
+    if (weightsElements == 0) {
+        THROW_IE_EXCEPTION << "Blob size is 0";
+    }
+
+    auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), { TileTo });
+    tiledBlob->allocate();
+
+    for (int i = 0; i < (TileTo / weightsElements); ++i) {
+        ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
+    }
+    return tiledBlob;
+}
+
 void SubstituteScaleShiftBroadCastPass::run() {
    for (auto & l : *pLayers) {
        LayerInfo layerInfo(l);
@@ -1036,34 +1051,15 @@ void SubstituteScaleShiftBroadCastPass::run() {
        gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
        // approach 1 - weights tiling
        if (getPassManager()->getPolicy().ScaleShiftPolicy == Policy::ScaleShift::WEIGHTS_TILING) {
-            auto tileBlob = [](Blob::Ptr &blob, size_t TileTo){
-                auto weightsElements = blob->size();
-                auto weightsBytes = blob->byteSize();
-                if (weightsElements == 0) {
-                    THROW_IE_EXCEPTION << "Blob size is 0";
-                }
-                if (TileTo % weightsElements) {
-                    return false;
-                }
-
-                auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), {TileTo});
-                tiledBlob->allocate();
-
-
-                for (int i=0; i != TileTo / weightsElements; i++) {
-                    ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
-                }
-                blob = tiledBlob;
-                return true;
-            };
-
-            if (!tileBlob(scaleShift->_weights, nElements)) {
+            if (nElements % scaleShift->_weights->size()) {
                THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
            }
+            scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
            if (scaleShift->_biases) {
-                if (!tileBlob(scaleShift->_biases, nElements)) {
+                if (nElements % scaleShift->_biases->size()) {
                    THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
                }
+                scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
            }

            // currently data type no providing reshape method of tensor desc
@@ -1076,6 +1072,51 @@ void SubstituteScaleShiftBroadCastPass::run() {
    }
 }

+void BroadcastConstPass::run() {
+    for (auto& constLayer : *pLayers) {
+        if (!LayerInfo(constLayer).isConst()) {
+            continue;
+        }
+        auto isNonFunctional = [](CNNLayerPtr l) {
+            return LayerInfo(l).isNonFunctional();
+        };
+        if (!CNNNetHasNextLayerSkipCertain(constLayer, 0, 0, isNonFunctional)) {
+            continue;
+        }
+
+        auto nextLayer = CNNNetGetNextLayerSkipCertain(constLayer, 0, 0, isNonFunctional).first;
+
+        if (!LayerInfo(nextLayer).isEltwise()) {
+            continue;
+        }
+
+        auto constDims = constLayer->outData.front()->getTensorDesc().getDims();
+        auto constDimsSize = product(constDims.begin(), constDims.end());
+        auto eltwiseDims = nextLayer->outData.front()->getTensorDesc().getDims();
+        auto eltwiseDimsSize = product(eltwiseDims.begin(), eltwiseDims.end());
+
+        if (constDimsSize == eltwiseDimsSize) {
+            continue;
+        }
+
+        if (eltwiseDimsSize % constDimsSize) {
+            continue;
+        }
+
+        if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
+            THROW_GNA_LAYER_EXCEPTION(constLayer) << "Const layer " << constLayer->name << " is missing 'custom' parameter";
+        }
+
+        auto currentConstBlob = constLayer->blobs.find("custom")->second;
+
+        constLayer->blobs.find("custom")->second = tileBlob(currentConstBlob, eltwiseDimsSize);
+
+        constLayer->outData.front()->setDims(nextLayer->outData.front()->getDims());
+        constLayer->outData.front()->setLayout(nextLayer->outData.front()->getLayout());
+        gnalog() << "Const layer '" << constLayer->name << "' was changed to match output of '" << nextLayer->name << "'\n";
+    }
+}
+
 void UnrollLSTMCellPass::run() {
    InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
        if (rnn.clip != 0.0f)
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
@@ -160,6 +160,11 @@ DECL_PASS_BEFORE_COPY(RemoveConst);
 */
 DECL_PASS(FuseMultipleIdentities);

+/**
+* @brief Brodcast data in Const layer
+*/
+DECL_PASS(BroadcastConst);
+
 struct PassManagerSettings {
    Policy policy;
    /// @brief whether to run passes before copy
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/4d_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/4d_eltwise.cpp
@@ -0,0 +1,176 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    ngraph::helpers::EltwiseTypes       // Type of eltwise
+> eltwiseParams;
+
+namespace LayerTestsDefinitions {
+
+class Eltwise4dBroadcast : public testing::WithParamInterface<eltwiseParams>,
+                  public LayerTestsUtils::LayerTestsCommon {
+    public:
+        static std::string getTestCaseName(testing::TestParamInfo<eltwiseParams> obj) {
+            InferenceEngine::Precision netPrecision;
+            std::string targetDevice;
+            std::map<std::string, std::string> configuration;
+            ngraph::helpers::EltwiseTypes eltwiseType;
+            std::tie(netPrecision, targetDevice, configuration, eltwiseType) = obj.param;
+
+            std::ostringstream result;
+            result << "netPRC=" << netPrecision.name() << "_";
+            result << "targetDevice=" << targetDevice << "_";
+            for (auto const& configItem : configuration) {
+                result << "_configItem=" << configItem.first << "_" << configItem.second;
+            }
+            result << "_eltwiseType=" << eltwiseType;
+            return result.str();
+        }
+
+    protected:
+        void SetUp() override {
+            InferenceEngine::Precision netPrecision;
+            ngraph::helpers::EltwiseTypes eltwiseType;
+            std::tie(netPrecision, targetDevice, configuration, eltwiseType) = this->GetParam();
+            auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+            outPrc = InferenceEngine::Precision::FP32;
+
+            auto params = ngraph::builder::makeParams(ngPrc, { {1, 72} });
+
+            std::vector<size_t> outFormShapes1 = { 1, 1, 6, 12 };
+            auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1);
+            auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
+
+            auto constant1 = ngraph::builder::makeConstant(ngPrc, { 1, 1, 1, 12 }, {}, true);
+            auto eltwise = ngraph::builder::makeEltwise(reshape1, constant1, eltwiseType);
+
+            std::vector<size_t> outFormShapes2 = { 1, 72 };
+            auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2);
+            auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(eltwise, pattern2, false);
+
+            ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
+            function = std::make_shared<ngraph::Function>(results, params, "Eltwise4dBroadcast");
+        }
+};
+
+class Eltwise4dMultipleInput : public testing::WithParamInterface<eltwiseParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<eltwiseParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        ngraph::helpers::EltwiseTypes eltwiseType;
+        std::tie(netPrecision, targetDevice, configuration, eltwiseType) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_eltwiseType=" << eltwiseType;
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+        ngraph::helpers::EltwiseTypes eltwiseType;
+        std::tie(netPrecision, targetDevice, configuration, eltwiseType) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        outPrc = InferenceEngine::Precision::FP32;
+
+        auto params = ngraph::builder::makeParams(ngPrc, { {1, 72}, {1, 72} });
+
+        std::vector<size_t> outFormShapes1 = { 1, 1, 6, 12 };
+        auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1);
+        auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
+
+        auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(params[1], pattern1, false);
+
+        auto eltwise = ngraph::builder::makeEltwise(reshape1, reshape2, eltwiseType);
+
+        std::vector<size_t> outFormShapes2 = { 1, 72 };
+        auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2);
+        auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(eltwise, pattern2, false);
+
+        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape3) };
+        function = std::make_shared<ngraph::Function>(results, params, "Eltwise4dMultipleInput");
+    }
+};
+
+    TEST_P(Eltwise4dBroadcast, CompareWithRefImpl) {
+        Run();
+    };
+
+    TEST_P(Eltwise4dMultipleInput, CompareWithRefImpl) {
+        Run();
+    };
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+    };
+
+    const std::vector<std::map<std::string, std::string>> configs = {
+        {
+            {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+            {"GNA_SCALE_FACTOR_0", "1638.4"}
+        }
+    };
+
+    const std::vector<std::map<std::string, std::string>> configsMultiple = {
+        {
+            {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+            {"GNA_SCALE_FACTOR_0", "1638.4"},
+            {"GNA_SCALE_FACTOR_1", "1638.4"}
+        }
+    };
+
+    const std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
+        ngraph::helpers::EltwiseTypes::MULTIPLY,
+        ngraph::helpers::EltwiseTypes::SUBTRACT,
+        ngraph::helpers::EltwiseTypes::ADD
+    };
+
+    INSTANTIATE_TEST_CASE_P(Eltwise4d, Eltwise4dBroadcast,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configs),
+            ::testing::ValuesIn(eltwiseOpTypes)),
+        Eltwise4dBroadcast::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(Eltwise4d, Eltwise4dMultipleInput,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configsMultiple),
+            ::testing::ValuesIn(eltwiseOpTypes)),
+        Eltwise4dMultipleInput::getTestCaseName);
+
+} // namespace LayerTestsDefinitions