[GNA] incorrect diag insertion (#14858)

* [GNA] Create ngraph implementation for relu_torch_pot model for further tests. Create legacy pass fusing FC-Eltwise-Const layers pattern into single FC layer with biases * [GNA] Fix review comments, applied proper code style to changed code
2023-02-22 11:22:55 +01:00 · 2023-02-22 11:22:55 +01:00 · c8643a9a30
commit c8643a9a30
parent f41c75b965
6 changed files with 404 additions and 13 deletions
--- a/src/plugins/intel_gna/src/gna_graph_patterns.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_patterns.hpp
@ -445,5 +445,16 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
    return findTranspositionInfoRecursive(layer);
 }

+/**
+ * @brief Return true if the layer has max one non-1 dimension
+ * (because we can treat it as a single dimension layer, then)
+ */
+inline bool IsOneDimLayer(InferenceEngine::CNNLayerPtr layer) {
+    auto dims = layer->insData[0].lock()->getDims();
+    return std::count_if(std::begin(dims), std::end(dims), [](size_t dim) {
+               return dim > 1;
+           }) <= 1;
+}
+
 }  // namespace intel_gna
 }  // namespace ov
--- a/src/plugins/intel_gna/src/gna_graph_tools.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_tools.hpp
@ -21,6 +21,10 @@ namespace InferenceEngine {

 static constexpr size_t invalid_data_idx = std::numeric_limits<size_t>::max();

+inline bool DoNotSkip(CNNLayerPtr layer) {
+    return false;
+};
+
 // compares data, for copied network and in old network
 inline bool areEqualDatas(DataPtr source, DataPtr target) {
    if (source.get() == target.get()) {
@ -693,6 +697,47 @@ std::vector<std::pair<CNNLayerPtr, int>> CNNNetGetPrevLayersSkip(CNNLayerPtr ori
    return prevLayers;
 }

+/**
+ * @brief Removes 'to_remove' layer from between two other - 'prev' and 'next', then connects 'prev' and 'next'
+ * @param prev           Layer before 'to_remove'
+ * @param to_remove      Layer to be removed
+ * @param prevOutputNo   Output number of 'prev', which will be connected with 'next'
+ * @param nextInputNo    Input number of 'next', which will be connected with 'prev'
+ * @return true if layer was removed, otherwise return false
+ */
+
+inline bool CNNRemoveAndConnect(CNNLayerPtr prev, CNNLayerPtr to_remove, int prevOutputNo = 0, int nextInputNo = 0) {
+    CNNLayerPtr next = CNNNetCheckNextLayerSkipCertain(to_remove, 0, 0, true, DoNotSkip).first;
+
+    if (!prev || !next) {
+        return false;
+    }
+
+    IE_ASSERT(prev->outData.size() > 0);
+    IE_ASSERT(next->outData.size() > 0);
+
+    if (to_remove->outData.size() != 1) {
+        // Cannot remove layer, which has different number of outputs than 1
+        return false;
+    }
+
+    // Get first output ptr of 'prev'
+    auto prevDPtr = prev->outData[prevOutputNo];
+
+    // Assign first output of 'prev' to first input of 'next'
+    next->insData[nextInputNo] = prevDPtr;
+
+    // Add 'next' to inputTo map of 'prev',
+    // so now it will point 'next' as a layer, which uses 'prev' as its input.
+    auto& prevInputToMap = getInputTo(prevDPtr);
+    prevInputToMap[next->name] = next;
+
+    // Remove reference to 'to_remove' from inputTo map of 'prev'
+    prevInputToMap.erase(to_remove->name);
+
+    return true;
+}
+
 /**
 * @brief remove given layer from topology, currently only layers with one input data and one output data supported
 */
--- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
+++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
@ -223,6 +223,7 @@ void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& ne
    passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
    passes->registerPass<ForbidActivationFusingPass>();
    passes->registerPass<FuseMultipleIdentitiesPass>();
+    passes->registerPass<FuseFullyConnectedWithEltwisePass>();
    legacy_pass_index = passes->run(legacy_pass_index);
 }

--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
@ -62,6 +62,51 @@ std::shared_ptr<IPassManager> BasePass::getPassManager() {
    return sharedMgr;
 }

+/**
+ * @brief Perform addition of two blobs values
+ */
+template <class T>
+static void SumBlobs_t(Blob::Ptr& src_blob, Blob::Ptr& dst_blob) {
+    IE_ASSERT(src_blob != nullptr);
+    IE_ASSERT(dst_blob != nullptr);
+    IE_ASSERT(src_blob->size() == dst_blob->size());
+    IE_ASSERT(src_blob->getTensorDesc().getPrecision() == dst_blob->getTensorDesc().getPrecision());
+
+    T* src_blob_buf = src_blob->buffer().as<T*>();
+    T* dst_blob_buf = dst_blob->buffer().as<T*>();
+    std::transform(dst_blob_buf, dst_blob_buf + dst_blob->size(), src_blob_buf, dst_blob_buf, std::plus<T>());
+}
+
+static void SumBlobs(Blob::Ptr& src_blob, Blob::Ptr& dst_blob) {
+    IE_ASSERT(src_blob != nullptr);
+
+    switch (src_blob->getTensorDesc().getPrecision()) {
+#define CASE(x) \
+    case x:     \
+        return SumBlobs_t<PrecisionTrait<x>::value_type>(src_blob, dst_blob);
+        CASE(InferenceEngine::Precision::FP32);
+        CASE(InferenceEngine::Precision::FP64);
+        CASE(InferenceEngine::Precision::FP16);
+        CASE(InferenceEngine::Precision::BF16);
+        CASE(InferenceEngine::Precision::I4);
+        CASE(InferenceEngine::Precision::I8);
+        CASE(InferenceEngine::Precision::I16);
+        CASE(InferenceEngine::Precision::I32);
+        CASE(InferenceEngine::Precision::I64);
+        CASE(InferenceEngine::Precision::U4);
+        CASE(InferenceEngine::Precision::U8);
+        CASE(InferenceEngine::Precision::U16);
+        CASE(InferenceEngine::Precision::U32);
+        CASE(InferenceEngine::Precision::U64);
+        CASE(InferenceEngine::Precision::Q78);
+        CASE(InferenceEngine::Precision::BIN);
+        CASE(InferenceEngine::Precision::BOOL);
+#undef CASE
+    default:
+        IE_THROW() << "Wrong precision specified: " << src_blob->getTensorDesc().getPrecision().name();
+    }
+}
+
 static Blob::Ptr convertToRWBlob(const Blob::Ptr& readOnlyBlob, const std::string& name = {}) {
    auto blob = Blob::CreateFromData(std::make_shared<Data>(name, readOnlyBlob->getTensorDesc()));
    blob->allocate();
@ -2171,10 +2216,6 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
        return;
    }

-    auto donotSkip = [](CNNLayerPtr) {
-        return false;
-    };
-
    auto allowFQFuse = [this](CNNLayerPtr layer) -> bool {
        auto skipNonFunctionalOrMemory = [](CNNLayerPtr layer) {
            return LayerInfo(layer).isNonFunctional() || LayerInfo(layer).isMemory();
@ -2202,15 +2243,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
                return;
            }

-            auto donotSkip = [](CNNLayerPtr) {
-                return false;
-            };
-
            auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
            IE_ASSERT(quantParams != nullptr);

            // Find all output layers connected to FQ
-            auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer.get(), -1, donotSkip);
+            auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer.get(), -1, DoNotSkip);
            if (nextLayers.empty()) {
                quantParams->_src_quant.CopyStats(srcQuantParams->_dst_quant);
                if (LayerInfo(layer).isNonFunctional()) {
@ -2264,7 +2301,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
            continue;
        }
        GNAFakeQuantizeLayer fqLayer(l);
-        auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, donotSkip);
+        auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, DoNotSkip);
        auto prevDataIt = std::find_if(std::begin(prevLayer->outData), std::end(prevLayer->outData), [l](DataPtr data) {
            return getInputTo(data).find(l->name) != std::end(getInputTo(data));
        });
@ -2307,8 +2344,8 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
        // Propogate destination statistics to multiply layer if it's set for the next sum/sub layer (is considered as
        // bias)
        if (LayerInfo(prevLayer).isEltwiseSum() || LayerInfo(prevLayer).isEltwiseSub()) {
-            auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, donotSkip);
-            auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, donotSkip);
+            auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, DoNotSkip);
+            auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, DoNotSkip);
            if (LayerInfo(eltwPrevLayer).isEltwise() && LayerInfo(constLayer).isConst()) {
                auto quantParamsEltwLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(eltwPrevLayer);
                quantParamsEltwLayer->_dst_quant.CopyStats(quantParamsPrevLayer->_dst_quant);
@ -2334,7 +2371,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
        auto prevData = *prevDataIt;

        // Find all output layers connected to FQ
-        auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
+        auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, DoNotSkip);
        if (nextLayers.empty()) {
            continue;
        }
@ -2582,6 +2619,93 @@ void TransposeWeightsFromNCHWToNHWCPass::run() {
    }
 }

+void FuseFullyConnectedWithEltwisePass::run() {
+    // This legacy pass removes the Eltwise (only if it performs SUM op) from between FC and Any.
+    // The blob data of Const layer attached to Eltwise is added to biases blob data of FC layer.
+    // Finally Const is also removed.
+    // Permute and Reshape layers existing between FC and Eltwise remain in the network in order
+    // to keep final data shape unchanged.
+    //
+    // This operation can be illustrated as follows:
+    //
+    // --
+    //  Original:                  Result:           Removed:
+    //
+    //    FC                         FC              (Eltwise)
+    //    |                          |               (Const)
+    //  Permute                   Permute
+    // (optional)           (if exists in Original)
+    //    |                          |
+    //  Reshape                   Reshape
+    // (optional)  Const    (if exists in Original)
+    //    |       /                  |
+    //  Eltwise(sum)                Any
+    //    |
+    //  Any (e.g. ReLU)
+    //--
+    //
+    // NOTE: This pass is implemented to prevent unnecessary roundtrip to memory (additional layer).
+    // It can be fully removed if corresponding LPT transformation is already implemented.
+    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseFullyConnectedWithEltwisePass");
+
+    auto skipPermuteAndReshape = [](CNNLayerPtr layer) {
+        if (IsOneDimLayer(layer)) {
+            if (LayerInfo(layer).isPermute() || LayerInfo(layer).isReshape()) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    for (auto& layer : *pLayers) {
+        if (!LayerInfo(layer).isFullyConnected() || !layer->outData.size()) {
+            continue;
+        }
+        CNNLayerPtr next = nullptr;
+
+        auto& fully_connected = layer;
+
+        // Find Eltwise skipping Premutes and Reshapes
+        auto eltwise = CNNNetCheckNextLayerSkipCertain(fully_connected, 0, 0, true, skipPermuteAndReshape).first;
+
+        // If the layer is not Eltwise or does not perform 'sum' operation, we should skip the pass
+        if (!eltwise || !LayerInfo(eltwise).isEltwiseSum()) {
+            continue;
+        }
+
+        // Get the Eltwise's input layers
+        CNNLayerPtr eltwise_const = nullptr;
+        CNNLayerPtr eltwise_input = nullptr;
+        for (size_t i = 0; i < eltwise->insData.size(); i++) {
+            // Get Eltwise's prev layer and check its kind
+            auto before_eltwise =
+                CNNNetHasPrevLayer(eltwise.get(), 0) ? CNNNetPrevLayerSkipCertain(eltwise, i, DoNotSkip) : nullptr;
+            if (LayerInfo(before_eltwise).isConst()) {
+                eltwise_const = before_eltwise;
+            } else {
+                eltwise_input = before_eltwise;
+            }
+        }
+        if (!eltwise_const || !eltwise_input) {
+            continue;
+        }
+
+        // Find (any) layer after Eltwise
+        auto any_layer = CNNNetCheckNextLayerSkipCertain(eltwise, 0, 0, true, DoNotSkip).first;
+        if (!any_layer) {
+            continue;
+        }
+
+        // Connect FC with layer after Eltwise (Eltwise and Const will be removed)
+        if (CNNRemoveAndConnect(eltwise_input, eltwise)) {
+            // Add data from Const "custom" blob to FC "biases" blob
+            auto& const_blob = eltwise_const->blobs.find("custom")->second;
+            auto& fc_blob = fully_connected->blobs.find("biases")->second;
+            SumBlobs(const_blob, fc_blob);
+        }
+    }
+}
+
 int PassManager::run(int index) {
 #if defined PLOT || defined ENABLE_V7_SERIALIZE
    auto dumpNetworkAfterPass = [&index, this](std::shared_ptr<Pass> pass) {
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
@ -222,6 +222,12 @@ DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);
 */
 DECL_PASS(TransposeWeightsFromNCHWToNHWC);

+/**
+ * @brief fuse FullyConnected and Eltwise layers, also in case there is a Reshape between them having input with only
+ * one dimension > 1
+ */
+DECL_PASS(FuseFullyConnectedWithEltwise);
+
 struct PassManagerSettings {
    /// @brief whether to run passes before copy
    bool runBeforeCopy;
--- a/src/plugins/intel_gna/tests/functional/pass_tests/diagonal_insertion_test.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/diagonal_insertion_test.cpp
@ -0,0 +1,204 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/pass/convert_prc.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+
+namespace DiagonalInsertionTestNs {
+
+using namespace ngraph;
+using namespace ngraph::builder;
+using namespace ngraph::element;
+using namespace ngraph::op;
+using namespace ngraph::opset9;
+using namespace std;
+
+using DiagonalInsertionTestParams = tuple<map<string, string>,   // Configuration
+                                          vector<vector<float>>  // FakeQuantize min/max params
+                                          >;
+
+constexpr uint16_t fq_levels = numeric_limits<uint16_t>::max();
+
+// This class performs tests on the following network:
+//                              Params
+//                     Const      |
+//                       |    FakeQuantize
+//                  FakeQuantize  |
+//                       |      Reshape
+//                        \     /
+//                        MatMul
+//                          |
+//          Const       Reshape
+//            |          /
+//       FakeQuantize   /
+//               \     /
+//                 Add
+//                  |
+//             FakeQuantize
+//                  |
+//                ReLU
+//                  |
+//                Result
+// The above network should cause the FuseFullyConnectedWithEltwisePass to be fired
+// The final network should have only one functional layer - FullyConnected
+
+class DiagonalInsertionTest : public testing::WithParamInterface<DiagonalInsertionTestParams>,
+                              public LayerTestsUtils::LayerTestsCommon {
+    const int32_t seed = 7235346;
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
+        return FuncTestUtils::createAndFillBlobFloatNormalDistribution(info.getTensorDesc(), 0.0f, 0.2f, seed);
+    }
+
+    ParameterVector CreateInputVector(const Type& type, const vector<std::size_t>& shapes) {
+        return makeParams(type, {shapes});
+    }
+
+    shared_ptr<FakeQuantize> CreateFQNode(const Type& type,
+                                          const shared_ptr<ov::Node>& node,
+                                          float fq_min,
+                                          float fq_max,
+                                          std::size_t levels) {
+        //
+        auto fq_inp_min = makeConstant<float>(type, {1}, {fq_min});
+        auto fq_inp_max = makeConstant<float>(type, {1}, {fq_max});
+        auto fq_out_min = makeConstant<float>(type, {1}, {fq_min});
+        auto fq_out_max = makeConstant<float>(type, {1}, {fq_max});
+        return make_shared<FakeQuantize>(node, fq_inp_min, fq_inp_max, fq_out_min, fq_out_max, levels);
+    }
+
+    std::shared_ptr<Reshape> CreateReshapeNode(element::Type in_type,
+                                               shared_ptr<Node> input_node,
+                                               std::vector<size_t> target_shape_vect) {
+        //
+        const auto target_shape_const = Constant::create(in_type, Shape{target_shape_vect.size()}, target_shape_vect);
+        return std::make_shared<Reshape>(input_node, target_shape_const, false);
+    }
+
+    bool IsDebugEnabled(map<string, string>& configuration) {
+        return configuration.find("LOG_LEVEL") != configuration.end() && configuration["LOG_LEVEL"] == "LOG_DEBUG";
+    }
+
+public:
+    static string getTestCaseName(testing::TestParamInfo<DiagonalInsertionTestParams> obj) {
+        map<string, string> configuration;
+        vector<vector<float>> fq_min_max;
+
+        tie(configuration, fq_min_max) = obj.param;
+
+        ostringstream result;
+        for (auto const& config_item : configuration) {
+            result << "_configItem=" << config_item.first << ":" << config_item.second;
+        }
+        for (auto const& fq : fq_min_max) {
+            result << "_fqMin=" << fq[0] << "_fqMax=" << fq[1];
+        }
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        // Loosen threshold because of precision decrease during test
+        threshold = 0.1;
+        targetDevice = CommonTestUtils::DEVICE_GNA;
+
+        const size_t height = 512;
+        const size_t width = 1024;
+        const auto precision = ::ngraph::element::Type_t::f32;
+        const vector<std::size_t> input_shape = {width};
+
+        // Receive test params
+        vector<vector<float>> fq_min_max;
+        tie(configuration, fq_min_max) = this->GetParam();
+
+        // Create network
+
+        auto input_vect = makeParams(precision, {input_shape});
+        auto input_fq = CreateFQNode(precision, input_vect[0], fq_min_max[0][0], fq_min_max[0][1], fq_levels);
+
+        auto reshape = CreateReshapeNode(ngraph::element::Type_t::i32, input_fq, {width, 1});
+
+        auto mm_const = makeConstant<float>(precision, {height, width}, {}, true);
+        auto mm_const_fq = CreateFQNode(precision, mm_const, fq_min_max[1][0], fq_min_max[1][1], fq_levels);
+
+        auto matmul = makeMatMul(mm_const_fq, reshape);
+        auto matmul_fq = CreateFQNode(precision, matmul, fq_min_max[2][0], fq_min_max[2][1], fq_levels);
+        auto add_mm_reshape = CreateReshapeNode(ngraph::element::Type_t::i32, matmul, {height});
+
+        auto add_const = makeConstant<float>(precision, {height}, {}, true);
+        auto add_const_fq = CreateFQNode(precision, add_const, fq_min_max[3][0], fq_min_max[3][1], fq_levels);
+
+        auto add = make_shared<Add>(add_const_fq, add_mm_reshape);
+        auto add_fq = CreateFQNode(precision, add, fq_min_max[4][0], fq_min_max[4][1], fq_levels);
+
+        auto relu = make_shared<Relu>(add_fq);
+
+        function = make_shared<ngraph::Function>(relu, input_vect, "DiagonalInsertion");
+    }
+};
+
+TEST_P(DiagonalInsertionTest, CompareWithRefs) {
+    Run();
+};
+
+const vector<map<string, string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"GNA_PRECISION", "I16"},
+        {"GNA_EXEC_TARGET", "GNA_TARGET_3_5"},
+    },
+};
+
+vector<vector<float>> fq_mm1 = {{-19.38653564453125, 19.38653564453125},
+                                {-4.872922897338867, 4.872922897338867},
+                                {-633.115478515625, 633.115478515625},
+                                {-3.2157254219055176, 3.2157254219055176},
+                                {-633.0288696289062, 633.0288696289062}};
+
+vector<vector<float>> fq_mm2 = {{-1.38653564453125, 1.38653564453125},
+                                {-0.872922897338867, 0.872922897338867},
+                                {-63.115478515625, 63.115478515625},
+                                {-0.2157254219055176, 0.2157254219055176},
+                                {-63.0288696289062, 63.0288696289062}};
+
+vector<vector<float>> fq_mm3 = {{-0.1938653564453125, 0.1938653564453125},
+                                {-0.04872922897338867, 0.04872922897338867},
+                                {-6.33115478515625, 6.33115478515625},
+                                {-0.032157254219055176, 0.032157254219055176},
+                                {-6.330288696289062, 6.330288696289062}};
+
+vector<vector<float>> fq_mm4 = {{-4.38653564453125, 4.38653564453125},
+                                {-48.72922897338867, 48.72922897338867},
+                                {-3.115478515625, 3.115478515625},
+                                {-32.157254219055176, 32.157254219055176},
+                                {-30.0288696289062, 30.0288696289062}};
+
+vector<vector<float>> fq_mm5 = {{-390.38653564453125, 390.38653564453125},
+                                {-400.872922897338867, 400.872922897338867},
+                                {-633.115478515625, 633.115478515625},
+                                {-399.2157254219055176, 399.2157254219055176},
+                                {-633.0288696289062, 633.0288696289062}};
+
+vector<vector<vector<float>>> fq_min_max = {fq_mm1, fq_mm2, fq_mm3, fq_mm4, fq_mm5};
+
+INSTANTIATE_TEST_SUITE_P(smoke_DiagonalInsertion,
+                         DiagonalInsertionTest,
+                         ::testing::Combine(::testing::ValuesIn(configs), ::testing::ValuesIn(fq_min_max)),
+                         DiagonalInsertionTest::getTestCaseName);
+
+}  // namespace DiagonalInsertionTestNs