From c0941aba743fa5ed8852b92ec754021b6f171bd0 Mon Sep 17 00:00:00 2001
From: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
Date: Fri, 25 Nov 2022 09:21:57 +0100
Subject: [PATCH] [GNA] Fixed order of Assign layers (#14066)

---
 .../intel_gna/src/gna_graph_compiler.cpp      |  19 +--
 src/plugins/intel_gna/src/gna_plugin.cpp      |   3 +
 .../intel_gna/src/layers/gna_layer_info.hpp   |  11 ++
 .../intel_gna/src/layers/gna_memory_layer.hpp |   9 +-
 .../intel_gna/src/memory/gna_mem_regions.hpp  |   2 +
 .../pass_tests/concat_memory_param.cpp        | 134 ++++++++++++++++++
 6 files changed, 165 insertions(+), 13 deletions(-)
 create mode 100644 src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp

diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
index 7bb49c18051..7ead9b8ed75 100644
--- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp
+++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
@@ -2298,10 +2298,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
                 auto &nextMemoryLayer = nextMemoryLayerIt->second;
                 // memory layer not yet initialized
                 if (nextMemoryLayer.reserved_size == 0) {
-                    auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
-                    gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
+                    nextMemoryLayer.reserved_size = ALIGN64(nextMemoryLayer.getByteSize());
+                    gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size, 64);
                     gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
-                    nextMemoryLayer.reserved_size = ALIGN64(memorySize);
                 } else {
                     // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
                     gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
@@ -2592,27 +2591,23 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
         // TODO: this is duplicate with connect output
         auto& memoryLayer = prevMemoryLayer->second;
         if (memoryLayer.reserved_size == 0) {
-            auto memorySize = InferenceEngine::details::product(memoryLayer.getDims()) * memoryLayer.elementSizeBytes();
-
+            memoryLayer.reserved_size = ALIGN64(memoryLayer.getByteSize());
             // connectTo used for  indicate that memory layer should be bound to given buffer
             if (connectTo) {
-                memorySize = std::max(memorySize, num_data_bytes_in);
-                gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
+                memoryLayer.reserved_size = ALIGN64(std::max(memoryLayer.reserved_size, num_data_bytes_in));
+                gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size, 64);
                 gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
             } else {
-                if (num_data_bytes_in < memorySize + offset) {
+                if (ALIGN64(num_data_bytes_in) < ALIGN64(memoryLayer.reserved_size + offset)) {
                     THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
-                                                     << num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
+                                                     << num_data_bytes_in << " is more then state tensor size of: " << memoryLayer.reserved_size + offset;
                 }
                 gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
             }
-
-            memoryLayer.reserved_size = ALIGN64(memorySize);
         } else {
             // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
             gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
         }
-
         return prevLayer;
     }
 
diff --git a/src/plugins/intel_gna/src/gna_plugin.cpp b/src/plugins/intel_gna/src/gna_plugin.cpp
index c4cf65639d6..f3647ca56ec 100644
--- a/src/plugins/intel_gna/src/gna_plugin.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin.cpp
@@ -900,6 +900,9 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
     if (sortedNet.empty()) {
         THROW_GNA_EXCEPTION << "Sorted network is empty";
     }
+    // Copy operations connected to memory layer (Assign to state variable) should be executed when all functional layers are calculated.
+    // To simplify, just moving these Copy operations at the end of the execution list
+    std::stable_partition(sortedNet.begin(), sortedNet.end(), [&](CNNLayerPtr layer){return !LayerInfo(layer).isCopyToMemory();});
 
     std::vector<CNNLayerPtr> sortedNoMem;
     std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
diff --git a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
index b8844b96628..454a080abf2 100644
--- a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
@@ -338,6 +338,17 @@ class LayerInfo {
     bool isMemory() const noexcept {
         return isOfType("memory");
     }
+    // @brief verify that it is Assign layer (Copy -> Memory)
+    bool isCopyToMemory() const {
+        if (isCopy()) {
+            for (auto&& out : getInputTo(layer->outData.front())) {
+                if (LayerInfo(out.second).isMemory()) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
     bool isCrop() const noexcept {
         return isOfType("crop");
     }
diff --git a/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp b/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp
index 7fd524ecf36..73d1bce9206 100644
--- a/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp
@@ -4,7 +4,8 @@
 
 #pragma once
 
-#include <legacy/ie_layers.h>
+#include "legacy/ie_layers.h"
+#include "debug.h"
 
 namespace GNAPluginNS {
 /**
@@ -25,6 +26,12 @@ public:
     InferenceEngine::SizeVector getDims() const {
         return inputLayer->outData.front()->getDims();
     }
+    /**
+     * @brief Get size requred for the gna memory buffer
+     */
+    size_t getByteSize() const {
+        return InferenceEngine::details::product(getDims()) * elementSizeBytes();
+    }
     /**
      * @brief Reset the gna memory
      */
diff --git a/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp b/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp
index 7362e2241b8..a15b5e0a0b4 100644
--- a/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp
+++ b/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp
@@ -7,6 +7,8 @@
 #include <cstdint>
 #include <vector>
 
+#include "log/debug.hpp"
+
 namespace GNAPluginNS {
 namespace memory {
 
diff --git a/src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp b/src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp
new file mode 100644
index 00000000000..c3b637f8be4
--- /dev/null
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp
@@ -0,0 +1,134 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>                 // shape to split
+> concat_memory_test_params;
+
+namespace LayerTestsDefinitions {
+
+class ConcatMemoryTest : public testing::WithParamInterface<concat_memory_test_params>,
+                                   public LayerTestsUtils::LayerTestsCommon {
+    public:
+    static std::string getTestCaseName(testing::TestParamInfo<concat_memory_test_params> obj) {
+            InferenceEngine::Precision net_prc;
+            std::string targetDevice;
+            std::map<std::string, std::string> configuration;
+            std::vector<size_t> input_shape;
+            std::tie(net_prc, targetDevice, configuration, input_shape) = obj.param;
+
+            std::ostringstream result;
+            result << "net_prc=" << net_prc.name() << "_";
+            result << "device=" << targetDevice << "_";
+            for (auto const& config_item : configuration) {
+                result << "_config_item=" << config_item.first << "_" << config_item.second;
+            }
+            result << "_input_shape=" << CommonTestUtils::vec2str(input_shape);
+            return result.str();
+        }
+
+    protected:
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
+            InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
+            blob->allocate();
+
+            auto* raw_blob_data_ptr = blob->buffer().as<float*>();
+            std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -2.f, 2.f);
+            for (size_t i = 0; i < blob->size(); i++) {
+                raw_blob_data_ptr[i] = values[i];
+            }
+            return blob;
+        }
+
+        void SetUp() override {
+            InferenceEngine::Precision net_prc;
+            std::vector<size_t> input_shape;
+            std::tie(net_prc, targetDevice, configuration, input_shape) = this->GetParam();
+
+            auto ng_prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(net_prc);
+
+            size_t in_total_dims_size = ov::shape_size(input_shape);
+            auto params = ngraph::builder::makeParams(ng_prc, {{1, in_total_dims_size}});
+            auto reshape_pattern = std::make_shared<ngraph::opset9::Constant>(ov::element::Type_t::i64, ov::Shape{2}, input_shape);
+            auto reshape = std::make_shared<ngraph::opset9::Reshape>(params[0], reshape_pattern, false);
+
+            ov::op::util::VariableInfo vi{};
+            vi.data_shape = ov::PartialShape(input_shape);
+            vi.variable_id = "test_variable";
+            vi.data_type = ov::element::Type_t::f32;
+            const auto var = std::make_shared<ov::op::util::Variable>(vi);
+            std::vector<float> initial_state = CommonTestUtils::generate_float_numbers(in_total_dims_size, -3.f, 3.f);
+            auto initial_state_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, input_shape, initial_state);
+            auto readValue = std::make_shared<ngraph::opset9::ReadValue>(initial_state_node, var);
+
+            const int axis = 1;
+            ov::OutputVector to_concat{readValue, reshape};
+            auto concat = ngraph::builder::makeConcat(to_concat, axis);
+
+            const auto concat_shape = concat->get_output_shape(0);
+            const auto concat_shape_size = ov::shape_size(concat_shape);
+
+            auto etlwise_data = CommonTestUtils::generate_float_numbers(concat_shape_size, -1.f, 1.f);
+            auto etlwise_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, concat_shape, etlwise_data);
+            auto etlwise_result_node = std::make_shared<ngraph::opset9::Multiply>(concat, etlwise_node);
+
+            ov::ResultVector results{std::make_shared<ngraph::opset9::Result>(etlwise_result_node)};
+            auto split_node = ngraph::builder::makeSplit(concat, ng_prc, 2, axis);
+
+            auto assign_node = std::make_shared<ngraph::opset9::Assign>(split_node->output(1), var);
+            ngraph::SinkVector sinks{assign_node};
+            function = std::make_shared<ov::Model>(results, sinks, params);
+        }
+};
+
+TEST_P(ConcatMemoryTest, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+    },
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_FP32"}
+    }
+};
+
+const std::vector<std::vector<size_t>> shapes {
+    {1, 64},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_concat_memory, ConcatMemoryTest,
+        ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GNA),
+            ::testing::ValuesIn(configs),
+            ::testing::ValuesIn(shapes)),
+        ConcatMemoryTest::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
\ No newline at end of file