From c0941aba743fa5ed8852b92ec754021b6f171bd0 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Fri, 25 Nov 2022 09:21:57 +0100 Subject: [PATCH] [GNA] Fixed order of Assign layers (#14066) --- .../intel_gna/src/gna_graph_compiler.cpp | 19 +-- src/plugins/intel_gna/src/gna_plugin.cpp | 3 + .../intel_gna/src/layers/gna_layer_info.hpp | 11 ++ .../intel_gna/src/layers/gna_memory_layer.hpp | 9 +- .../intel_gna/src/memory/gna_mem_regions.hpp | 2 + .../pass_tests/concat_memory_param.cpp | 134 ++++++++++++++++++ 6 files changed, 165 insertions(+), 13 deletions(-) create mode 100644 src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp index 7bb49c18051..7ead9b8ed75 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp @@ -2298,10 +2298,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, auto &nextMemoryLayer = nextMemoryLayerIt->second; // memory layer not yet initialized if (nextMemoryLayer.reserved_size == 0) { - auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes(); - gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64); + nextMemoryLayer.reserved_size = ALIGN64(nextMemoryLayer.getByteSize()); + gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size, 64); gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer)); - nextMemoryLayer.reserved_size = ALIGN64(memorySize); } else { // We may need to extend memory buffer if connected input size is bigger, for example for concat connection gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out)); @@ -2592,27 +2591,23 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // TODO: this is duplicate with connect output auto& memoryLayer = prevMemoryLayer->second; if (memoryLayer.reserved_size == 0) { - auto memorySize = InferenceEngine::details::product(memoryLayer.getDims()) * memoryLayer.elementSizeBytes(); - + memoryLayer.reserved_size = ALIGN64(memoryLayer.getByteSize()); // connectTo used for indicate that memory layer should be bound to given buffer if (connectTo) { - memorySize = std::max(memorySize, num_data_bytes_in); - gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64); + memoryLayer.reserved_size = ALIGN64(std::max(memoryLayer.reserved_size, num_data_bytes_in)); + gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size, 64); gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset); } else { - if (num_data_bytes_in < memorySize + offset) { + if (ALIGN64(num_data_bytes_in) < ALIGN64(memoryLayer.reserved_size + offset)) { THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of " - << num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset; + << num_data_bytes_in << " is more then state tensor size of: " << memoryLayer.reserved_size + offset; } gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in)); } - - memoryLayer.reserved_size = ALIGN64(memorySize); } else { // We may need to extend memory buffer if connected input size is bigger, for example for concat connection gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in)); } - return prevLayer; } diff --git a/src/plugins/intel_gna/src/gna_plugin.cpp b/src/plugins/intel_gna/src/gna_plugin.cpp index c4cf65639d6..f3647ca56ec 100644 --- a/src/plugins/intel_gna/src/gna_plugin.cpp +++ b/src/plugins/intel_gna/src/gna_plugin.cpp @@ -900,6 +900,9 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { if (sortedNet.empty()) { THROW_GNA_EXCEPTION << "Sorted network is empty"; } + // Copy operations connected to memory layer (Assign to state variable) should be executed when all functional layers are calculated. + // To simplify, just moving these Copy operations at the end of the execution list + std::stable_partition(sortedNet.begin(), sortedNet.end(), [&](CNNLayerPtr layer){return !LayerInfo(layer).isCopyToMemory();}); std::vector sortedNoMem; std::unordered_map> memoryPairs; diff --git a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp index b8844b96628..454a080abf2 100644 --- a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp +++ b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp @@ -338,6 +338,17 @@ class LayerInfo { bool isMemory() const noexcept { return isOfType("memory"); } + // @brief verify that it is Assign layer (Copy -> Memory) + bool isCopyToMemory() const { + if (isCopy()) { + for (auto&& out : getInputTo(layer->outData.front())) { + if (LayerInfo(out.second).isMemory()) { + return true; + } + } + } + return false; + } bool isCrop() const noexcept { return isOfType("crop"); } diff --git a/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp b/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp index 7fd524ecf36..73d1bce9206 100644 --- a/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp +++ b/src/plugins/intel_gna/src/layers/gna_memory_layer.hpp @@ -4,7 +4,8 @@ #pragma once -#include +#include "legacy/ie_layers.h" +#include "debug.h" namespace GNAPluginNS { /** @@ -25,6 +26,12 @@ public: InferenceEngine::SizeVector getDims() const { return inputLayer->outData.front()->getDims(); } + /** + * @brief Get size requred for the gna memory buffer + */ + size_t getByteSize() const { + return InferenceEngine::details::product(getDims()) * elementSizeBytes(); + } /** * @brief Reset the gna memory */ diff --git a/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp b/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp index 7362e2241b8..a15b5e0a0b4 100644 --- a/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp +++ b/src/plugins/intel_gna/src/memory/gna_mem_regions.hpp @@ -7,6 +7,8 @@ #include #include +#include "log/debug.hpp" + namespace GNAPluginNS { namespace memory { diff --git a/src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp b/src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp new file mode 100644 index 00000000000..c3b637f8be4 --- /dev/null +++ b/src/plugins/intel_gna/tests/functional/pass_tests/concat_memory_param.cpp @@ -0,0 +1,134 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::vector // shape to split +> concat_memory_test_params; + +namespace LayerTestsDefinitions { + +class ConcatMemoryTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { + public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision net_prc; + std::string targetDevice; + std::map configuration; + std::vector input_shape; + std::tie(net_prc, targetDevice, configuration, input_shape) = obj.param; + + std::ostringstream result; + result << "net_prc=" << net_prc.name() << "_"; + result << "device=" << targetDevice << "_"; + for (auto const& config_item : configuration) { + result << "_config_item=" << config_item.first << "_" << config_item.second; + } + result << "_input_shape=" << CommonTestUtils::vec2str(input_shape); + return result.str(); + } + + protected: + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override { + InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc()); + blob->allocate(); + + auto* raw_blob_data_ptr = blob->buffer().as(); + std::vector values = CommonTestUtils::generate_float_numbers(blob->size(), -2.f, 2.f); + for (size_t i = 0; i < blob->size(); i++) { + raw_blob_data_ptr[i] = values[i]; + } + return blob; + } + + void SetUp() override { + InferenceEngine::Precision net_prc; + std::vector input_shape; + std::tie(net_prc, targetDevice, configuration, input_shape) = this->GetParam(); + + auto ng_prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(net_prc); + + size_t in_total_dims_size = ov::shape_size(input_shape); + auto params = ngraph::builder::makeParams(ng_prc, {{1, in_total_dims_size}}); + auto reshape_pattern = std::make_shared(ov::element::Type_t::i64, ov::Shape{2}, input_shape); + auto reshape = std::make_shared(params[0], reshape_pattern, false); + + ov::op::util::VariableInfo vi{}; + vi.data_shape = ov::PartialShape(input_shape); + vi.variable_id = "test_variable"; + vi.data_type = ov::element::Type_t::f32; + const auto var = std::make_shared(vi); + std::vector initial_state = CommonTestUtils::generate_float_numbers(in_total_dims_size, -3.f, 3.f); + auto initial_state_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, input_shape, initial_state); + auto readValue = std::make_shared(initial_state_node, var); + + const int axis = 1; + ov::OutputVector to_concat{readValue, reshape}; + auto concat = ngraph::builder::makeConcat(to_concat, axis); + + const auto concat_shape = concat->get_output_shape(0); + const auto concat_shape_size = ov::shape_size(concat_shape); + + auto etlwise_data = CommonTestUtils::generate_float_numbers(concat_shape_size, -1.f, 1.f); + auto etlwise_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, concat_shape, etlwise_data); + auto etlwise_result_node = std::make_shared(concat, etlwise_node); + + ov::ResultVector results{std::make_shared(etlwise_result_node)}; + auto split_node = ngraph::builder::makeSplit(concat, ng_prc, 2, axis); + + auto assign_node = std::make_shared(split_node->output(1), var); + ngraph::SinkVector sinks{assign_node}; + function = std::make_shared(results, sinks, params); + } +}; + +TEST_P(ConcatMemoryTest, CompareWithRefImpl) { + Run(); +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, +}; + +const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"} + }, + { + {"GNA_DEVICE_MODE", "GNA_SW_FP32"} + } +}; + +const std::vector> shapes { + {1, 64}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_concat_memory, ConcatMemoryTest, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(shapes)), + ConcatMemoryTest::getTestCaseName); + +} // namespace LayerTestsDefinitions \ No newline at end of file