[GNA] Fixed order of Assign layers (#14066)

This commit is contained in:
Mikhail Ryzhov 2022-11-25 09:21:57 +01:00 committed by GitHub
parent b5557fc211
commit c0941aba74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 165 additions and 13 deletions

View File

@ -2298,10 +2298,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
auto &nextMemoryLayer = nextMemoryLayerIt->second;
// memory layer not yet initialized
if (nextMemoryLayer.reserved_size == 0) {
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
nextMemoryLayer.reserved_size = ALIGN64(nextMemoryLayer.getByteSize());
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size, 64);
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
@ -2592,27 +2591,23 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// TODO: this is duplicate with connect output
auto& memoryLayer = prevMemoryLayer->second;
if (memoryLayer.reserved_size == 0) {
auto memorySize = InferenceEngine::details::product(memoryLayer.getDims()) * memoryLayer.elementSizeBytes();
memoryLayer.reserved_size = ALIGN64(memoryLayer.getByteSize());
// connectTo used for indicate that memory layer should be bound to given buffer
if (connectTo) {
memorySize = std::max(memorySize, num_data_bytes_in);
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
memoryLayer.reserved_size = ALIGN64(std::max(memoryLayer.reserved_size, num_data_bytes_in));
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size, 64);
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
} else {
if (num_data_bytes_in < memorySize + offset) {
if (ALIGN64(num_data_bytes_in) < ALIGN64(memoryLayer.reserved_size + offset)) {
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
<< num_data_bytes_in << " is more then state tensor size of: " << memoryLayer.reserved_size + offset;
}
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
}
memoryLayer.reserved_size = ALIGN64(memorySize);
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
}
return prevLayer;
}

View File

@ -900,6 +900,9 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
if (sortedNet.empty()) {
THROW_GNA_EXCEPTION << "Sorted network is empty";
}
// Copy operations connected to memory layer (Assign to state variable) should be executed when all functional layers are calculated.
// To simplify, just moving these Copy operations at the end of the execution list
std::stable_partition(sortedNet.begin(), sortedNet.end(), [&](CNNLayerPtr layer){return !LayerInfo(layer).isCopyToMemory();});
std::vector<CNNLayerPtr> sortedNoMem;
std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;

View File

@ -338,6 +338,17 @@ class LayerInfo {
bool isMemory() const noexcept {
return isOfType("memory");
}
// @brief verify that it is Assign layer (Copy -> Memory)
bool isCopyToMemory() const {
if (isCopy()) {
for (auto&& out : getInputTo(layer->outData.front())) {
if (LayerInfo(out.second).isMemory()) {
return true;
}
}
}
return false;
}
bool isCrop() const noexcept {
return isOfType("crop");
}

View File

@ -4,7 +4,8 @@
#pragma once
#include <legacy/ie_layers.h>
#include "legacy/ie_layers.h"
#include "debug.h"
namespace GNAPluginNS {
/**
@ -25,6 +26,12 @@ public:
InferenceEngine::SizeVector getDims() const {
return inputLayer->outData.front()->getDims();
}
/**
* @brief Get size requred for the gna memory buffer
*/
size_t getByteSize() const {
return InferenceEngine::details::product(getDims()) * elementSizeBytes();
}
/**
* @brief Reset the gna memory
*/

View File

@ -7,6 +7,8 @@
#include <cstdint>
#include <vector>
#include "log/debug.hpp"
namespace GNAPluginNS {
namespace memory {

View File

@ -0,0 +1,134 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string>, // Configuration
std::vector<size_t> // shape to split
> concat_memory_test_params;
namespace LayerTestsDefinitions {
class ConcatMemoryTest : public testing::WithParamInterface<concat_memory_test_params>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<concat_memory_test_params> obj) {
InferenceEngine::Precision net_prc;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::vector<size_t> input_shape;
std::tie(net_prc, targetDevice, configuration, input_shape) = obj.param;
std::ostringstream result;
result << "net_prc=" << net_prc.name() << "_";
result << "device=" << targetDevice << "_";
for (auto const& config_item : configuration) {
result << "_config_item=" << config_item.first << "_" << config_item.second;
}
result << "_input_shape=" << CommonTestUtils::vec2str(input_shape);
return result.str();
}
protected:
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
blob->allocate();
auto* raw_blob_data_ptr = blob->buffer().as<float*>();
std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -2.f, 2.f);
for (size_t i = 0; i < blob->size(); i++) {
raw_blob_data_ptr[i] = values[i];
}
return blob;
}
void SetUp() override {
InferenceEngine::Precision net_prc;
std::vector<size_t> input_shape;
std::tie(net_prc, targetDevice, configuration, input_shape) = this->GetParam();
auto ng_prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(net_prc);
size_t in_total_dims_size = ov::shape_size(input_shape);
auto params = ngraph::builder::makeParams(ng_prc, {{1, in_total_dims_size}});
auto reshape_pattern = std::make_shared<ngraph::opset9::Constant>(ov::element::Type_t::i64, ov::Shape{2}, input_shape);
auto reshape = std::make_shared<ngraph::opset9::Reshape>(params[0], reshape_pattern, false);
ov::op::util::VariableInfo vi{};
vi.data_shape = ov::PartialShape(input_shape);
vi.variable_id = "test_variable";
vi.data_type = ov::element::Type_t::f32;
const auto var = std::make_shared<ov::op::util::Variable>(vi);
std::vector<float> initial_state = CommonTestUtils::generate_float_numbers(in_total_dims_size, -3.f, 3.f);
auto initial_state_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, input_shape, initial_state);
auto readValue = std::make_shared<ngraph::opset9::ReadValue>(initial_state_node, var);
const int axis = 1;
ov::OutputVector to_concat{readValue, reshape};
auto concat = ngraph::builder::makeConcat(to_concat, axis);
const auto concat_shape = concat->get_output_shape(0);
const auto concat_shape_size = ov::shape_size(concat_shape);
auto etlwise_data = CommonTestUtils::generate_float_numbers(concat_shape_size, -1.f, 1.f);
auto etlwise_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, concat_shape, etlwise_data);
auto etlwise_result_node = std::make_shared<ngraph::opset9::Multiply>(concat, etlwise_node);
ov::ResultVector results{std::make_shared<ngraph::opset9::Result>(etlwise_result_node)};
auto split_node = ngraph::builder::makeSplit(concat, ng_prc, 2, axis);
auto assign_node = std::make_shared<ngraph::opset9::Assign>(split_node->output(1), var);
ngraph::SinkVector sinks{assign_node};
function = std::make_shared<ov::Model>(results, sinks, params);
}
};
TEST_P(ConcatMemoryTest, CompareWithRefImpl) {
Run();
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
},
{
{"GNA_DEVICE_MODE", "GNA_SW_FP32"}
}
};
const std::vector<std::vector<size_t>> shapes {
{1, 64},
};
INSTANTIATE_TEST_SUITE_P(smoke_concat_memory, ConcatMemoryTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs),
::testing::ValuesIn(shapes)),
ConcatMemoryTest::getTestCaseName);
} // namespace LayerTestsDefinitions