[GNA] Fixed order of Assign layers (#14066)
This commit is contained in:
parent
b5557fc211
commit
c0941aba74
@ -2298,10 +2298,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
|||||||
auto &nextMemoryLayer = nextMemoryLayerIt->second;
|
auto &nextMemoryLayer = nextMemoryLayerIt->second;
|
||||||
// memory layer not yet initialized
|
// memory layer not yet initialized
|
||||||
if (nextMemoryLayer.reserved_size == 0) {
|
if (nextMemoryLayer.reserved_size == 0) {
|
||||||
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
|
nextMemoryLayer.reserved_size = ALIGN64(nextMemoryLayer.getByteSize());
|
||||||
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size, 64);
|
||||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||||
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
|
||||||
} else {
|
} else {
|
||||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
||||||
@ -2592,27 +2591,23 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
|||||||
// TODO: this is duplicate with connect output
|
// TODO: this is duplicate with connect output
|
||||||
auto& memoryLayer = prevMemoryLayer->second;
|
auto& memoryLayer = prevMemoryLayer->second;
|
||||||
if (memoryLayer.reserved_size == 0) {
|
if (memoryLayer.reserved_size == 0) {
|
||||||
auto memorySize = InferenceEngine::details::product(memoryLayer.getDims()) * memoryLayer.elementSizeBytes();
|
memoryLayer.reserved_size = ALIGN64(memoryLayer.getByteSize());
|
||||||
|
|
||||||
// connectTo used for indicate that memory layer should be bound to given buffer
|
// connectTo used for indicate that memory layer should be bound to given buffer
|
||||||
if (connectTo) {
|
if (connectTo) {
|
||||||
memorySize = std::max(memorySize, num_data_bytes_in);
|
memoryLayer.reserved_size = ALIGN64(std::max(memoryLayer.reserved_size, num_data_bytes_in));
|
||||||
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size, 64);
|
||||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
|
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
|
||||||
} else {
|
} else {
|
||||||
if (num_data_bytes_in < memorySize + offset) {
|
if (ALIGN64(num_data_bytes_in) < ALIGN64(memoryLayer.reserved_size + offset)) {
|
||||||
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
||||||
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
|
<< num_data_bytes_in << " is more then state tensor size of: " << memoryLayer.reserved_size + offset;
|
||||||
}
|
}
|
||||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
|
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
|
||||||
}
|
}
|
||||||
|
|
||||||
memoryLayer.reserved_size = ALIGN64(memorySize);
|
|
||||||
} else {
|
} else {
|
||||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||||
}
|
}
|
||||||
|
|
||||||
return prevLayer;
|
return prevLayer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -900,6 +900,9 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
if (sortedNet.empty()) {
|
if (sortedNet.empty()) {
|
||||||
THROW_GNA_EXCEPTION << "Sorted network is empty";
|
THROW_GNA_EXCEPTION << "Sorted network is empty";
|
||||||
}
|
}
|
||||||
|
// Copy operations connected to memory layer (Assign to state variable) should be executed when all functional layers are calculated.
|
||||||
|
// To simplify, just moving these Copy operations at the end of the execution list
|
||||||
|
std::stable_partition(sortedNet.begin(), sortedNet.end(), [&](CNNLayerPtr layer){return !LayerInfo(layer).isCopyToMemory();});
|
||||||
|
|
||||||
std::vector<CNNLayerPtr> sortedNoMem;
|
std::vector<CNNLayerPtr> sortedNoMem;
|
||||||
std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
|
std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
|
||||||
|
@ -338,6 +338,17 @@ class LayerInfo {
|
|||||||
bool isMemory() const noexcept {
|
bool isMemory() const noexcept {
|
||||||
return isOfType("memory");
|
return isOfType("memory");
|
||||||
}
|
}
|
||||||
|
// @brief verify that it is Assign layer (Copy -> Memory)
|
||||||
|
bool isCopyToMemory() const {
|
||||||
|
if (isCopy()) {
|
||||||
|
for (auto&& out : getInputTo(layer->outData.front())) {
|
||||||
|
if (LayerInfo(out.second).isMemory()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
bool isCrop() const noexcept {
|
bool isCrop() const noexcept {
|
||||||
return isOfType("crop");
|
return isOfType("crop");
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <legacy/ie_layers.h>
|
#include "legacy/ie_layers.h"
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
/**
|
/**
|
||||||
@ -25,6 +26,12 @@ public:
|
|||||||
InferenceEngine::SizeVector getDims() const {
|
InferenceEngine::SizeVector getDims() const {
|
||||||
return inputLayer->outData.front()->getDims();
|
return inputLayer->outData.front()->getDims();
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* @brief Get size requred for the gna memory buffer
|
||||||
|
*/
|
||||||
|
size_t getByteSize() const {
|
||||||
|
return InferenceEngine::details::product(getDims()) * elementSizeBytes();
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* @brief Reset the gna memory
|
* @brief Reset the gna memory
|
||||||
*/
|
*/
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "log/debug.hpp"
|
||||||
|
|
||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
namespace memory {
|
namespace memory {
|
||||||
|
|
||||||
|
@ -0,0 +1,134 @@
|
|||||||
|
// Copyright (C) 2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/common_utils.hpp"
|
||||||
|
#include "functional_test_utils/plugin_cache.hpp"
|
||||||
|
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||||
|
#include "functional_test_utils/blob_utils.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
|
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
InferenceEngine::Precision, // Network Precision
|
||||||
|
std::string, // Target Device
|
||||||
|
std::map<std::string, std::string>, // Configuration
|
||||||
|
std::vector<size_t> // shape to split
|
||||||
|
> concat_memory_test_params;
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
class ConcatMemoryTest : public testing::WithParamInterface<concat_memory_test_params>,
|
||||||
|
public LayerTestsUtils::LayerTestsCommon {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<concat_memory_test_params> obj) {
|
||||||
|
InferenceEngine::Precision net_prc;
|
||||||
|
std::string targetDevice;
|
||||||
|
std::map<std::string, std::string> configuration;
|
||||||
|
std::vector<size_t> input_shape;
|
||||||
|
std::tie(net_prc, targetDevice, configuration, input_shape) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "net_prc=" << net_prc.name() << "_";
|
||||||
|
result << "device=" << targetDevice << "_";
|
||||||
|
for (auto const& config_item : configuration) {
|
||||||
|
result << "_config_item=" << config_item.first << "_" << config_item.second;
|
||||||
|
}
|
||||||
|
result << "_input_shape=" << CommonTestUtils::vec2str(input_shape);
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
|
||||||
|
InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
|
||||||
|
blob->allocate();
|
||||||
|
|
||||||
|
auto* raw_blob_data_ptr = blob->buffer().as<float*>();
|
||||||
|
std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -2.f, 2.f);
|
||||||
|
for (size_t i = 0; i < blob->size(); i++) {
|
||||||
|
raw_blob_data_ptr[i] = values[i];
|
||||||
|
}
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetUp() override {
|
||||||
|
InferenceEngine::Precision net_prc;
|
||||||
|
std::vector<size_t> input_shape;
|
||||||
|
std::tie(net_prc, targetDevice, configuration, input_shape) = this->GetParam();
|
||||||
|
|
||||||
|
auto ng_prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(net_prc);
|
||||||
|
|
||||||
|
size_t in_total_dims_size = ov::shape_size(input_shape);
|
||||||
|
auto params = ngraph::builder::makeParams(ng_prc, {{1, in_total_dims_size}});
|
||||||
|
auto reshape_pattern = std::make_shared<ngraph::opset9::Constant>(ov::element::Type_t::i64, ov::Shape{2}, input_shape);
|
||||||
|
auto reshape = std::make_shared<ngraph::opset9::Reshape>(params[0], reshape_pattern, false);
|
||||||
|
|
||||||
|
ov::op::util::VariableInfo vi{};
|
||||||
|
vi.data_shape = ov::PartialShape(input_shape);
|
||||||
|
vi.variable_id = "test_variable";
|
||||||
|
vi.data_type = ov::element::Type_t::f32;
|
||||||
|
const auto var = std::make_shared<ov::op::util::Variable>(vi);
|
||||||
|
std::vector<float> initial_state = CommonTestUtils::generate_float_numbers(in_total_dims_size, -3.f, 3.f);
|
||||||
|
auto initial_state_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, input_shape, initial_state);
|
||||||
|
auto readValue = std::make_shared<ngraph::opset9::ReadValue>(initial_state_node, var);
|
||||||
|
|
||||||
|
const int axis = 1;
|
||||||
|
ov::OutputVector to_concat{readValue, reshape};
|
||||||
|
auto concat = ngraph::builder::makeConcat(to_concat, axis);
|
||||||
|
|
||||||
|
const auto concat_shape = concat->get_output_shape(0);
|
||||||
|
const auto concat_shape_size = ov::shape_size(concat_shape);
|
||||||
|
|
||||||
|
auto etlwise_data = CommonTestUtils::generate_float_numbers(concat_shape_size, -1.f, 1.f);
|
||||||
|
auto etlwise_node = ngraph::builder::makeConstant(ov::element::Type_t::f32, concat_shape, etlwise_data);
|
||||||
|
auto etlwise_result_node = std::make_shared<ngraph::opset9::Multiply>(concat, etlwise_node);
|
||||||
|
|
||||||
|
ov::ResultVector results{std::make_shared<ngraph::opset9::Result>(etlwise_result_node)};
|
||||||
|
auto split_node = ngraph::builder::makeSplit(concat, ng_prc, 2, axis);
|
||||||
|
|
||||||
|
auto assign_node = std::make_shared<ngraph::opset9::Assign>(split_node->output(1), var);
|
||||||
|
ngraph::SinkVector sinks{assign_node};
|
||||||
|
function = std::make_shared<ov::Model>(results, sinks, params);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(ConcatMemoryTest, CompareWithRefImpl) {
|
||||||
|
Run();
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||||
|
InferenceEngine::Precision::FP32,
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::map<std::string, std::string>> configs = {
|
||||||
|
{
|
||||||
|
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{"GNA_DEVICE_MODE", "GNA_SW_FP32"}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::vector<size_t>> shapes {
|
||||||
|
{1, 64},
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_concat_memory, ConcatMemoryTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
|
::testing::ValuesIn(configs),
|
||||||
|
::testing::ValuesIn(shapes)),
|
||||||
|
ConcatMemoryTest::getTestCaseName);
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user