[GNA] Fixed insertion of delayed copy error (#7944)
* [GNA] Fixed error with delayed copy insertion * [GNA] Added test
This commit is contained in:
parent
d21572d7cb
commit
e9bde06f7b
@ -824,6 +824,30 @@ void InsertIdentityLayerPass::run() {
|
||||
|
||||
void InsertCopyLayerPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass");
|
||||
using FuncChildrenInfo = std::tuple<
|
||||
CNNLayerPtr, // parent layer
|
||||
CNNLayerPtr, // child layer
|
||||
int32_t // input index
|
||||
>;
|
||||
// recursively searches for children functional layers skipping non-functional ones
|
||||
std::function<std::vector<FuncChildrenInfo>(CNNLayerPtr, CNNLayerPtr, int32_t)> find_func_layers =
|
||||
[&find_func_layers](CNNLayerPtr currentLayer, CNNLayerPtr parentLayer, int32_t input_idx) {
|
||||
if (!LayerInfo(currentLayer).isNonFunctional() ||
|
||||
currentLayer->outData.size() == 0 ||
|
||||
getInputTo(currentLayer->outData[0]).size() == 0) {
|
||||
return std::vector<FuncChildrenInfo>{std::make_tuple(parentLayer, currentLayer, input_idx)};
|
||||
}
|
||||
std::vector<FuncChildrenInfo> results;
|
||||
for (size_t i = 0; i < getInputTo(currentLayer->outData[0]).size(); ++i) {
|
||||
auto next_layer = CNNNetGetNextLayerSkipCertain(currentLayer, 0, i,
|
||||
[](CNNLayerPtr origin) {return false; }).first;
|
||||
auto result = find_func_layers(next_layer, currentLayer,
|
||||
CNNLayerFindInsDataIdxes(currentLayer->outData[0], next_layer)[0]);
|
||||
results.insert(std::end(results), std::begin(result), std::end(result));
|
||||
}
|
||||
return results;
|
||||
};
|
||||
|
||||
// Copy layer insertion happens in few cases:
|
||||
// Crop output goes to concat layer -> copy layer insertion
|
||||
// Splitted part of input goes to concat layer -> copy layer insertion
|
||||
@ -854,37 +878,24 @@ void InsertCopyLayerPass::run() {
|
||||
|
||||
// Crop -> Concat, Input -> Split -> Concat and Concat -> Memory cases
|
||||
if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat() || LayerInfo(l).isSplit()) {
|
||||
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> copy_insertion_tuples;
|
||||
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> delayed_copy_insertion_tuples;
|
||||
std::vector<FuncChildrenInfo> copy_insertion_tuples;
|
||||
std::vector<FuncChildrenInfo> delayed_copy_insertion_tuples;
|
||||
for (auto output : l->outData) {
|
||||
auto& inputTo = getInputTo(output);
|
||||
for (auto& childLayer : inputTo) {
|
||||
auto original_child = childLayer.second;
|
||||
auto original_parent = l;
|
||||
auto current_layer = original_child;
|
||||
std::vector<int> connections = CNNLayerFindInsDataIdxes(output, original_child);
|
||||
|
||||
std::vector<int> connections = CNNLayerFindInsDataIdxes(output, childLayer.second);
|
||||
for (auto input_idx : connections) {
|
||||
while (LayerInfo(current_layer).isNonFunctional()) {
|
||||
if (current_layer->outData.size() == 0) break;
|
||||
if (getInputTo(current_layer->outData[0]).size() == 0) break;
|
||||
|
||||
auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin) {return false; }).first;
|
||||
if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) {
|
||||
original_child = next_layer;
|
||||
original_parent = current_layer;
|
||||
input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0];
|
||||
}
|
||||
current_layer = next_layer;
|
||||
}
|
||||
|
||||
if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) {
|
||||
auto children_info = find_func_layers(childLayer.second, l, input_idx);
|
||||
for (const auto &child_info : children_info) {
|
||||
CNNLayerPtr child = std::get<1>(child_info);
|
||||
if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(child).isMemory()) {
|
||||
// Concat|Split|Crop -> Memory case
|
||||
delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
|
||||
} else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(current_layer).isConcat()) {
|
||||
delayed_copy_insertion_tuples.push_back(child_info);
|
||||
} else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(child).isConcat()) {
|
||||
// Split|Crop -> Concat case
|
||||
// concat may be connected to previous layer with multiple connections
|
||||
copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
|
||||
copy_insertion_tuples.push_back(child_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -13,14 +13,28 @@ namespace {
|
||||
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {
|
||||
{"GNA_COMPACT_MODE", "NO"}
|
||||
std::vector<std::map<std::string, std::string>> additional_config = {
|
||||
{{"GNA_DEVICE_MODE", "GNA_SW_FP32"}},
|
||||
{{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}}
|
||||
};
|
||||
|
||||
std::vector<size_t> memory_sizes = {
|
||||
128, 256, 32
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(additional_config)),
|
||||
DelayedCopyTest::getTestCaseName);
|
||||
::testing::ValuesIn(additional_config),
|
||||
::testing::ValuesIn(memory_sizes)),
|
||||
DelayedCopyTestBase::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyAfterReshapeWithMultipleConnTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(additional_config),
|
||||
::testing::ValuesIn(memory_sizes)),
|
||||
DelayedCopyTestBase::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -12,4 +12,8 @@ TEST_P(DelayedCopyTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
TEST_P(DelayedCopyAfterReshapeWithMultipleConnTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -18,18 +18,35 @@ namespace SubgraphTestsDefinitions {
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision, // Network precision
|
||||
std::string, // Device name
|
||||
std::map<std::string, std::string> //Configuration
|
||||
> ConcatSplitReluTuple;
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
size_t // Memory layer size
|
||||
> DelayedCopyTuple;
|
||||
|
||||
class DelayedCopyTest
|
||||
: public testing::WithParamInterface<ConcatSplitReluTuple>,
|
||||
class DelayedCopyTestBase
|
||||
: public testing::WithParamInterface<DelayedCopyTuple>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
private:
|
||||
void switchToNgraphFriendlyModel();
|
||||
void InitMemory();
|
||||
virtual void switchToNgraphFriendlyModel() = 0;
|
||||
protected:
|
||||
void Run() override;
|
||||
std::vector<float> memory_init;
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<ConcatSplitReluTuple> &obj);
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<DelayedCopyTuple> &obj);
|
||||
};
|
||||
|
||||
class DelayedCopyTest : public DelayedCopyTestBase {
|
||||
private:
|
||||
void switchToNgraphFriendlyModel() override;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void Run() override;
|
||||
};
|
||||
|
||||
class DelayedCopyAfterReshapeWithMultipleConnTest : public DelayedCopyTestBase {
|
||||
private:
|
||||
void switchToNgraphFriendlyModel() override;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
@ -5,35 +5,73 @@
|
||||
#include "shared_test_classes/subgraph/delayed_copy_layer.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
std::string DelayedCopyTest::getTestCaseName(const testing::TestParamInfo<ConcatSplitReluTuple> &obj) {
|
||||
void DelayedCopyTestBase::InitMemory() {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
auto name = state.GetName();
|
||||
if (name.find("id") != std::string::npos) {
|
||||
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetState()->getTensorDesc(),
|
||||
memory_init.data(), memory_init.size());
|
||||
state.SetState(blob);
|
||||
} else {
|
||||
GTEST_FAIL() << "unknown memory state";
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
void DelayedCopyTestBase::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
LoadNetwork();
|
||||
InitMemory();
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
switchToNgraphFriendlyModel();
|
||||
Validate();
|
||||
}
|
||||
|
||||
std::string DelayedCopyTestBase::getTestCaseName(const testing::TestParamInfo<DelayedCopyTuple> &obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetName;
|
||||
std::map<std::string, std::string> additional_config;
|
||||
std::tie(netPrecision, targetName, additional_config) = obj.param;
|
||||
size_t memory_size;
|
||||
std::tie(netPrecision, targetName, additional_config, memory_size) = obj.param;
|
||||
std::ostringstream results;
|
||||
|
||||
results << "netPRC=" << netPrecision.name() << "_";
|
||||
results << "targetDevice=" << targetName << "_";
|
||||
results << "memorySize=" << memory_size;
|
||||
for (auto const& configItem : additional_config) {
|
||||
results << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
return results.str();
|
||||
}
|
||||
|
||||
void DelayedCopyTest::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> additional_config;
|
||||
std::tie(netPrecision, targetDevice, additional_config) = this->GetParam();
|
||||
size_t memory_size;
|
||||
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
|
||||
configuration.insert(additional_config.begin(), additional_config.end());
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}});
|
||||
|
||||
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, 128}, std::vector<float>{0});
|
||||
ASSERT_EQ(memory_size % 2, 0);
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}});
|
||||
|
||||
memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f);
|
||||
|
||||
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
|
||||
|
||||
auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id");
|
||||
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_r, input[0]}, 1);
|
||||
auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1);
|
||||
auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1);
|
||||
auto mem_w = std::make_shared<ngraph::opset3::Assign>(split->output(1), "id");
|
||||
|
||||
auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1);
|
||||
auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1);
|
||||
auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1));
|
||||
|
||||
mem_w->add_control_dependency(mem_r);
|
||||
@ -46,29 +84,85 @@ namespace SubgraphTestsDefinitions {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
std::map<std::string, std::string> additional_config;
|
||||
std::tie(netPrecision, targetDevice, additional_config) = this->GetParam();
|
||||
size_t memory_size;
|
||||
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
|
||||
|
||||
ASSERT_EQ(memory_size % 2, 0);
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}});
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}});
|
||||
|
||||
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, 128}, std::vector<float>{0});
|
||||
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_c, input[0]}, 1);
|
||||
auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1);
|
||||
auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1);
|
||||
|
||||
auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1);
|
||||
auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1);
|
||||
auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1));
|
||||
|
||||
function = std::make_shared<ngraph::Function>(relu2, input, "delayed_copy_layer_nonmemory");
|
||||
functionRefs = ngraph::clone_function(*function);
|
||||
}
|
||||
|
||||
void DelayedCopyTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
void DelayedCopyAfterReshapeWithMultipleConnTest::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> additional_config;
|
||||
size_t memory_size;
|
||||
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
|
||||
configuration.insert(additional_config.begin(), additional_config.end());
|
||||
|
||||
LoadNetwork();
|
||||
GenerateInputs();
|
||||
Infer();
|
||||
switchToNgraphFriendlyModel();
|
||||
Validate();
|
||||
ASSERT_EQ(memory_size % 8, 0);
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}});
|
||||
|
||||
memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f);
|
||||
|
||||
auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{8, memory_size / 8}, memory_init);
|
||||
auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id");
|
||||
auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
|
||||
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(mem_r, reshape_pattern1, false);
|
||||
auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1);
|
||||
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{split->output(0), input[0]}, 1);
|
||||
auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8});
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(concat, reshape_pattern2, false);
|
||||
|
||||
auto mem_w = std::make_shared<ngraph::opset3::Assign>(reshape2, "id");
|
||||
|
||||
auto relu = std::make_shared<ngraph::opset1::Sigmoid>(reshape2);
|
||||
auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
|
||||
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(relu, reshape_pattern3, false);
|
||||
|
||||
mem_w->add_control_dependency(mem_r);
|
||||
reshape3->add_control_dependency(mem_w);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(reshape3, input, "delayed_copy_layer_reshape_memory");
|
||||
}
|
||||
|
||||
void DelayedCopyAfterReshapeWithMultipleConnTest::switchToNgraphFriendlyModel() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
std::map<std::string, std::string> additional_config;
|
||||
size_t memory_size;
|
||||
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
|
||||
|
||||
ASSERT_EQ(memory_size % 8, 0);
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}});
|
||||
|
||||
auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
|
||||
auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
|
||||
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(mem_c, reshape_pattern1, false);
|
||||
auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1);
|
||||
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{split->output(0), input[0]}, 1);
|
||||
auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8});
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(concat, reshape_pattern2, false);
|
||||
|
||||
auto relu = std::make_shared<ngraph::opset1::Sigmoid>(reshape2);
|
||||
auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
|
||||
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(relu, reshape_pattern3, false);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(reshape3, input, "delayed_copy_layer_reshape_nonmemory");
|
||||
}
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
|
Loading…
Reference in New Issue
Block a user