[GNA] Fixed insertion of delayed copy error (#7944)

* [GNA] Fixed error with delayed copy insertion

* [GNA] Added test
This commit is contained in:
Elizaveta Lobanova 2021-10-14 19:04:15 +03:00 committed by GitHub
parent d21572d7cb
commit e9bde06f7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 201 additions and 61 deletions

View File

@ -824,6 +824,30 @@ void InsertIdentityLayerPass::run() {
void InsertCopyLayerPass::run() { void InsertCopyLayerPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass"); OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass");
using FuncChildrenInfo = std::tuple<
CNNLayerPtr, // parent layer
CNNLayerPtr, // child layer
int32_t // input index
>;
// recursively searches for children functional layers skipping non-functional ones
std::function<std::vector<FuncChildrenInfo>(CNNLayerPtr, CNNLayerPtr, int32_t)> find_func_layers =
[&find_func_layers](CNNLayerPtr currentLayer, CNNLayerPtr parentLayer, int32_t input_idx) {
if (!LayerInfo(currentLayer).isNonFunctional() ||
currentLayer->outData.size() == 0 ||
getInputTo(currentLayer->outData[0]).size() == 0) {
return std::vector<FuncChildrenInfo>{std::make_tuple(parentLayer, currentLayer, input_idx)};
}
std::vector<FuncChildrenInfo> results;
for (size_t i = 0; i < getInputTo(currentLayer->outData[0]).size(); ++i) {
auto next_layer = CNNNetGetNextLayerSkipCertain(currentLayer, 0, i,
[](CNNLayerPtr origin) {return false; }).first;
auto result = find_func_layers(next_layer, currentLayer,
CNNLayerFindInsDataIdxes(currentLayer->outData[0], next_layer)[0]);
results.insert(std::end(results), std::begin(result), std::end(result));
}
return results;
};
// Copy layer insertion happens in few cases: // Copy layer insertion happens in few cases:
// Crop output goes to concat layer -> copy layer insertion // Crop output goes to concat layer -> copy layer insertion
// Splitted part of input goes to concat layer -> copy layer insertion // Splitted part of input goes to concat layer -> copy layer insertion
@ -854,37 +878,24 @@ void InsertCopyLayerPass::run() {
// Crop -> Concat, Input -> Split -> Concat and Concat -> Memory cases // Crop -> Concat, Input -> Split -> Concat and Concat -> Memory cases
if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat() || LayerInfo(l).isSplit()) { if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat() || LayerInfo(l).isSplit()) {
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> copy_insertion_tuples; std::vector<FuncChildrenInfo> copy_insertion_tuples;
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> delayed_copy_insertion_tuples; std::vector<FuncChildrenInfo> delayed_copy_insertion_tuples;
for (auto output : l->outData) { for (auto output : l->outData) {
auto& inputTo = getInputTo(output); auto& inputTo = getInputTo(output);
for (auto& childLayer : inputTo) { for (auto& childLayer : inputTo) {
auto original_child = childLayer.second; std::vector<int> connections = CNNLayerFindInsDataIdxes(output, childLayer.second);
auto original_parent = l;
auto current_layer = original_child;
std::vector<int> connections = CNNLayerFindInsDataIdxes(output, original_child);
for (auto input_idx : connections) { for (auto input_idx : connections) {
while (LayerInfo(current_layer).isNonFunctional()) { auto children_info = find_func_layers(childLayer.second, l, input_idx);
if (current_layer->outData.size() == 0) break; for (const auto &child_info : children_info) {
if (getInputTo(current_layer->outData[0]).size() == 0) break; CNNLayerPtr child = std::get<1>(child_info);
if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(child).isMemory()) {
auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin) {return false; }).first; // Concat|Split|Crop -> Memory case
if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) { delayed_copy_insertion_tuples.push_back(child_info);
original_child = next_layer; } else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(child).isConcat()) {
original_parent = current_layer; // Split|Crop -> Concat case
input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0]; // concat may be connected to previous layer with multiple connections
copy_insertion_tuples.push_back(child_info);
} }
current_layer = next_layer;
}
if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) {
// Concat|Split|Crop -> Memory case
delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
} else if ((LayerInfo(l).isSplit() || LayerInfo(l).isCrop()) && LayerInfo(current_layer).isConcat()) {
// Split|Crop -> Concat case
// concat may be connected to previous layer with multiple connections
copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
} }
} }
} }

View File

@ -13,14 +13,28 @@ namespace {
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32, std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
}; };
std::map<std::string, std::string> additional_config = { std::vector<std::map<std::string, std::string>> additional_config = {
{"GNA_COMPACT_MODE", "NO"} {{"GNA_DEVICE_MODE", "GNA_SW_FP32"}},
{{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}}
};
std::vector<size_t> memory_sizes = {
128, 256, 32
}; };
INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyTest, INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyTest,
::testing::Combine( ::testing::Combine(
::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(additional_config)), ::testing::ValuesIn(additional_config),
DelayedCopyTest::getTestCaseName); ::testing::ValuesIn(memory_sizes)),
DelayedCopyTestBase::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_delayed_copy_layer, DelayedCopyAfterReshapeWithMultipleConnTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(additional_config),
::testing::ValuesIn(memory_sizes)),
DelayedCopyTestBase::getTestCaseName);
} // namespace } // namespace

View File

@ -12,4 +12,8 @@ TEST_P(DelayedCopyTest, CompareWithRefs) {
Run(); Run();
}; };
TEST_P(DelayedCopyAfterReshapeWithMultipleConnTest, CompareWithRefs) {
Run();
};
} // namespace SubgraphTestsDefinitions } // namespace SubgraphTestsDefinitions

View File

@ -16,20 +16,37 @@
namespace SubgraphTestsDefinitions { namespace SubgraphTestsDefinitions {
typedef std::tuple< typedef std::tuple<
InferenceEngine::Precision, //Network precision InferenceEngine::Precision, // Network precision
std::string, //Device name std::string, // Device name
std::map<std::string, std::string> //Configuration std::map<std::string, std::string>, // Configuration
> ConcatSplitReluTuple; size_t // Memory layer size
> DelayedCopyTuple;
class DelayedCopyTest class DelayedCopyTestBase
: public testing::WithParamInterface<ConcatSplitReluTuple>, : public testing::WithParamInterface<DelayedCopyTuple>,
public LayerTestsUtils::LayerTestsCommon { public LayerTestsUtils::LayerTestsCommon {
private: private:
void switchToNgraphFriendlyModel(); void InitMemory();
virtual void switchToNgraphFriendlyModel() = 0;
protected:
void Run() override;
std::vector<float> memory_init;
public: public:
static std::string getTestCaseName(const testing::TestParamInfo<ConcatSplitReluTuple> &obj); static std::string getTestCaseName(const testing::TestParamInfo<DelayedCopyTuple> &obj);
};
class DelayedCopyTest : public DelayedCopyTestBase {
private:
void switchToNgraphFriendlyModel() override;
protected: protected:
void SetUp() override; void SetUp() override;
void Run() override;
}; };
class DelayedCopyAfterReshapeWithMultipleConnTest : public DelayedCopyTestBase {
private:
void switchToNgraphFriendlyModel() override;
protected:
void SetUp() override;
};
} // namespace SubgraphTestsDefinitions } // namespace SubgraphTestsDefinitions

View File

@ -5,35 +5,73 @@
#include "shared_test_classes/subgraph/delayed_copy_layer.hpp" #include "shared_test_classes/subgraph/delayed_copy_layer.hpp"
namespace SubgraphTestsDefinitions { namespace SubgraphTestsDefinitions {
std::string DelayedCopyTest::getTestCaseName(const testing::TestParamInfo<ConcatSplitReluTuple> &obj) { void DelayedCopyTestBase::InitMemory() {
IE_SUPPRESS_DEPRECATED_START
auto states = executableNetwork.QueryState();
for (auto& state : states) {
auto name = state.GetName();
if (name.find("id") != std::string::npos) {
auto blob = FuncTestUtils::createAndFillBlobWithFloatArray(state.GetState()->getTensorDesc(),
memory_init.data(), memory_init.size());
state.SetState(blob);
} else {
GTEST_FAIL() << "unknown memory state";
}
}
IE_SUPPRESS_DEPRECATED_END
}
void DelayedCopyTestBase::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
LoadNetwork();
InitMemory();
GenerateInputs();
Infer();
switchToNgraphFriendlyModel();
Validate();
}
std::string DelayedCopyTestBase::getTestCaseName(const testing::TestParamInfo<DelayedCopyTuple> &obj) {
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
std::string targetName; std::string targetName;
std::map<std::string, std::string> additional_config; std::map<std::string, std::string> additional_config;
std::tie(netPrecision, targetName, additional_config) = obj.param; size_t memory_size;
std::tie(netPrecision, targetName, additional_config, memory_size) = obj.param;
std::ostringstream results; std::ostringstream results;
results << "netPRC=" << netPrecision.name() << "_"; results << "netPRC=" << netPrecision.name() << "_";
results << "targetDevice=" << targetName << "_"; results << "targetDevice=" << targetName << "_";
results << "memorySize=" << memory_size;
for (auto const& configItem : additional_config) {
results << "_configItem=" << configItem.first << "_" << configItem.second;
}
return results.str(); return results.str();
} }
void DelayedCopyTest::SetUp() { void DelayedCopyTest::SetUp() {
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> additional_config; std::map<std::string, std::string> additional_config;
std::tie(netPrecision, targetDevice, additional_config) = this->GetParam(); size_t memory_size;
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
configuration.insert(additional_config.begin(), additional_config.end()); configuration.insert(additional_config.begin(), additional_config.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}});
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, 128}, std::vector<float>{0}); ASSERT_EQ(memory_size % 2, 0);
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}});
memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f);
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id"); auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id");
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_r, input[0]}, 1); auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_r, input[0]}, 1);
auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1); auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1);
auto mem_w = std::make_shared<ngraph::opset3::Assign>(split->output(1), "id"); auto mem_w = std::make_shared<ngraph::opset3::Assign>(split->output(1), "id");
auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1); auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1);
auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1)); auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1));
mem_w->add_control_dependency(mem_r); mem_w->add_control_dependency(mem_r);
@ -46,29 +84,85 @@ namespace SubgraphTestsDefinitions {
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config; std::map<std::string, std::string> config;
std::map<std::string, std::string> additional_config; std::map<std::string, std::string> additional_config;
std::tie(netPrecision, targetDevice, additional_config) = this->GetParam(); size_t memory_size;
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
ASSERT_EQ(memory_size % 2, 0);
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto input = ngraph::builder::makeParams(ngPrc, {{1, 384}}); auto input = ngraph::builder::makeParams(ngPrc, {{1, 3 * memory_size}});
auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, 128}, std::vector<float>{0}); auto mem_c = std::make_shared<ngraph::op::Constant>(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_c, input[0]}, 1); auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{mem_c, input[0]}, 1);
auto split = ngraph::builder::makeVariadicSplit(concat, {384, 128}, 1); auto split = ngraph::builder::makeVariadicSplit(concat, {3 * memory_size, memory_size}, 1);
auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {64, 448}, 1); auto VariadicSplit = ngraph::builder::makeVariadicSplit(concat, {memory_size / 2, 3 * memory_size + memory_size / 2}, 1);
auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1)); auto relu2 = std::make_shared<ngraph::opset1::Sigmoid>(VariadicSplit->output(1));
function = std::make_shared<ngraph::Function>(relu2, input, "delayed_copy_layer_nonmemory"); function = std::make_shared<ngraph::Function>(relu2, input, "delayed_copy_layer_nonmemory");
functionRefs = ngraph::clone_function(*function);
} }
void DelayedCopyTest::Run() { void DelayedCopyAfterReshapeWithMultipleConnTest::SetUp() {
SKIP_IF_CURRENT_TEST_IS_DISABLED() InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> additional_config;
size_t memory_size;
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
configuration.insert(additional_config.begin(), additional_config.end());
LoadNetwork(); ASSERT_EQ(memory_size % 8, 0);
GenerateInputs();
Infer(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
switchToNgraphFriendlyModel(); auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}});
Validate();
memory_init = CommonTestUtils::generate_float_numbers(memory_size, -0.2f, 0.2f);
auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{8, memory_size / 8}, memory_init);
auto mem_r = std::make_shared<ngraph::opset3::ReadValue>(mem_c, "id");
auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(mem_r, reshape_pattern1, false);
auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1);
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{split->output(0), input[0]}, 1);
auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8});
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(concat, reshape_pattern2, false);
auto mem_w = std::make_shared<ngraph::opset3::Assign>(reshape2, "id");
auto relu = std::make_shared<ngraph::opset1::Sigmoid>(reshape2);
auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(relu, reshape_pattern3, false);
mem_w->add_control_dependency(mem_r);
reshape3->add_control_dependency(mem_w);
function = std::make_shared<ngraph::Function>(reshape3, input, "delayed_copy_layer_reshape_memory");
} }
void DelayedCopyAfterReshapeWithMultipleConnTest::switchToNgraphFriendlyModel() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
std::map<std::string, std::string> additional_config;
size_t memory_size;
std::tie(netPrecision, targetDevice, additional_config, memory_size) = this->GetParam();
ASSERT_EQ(memory_size % 8, 0);
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto input = ngraph::builder::makeParams(ngPrc, {{1, memory_size / 2}});
auto mem_c = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{1, memory_size}, memory_init);
auto reshape_pattern1 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(mem_c, reshape_pattern1, false);
auto split = ngraph::builder::makeSplit(reshape1, ngPrc, 2, 1);
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{split->output(0), input[0]}, 1);
auto reshape_pattern2 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{8, memory_size / 8});
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(concat, reshape_pattern2, false);
auto relu = std::make_shared<ngraph::opset1::Sigmoid>(reshape2);
auto reshape_pattern3 = ngraph::builder::makeConstant(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, memory_size});
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(relu, reshape_pattern3, false);
function = std::make_shared<ngraph::Function>(reshape3, input, "delayed_copy_layer_reshape_nonmemory");
}
} // namespace SubgraphTestsDefinitions } // namespace SubgraphTestsDefinitions