[GNA] fix InsertCopyLayerPass (#3058)

* [GNA] fix InsertCopyLayerPass

* style

* [GNA] fixed Concat -> Memory case for InserCopyLayer

* Style

* [GNA] fix some FWL size mismatch cases

* [GNA] fix crop|split -> memory cases

* [GNA] One layer -> multiple concats fix tests

* style

* [GNA] move copy insertion after trivial non-functional layers
This commit is contained in:
Kamil Magierski 2020-11-24 18:08:09 +01:00 committed by GitHub
parent 42478ff0ce
commit 860fae2f27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 233 additions and 37 deletions

View File

@ -1563,8 +1563,12 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
if (dnn->new_num_conv_columns) {
num_rows = dnn->new_num_conv_columns;
if (inputs->getDims().size() == 4) num_rows /= num_columns;
if (dnn->new_num_conv_columns % num_columns == 0) {
num_rows = dnn->new_num_conv_columns / num_columns;
} else {
num_columns = dnn->new_num_conv_columns;
num_rows = 1;
}
dnn->new_num_conv_columns = 0;
}
@ -2031,7 +2035,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// if request for allocation less that realTensorInput - we need to extend request
auto minInput = inputDesc->minBytesRequiredForStoreInput(prevLayer);
if (num_data_bytes_in < minInput) {
gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8) << "\n";
gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8);
num_data_bytes_in = ALIGN(minInput, 8);
}

View File

@ -779,51 +779,99 @@ void InsertIdentityLayerPass::run() {
}
void InsertCopyLayerPass::run() {
// Copy layer insertion happens in few cases:
// Crop output goes to concat layer -> copy layer insertion
// Concat|Split|Crop layer goes to memory layer -> delayed copy layer insertion
// One output goes to multiple concat and/or memory layers -> delayed copies before memory layers
// and copies before concay layers (one less copy than outputs)
for (auto & l : *pLayers) {
if (l->insData.empty()) continue;
auto prevLayers = CNNNetGetPrevLayersSkip(l, [](CNNLayerPtr origin){
return !LayerInfo(origin).isNonFunctional();
});
if (LayerInfo(l).isNonFunctional()) continue;
// Crop -> Concat and Concat -> Memory cases
if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat()) {
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> copy_insertion_tuples;
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> delayed_copy_insertion_tuples;
for (int i=0; i != prevLayers.size(); i++) {
auto & prevIndirectLayer = prevLayers[i].first;
bool bInsert = false;
/// Delayed copy layers need to be moved to the very end of processing
bool bInsertDelayed = false;
for (auto output : l->outData) {
auto& inputTo = getInputTo(output);
for (auto& childLayer : inputTo) {
auto original_child = childLayer.second;
auto original_parent = l;
auto current_layer = original_child;
size_t input_idx = CNNLayerFindInsDataIdxes(output, original_child)[0];
auto isInserted = [&bInsertDelayed, &bInsert]() {
return bInsert || bInsertDelayed;
};
if (LayerInfo(l).isMemory()) {
if (LayerInfo(prevIndirectLayer).isConcat() || LayerInfo(prevIndirectLayer).isCrop()
|| LayerInfo(prevIndirectLayer).isSplit()) { bInsertDelayed = true;}
// memory usualy preceded by either activation or split, or other layers in order to have 2b precision
for (auto && inputto : getInputTo(prevLayers[i].first->outData[prevLayers[i].second])) {
auto current_layer = inputto.second;
while (LayerInfo(current_layer).isNonFunctional() || LayerInfo(current_layer).isSplit()) {
while (LayerInfo(current_layer).isNonFunctional()) {
if (current_layer->outData.size() == 0) break;
if (getInputTo(current_layer->outData[0]).size() == 0) break;
auto new_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
current_layer = new_layer;
auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) {
original_child = next_layer;
original_parent = current_layer;
input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0];
}
current_layer = next_layer;
}
// if preceding layer is common for memory and concat
if (LayerInfo(current_layer).isConcat()) {
bInsertDelayed = true;
break;
if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) {
// Concat|Split|Crop -> Memory case
delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
} else if (LayerInfo(l).isCrop() && LayerInfo(current_layer).isConcat()) {
// Crop -> Concat case
copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
}
}
}
if (!isInserted() && LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
if (isInserted()) {
if (LayerInfo(prevIndirectLayer).isCropAffined()) {
// The crop will be replaced by affine.
// Copy layer insertion is not required
continue;
for (auto& tuple : delayed_copy_insertion_tuples) {
// Concat -> Memory case
InsertCopyLayer(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple), this->getPassManager(), DelayedCopyLayerName);
}
for (auto& tuple : copy_insertion_tuples) {
// Crop -> Concat case
InsertCopyLayer(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple), this->getPassManager(), CopyLayerName);
}
}
// Layer -> multiple concat/memory case
for (auto output : l->outData) {
std::vector<std::pair<CNNLayerPtr, size_t>> MemoryLayers;
std::vector<std::pair<CNNLayerPtr, size_t>> ConcatLayers;
auto& inputTo = getInputTo(output);
if (inputTo.size() < 2) continue;
for (auto& childLayer : inputTo) {
auto layer_to_insert = childLayer.second;
auto current_layer = childLayer.second;
auto previous_layer = l;
size_t input_idx = CNNLayerFindInsDataIdxes(output, current_layer)[0];
while (LayerInfo(current_layer).isNonFunctional()) {
if (current_layer->outData.size() == 0) break;
if (getInputTo(current_layer->outData[0]).size() == 0) break;
previous_layer = current_layer;
current_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
}
auto prevLayer = CNNNetPrevLayer(l, i);
InsertCopyLayer(prevLayer, l, i, getPassManager(), bInsertDelayed ? DelayedCopyLayerName : CopyLayerName);
if (LayerInfo(current_layer).isConcat()) {
ConcatLayers.push_back(make_pair(layer_to_insert, input_idx));
} else if (LayerInfo(current_layer).isMemory()) {
MemoryLayers.push_back(make_pair(layer_to_insert, input_idx));
}
}
if (MemoryLayers.empty() && ConcatLayers.empty()) continue;
auto toCopyCount = MemoryLayers.size() + ConcatLayers.size() - 1;
size_t currentCopyIdx = 0;
while (currentCopyIdx < toCopyCount) {
if (currentCopyIdx < MemoryLayers.size()) {
size_t memoryIdx = currentCopyIdx;
auto memoryLayer = MemoryLayers[memoryIdx].first;
auto inputIdx = MemoryLayers[memoryIdx].second;
InsertCopyLayer(l, memoryLayer, inputIdx, this->getPassManager(), DelayedCopyLayerName);
} else {
size_t concatIdx = currentCopyIdx - MemoryLayers.size();
auto concatLayer = ConcatLayers[concatIdx].first;
auto inputIdx = ConcatLayers[concatIdx].second;
InsertCopyLayer(l, concatLayer, inputIdx, this->getPassManager(), CopyLayerName);
}
currentCopyIdx++;
}
}
}

View File

@ -42,6 +42,7 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)",
// TODO: Issue 39358
R"(.*unaligned.*MultipleConcatTest.*)",
R"(.*ActivationConcatsEltwise.*CS=35.*)",
// TODO: Issue: 40960
R"(.*(ConstantResultSubgraphTest).*)",
};

View File

@ -0,0 +1,43 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "subgraph_tests/activation_concats_eltwise.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<size_t> input_sizes = {
7,
16,
35,
64
};
std::vector<size_t> concat_const_sizes = {
7,
16,
35,
64
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
std::map<std::string, std::string> additional_config = {};
INSTANTIATE_TEST_CASE_P(smoke_CompareRefs, ActivationConcatsEltwise,
::testing::Combine(
::testing::ValuesIn(input_sizes),
::testing::ValuesIn(concat_const_sizes),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(additional_config)),
ActivationConcatsEltwise::getTestCaseName);
} // namespace

View File

@ -0,0 +1,31 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include "functional_test_utils/layer_test_utils.hpp"
namespace LayerTestsDefinitions {
using ActivationConcatsEltwiseParamsTuple = typename std::tuple<
size_t, // input size
size_t, // concat const size
InferenceEngine::Precision, // precision
std::string, // device name
std::map<std::string, std::string> // configuration
>;
class ActivationConcatsEltwise : public testing::WithParamInterface<ActivationConcatsEltwiseParamsTuple>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<ParamType> obj);
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,69 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include <debug.h>
#include "common_test_utils/common_utils.hpp"
#include "common_test_utils/data_utils.hpp"
#include "functional_test_utils/precision_utils.hpp"
#include "functional_test_utils/skip_tests_config.hpp"
#include "subgraph_tests/activation_concats_eltwise.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
namespace LayerTestsDefinitions {
using namespace CommonTestUtils;
using namespace InferenceEngine;
std::string ActivationConcatsEltwise::getTestCaseName(testing::TestParamInfo<ParamType> obj) {
InferenceEngine::Precision netPrecision;
size_t inputSize;
size_t concatSize;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::tie(inputSize, concatSize, netPrecision, targetDevice, configuration) = obj.param;
std::ostringstream result;
result << "IS=" << inputSize << "_";
result << "CS=" << concatSize << "_";
result << "PRC=" << netPrecision.name() << "_";
result << "dev=" << targetDevice;
return result.str();
}
void ActivationConcatsEltwise::SetUp() {
InferenceEngine::Precision netPrecision;
size_t inputSize;
size_t concatSize;
std::map<std::string, std::string> config;
std::tie(inputSize, concatSize, netPrecision, targetDevice, config) = this->GetParam();
configuration.insert(config.begin(), config.end());
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto input = ngraph::builder::makeParams(ngPrc, { {1, inputSize} });
auto relu = ngraph::builder::makeActivation(input[0], ngPrc, ngraph::helpers::ActivationTypes::Relu);
auto concat_vals_1 = CommonTestUtils::generate_float_numbers(concatSize, 14, 14);
auto concat_vals_2 = CommonTestUtils::generate_float_numbers(concatSize, 14, 14);
auto concat_const_1 = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals_1);
auto concat_const_2 = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals_2);
auto concat_1 = ngraph::builder::makeConcat({concat_const_1, relu}, 1);
auto concat_2 = ngraph::builder::makeConcat({concat_const_2, relu}, 1);
auto eltw = ngraph::builder::makeEltwise(concat_1, concat_2, ngraph::helpers::EltwiseTypes::ADD);
auto reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>({1, inputSize + concatSize}));
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(eltw, reshape_pattern, false);
function = std::make_shared<ngraph::Function>(final_reshape, input, "ActivationConcatsEltwise");
}
TEST_P(ActivationConcatsEltwise, CompareWithRefs) {
Run();
}
} // namespace LayerTestsDefinitions