[GNA] fix InsertCopyLayerPass (#3058)
* [GNA] fix InsertCopyLayerPass * style * [GNA] fixed Concat -> Memory case for InserCopyLayer * Style * [GNA] fix some FWL size mismatch cases * [GNA] fix crop|split -> memory cases * [GNA] One layer -> multiple concats fix tests * style * [GNA] move copy insertion after trivial non-functional layers
This commit is contained in:
parent
42478ff0ce
commit
860fae2f27
@ -1563,8 +1563,12 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
|
||||
if (dnn->new_num_conv_columns) {
|
||||
num_rows = dnn->new_num_conv_columns;
|
||||
if (inputs->getDims().size() == 4) num_rows /= num_columns;
|
||||
if (dnn->new_num_conv_columns % num_columns == 0) {
|
||||
num_rows = dnn->new_num_conv_columns / num_columns;
|
||||
} else {
|
||||
num_columns = dnn->new_num_conv_columns;
|
||||
num_rows = 1;
|
||||
}
|
||||
dnn->new_num_conv_columns = 0;
|
||||
}
|
||||
|
||||
@ -2031,7 +2035,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// if request for allocation less that realTensorInput - we need to extend request
|
||||
auto minInput = inputDesc->minBytesRequiredForStoreInput(prevLayer);
|
||||
if (num_data_bytes_in < minInput) {
|
||||
gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8) << "\n";
|
||||
gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8);
|
||||
num_data_bytes_in = ALIGN(minInput, 8);
|
||||
}
|
||||
|
||||
|
@ -779,51 +779,99 @@ void InsertIdentityLayerPass::run() {
|
||||
}
|
||||
|
||||
void InsertCopyLayerPass::run() {
|
||||
// Copy layer insertion happens in few cases:
|
||||
// Crop output goes to concat layer -> copy layer insertion
|
||||
// Concat|Split|Crop layer goes to memory layer -> delayed copy layer insertion
|
||||
// One output goes to multiple concat and/or memory layers -> delayed copies before memory layers
|
||||
// and copies before concay layers (one less copy than outputs)
|
||||
for (auto & l : *pLayers) {
|
||||
if (l->insData.empty()) continue;
|
||||
auto prevLayers = CNNNetGetPrevLayersSkip(l, [](CNNLayerPtr origin){
|
||||
return !LayerInfo(origin).isNonFunctional();
|
||||
});
|
||||
if (LayerInfo(l).isNonFunctional()) continue;
|
||||
// Crop -> Concat and Concat -> Memory cases
|
||||
if ((LayerInfo(l).isCrop() && !LayerInfo(l).isCropAffined()) || LayerInfo(l).isConcat()) {
|
||||
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> copy_insertion_tuples;
|
||||
std::vector<std::tuple<CNNLayerPtr, CNNLayerPtr, size_t>> delayed_copy_insertion_tuples;
|
||||
|
||||
for (int i=0; i != prevLayers.size(); i++) {
|
||||
auto & prevIndirectLayer = prevLayers[i].first;
|
||||
bool bInsert = false;
|
||||
/// Delayed copy layers need to be moved to the very end of processing
|
||||
bool bInsertDelayed = false;
|
||||
for (auto output : l->outData) {
|
||||
auto& inputTo = getInputTo(output);
|
||||
for (auto& childLayer : inputTo) {
|
||||
auto original_child = childLayer.second;
|
||||
auto original_parent = l;
|
||||
auto current_layer = original_child;
|
||||
size_t input_idx = CNNLayerFindInsDataIdxes(output, original_child)[0];
|
||||
|
||||
auto isInserted = [&bInsertDelayed, &bInsert]() {
|
||||
return bInsert || bInsertDelayed;
|
||||
};
|
||||
|
||||
if (LayerInfo(l).isMemory()) {
|
||||
if (LayerInfo(prevIndirectLayer).isConcat() || LayerInfo(prevIndirectLayer).isCrop()
|
||||
|| LayerInfo(prevIndirectLayer).isSplit()) { bInsertDelayed = true;}
|
||||
// memory usualy preceded by either activation or split, or other layers in order to have 2b precision
|
||||
for (auto && inputto : getInputTo(prevLayers[i].first->outData[prevLayers[i].second])) {
|
||||
auto current_layer = inputto.second;
|
||||
while (LayerInfo(current_layer).isNonFunctional() || LayerInfo(current_layer).isSplit()) {
|
||||
while (LayerInfo(current_layer).isNonFunctional()) {
|
||||
if (current_layer->outData.size() == 0) break;
|
||||
if (getInputTo(current_layer->outData[0]).size() == 0) break;
|
||||
auto new_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
|
||||
current_layer = new_layer;
|
||||
|
||||
auto next_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
|
||||
if (current_layer->outData.size() == 1 && getInputTo(current_layer->outData[0]).size() == 1 && original_child == current_layer) {
|
||||
original_child = next_layer;
|
||||
original_parent = current_layer;
|
||||
input_idx = CNNLayerFindInsDataIdxes(original_parent->outData[0], original_child)[0];
|
||||
}
|
||||
current_layer = next_layer;
|
||||
}
|
||||
// if preceding layer is common for memory and concat
|
||||
if (LayerInfo(current_layer).isConcat()) {
|
||||
bInsertDelayed = true;
|
||||
break;
|
||||
|
||||
if ((LayerInfo(l).isConcat() || LayerInfo(l).isCrop() || LayerInfo(l).isSplit()) && LayerInfo(current_layer).isMemory()) {
|
||||
// Concat|Split|Crop -> Memory case
|
||||
delayed_copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
|
||||
} else if (LayerInfo(l).isCrop() && LayerInfo(current_layer).isConcat()) {
|
||||
// Crop -> Concat case
|
||||
copy_insertion_tuples.push_back(std::make_tuple(original_parent, original_child, input_idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!isInserted() && LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
|
||||
|
||||
if (isInserted()) {
|
||||
if (LayerInfo(prevIndirectLayer).isCropAffined()) {
|
||||
// The crop will be replaced by affine.
|
||||
// Copy layer insertion is not required
|
||||
continue;
|
||||
for (auto& tuple : delayed_copy_insertion_tuples) {
|
||||
// Concat -> Memory case
|
||||
InsertCopyLayer(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple), this->getPassManager(), DelayedCopyLayerName);
|
||||
}
|
||||
for (auto& tuple : copy_insertion_tuples) {
|
||||
// Crop -> Concat case
|
||||
InsertCopyLayer(std::get<0>(tuple), std::get<1>(tuple), std::get<2>(tuple), this->getPassManager(), CopyLayerName);
|
||||
}
|
||||
}
|
||||
|
||||
// Layer -> multiple concat/memory case
|
||||
for (auto output : l->outData) {
|
||||
std::vector<std::pair<CNNLayerPtr, size_t>> MemoryLayers;
|
||||
std::vector<std::pair<CNNLayerPtr, size_t>> ConcatLayers;
|
||||
auto& inputTo = getInputTo(output);
|
||||
if (inputTo.size() < 2) continue;
|
||||
for (auto& childLayer : inputTo) {
|
||||
auto layer_to_insert = childLayer.second;
|
||||
auto current_layer = childLayer.second;
|
||||
auto previous_layer = l;
|
||||
size_t input_idx = CNNLayerFindInsDataIdxes(output, current_layer)[0];
|
||||
|
||||
while (LayerInfo(current_layer).isNonFunctional()) {
|
||||
if (current_layer->outData.size() == 0) break;
|
||||
if (getInputTo(current_layer->outData[0]).size() == 0) break;
|
||||
previous_layer = current_layer;
|
||||
current_layer = CNNNetGetNextLayerSkipCertain(current_layer, 0, 0, [](CNNLayerPtr origin){return false;}).first;
|
||||
}
|
||||
auto prevLayer = CNNNetPrevLayer(l, i);
|
||||
InsertCopyLayer(prevLayer, l, i, getPassManager(), bInsertDelayed ? DelayedCopyLayerName : CopyLayerName);
|
||||
if (LayerInfo(current_layer).isConcat()) {
|
||||
ConcatLayers.push_back(make_pair(layer_to_insert, input_idx));
|
||||
} else if (LayerInfo(current_layer).isMemory()) {
|
||||
MemoryLayers.push_back(make_pair(layer_to_insert, input_idx));
|
||||
}
|
||||
}
|
||||
if (MemoryLayers.empty() && ConcatLayers.empty()) continue;
|
||||
auto toCopyCount = MemoryLayers.size() + ConcatLayers.size() - 1;
|
||||
size_t currentCopyIdx = 0;
|
||||
while (currentCopyIdx < toCopyCount) {
|
||||
if (currentCopyIdx < MemoryLayers.size()) {
|
||||
size_t memoryIdx = currentCopyIdx;
|
||||
auto memoryLayer = MemoryLayers[memoryIdx].first;
|
||||
auto inputIdx = MemoryLayers[memoryIdx].second;
|
||||
InsertCopyLayer(l, memoryLayer, inputIdx, this->getPassManager(), DelayedCopyLayerName);
|
||||
} else {
|
||||
size_t concatIdx = currentCopyIdx - MemoryLayers.size();
|
||||
auto concatLayer = ConcatLayers[concatIdx].first;
|
||||
auto inputIdx = ConcatLayers[concatIdx].second;
|
||||
InsertCopyLayer(l, concatLayer, inputIdx, this->getPassManager(), CopyLayerName);
|
||||
}
|
||||
currentCopyIdx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -42,6 +42,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)",
|
||||
// TODO: Issue 39358
|
||||
R"(.*unaligned.*MultipleConcatTest.*)",
|
||||
R"(.*ActivationConcatsEltwise.*CS=35.*)",
|
||||
// TODO: Issue: 40960
|
||||
R"(.*(ConstantResultSubgraphTest).*)",
|
||||
};
|
||||
|
@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "subgraph_tests/activation_concats_eltwise.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
std::vector<size_t> input_sizes = {
|
||||
7,
|
||||
16,
|
||||
35,
|
||||
64
|
||||
};
|
||||
|
||||
std::vector<size_t> concat_const_sizes = {
|
||||
7,
|
||||
16,
|
||||
35,
|
||||
64
|
||||
};
|
||||
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareRefs, ActivationConcatsEltwise,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(input_sizes),
|
||||
::testing::ValuesIn(concat_const_sizes),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(additional_config)),
|
||||
ActivationConcatsEltwise::getTestCaseName);
|
||||
|
||||
} // namespace
|
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
using ActivationConcatsEltwiseParamsTuple = typename std::tuple<
|
||||
size_t, // input size
|
||||
size_t, // concat const size
|
||||
InferenceEngine::Precision, // precision
|
||||
std::string, // device name
|
||||
std::map<std::string, std::string> // configuration
|
||||
>;
|
||||
|
||||
|
||||
class ActivationConcatsEltwise : public testing::WithParamInterface<ActivationConcatsEltwiseParamsTuple>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ParamType> obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,69 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <debug.h>
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "common_test_utils/data_utils.hpp"
|
||||
#include "functional_test_utils/precision_utils.hpp"
|
||||
#include "functional_test_utils/skip_tests_config.hpp"
|
||||
#include "subgraph_tests/activation_concats_eltwise.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
using namespace CommonTestUtils;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
std::string ActivationConcatsEltwise::getTestCaseName(testing::TestParamInfo<ParamType> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
size_t inputSize;
|
||||
size_t concatSize;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
std::tie(inputSize, concatSize, netPrecision, targetDevice, configuration) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << inputSize << "_";
|
||||
result << "CS=" << concatSize << "_";
|
||||
result << "PRC=" << netPrecision.name() << "_";
|
||||
result << "dev=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void ActivationConcatsEltwise::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
size_t inputSize;
|
||||
size_t concatSize;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(inputSize, concatSize, netPrecision, targetDevice, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
auto input = ngraph::builder::makeParams(ngPrc, { {1, inputSize} });
|
||||
|
||||
auto relu = ngraph::builder::makeActivation(input[0], ngPrc, ngraph::helpers::ActivationTypes::Relu);
|
||||
|
||||
auto concat_vals_1 = CommonTestUtils::generate_float_numbers(concatSize, 14, 14);
|
||||
auto concat_vals_2 = CommonTestUtils::generate_float_numbers(concatSize, 14, 14);
|
||||
auto concat_const_1 = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals_1);
|
||||
auto concat_const_2 = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals_2);
|
||||
|
||||
auto concat_1 = ngraph::builder::makeConcat({concat_const_1, relu}, 1);
|
||||
auto concat_2 = ngraph::builder::makeConcat({concat_const_2, relu}, 1);
|
||||
|
||||
auto eltw = ngraph::builder::makeEltwise(concat_1, concat_2, ngraph::helpers::EltwiseTypes::ADD);
|
||||
|
||||
auto reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>({1, inputSize + concatSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(eltw, reshape_pattern, false);
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input, "ActivationConcatsEltwise");
|
||||
}
|
||||
|
||||
TEST_P(ActivationConcatsEltwise, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
} // namespace LayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user