[GNA]: Split eltwise using ngraph (#13176)

* [GNA]: Split eltwise over channel using ngraph

* Update src/plugins/intel_gna/layers/gna_split_layer.hpp

Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>

* Update src/plugins/intel_gna/transformations/split_eltwise_over_channel.hpp

Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>

* Review comments

Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
This commit is contained in:
Nadezhda Ageeva
2022-09-27 21:13:42 +04:00
committed by GitHub
parent 8ad0992050
commit 9d206b6956
8 changed files with 320 additions and 22 deletions

View File

@@ -222,6 +222,7 @@ static std::vector<std::string> skipConstInfer = {
"Copy",
"FullyConnected",
"Squeeze",
"Split",
"TensorIterator",
"LSTMSequence",
"MVN"};

View File

@@ -91,6 +91,7 @@
#include "transformations/convert_precision.hpp"
#include "transformations/unfuse_reshape_and_transpose.hpp"
#include "transformations/insert_copy_layer.hpp"
#include "transformations/split_eltwise.hpp"
#include <ngraph/opsets/opset7.hpp>
@@ -732,6 +733,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
transormations
*/
manager.register_pass<ov::intel_gna::pass::BroadcastAddMultiplyConst>();
/*
SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant
input is doing
*/
manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
@@ -823,9 +829,9 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
if (!isNgraphPassesUsed) {
passes->registerPass<ReorderMaxPoolPass>();
passes->registerPass<EltwiseSplitOverChannelsPass>();
}
passes->registerPass<EltwiseSplitOverChannelsPass>();
passes->registerPass<InsertSplitAligningFilterPass>();
if (!isNgraphPassesUsed) {

View File

@@ -60,4 +60,29 @@ static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t m
return splitSizes;
}
// @brief Returns pair of axis and sizes of split outputs to split the input tensor to aligned parts, taking into account GNA HW limitations
static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(InferenceEngine::SizeVector dims) {
std::vector<uint32_t> splitSizes = {};
auto totalElementsSize = InferenceEngine::details::product(std::begin(dims), std::end(dims));
auto firstValuableDim = std::find_if(std::begin(dims), std::end(dims), [](size_t val) { return val > 1; });
IE_ASSERT(firstValuableDim != std::end(dims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
auto alignment = GNALimitations::inputByteAlignment;
// Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always
// split if the remaining size is aligned
if (splittedElementsSize <= alignment) {
if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
alignment = 1;
} else {
return {splittedDimIx, splitSizes};
}
}
splitSizes = GetAlignedSplitSizes(splittedElementsSize,
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
return {splittedDimIx, splitSizes};
}
} // namespace GNAPluginNS

View File

@@ -1495,27 +1495,12 @@ void EltwiseSplitOverChannelsPass::run() {
if (totalElementsSize <= GNALimitations::bufferMaxSize) {
continue;
}
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
IE_ASSERT(firstValuableDim != std::end(oDims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
auto alignment = GNALimitations::inputByteAlignment;
// Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always
// split if the remaining size is aligned
if (splittedElementsSize <= 64) {
if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
alignment = 1;
} else {
THROW_GNA_LAYER_EXCEPTION(l) << "splitting didn't succeed\n";
}
if (0 == splitSizesPerAxis.second.size()) {
THROW_GNA_LAYER_EXCEPTION(l) << "splitting didn't succeed\n";
}
auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
@@ -1532,9 +1517,9 @@ void EltwiseSplitOverChannelsPass::run() {
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
// create split layer outputs
for (auto elementsNum : splitSizes) {
for (auto elementsNum : splitSizesPerAxis.second) {
auto newDims = oDims;
newDims[splittedDimIx] = elementsNum;
newDims[splitSizesPerAxis.first] = elementsNum;
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
getCreatorLayer(data) = split;
@@ -1558,7 +1543,7 @@ void EltwiseSplitOverChannelsPass::run() {
concat->outData.push_back(masterEltwise->outData.front());
getCreatorLayer(masterEltwise->outData.front()) = concat;
for (size_t k = 0; k != splitSizes.size(); k++) {
for (size_t k = 0; k != splitSizesPerAxis.second.size(); k++) {
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
IE_ASSERT(eltwiseRaw != nullptr);

View File

@@ -0,0 +1,87 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/split_eltwise.hpp"
#include <ngraph/opsets/opset9.hpp>
#include <ngraph/pattern/op/or.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include "legacy/ngraph_ops/eltwise.hpp"
#include "ops/util/util.hpp"
#include "backend/gna_limitations.hpp"
#include "layers/gna_split_layer.hpp"
using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::ngraph_util;
namespace {
inline bool is_eltwise_has_to_be_splitted(const ngraph::Output<ngraph::Node>& node) {
auto eltwise = std::dynamic_pointer_cast<ngraph::op::Eltwise>(node.get_node_shared_ptr());
if (!eltwise) return false;
auto o_dims = eltwise->get_output_shape(0);
auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
return (total_elem_size > GNAPluginNS::GNALimitations::bufferMaxSize);
}
std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(const std::shared_ptr<ov::Node>& node,
const std::pair<int64_t, std::vector<uint32_t>>& split_sizes_per_axis) {
auto split = std::make_shared<ngraph::opset9::VariadicSplit>(node,
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{split_sizes_per_axis.first}),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_per_axis.second.size()}), split_sizes_per_axis.second));
split->set_friendly_name(node->get_friendly_name() + "/split");
ngraph::copy_runtime_info(node, split);
return split;
}
std::shared_ptr<ngraph::op::Eltwise> create_eltwise(const std::shared_ptr<ov::Node>& node, const std::shared_ptr<ov::Node>& split0,
const std::shared_ptr<ov::Node>& split1, size_t index) {
auto root_eltwise = std::dynamic_pointer_cast<ngraph::op::Eltwise>(node);
auto eltwise = std::make_shared<ngraph::op::Eltwise>(split0->output(index), split1->output(index),
root_eltwise->eltwise_type, root_eltwise->get_output_element_type(0));
eltwise->set_friendly_name(root_eltwise->get_friendly_name() + "/partition" + std::to_string(index));
ngraph::copy_runtime_info(root_eltwise, eltwise);
return eltwise;
}
} // namespace
SplitEltwise::SplitEltwise() {
MATCHER_SCOPE(SplitEltwise);
auto eltwise = ngraph::pattern::wrap_type<ngraph::op::Eltwise>({ngraph::pattern::any_input(), ngraph::pattern::any_input()},
is_eltwise_has_to_be_splitted);
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
const auto& pattern_map = m.get_pattern_value_map();
auto eltwise_node = pattern_map.at(eltwise).get_node_shared_ptr();
auto consumers = eltwise_node->output(0).get_target_inputs();
auto o_dims = eltwise_node->get_output_shape(0);
auto split_sizes_per_axis = GNAPluginNS::AlignedSplitSizesPerAxis(o_dims);
if (0 == split_sizes_per_axis.second.size()) {
gnalog() << "Splitting didn't succeed for layer " << eltwise_node->get_friendly_name()
<< " on axis " << split_sizes_per_axis.first << std::endl;
return false;
}
auto split_node0 = split_input(eltwise_node->get_input_node_shared_ptr(0), split_sizes_per_axis);
auto split_node1 = split_input(eltwise_node->get_input_node_shared_ptr(1), split_sizes_per_axis);
ov::NodeVector concat_inputs;
for (size_t i = 0; i < split_sizes_per_axis.second.size(); i++) {
auto eltwise_node_part = create_eltwise(eltwise_node, split_node0, split_node1, i);
concat_inputs.push_back(eltwise_node_part);
}
auto concat = std::make_shared<ngraph::opset9::Concat>(concat_inputs, split_sizes_per_axis.first);
concat->set_friendly_name(eltwise_node->get_friendly_name());
ngraph::copy_runtime_info(eltwise_node, concat);
for (auto&& input : consumers) {
input.replace_source_output(concat);
}
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise, matcher_name);
this->register_matcher(m, callback);
}

View File

@@ -0,0 +1,23 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/pass/graph_rewrite.hpp>
namespace ov {
namespace intel_gna {
namespace pass {
/**
* @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise
*/
class SplitEltwise : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("SplitEltwise", "0");
SplitEltwise();
};
} // namespace pass
} // namespace intel_gna
} // namespace ov

View File

@@ -72,6 +72,13 @@ const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_COMPACT_MODE", "NO"}
},
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_COMPACT_MODE", "YES"}
},
{
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
}
};

View File

@@ -0,0 +1,164 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "transformations/split_eltwise.hpp"
#include "common_test_utils/common_utils.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset9.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/init_node_info.hpp>
#include <legacy/ngraph_ops/eltwise.hpp>
#include <layers/gna_split_layer.hpp>
namespace testing {
namespace {
static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& input_shape,
bool with_const,
bool with_fq,
ELTWISE_TYPE type,
bool split) {
std::shared_ptr<ngraph::Node> last_node, last_node0, last_node1;
ngraph::ParameterVector parameters;
auto input0 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape);
parameters.push_back(input0);
last_node0 = input0;
std::shared_ptr<ngraph::Node> input1;
if (with_const) {
auto const_input = ngraph::opset9::Constant::create(ngraph::element::f32, input_shape, {1});
last_node1 = const_input;
} else {
auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape);
last_node1 = input1;
parameters.push_back(input1);
}
auto add_fake_quantize = [&](const std::shared_ptr<ngraph::Node>& node) {
auto input_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
auto input_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5});
auto output_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
auto output_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10});
return std::make_shared<ngraph::opset9::FakeQuantize>(node, input_low, input_high, output_low, output_high, 11);
};
if (with_fq) {
auto fq_eltwise_input0 = add_fake_quantize(last_node0);
last_node0 = fq_eltwise_input0;
auto fq_eltwise_input1 = add_fake_quantize(last_node1);
last_node1 = fq_eltwise_input1;
}
if (split) {
auto split_sizes_per_axis = GNAPluginNS::AlignedSplitSizesPerAxis(input_shape);
auto split0 = std::make_shared<ngraph::opset9::VariadicSplit>(last_node0,
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{split_sizes_per_axis.first}),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_per_axis.second.size()}), split_sizes_per_axis.second));
auto split1 = std::make_shared<ngraph::opset9::VariadicSplit>(last_node1,
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{split_sizes_per_axis.first}),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_per_axis.second.size()}), split_sizes_per_axis.second));
ov::NodeVector concat_inputs;
for (size_t i = 0; i < split_sizes_per_axis.second.size(); i++) {
auto eltwise_node_part = std::make_shared<ngraph::op::Eltwise>(split0->output(i), split1->output(i), type);
concat_inputs.push_back(eltwise_node_part);
}
auto concat = std::make_shared<ngraph::opset9::Concat>(concat_inputs, split_sizes_per_axis.first);
auto result = std::make_shared<ngraph::opset9::Result>(concat);
return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, parameters);
} else {
auto eltwise = std::make_shared<ngraph::op::Eltwise>(last_node0, last_node1, type);
auto result = std::make_shared<ngraph::opset9::Result>(eltwise);
return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, parameters);
}
}
typedef std::tuple<
ngraph::Shape,
bool, // with const
bool, // with fq
ELTWISE_TYPE // eltwise type
> EltwiseSplitParams;
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitParams> obj) {
ngraph::Shape shape;
bool with_const;
bool with_fq;
ELTWISE_TYPE type;
std::tie(shape, with_const, with_fq, type) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(shape) << "_";
result << "wConst=" << with_const << "_";
result << "wFQ=" << with_fq << "_";
result << "type=";
switch (type) {
case ELTWISE_TYPE::Sum:
result << "sum";
break;
case ELTWISE_TYPE::Sub:
result << "sub";
break;
case ELTWISE_TYPE::Prod:
result << "prod";
break;
default:
break;
}
return result.str();
}
class SplitEltwiseTestSuiteFixture: public CommonTestUtils::TestsCommon,
public ::testing::WithParamInterface<EltwiseSplitParams> {
public:
void SetUp() override;
public:
std::shared_ptr<ngraph::Function> function, reference_function;
};
void SplitEltwiseTestSuiteFixture::SetUp() {
ngraph::Shape shape;
bool with_const;
bool with_fq;
ELTWISE_TYPE type;
std::tie(shape, with_const, with_fq, type) = this->GetParam();
function = createFunction(shape, with_const, with_fq, type, false);
reference_function = createFunction(shape, with_const, with_fq, type, true);
}
void execute_test(std::shared_ptr<ngraph::Function> function,
std::shared_ptr<ngraph::Function> reference_function) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
manager.run_passes(function);
const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
const FunctionsComparator::Result result = func_comparator(function, reference_function);
ASSERT_TRUE(result.valid) << result.message;
}
TEST_P(SplitEltwiseTestSuiteFixture, CompareFunctions) {
execute_test(function, reference_function);
}
const std::vector<ov::Shape> inputShape = {
{1, 67000},
{1, 500000},
{1, 936, 513},
{1, 64, 64, 64}
};
INSTANTIATE_TEST_SUITE_P(SplitEltwiseTestSuite, SplitEltwiseTestSuiteFixture,
::testing::Combine(
::testing::ValuesIn(inputShape),
::testing::ValuesIn(std::vector<bool>{true, false}), // with const
::testing::ValuesIn(std::vector<bool>{true, false}), // with fq
::testing::ValuesIn(std::vector<ELTWISE_TYPE>{ELTWISE_TYPE::Sum, ELTWISE_TYPE::Sub, ELTWISE_TYPE::Prod})), // eltwise type
getTestCaseName);
} // namespace
} // namespace testing