[GNA]: Split eltwise using ngraph (#13176)

* [GNA]: Split eltwise over channel using ngraph * Update src/plugins/intel_gna/layers/gna_split_layer.hpp Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com> * Update src/plugins/intel_gna/transformations/split_eltwise_over_channel.hpp Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com> * Review comments Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
2022-09-27 21:13:42 +04:00
parent 8ad0992050
commit 9d206b6956
8 changed files with 320 additions and 22 deletions
--- a/src/common/legacy/src/graph_transformer.cpp
+++ b/src/common/legacy/src/graph_transformer.cpp
@@ -222,6 +222,7 @@ static std::vector<std::string> skipConstInfer = {
    "Copy",
    "FullyConnected",
    "Squeeze",
+    "Split",
    "TensorIterator",
    "LSTMSequence",
    "MVN"};
--- a/src/plugins/intel_gna/gna_plugin.cpp
+++ b/src/plugins/intel_gna/gna_plugin.cpp
@@ -91,6 +91,7 @@
 #include "transformations/convert_precision.hpp"
 #include "transformations/unfuse_reshape_and_transpose.hpp"
 #include "transformations/insert_copy_layer.hpp"
+#include "transformations/split_eltwise.hpp"

 #include <ngraph/opsets/opset7.hpp>

@@ -732,6 +733,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
              transormations
        */
        manager.register_pass<ov::intel_gna::pass::BroadcastAddMultiplyConst>();
+        /*
+            SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant
+            input is doing
+        */
+        manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
        if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
            manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
            manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
@@ -823,9 +829,9 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {

        if (!isNgraphPassesUsed) {
            passes->registerPass<ReorderMaxPoolPass>();
+            passes->registerPass<EltwiseSplitOverChannelsPass>();
        }

-        passes->registerPass<EltwiseSplitOverChannelsPass>();
        passes->registerPass<InsertSplitAligningFilterPass>();

        if (!isNgraphPassesUsed) {
--- a/src/plugins/intel_gna/layers/gna_split_layer.hpp
+++ b/src/plugins/intel_gna/layers/gna_split_layer.hpp
@@ -60,4 +60,29 @@ static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t m
    return splitSizes;
 }

+// @brief Returns pair of axis and sizes of split outputs to split the input tensor to aligned parts, taking into account GNA HW limitations
+static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(InferenceEngine::SizeVector dims) {
+    std::vector<uint32_t> splitSizes = {};
+    auto totalElementsSize = InferenceEngine::details::product(std::begin(dims), std::end(dims));
+    auto firstValuableDim = std::find_if(std::begin(dims), std::end(dims), [](size_t val) { return val > 1; });
+    IE_ASSERT(firstValuableDim != std::end(dims));
+    auto splittedElementsSize = *firstValuableDim;
+    auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
+    auto alignment = GNALimitations::inputByteAlignment;
+
+    // Split output size should be multiple by 64 to avoid align filters insertion,
+    // but we need to check if our input size to split exceeds 64; if not we can always
+    // split if the remaining size is aligned
+    if (splittedElementsSize <= alignment) {
+        if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
+            alignment = 1;
+        } else {
+            return {splittedDimIx, splitSizes};
+        }
+    }
+    splitSizes = GetAlignedSplitSizes(splittedElementsSize,
+        GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
+    return {splittedDimIx, splitSizes};
+}
+
 }  // namespace GNAPluginNS
--- a/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp
@@ -1495,27 +1495,12 @@ void EltwiseSplitOverChannelsPass::run() {
        if (totalElementsSize <= GNALimitations::bufferMaxSize) {
            continue;
        }
+        auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);

-        auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
-        IE_ASSERT(firstValuableDim != std::end(oDims));
-        auto splittedElementsSize = *firstValuableDim;
-        auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
-        auto alignment = GNALimitations::inputByteAlignment;
-
-        // Split output size should be multiple by 64 to avoid align filters insertion,
-        // but we need to check if our input size to split exceeds 64; if not we can always
-        // split if the remaining size is aligned
-        if (splittedElementsSize <= 64) {
-            if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
-                alignment = 1;
-            } else {
-                THROW_GNA_LAYER_EXCEPTION(l) << "splitting didn't succeed\n";
-            }
+        if (0 == splitSizesPerAxis.second.size()) {
+            THROW_GNA_LAYER_EXCEPTION(l) << "splitting didn't succeed\n";
        }

-        auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
-            GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
-
        pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);

@@ -1532,9 +1517,9 @@ void EltwiseSplitOverChannelsPass::run() {
            auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();

            // create split layer outputs
-            for (auto elementsNum : splitSizes) {
+            for (auto elementsNum : splitSizesPerAxis.second) {
                auto newDims = oDims;
-                newDims[splittedDimIx] = elementsNum;
+                newDims[splitSizesPerAxis.first] = elementsNum;
                auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
                auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
                getCreatorLayer(data) = split;
@@ -1558,7 +1543,7 @@ void EltwiseSplitOverChannelsPass::run() {
        concat->outData.push_back(masterEltwise->outData.front());
        getCreatorLayer(masterEltwise->outData.front()) = concat;

-        for (size_t k = 0; k != splitSizes.size(); k++) {
+        for (size_t k = 0; k != splitSizesPerAxis.second.size(); k++) {
            auto eltwiseRaw = std::make_shared<EltwiseLayer>(
                    LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
            IE_ASSERT(eltwiseRaw != nullptr);
--- a/src/plugins/intel_gna/transformations/split_eltwise.cpp
+++ b/src/plugins/intel_gna/transformations/split_eltwise.cpp
@@ -0,0 +1,87 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <openvino/cc/ngraph/itt.hpp>
+
+#include "transformations/split_eltwise.hpp"
+
+#include <ngraph/opsets/opset9.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+#include "legacy/ngraph_ops/eltwise.hpp"
+#include "ops/util/util.hpp"
+#include "backend/gna_limitations.hpp"
+#include "layers/gna_split_layer.hpp"
+
+using namespace ov::intel_gna::pass;
+using namespace ov::intel_gna::ngraph_util;
+
+namespace {
+inline bool is_eltwise_has_to_be_splitted(const ngraph::Output<ngraph::Node>& node) {
+    auto eltwise = std::dynamic_pointer_cast<ngraph::op::Eltwise>(node.get_node_shared_ptr());
+    if (!eltwise) return false;
+    auto o_dims = eltwise->get_output_shape(0);
+    auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
+    return (total_elem_size > GNAPluginNS::GNALimitations::bufferMaxSize);
+}
+
+std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(const std::shared_ptr<ov::Node>& node,
+    const std::pair<int64_t, std::vector<uint32_t>>& split_sizes_per_axis) {
+    auto split = std::make_shared<ngraph::opset9::VariadicSplit>(node,
+        ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{split_sizes_per_axis.first}),
+        ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_per_axis.second.size()}), split_sizes_per_axis.second));
+    split->set_friendly_name(node->get_friendly_name() + "/split");
+    ngraph::copy_runtime_info(node, split);
+    return split;
+}
+
+std::shared_ptr<ngraph::op::Eltwise> create_eltwise(const std::shared_ptr<ov::Node>& node, const std::shared_ptr<ov::Node>& split0,
+    const std::shared_ptr<ov::Node>& split1, size_t index) {
+    auto root_eltwise = std::dynamic_pointer_cast<ngraph::op::Eltwise>(node);
+    auto eltwise = std::make_shared<ngraph::op::Eltwise>(split0->output(index), split1->output(index),
+        root_eltwise->eltwise_type, root_eltwise->get_output_element_type(0));
+    eltwise->set_friendly_name(root_eltwise->get_friendly_name() + "/partition" + std::to_string(index));
+    ngraph::copy_runtime_info(root_eltwise, eltwise);
+    return eltwise;
+}
+} // namespace
+
+SplitEltwise::SplitEltwise() {
+    MATCHER_SCOPE(SplitEltwise);
+    auto eltwise = ngraph::pattern::wrap_type<ngraph::op::Eltwise>({ngraph::pattern::any_input(), ngraph::pattern::any_input()},
+        is_eltwise_has_to_be_splitted);
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto eltwise_node = pattern_map.at(eltwise).get_node_shared_ptr();
+        auto consumers = eltwise_node->output(0).get_target_inputs();
+        auto o_dims = eltwise_node->get_output_shape(0);
+
+        auto split_sizes_per_axis = GNAPluginNS::AlignedSplitSizesPerAxis(o_dims);
+        if (0 == split_sizes_per_axis.second.size()) {
+            gnalog() << "Splitting didn't succeed for layer " << eltwise_node->get_friendly_name()
+            << " on axis " << split_sizes_per_axis.first << std::endl;
+            return false;
+        }
+
+        auto split_node0 = split_input(eltwise_node->get_input_node_shared_ptr(0), split_sizes_per_axis);
+        auto split_node1 = split_input(eltwise_node->get_input_node_shared_ptr(1), split_sizes_per_axis);
+
+        ov::NodeVector concat_inputs;
+        for (size_t i = 0; i < split_sizes_per_axis.second.size(); i++) {
+            auto eltwise_node_part = create_eltwise(eltwise_node, split_node0, split_node1, i);
+            concat_inputs.push_back(eltwise_node_part);
+        }
+        auto concat = std::make_shared<ngraph::opset9::Concat>(concat_inputs, split_sizes_per_axis.first);
+        concat->set_friendly_name(eltwise_node->get_friendly_name());
+        ngraph::copy_runtime_info(eltwise_node, concat);
+        for (auto&& input : consumers) {
+            input.replace_source_output(concat);
+        }
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise, matcher_name);
+    this->register_matcher(m, callback);
+}
--- a/src/plugins/intel_gna/transformations/split_eltwise.hpp
+++ b/src/plugins/intel_gna/transformations/split_eltwise.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+
+namespace ov {
+namespace intel_gna {
+namespace pass {
+/**
+ * @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise
+ */
+class SplitEltwise : public ov::pass::MatcherPass {
+public:
+  OPENVINO_RTTI("SplitEltwise", "0");
+  SplitEltwise();
+};
+
+} // namespace pass
+} // namespace intel_gna
+} // namespace ov
--- a/src/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
+++ b/src/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp
@@ -72,6 +72,13 @@ const std::vector<std::map<std::string, std::string>> configs = {
        {
                {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
                {"GNA_COMPACT_MODE", "NO"}
+        },
+        {
+                {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+                {"GNA_COMPACT_MODE", "YES"}
+        },
+        {
+                {"GNA_DEVICE_MODE", "GNA_SW_FP32"},
        }
 };

--- a/src/tests/unit/gna/ngraph/transformations/gna_split_eltwise.cpp
+++ b/src/tests/unit/gna/ngraph/transformations/gna_split_eltwise.cpp
@@ -0,0 +1,164 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "transformations/split_eltwise.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset9.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/init_node_info.hpp>
+#include <legacy/ngraph_ops/eltwise.hpp>
+#include <layers/gna_split_layer.hpp>
+
+namespace testing {
+namespace {
+
+static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& input_shape,
+                                                        bool with_const,
+                                                        bool with_fq,
+                                                        ELTWISE_TYPE type,
+                                                        bool split) {
+    std::shared_ptr<ngraph::Node> last_node, last_node0, last_node1;
+
+    ngraph::ParameterVector parameters;
+    auto input0 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape);
+    parameters.push_back(input0);
+    last_node0 = input0;
+    std::shared_ptr<ngraph::Node> input1;
+    if (with_const) {
+        auto const_input = ngraph::opset9::Constant::create(ngraph::element::f32, input_shape, {1});
+        last_node1 = const_input;
+    } else {
+        auto input1 = std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape);
+        last_node1 = input1;
+        parameters.push_back(input1);
+    }
+
+    auto add_fake_quantize = [&](const std::shared_ptr<ngraph::Node>& node) {
+        auto input_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
+        auto input_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5});
+        auto output_low = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
+        auto output_high = ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10});
+        return std::make_shared<ngraph::opset9::FakeQuantize>(node, input_low, input_high, output_low, output_high, 11);
+    };
+
+    if (with_fq) {
+        auto fq_eltwise_input0 = add_fake_quantize(last_node0);
+        last_node0 = fq_eltwise_input0;
+        auto fq_eltwise_input1 = add_fake_quantize(last_node1);
+        last_node1 = fq_eltwise_input1;
+    }
+
+    if (split) {
+        auto split_sizes_per_axis = GNAPluginNS::AlignedSplitSizesPerAxis(input_shape);
+        auto split0 = std::make_shared<ngraph::opset9::VariadicSplit>(last_node0,
+                ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{split_sizes_per_axis.first}),
+                ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_per_axis.second.size()}), split_sizes_per_axis.second));
+        auto split1 = std::make_shared<ngraph::opset9::VariadicSplit>(last_node1,
+                ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{split_sizes_per_axis.first}),
+                ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_per_axis.second.size()}), split_sizes_per_axis.second));
+        ov::NodeVector concat_inputs;
+        for (size_t i = 0; i < split_sizes_per_axis.second.size(); i++) {
+            auto eltwise_node_part = std::make_shared<ngraph::op::Eltwise>(split0->output(i), split1->output(i), type);
+            concat_inputs.push_back(eltwise_node_part);
+        }
+        auto concat = std::make_shared<ngraph::opset9::Concat>(concat_inputs, split_sizes_per_axis.first);
+        auto result = std::make_shared<ngraph::opset9::Result>(concat);
+        return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, parameters);
+    } else {
+        auto eltwise = std::make_shared<ngraph::op::Eltwise>(last_node0, last_node1, type);
+        auto result = std::make_shared<ngraph::opset9::Result>(eltwise);
+        return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, parameters);
+    }
+}
+
+typedef std::tuple<
+        ngraph::Shape,
+        bool,                               // with const
+        bool,                               // with fq
+        ELTWISE_TYPE                        // eltwise type
+> EltwiseSplitParams;
+
+static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitParams> obj) {
+    ngraph::Shape shape;
+    bool with_const;
+    bool with_fq;
+    ELTWISE_TYPE type;
+    std::tie(shape, with_const, with_fq, type) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(shape) << "_";
+    result << "wConst=" << with_const << "_";
+    result << "wFQ=" << with_fq << "_";
+    result << "type=";
+    switch (type) {
+    case ELTWISE_TYPE::Sum:
+        result << "sum";
+        break;
+    case ELTWISE_TYPE::Sub:
+        result << "sub";
+        break;
+    case ELTWISE_TYPE::Prod:
+        result << "prod";
+        break;
+    default:
+        break;
+    }
+    return result.str();
+}
+
+class SplitEltwiseTestSuiteFixture: public CommonTestUtils::TestsCommon,
+                               public ::testing::WithParamInterface<EltwiseSplitParams> {
+public:
+    void SetUp() override;
+public:
+    std::shared_ptr<ngraph::Function> function, reference_function;
+};
+
+void SplitEltwiseTestSuiteFixture::SetUp() {
+    ngraph::Shape shape;
+    bool with_const;
+    bool with_fq;
+    ELTWISE_TYPE type;
+    std::tie(shape, with_const, with_fq, type) = this->GetParam();
+    function = createFunction(shape, with_const, with_fq, type, false);
+    reference_function = createFunction(shape, with_const, with_fq, type, true);
+}
+
+void execute_test(std::shared_ptr<ngraph::Function> function,
+                  std::shared_ptr<ngraph::Function> reference_function) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
+    manager.run_passes(function);
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
+    const FunctionsComparator::Result result = func_comparator(function, reference_function);
+    ASSERT_TRUE(result.valid) << result.message;
+}
+
+TEST_P(SplitEltwiseTestSuiteFixture, CompareFunctions) {
+    execute_test(function, reference_function);
+}
+
+const std::vector<ov::Shape> inputShape = {
+    {1, 67000},
+    {1, 500000},
+    {1, 936, 513},
+    {1, 64, 64, 64}
+};
+
+INSTANTIATE_TEST_SUITE_P(SplitEltwiseTestSuite, SplitEltwiseTestSuiteFixture,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(inputShape),
+                            ::testing::ValuesIn(std::vector<bool>{true, false}),                                                       // with const
+                            ::testing::ValuesIn(std::vector<bool>{true, false}),                                                       // with fq
+                            ::testing::ValuesIn(std::vector<ELTWISE_TYPE>{ELTWISE_TYPE::Sum, ELTWISE_TYPE::Sub, ELTWISE_TYPE::Prod})), // eltwise type
+                            getTestCaseName);
+
+} // namespace
+} // namespace testing