[GPU] Added handle permute (#9853)

2022-01-27 14:37:55 +03:00 · 2022-01-27 14:37:55 +03:00 · cf80eda8e0
commit cf80eda8e0
parent f6162ed657
7 changed files with 121 additions and 7 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@ -44,6 +44,7 @@ struct program {
    friend class prepare_conv_eltw_fusing;   // to be removed when possible
    friend class reorder_inputs;             // to be removed when possible
    friend class remove_redundant_reorders;  // to be removed when possible
+    friend class handle_permute;             // to be removed when possible
    friend class program_wrapper;       // this class is intended to extend the interface of program for
                                             // the usage within tests_core_internal project only
 public:
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_permute.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_permute.cpp
@ -0,0 +1,42 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+#include "permute_inst.h"
+#include "program_node.h"
+#include "intel_gpu/graph/program.hpp"
+
+#include <iterator>
+#include <vector>
+#include <memory>
+
+using namespace cldnn;
+
+void handle_permute::run(program& p) {
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto& node = (*itr++);
+        if (!node->is_type<permute>())
+            continue;
+
+        auto& perm_node = node->as<permute>();
+        auto& prev_node = perm_node.get_dependencies().front();
+        if (prev_node->get_output_layout().format == format::byxf &&
+            perm_node.get_permute_order() == std::vector<uint16_t>{ 0, 2, 3, 1 }) {
+                layout reorder_layout = perm_node.get_output_layout();
+                reorder_layout.format = format::bfyx;
+                std::string reorder_name = perm_node.id() + "_converted_to_reorder";
+
+                auto new_reorder = std::make_shared<reorder>(reorder_name, prev_node->id(), reorder_layout);
+                auto& new_reorder_node = p.get_or_create(new_reorder);
+
+                p.replace(perm_node, new_reorder_node);
+                p.rename(new_reorder_node, reorder_name);
+                new_reorder_node.recalc_output_layout();
+        }
+    }
+}
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@ -12,6 +12,7 @@
 #include "quantize_inst.h"
 #include "eltwise_inst.h"
 #include "convolution_inst.h"
+#include "permute_inst.h"
 #include <string>
 #include <vector>
 #include <memory>
@ -131,6 +132,14 @@ private:
    void run(program& p) override;
 };

+class handle_permute : public base_pass {
+public:
+    handle_permute() : base_pass("handle_permute") {}
+
+private:
+    void run(program& p) override;
+};
+
 class mark_nodes : public base_pass {
 public:
    mark_nodes() : base_pass("analyzed_graph") {}
--- a/src/plugins/intel_gpu/src/graph/include/permute_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/permute_inst.h
@ -23,6 +23,7 @@ public:
    using parent::parent;

    program_node& input() const { return get_dependency(0); }
+    std::vector<uint16_t> get_permute_order() const { return get_primitive()->permute_order; }
 };

 using permute_node = typed_program_node<permute>;
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -475,6 +475,8 @@ void program::pre_optimize_graph(bool is_internal) {
    // handle symmetric and asymmetric padding for input
    apply_opt_pass<handle_input_padding>();

+    apply_opt_pass<handle_permute>();
+
    processing_order.calculate_BFS_processing_order();  // this method makes sense only for OOOQ (out of order execution queue)

    apply_opt_pass<reverse_optional_nodes_outputs>();
--- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
@ -178,13 +178,26 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
        break;
    }

-    bool is_convert_color_input = false;
-    for (auto& node : op->get_users()) {
-        is_convert_color_input |= ngraph::is_type<ngraph::op::v8::NV12toRGB>(node) ||
-                                  ngraph::is_type<ngraph::op::v8::NV12toBGR>(node) ||
-                                  ngraph::is_type<ngraph::op::v8::I420toRGB>(node) ||
-                                  ngraph::is_type<ngraph::op::v8::I420toBGR>(node);
-    }
+    auto is_convert_color_type = [](const std::shared_ptr<ov::Node> &node) {
+        return ngraph::is_type<ngraph::op::v8::NV12toRGB>(node) ||
+               ngraph::is_type<ngraph::op::v8::NV12toBGR>(node) ||
+               ngraph::is_type<ngraph::op::v8::I420toRGB>(node) ||
+               ngraph::is_type<ngraph::op::v8::I420toBGR>(node);
+    };
+
+    std::function<bool(const std::shared_ptr<ov::Node>&, size_t)> recursive_search_convert_color =
+        [&](const std::shared_ptr<ov::Node> &node, size_t curr_depth) -> bool {
+        bool convert_color_found = is_convert_color_type(node);
+        if (curr_depth != 0) {
+            for (auto& user : node->get_users()) {
+                convert_color_found |= recursive_search_convert_color(user, curr_depth - 1);
+            }
+        }
+        return convert_color_found;
+    };
+
+    size_t search_depth = 3;
+    bool is_convert_color_input = recursive_search_convert_color(op, search_depth);

    if (is_convert_color_input) {
        networkInputLayout.format = cldnn::format::byxf;
--- a/src/plugins/intel_gpu/tests/test_cases/handle_permute_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/handle_permute_gpu_test.cpp
@ -0,0 +1,46 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/convert_color.hpp>
+#include <intel_gpu/primitives/permute.hpp>
+#include <intel_gpu/primitives/resample.hpp>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(handle_permute, convert_permute_to_reorder) {
+    auto& engine = get_test_engine();
+
+    int32_t width = 224;
+    int32_t height = 448;
+    int32_t input_height = height + height / 2;
+
+    auto input = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, input_height } });
+
+    std::vector<float> input_data = generate_random_1d<float>(width * input_height, 0, 255);
+    set_values(input, input_data);
+
+    layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height });
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(convert_color("convert_color", { "input" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB,
+                               cldnn::convert_color::memory_type::buffer, output_layout));
+    topology.add(permute("permute", "convert_color", { 0, 2, 3, 1 }));
+    topology.add(resample("resample", "permute", { 1, 3, width, height }));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    std::vector<int32_t> expected_shape = { 1, 3, width, height };
+    std::vector<int32_t> output_shape = outputs.at("resample").get_memory()->get_layout().size.sizes();
+
+    for (size_t i = 0; i < expected_shape.size(); ++i) {
+        EXPECT_EQ(output_shape[i], expected_shape[i]);
+    }
+}