[GPU] fix strided_slice_optimize for 5d new axis output, typo in strided_slice cl kernel, avoid handle_reshape when user is only one (#14812)

* fix strided_slice_optimize for 5d new axis output, typo in strided_slice cl kernal, avoid handle_reshape when user is only one * fix unit test of resample_eltwise_fusing_through/12
2023-01-05 10:36:16 +09:00 · 2023-01-05 10:36:16 +09:00 · e422b5acb4
commit e422b5acb4
parent e07bd74f38
4 changed files with 68 additions and 8 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
@ -93,15 +93,15 @@ void handle_reshape::run(program& p) {
                // vector for storing reshape nodes to connect to new reorder nodes (if needed)
                std::vector<program_node*> reorder_reshape_nodes;

-                bool skip_first_user = false;
+                bool found_one = false;
                auto reshape_users = node->get_users();
                for (const auto& user : reshape_users) {
                    // reshape node for first user will be the orginal reshape from the graph
-                    if (!skip_first_user) {
-                        if (std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
-                            reorder_node_to_split.end())
+                    if (!found_one) {
+                        if ((std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
+                            reorder_node_to_split.end()) && (user->get_output_layout().get_rank() == node->get_output_layout().get_rank()))
                            reorder_reshape_nodes.push_back(node);
-                        skip_first_user = true;
+                        found_one = true;
                        continue;
                    }

@ -118,6 +118,9 @@ void handle_reshape::run(program& p) {
                    }
                }

+                if (reorder_reshape_nodes.size() == 0)
+                    continue;
+
                // add new reorder nodes to proper reshape node
                auto reshape_reorder_id = 0;
                for (const auto& reorder_node : reorder_node_to_split) {
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp
@ -26,12 +26,16 @@ void strided_slice_optimize::run(program& p) {
            if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) == new_axis_mask.end())
                continue;

+            auto node_layout = strided_slice_node.get_output_layout();
+            // only 4D or less dimension output runs optimization
+            if (node_layout.get_rank() > 4)
+                continue;
+
            auto& deps = node->get_dependencies();
            for (size_t i = deps.size(); i--;)
                if (deps[i].first->is_type<data>())
                    node->remove_dependency(i);

-            auto node_layout = strided_slice_node.get_output_layout();
            auto node_size = node_layout.get_tensor().sizes(format::bfyx);

            auto is_shift_possible = [&](const std::vector<int32_t>& dims) -> bool {
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
@ -18,8 +18,8 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP
 #elif OUTPUT_LAYOUT_BFZYX
    const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y);
    const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y);
-    const uint y_input = yx / INPUT0_SIZE_X;
-    const uint x_input = yx % INPUT0_SIZE_X;
+    const uint y_input = yx_input / INPUT0_SIZE_X;
+    const uint x_input = yx_input % INPUT0_SIZE_X;
 #endif
    const uint input_index = INPUT0_OFFSET +
        batch * INPUT0_BATCH_PITCH +
--- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
@ -1240,6 +1240,59 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) {
    }
 }

+TEST(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) {
+    // Input (BFYX): 1x8x1x1
+    // Output (BFZYX): 1x1x1x8x1
+
+    auto& engine = get_test_engine();
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 1, 1 } });
+    auto begin = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
+    auto end = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
+    auto strides = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
+    });
+    set_values(begin, {
+            0, 0, 0, 0, 0
+    });
+    set_values(end, {
+            0, 0, 0, 0, 0
+    });
+    set_values(strides, {
+            1, 1, 1, 1, 1
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {0, 1, 1, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {1, 1, 1, 8, 1}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
+    };
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
 TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) {
    // Input (BFYX): 2x2x2x2
    // Begin (BFYX): 0x0x0x0