diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp index 0dcf0df9771..ebb0f0d1a4f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp @@ -93,15 +93,15 @@ void handle_reshape::run(program& p) { // vector for storing reshape nodes to connect to new reorder nodes (if needed) std::vector reorder_reshape_nodes; - bool skip_first_user = false; + bool found_one = false; auto reshape_users = node->get_users(); for (const auto& user : reshape_users) { // reshape node for first user will be the orginal reshape from the graph - if (!skip_first_user) { - if (std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) != - reorder_node_to_split.end()) + if (!found_one) { + if ((std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) != + reorder_node_to_split.end()) && (user->get_output_layout().get_rank() == node->get_output_layout().get_rank())) reorder_reshape_nodes.push_back(node); - skip_first_user = true; + found_one = true; continue; } @@ -118,6 +118,9 @@ void handle_reshape::run(program& p) { } } + if (reorder_reshape_nodes.size() == 0) + continue; + // add new reorder nodes to proper reshape node auto reshape_reorder_id = 0; for (const auto& reorder_node : reorder_node_to_split) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp index a824874c07c..10926ab55c6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp @@ -26,12 +26,16 @@ void strided_slice_optimize::run(program& p) { if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) == new_axis_mask.end()) continue; + auto node_layout = strided_slice_node.get_output_layout(); + // only 4D or less dimension output runs optimization + if (node_layout.get_rank() > 4) + continue; + auto& deps = node->get_dependencies(); for (size_t i = deps.size(); i--;) if (deps[i].first->is_type()) node->remove_dependency(i); - auto node_layout = strided_slice_node.get_output_layout(); auto node_size = node_layout.get_tensor().sizes(format::bfyx); auto is_shift_possible = [&](const std::vector& dims) -> bool { diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl index 4da872d2f72..55f73c6d31b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl @@ -18,8 +18,8 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP #elif OUTPUT_LAYOUT_BFZYX const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y); const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y); - const uint y_input = yx / INPUT0_SIZE_X; - const uint x_input = yx % INPUT0_SIZE_X; + const uint y_input = yx_input / INPUT0_SIZE_X; + const uint x_input = yx_input % INPUT0_SIZE_X; #endif const uint input_index = INPUT0_OFFSET + batch * INPUT0_BATCH_PITCH + diff --git a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp index 9ec0d8a15b2..453fd101e01 100644 --- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp @@ -1240,6 +1240,59 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) { } } +TEST(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) { + // Input (BFYX): 1x8x1x1 + // Output (BFZYX): 1x1x1x8x1 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 1, 1 } }); + auto begin = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx }); + auto end = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx }); + auto strides = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx }); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f + }); + set_values(begin, { + 0, 0, 0, 0, 0 + }); + set_values(end, { + 0, 0, 0, 0, 0 + }); + set_values(strides, { + 1, 1, 1, 1, 1 + }); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(data("input2", begin)); + topology.add(data("input3", end)); + topology.add(data("input4", strides)); + topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {0, 1, 1, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {1, 1, 1, 8, 1})); + + network network(engine, topology); + + network.set_input_data("input", input); + + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < answers.size(); ++i) + { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])); + } +} + TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0