[GPU] fix strided_slice_optimize for 5d new axis output, typo in strided_slice cl kernel, avoid handle_reshape when user is only one (#14812)

* fix strided_slice_optimize for 5d new axis output, typo in strided_slice cl kernal, avoid handle_reshape when user is only one

* fix unit test of resample_eltwise_fusing_through/12
This commit is contained in:
Wilson Seok 2023-01-05 10:36:16 +09:00 committed by GitHub
parent e07bd74f38
commit e422b5acb4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 8 deletions

View File

@ -93,15 +93,15 @@ void handle_reshape::run(program& p) {
// vector for storing reshape nodes to connect to new reorder nodes (if needed)
std::vector<program_node*> reorder_reshape_nodes;
bool skip_first_user = false;
bool found_one = false;
auto reshape_users = node->get_users();
for (const auto& user : reshape_users) {
// reshape node for first user will be the orginal reshape from the graph
if (!skip_first_user) {
if (std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
reorder_node_to_split.end())
if (!found_one) {
if ((std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
reorder_node_to_split.end()) && (user->get_output_layout().get_rank() == node->get_output_layout().get_rank()))
reorder_reshape_nodes.push_back(node);
skip_first_user = true;
found_one = true;
continue;
}
@ -118,6 +118,9 @@ void handle_reshape::run(program& p) {
}
}
if (reorder_reshape_nodes.size() == 0)
continue;
// add new reorder nodes to proper reshape node
auto reshape_reorder_id = 0;
for (const auto& reorder_node : reorder_node_to_split) {

View File

@ -26,12 +26,16 @@ void strided_slice_optimize::run(program& p) {
if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) == new_axis_mask.end())
continue;
auto node_layout = strided_slice_node.get_output_layout();
// only 4D or less dimension output runs optimization
if (node_layout.get_rank() > 4)
continue;
auto& deps = node->get_dependencies();
for (size_t i = deps.size(); i--;)
if (deps[i].first->is_type<data>())
node->remove_dependency(i);
auto node_layout = strided_slice_node.get_output_layout();
auto node_size = node_layout.get_tensor().sizes(format::bfyx);
auto is_shift_possible = [&](const std::vector<int32_t>& dims) -> bool {

View File

@ -18,8 +18,8 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP
#elif OUTPUT_LAYOUT_BFZYX
const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y);
const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y);
const uint y_input = yx / INPUT0_SIZE_X;
const uint x_input = yx % INPUT0_SIZE_X;
const uint y_input = yx_input / INPUT0_SIZE_X;
const uint x_input = yx_input % INPUT0_SIZE_X;
#endif
const uint input_index = INPUT0_OFFSET +
batch * INPUT0_BATCH_PITCH +

View File

@ -1240,6 +1240,59 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) {
}
}
TEST(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) {
// Input (BFYX): 1x8x1x1
// Output (BFZYX): 1x1x1x8x1
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 1, 1 } });
auto begin = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
auto end = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
auto strides = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
set_values(input, {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
});
set_values(begin, {
0, 0, 0, 0, 0
});
set_values(end, {
0, 0, 0, 0, 0
});
set_values(strides, {
1, 1, 1, 1, 1
});
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("input2", begin));
topology.add(data("input3", end));
topology.add(data("input4", strides));
topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {0, 1, 1, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {1, 1, 1, 8, 1}));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "strided_slice");
auto output = outputs.at("strided_slice").get_memory();
std::vector<float> answers = {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
};
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < answers.size(); ++i)
{
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}
TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) {
// Input (BFYX): 2x2x2x2
// Begin (BFYX): 0x0x0x0