[GPU] fix strided_slice_optimize for 5d new axis output, typo in strided_slice cl kernel, avoid handle_reshape when user is only one (#14812)
* fix strided_slice_optimize for 5d new axis output, typo in strided_slice cl kernal, avoid handle_reshape when user is only one * fix unit test of resample_eltwise_fusing_through/12
This commit is contained in:
parent
e07bd74f38
commit
e422b5acb4
@ -93,15 +93,15 @@ void handle_reshape::run(program& p) {
|
|||||||
// vector for storing reshape nodes to connect to new reorder nodes (if needed)
|
// vector for storing reshape nodes to connect to new reorder nodes (if needed)
|
||||||
std::vector<program_node*> reorder_reshape_nodes;
|
std::vector<program_node*> reorder_reshape_nodes;
|
||||||
|
|
||||||
bool skip_first_user = false;
|
bool found_one = false;
|
||||||
auto reshape_users = node->get_users();
|
auto reshape_users = node->get_users();
|
||||||
for (const auto& user : reshape_users) {
|
for (const auto& user : reshape_users) {
|
||||||
// reshape node for first user will be the orginal reshape from the graph
|
// reshape node for first user will be the orginal reshape from the graph
|
||||||
if (!skip_first_user) {
|
if (!found_one) {
|
||||||
if (std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
|
if ((std::find(reorder_node_to_split.begin(), reorder_node_to_split.end(), user) !=
|
||||||
reorder_node_to_split.end())
|
reorder_node_to_split.end()) && (user->get_output_layout().get_rank() == node->get_output_layout().get_rank()))
|
||||||
reorder_reshape_nodes.push_back(node);
|
reorder_reshape_nodes.push_back(node);
|
||||||
skip_first_user = true;
|
found_one = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,6 +118,9 @@ void handle_reshape::run(program& p) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (reorder_reshape_nodes.size() == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
// add new reorder nodes to proper reshape node
|
// add new reorder nodes to proper reshape node
|
||||||
auto reshape_reorder_id = 0;
|
auto reshape_reorder_id = 0;
|
||||||
for (const auto& reorder_node : reorder_node_to_split) {
|
for (const auto& reorder_node : reorder_node_to_split) {
|
||||||
|
@ -26,12 +26,16 @@ void strided_slice_optimize::run(program& p) {
|
|||||||
if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) == new_axis_mask.end())
|
if (std::find(new_axis_mask.begin(), new_axis_mask.end(), 1) == new_axis_mask.end())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
auto node_layout = strided_slice_node.get_output_layout();
|
||||||
|
// only 4D or less dimension output runs optimization
|
||||||
|
if (node_layout.get_rank() > 4)
|
||||||
|
continue;
|
||||||
|
|
||||||
auto& deps = node->get_dependencies();
|
auto& deps = node->get_dependencies();
|
||||||
for (size_t i = deps.size(); i--;)
|
for (size_t i = deps.size(); i--;)
|
||||||
if (deps[i].first->is_type<data>())
|
if (deps[i].first->is_type<data>())
|
||||||
node->remove_dependency(i);
|
node->remove_dependency(i);
|
||||||
|
|
||||||
auto node_layout = strided_slice_node.get_output_layout();
|
|
||||||
auto node_size = node_layout.get_tensor().sizes(format::bfyx);
|
auto node_size = node_layout.get_tensor().sizes(format::bfyx);
|
||||||
|
|
||||||
auto is_shift_possible = [&](const std::vector<int32_t>& dims) -> bool {
|
auto is_shift_possible = [&](const std::vector<int32_t>& dims) -> bool {
|
||||||
|
@ -18,8 +18,8 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP
|
|||||||
#elif OUTPUT_LAYOUT_BFZYX
|
#elif OUTPUT_LAYOUT_BFZYX
|
||||||
const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y);
|
const uint yx_input = (uint)get_global_id(2) % (INPUT0_SIZE_X * INPUT0_SIZE_Y);
|
||||||
const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y);
|
const uint z_input = (uint)get_global_id(2) / (INPUT0_SIZE_X * INPUT0_SIZE_Y);
|
||||||
const uint y_input = yx / INPUT0_SIZE_X;
|
const uint y_input = yx_input / INPUT0_SIZE_X;
|
||||||
const uint x_input = yx % INPUT0_SIZE_X;
|
const uint x_input = yx_input % INPUT0_SIZE_X;
|
||||||
#endif
|
#endif
|
||||||
const uint input_index = INPUT0_OFFSET +
|
const uint input_index = INPUT0_OFFSET +
|
||||||
batch * INPUT0_BATCH_PITCH +
|
batch * INPUT0_BATCH_PITCH +
|
||||||
|
@ -1240,6 +1240,59 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) {
|
||||||
|
// Input (BFYX): 1x8x1x1
|
||||||
|
// Output (BFZYX): 1x1x1x8x1
|
||||||
|
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 1, 1 } });
|
||||||
|
auto begin = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
|
||||||
|
auto end = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
|
||||||
|
auto strides = engine.allocate_memory({ ov::PartialShape{ 5 }, data_types::i32, format::bfzyx });
|
||||||
|
|
||||||
|
set_values(input, {
|
||||||
|
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
|
||||||
|
});
|
||||||
|
set_values(begin, {
|
||||||
|
0, 0, 0, 0, 0
|
||||||
|
});
|
||||||
|
set_values(end, {
|
||||||
|
0, 0, 0, 0, 0
|
||||||
|
});
|
||||||
|
set_values(strides, {
|
||||||
|
1, 1, 1, 1, 1
|
||||||
|
});
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", input->get_layout()));
|
||||||
|
topology.add(data("input2", begin));
|
||||||
|
topology.add(data("input3", end));
|
||||||
|
topology.add(data("input4", strides));
|
||||||
|
topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {0, 1, 1, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {1, 1, 1, 8, 1}));
|
||||||
|
|
||||||
|
network network(engine, topology);
|
||||||
|
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
EXPECT_EQ(outputs.size(), size_t(1));
|
||||||
|
EXPECT_EQ(outputs.begin()->first, "strided_slice");
|
||||||
|
|
||||||
|
auto output = outputs.at("strided_slice").get_memory();
|
||||||
|
|
||||||
|
std::vector<float> answers = {
|
||||||
|
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
|
||||||
|
};
|
||||||
|
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < answers.size(); ++i)
|
||||||
|
{
|
||||||
|
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) {
|
TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) {
|
||||||
// Input (BFYX): 2x2x2x2
|
// Input (BFYX): 2x2x2x2
|
||||||
// Begin (BFYX): 0x0x0x0
|
// Begin (BFYX): 0x0x0x0
|
||||||
|
Loading…
Reference in New Issue
Block a user