[GPU] if the reorder which is an user of reshpae has truncation mode, it should not split the reorder node (#20749)

This commit is contained in:
Kelvin Choi 2023-10-31 19:21:56 +09:00 committed by Alexander Nesterov
parent 0a42b17178
commit 201b467111
2 changed files with 51 additions and 1 deletions

View File

@ -98,7 +98,8 @@ void handle_reshape::run(program& p) {
// find the users of reshape that are reorder type, if none present then skip the current node
// find users who are onednn impl
for (const auto& user : node->get_users()) {
if (user->is_type<reorder>())
if (user->is_type<reorder>() &&
(*user).as<reorder>().get_primitive()->truncate == false) // not to split conversion only reorder
reorder_node_to_split.push_back(user);
if (user->get_preferred_impl_type() == cldnn::impl_types::onednn)
onednn_users.push_back(user);

View File

@ -734,6 +734,55 @@ TEST(reshape_gpu_f32, shrink_chain_out) {
test_shrink_chain_out<float>(false);
}
template <typename T>
void test_shrink_chain_partial_reorder_truncate(bool is_caching_test) {
auto& engine = get_test_engine();
auto batch_num = 2;
auto feature_num = 2;
auto x_size = 1;
auto y_size = 1;
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}});
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("scale_in", scale_in));
topology.add(data("shift_in", shift_in));
topology.add(activation("relu", input_info("input"), activation_func::relu));
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32, {}, reorder_mean_mode::subtract, padding(), true));
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
set_values(input, input_vec);
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
auto output = outputs.at("out_reorder").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (size_t i = 0; i < out.size(); i++)
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
}
TEST(reshape_gpu_f32, shrink_chain_partial_reorder_truncate) {
test_shrink_chain_partial_reorder_truncate<float>(false);
}
TEST(reshape_gpu_f32, basic_runtime_static_shape) {
// input: bfwzyx, (3, 3, 2, 2, 1, 1)
// reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1)