[GPU] if the reorder which is an user of reshpae has truncation mode, it should not split the reorder node (#20749)

2023-10-31 19:21:56 +09:00 · 2023-10-31 19:21:56 +09:00 · 201b467111
commit 201b467111
parent 0a42b17178
2 changed files with 51 additions and 1 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_reshape.cpp
@ -98,7 +98,8 @@ void handle_reshape::run(program& p) {
            // find the users of reshape that are reorder type, if none present then skip the current node
            // find users who are onednn impl
            for (const auto& user : node->get_users()) {
-                if (user->is_type<reorder>())
+                if (user->is_type<reorder>() &&
+                    (*user).as<reorder>().get_primitive()->truncate == false)   // not to split conversion only reorder
                    reorder_node_to_split.push_back(user);
                if (user->get_preferred_impl_type() == cldnn::impl_types::onednn)
                    onednn_users.push_back(user);
--- a/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/reshape_gpu_test.cpp
@ -734,6 +734,55 @@ TEST(reshape_gpu_f32, shrink_chain_out) {
    test_shrink_chain_out<float>(false);
 }

+template <typename T>
+void test_shrink_chain_partial_reorder_truncate(bool is_caching_test) {
+    auto& engine = get_test_engine();
+    auto batch_num = 2;
+    auto feature_num = 2;
+    auto x_size = 1;
+    auto y_size = 1;
+    auto input = engine.allocate_memory({data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}});
+    auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
+    auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
+
+    std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
+    std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
+    set_values(scale_in, scale_vals);
+    set_values(shift_in, scale_shifts);
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(data("scale_in", scale_in));
+    topology.add(data("shift_in", shift_in));
+    topology.add(activation("relu", input_info("input"), activation_func::relu));
+    topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
+    topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32, {}, reorder_mean_mode::subtract, padding(), true));
+    topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
+    topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
+    topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
+    topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
+
+    std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
+    std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
+    set_values(input, input_vec);
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();
+
+    auto output = outputs.at("out_reorder").get_memory();
+    cldnn::mem_lock<T> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < out.size(); i++)
+        ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
+}
+
+TEST(reshape_gpu_f32, shrink_chain_partial_reorder_truncate) {
+    test_shrink_chain_partial_reorder_truncate<float>(false);
+}
+
 TEST(reshape_gpu_f32, basic_runtime_static_shape) {
    // input:  bfwzyx, (3, 3, 2, 2, 1, 1)
    // reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1)