[GPU] Fixed safe index func for per-channel case (#10136)

Co-authored-by: Ilya Znamenskiy <ilya.znamenskiy@intel.com>
2022-02-07 09:59:52 +03:00
parent a2ca1d4499
commit 12746efbe5
3 changed files with 73 additions and 4 deletions
--- a/src/plugins/intel_gpu/src/kernel_selector/core/common/jitter.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/common/jitter.cpp
@@ -454,15 +454,18 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
        // We support broadcast only if corresponding dimension is equal to 1.
        // Otherwise, dimensions should be equal and using "f" should be safe.
        if (_tensor.PitchesDifferFromLogicalDims() && _tensor.SimpleLayout()) {
-            std::string f_pitch = toCodeString(_tensor.Feature().pitch);
-            definitions.push_back({ safe_index_func_name, "(" + offset + " + (f) * " + f_pitch + ")" });
+            auto f_pitch = toCodeString(_tensor.Feature().pitch);
+            auto f_size = toCodeString(_tensor.Feature().v);
+            definitions.push_back({ safe_index_func_name, "(" + offset + " + ((f) % " + f_size + ")  * " + f_pitch + ")" });
            definitions.push_back({ index_func_name, "(" + offset + " + (f) * " + f_pitch + ")" });
        } else if (_tensor.PitchesDifferFromLogicalDims()) {
            // TODO This should be solved differently, by setting the macro arguments to zero
            definitions.push_back({ safe_index_func_name, safe_index_func_val });
            definitions.push_back({ index_func_name, index_func_val });
        } else {
-            definitions.push_back({ safe_index_func_name, "(" + toCodeString(_tensor.Feature().pad.before) + " + (f))" });
+            auto f_pad = toCodeString(_tensor.Feature().pad.before);
+            auto f_size = toCodeString(_tensor.Feature().v);
+            definitions.push_back({ safe_index_func_name, "((" + f_pad + " + (f)) % " + f_size + ")" });
            definitions.push_back({ index_func_name, "(" + toCodeString(_tensor.Feature().pad.before) + " + (f))" });
        }
    } else {
--- a/src/plugins/intel_gpu/src/plugin/plugin.cpp
+++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -28,6 +28,8 @@

 #include <transformations/rt_info/fused_names_attribute.hpp>

+#include "openvino/pass/serialize.hpp"
+
 #include "intel_gpu/runtime/device_query.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include <performance_heuristics.hpp>
@@ -103,7 +105,8 @@ InferenceEngine::CNNNetwork Plugin::CloneAndTransformNetwork(const InferenceEngi

    GPU_DEBUG_GET_INSTANCE(debug_config);
    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        clonedNetwork.serialize(debug_config->dump_graphs + "/" + network.getName() + "_" +  "transformed_func.xml");
+        auto path_base = debug_config->dump_graphs + "/" + network.getName() + "_" +  "transformed_func";
+        ov::pass::Serialize(path_base + ".xml", path_base + ".bin").run_on_model(clonedNetwork.getFunction());
    }
    return clonedNetwork;
 }
--- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp
@@ -2578,6 +2578,69 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast)
    }
 }

+TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast_bfyx)
+{
+    auto& engine = get_test_engine();
+    bool f16_supported = engine.get_device_info().supports_fp16;
+    if (!f16_supported) {
+        std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
+        return;
+    }
+
+    size_t input_b = 2;
+    size_t input_f = 72;
+    size_t input1_y = 10, input1_x = 10;
+
+    tensor input1_tensor(input_b, input_f, input1_x, input1_y);
+    tensor input2_tensor(1, input_f, 1, 1);
+
+    VVVVF<FLOAT16> input1_rnd = generate_random_4d<FLOAT16>(input_b, input_f, input1_y, input1_x, 1, 3);
+    VVVVF<FLOAT16> input2_rnd = generate_random_4d<FLOAT16>(1, input_f, 1, 1, 1, 3);
+
+    VF<FLOAT16> input1_flatten = flatten_4d<FLOAT16>(format::bfyx, input1_rnd);
+    VF<FLOAT16> input2_flatten = flatten_4d<FLOAT16>(format::bfyx, input2_rnd);
+
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, input1_tensor });
+    auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, input2_tensor });
+
+    set_values(input1, input1_flatten);
+    set_values(input2, input2_flatten);
+
+    topology ref_topology;
+    ref_topology.add(input_layout("input1", input1->get_layout()));
+    ref_topology.add(input_layout("input2", input2->get_layout()));
+    ref_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::prod));
+
+    network ref_network(engine, ref_topology);
+    ref_network.set_input_data("input1", input1);
+    ref_network.set_input_data("input2", input2);
+
+    auto ref_outputs = ref_network.execute();
+    auto ref_output = ref_outputs.at("eltwise").get_memory();
+    cldnn::mem_lock<FLOAT16> ref_ptr(ref_output, get_test_stream());
+
+    topology fsv32_topology;
+    fsv32_topology.add(input_layout("input1", input1->get_layout()));
+    fsv32_topology.add(input_layout("input2", input2->get_layout()));
+    fsv32_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::fs_b_yx_fsv32, input1_tensor)));
+    fsv32_topology.add(eltwise("eltwise", "reorder1", "input2", eltwise_mode::prod));
+    fsv32_topology.add(reorder("reorder_bfyx", "eltwise", layout(data_types::f16, format::bfyx, input1_tensor)));
+
+    network fsv32_network(engine, fsv32_topology);
+    fsv32_network.set_input_data("input1", input1);
+    fsv32_network.set_input_data("input2", input2);
+
+    auto fsv32_outputs = fsv32_network.execute();
+    auto fsv32_output = fsv32_outputs.at("reorder_bfyx").get_memory();
+    cldnn::mem_lock<FLOAT16> fsv32_ptr(fsv32_output, get_test_stream());
+
+    ASSERT_EQ(ref_ptr.size(), fsv32_ptr.size());
+
+    for (size_t i = 0; i < ref_ptr.size(); i++) {
+        ASSERT_EQ(float(ref_ptr[i]), float(fsv32_ptr[i]));
+    }
+}
+
 TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2x2) {
    //  Input  : 2x2x2x2x1
    //  Input2 : 2x2x1x1x2