[GPU] enable validate after adding reorder (#9594)

2022-01-27 13:08:35 +09:00 · 2022-01-27 13:08:35 +09:00 · 00b7f58152
commit 00b7f58152
parent c7eeda0247
4 changed files with 75 additions and 34 deletions
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp
@ -30,6 +30,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_

    auto new_reorder = std::make_shared<reorder>(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout);
    auto& new_reorder_node = p.get_or_create(new_reorder);
+    new_reorder_node.set_output_layout(reorder_layout, false);

    // ToDo: add a method to program class which adds an intermediate node given a node and its user
    auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
@ -56,4 +56,13 @@ KernelsData ActivationKernelRef::GetKernelsData(const Params& params, const opti
 KernelsPriority ActivationKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
    return DONT_USE_IF_HAVE_SOMETHING_ELSE;
 }
+
+bool ActivationKernelRef::Validate(const Params& p, const optional_params& o) const {
+    if (!Parent::Validate(p, o)) return false;
+    const auto& params = static_cast<const activation_params&>(p);
+    if (params.inputs[0].GetDims().size() != params.output.GetDims().size())
+        return false;
+
+    return true;
+}
 }  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
@ -11,6 +11,9 @@
 namespace kernel_selector {
 class ActivationKernelRef : public ActivationKernelBase {
 public:
+    using Parent = ActivationKernelBase;
+    using Parent::Parent;
+
    ActivationKernelRef() : ActivationKernelBase("activation_ref") {}
    virtual ~ActivationKernelRef() {}

@ -23,5 +26,7 @@ public:
                FusedOpType::SCALE,
                FusedOpType::ACTIVATION};
    }
+
+    bool Validate(const Params& p, const optional_params& o) const override;
 };
 }  // namespace kernel_selector
--- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
@ -113,6 +113,29 @@ public:
    }
 };

+class ConvReorderFusingTest : public BaseFusingTest<convolution_test_params> {
+public:
+    void execute(convolution_test_params& p) {
+        auto input_prim = get_mem(get_input_layout(p));
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
+        network_fused.set_input_data("input", input_prim);
+        network_not_fused.set_input_data("input", input_prim);
+
+        compare(network_not_fused, network_fused, p, true);
+    }
+
+    layout get_input_layout(convolution_test_params& p) {
+        auto pad = p.pad;
+        std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
+        return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
+    }
+
+    layout get_per_channel_layout(convolution_test_params& p) {
+        return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
+    }
+};
+
 class ConvEltwTest : public ::BaseFusingTest<conv_eltw_test_params> {
 public:

@ -2543,10 +2566,14 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp16_scale, ::testing::ValuesIn(std::
    bc_force_kernel_params{ CASE_CONV_FP16_13, 2, 3, "convolution_gpu_fs_byx_fsv32" },
 }));

-// reorder(bfyx to fs_b_yx_fsv32) + conv
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* ---------------------- reorder(bfyx to fs_b_yx_fsv32) + convolution kernel cases -------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
 #define FSV32_CASE_CONV_FP32_1 { 1, 32, 4, 5 }, { 1, 32, 2, 3 }, { 1, 1, 3, 3 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx

-class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvFusingTest {};
+// 'reorder_fsv32' is being removed from "remove_redundant_reorders" in the current impl
+class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvReorderFusingTest {};
 TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) {
    auto p = GetParam();

@ -2554,21 +2581,22 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) {
        input_layout("input", get_input_layout(p)),
        data("weights", get_mem(get_weights_layout(p), -127, 127)),
        reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32),
-        convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
-        activation("activation", "conv_output", activation_func::abs)
+        convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
+        activation("activation", "conv_prim", activation_func::abs),
+        reorder("reorder_out", "activation", format::bfyx, data_types::f32)
    );

    implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));

    execute(p);
 }
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_basic, ::testing::ValuesIn(std::vector<convolution_test_params>{
-    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  3, 3 }
+    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  3, 4 }
 }));

-
-class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvFusingTest {};
+// 'reorder_fsv32' is not being fused in the current impl, since it has 'mean'
+class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvReorderFusingTest {};
 TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) {
    auto p = GetParam();
    memory::ptr mul = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } });
@ -2579,21 +2607,21 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) {
        data("mul", mul),
        data("weights", get_mem(get_weights_layout(p), -127, 127)),
        reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32, "mul", reorder_mean_mode::mul),
-        convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
-        activation("activation", "conv_output", activation_func::abs)
+        convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
+        activation("activation", "conv_prim", activation_func::abs)
    );

    implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));

    execute(p);
 }
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_mean, ::testing::ValuesIn(std::vector<convolution_test_params>{
-    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  3, 3 }
+    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  4, 4 }
 }));

-
-class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvFusingTest {};
+// 'reorder_fsv32' is not being fused in the current impl, since it has 'subtract'
+class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvReorderFusingTest {};
 TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) {
    auto p = GetParam();
    const std::vector<float>& values_to_subtract = {
@ -2603,7 +2631,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
        0.1f, 0.2f, 0.1f, 0.1f, 0.1f, 0.2f, 0.1f, 0.1f
    };

-    auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
+    auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
    auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
    auto dw_stride = tensor{ 0, 0, 1, 1 };

@ -2613,12 +2641,11 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
        data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
        convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
        reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract),
-        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
-        activation("activation", "conv_output", activation_func::abs)
+        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation)
    );

    implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));

    execute(p);
 }
@ -2626,12 +2653,12 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_subtr
    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  4, 4 }
 }));

-
-class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvFusingTest {};
+// 'reorder_fsv32' is not being fused in the current impl, since it has 'fused_activation'
+class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvReorderFusingTest {};
 TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activation) {
    auto p = GetParam();

-    auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
+    auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
    auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
    auto dw_stride = tensor{ 0, 0, 1, 1 };

@ -2642,25 +2669,26 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat
        convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
        reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32),
        activation("activation_quantize", "reorder_fsv32", activation_func::relu),
-        convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
-        activation("activation", "conv_output", activation_func::abs)
+        convolution("conv_prim2", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
+        activation("activation", "conv_prim2", activation_func::abs)
    );

    implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } }));
+    bo_fused.set_option(build_option::force_implementations({ { "activation", conv_impl } }));

    execute(p);
 }
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, ::testing::ValuesIn(std::vector<convolution_test_params>{
-    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  4, 5 }
+    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  5, 6 }
 }));

-
-class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvFusingTest {};
+// 'reorder_fsv32' is being fused even if it has 'padding'
+class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvReorderFusingTest {};
 TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
    auto p = GetParam();

-    auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
+    auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
    auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
    auto dw_stride = tensor{ 0, 0, 1, 1 };

@ -2670,19 +2698,17 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
        data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
        convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
        reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ { 0, 0, 1, 1 }, 0 })),
-        convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
-        activation("activation", "conv_output", activation_func::abs),
-        activation("activation2", "conv_prim", activation_func::abs),
-        eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum)
+        convolution("conv_prim2", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
+        reorder("reorder_out", "conv_prim2", format::fs_b_yx_fsv32, data_types::f32)
    );

    implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
-    bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } }));

    execute(p);
 }
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, ::testing::ValuesIn(std::vector<convolution_test_params>{
-    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  5, 6 }
+    convolution_test_params{ FSV32_CASE_CONV_FP32_1,  5, 5 }
 }));

 #ifdef ENABLE_ONEDNN_FOR_GPU