From 00b7f58152ce6458bc35a28984e54132bf641037 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Thu, 27 Jan 2022 13:08:35 +0900 Subject: [PATCH] [GPU] enable validate after adding reorder (#9594) --- .../graph_optimizer/add_required_reorders.cpp | 1 + .../activation/activation_kernel_ref.cpp | 9 ++ .../activation/activation_kernel_ref.h | 5 + .../tests/fusions/convolution_fusion_test.cpp | 94 ++++++++++++------- 4 files changed, 75 insertions(+), 34 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index df7a9fbea3a..b88ffc92fe6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -30,6 +30,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_ auto new_reorder = std::make_shared(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout); auto& new_reorder_node = p.get_or_create(new_reorder); + new_reorder_node.set_output_layout(reorder_layout, false); // ToDo: add a method to program class which adds an intermediate node given a node and its user auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node); diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp index 34da9a3a4cf..eb49ae0db5c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp @@ -56,4 +56,13 @@ KernelsData ActivationKernelRef::GetKernelsData(const Params& params, const opti KernelsPriority ActivationKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { return DONT_USE_IF_HAVE_SOMETHING_ELSE; } + +bool ActivationKernelRef::Validate(const Params& p, const optional_params& o) const { + if (!Parent::Validate(p, o)) return false; + const auto& params = static_cast(p); + if (params.inputs[0].GetDims().size() != params.output.GetDims().size()) + return false; + + return true; +} } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h index d7696784f6d..1f6c3075c19 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h @@ -11,6 +11,9 @@ namespace kernel_selector { class ActivationKernelRef : public ActivationKernelBase { public: + using Parent = ActivationKernelBase; + using Parent::Parent; + ActivationKernelRef() : ActivationKernelBase("activation_ref") {} virtual ~ActivationKernelRef() {} @@ -23,5 +26,7 @@ public: FusedOpType::SCALE, FusedOpType::ACTIVATION}; } + + bool Validate(const Params& p, const optional_params& o) const override; }; } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp index 0e29fc74354..e0114e8f27c 100644 --- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp @@ -113,6 +113,29 @@ public: } }; +class ConvReorderFusingTest : public BaseFusingTest { +public: + void execute(convolution_test_params& p) { + auto input_prim = get_mem(get_input_layout(p)); + network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); + network network_fused(this->engine, this->topology_fused, bo_fused); + network_fused.set_input_data("input", input_prim); + network_not_fused.set_input_data("input", input_prim); + + compare(network_not_fused, network_fused, p, true); + } + + layout get_input_layout(convolution_test_params& p) { + auto pad = p.pad; + std::vector pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] }; + return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } }; + } + + layout get_per_channel_layout(convolution_test_params& p) { + return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} }; + } +}; + class ConvEltwTest : public ::BaseFusingTest { public: @@ -2543,10 +2566,14 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp16_scale, ::testing::ValuesIn(std:: bc_force_kernel_params{ CASE_CONV_FP16_13, 2, 3, "convolution_gpu_fs_byx_fsv32" }, })); -// reorder(bfyx to fs_b_yx_fsv32) + conv + +/* ----------------------------------------------------------------------------------------------------- */ +/* ---------------------- reorder(bfyx to fs_b_yx_fsv32) + convolution kernel cases -------------------- */ +/* ----------------------------------------------------------------------------------------------------- */ #define FSV32_CASE_CONV_FP32_1 { 1, 32, 4, 5 }, { 1, 32, 2, 3 }, { 1, 1, 3, 3 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx -class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvFusingTest {}; +// 'reorder_fsv32' is being removed from "remove_redundant_reorders" in the current impl +class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvReorderFusingTest {}; TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) { auto p = GetParam(); @@ -2554,21 +2581,22 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) { input_layout("input", get_input_layout(p)), data("weights", get_mem(get_weights_layout(p), -127, 127)), reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32), - convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation), - activation("activation", "conv_output", activation_func::abs) + convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation), + activation("activation", "conv_prim", activation_func::abs), + reorder("reorder_out", "activation", format::bfyx, data_types::f32) ); implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } })); + bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); execute(p); } INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_basic, ::testing::ValuesIn(std::vector{ - convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 3 } + convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 4 } })); - -class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvFusingTest {}; +// 'reorder_fsv32' is not being fused in the current impl, since it has 'mean' +class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvReorderFusingTest {}; TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) { auto p = GetParam(); memory::ptr mul = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } }); @@ -2579,21 +2607,21 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) { data("mul", mul), data("weights", get_mem(get_weights_layout(p), -127, 127)), reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32, "mul", reorder_mean_mode::mul), - convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation), - activation("activation", "conv_output", activation_func::abs) + convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation), + activation("activation", "conv_prim", activation_func::abs) ); implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } })); + bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); execute(p); } INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_mean, ::testing::ValuesIn(std::vector{ - convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 3 } + convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 4 } })); - -class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvFusingTest {}; +// 'reorder_fsv32' is not being fused in the current impl, since it has 'subtract' +class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvReorderFusingTest {}; TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) { auto p = GetParam(); const std::vector& values_to_subtract = { @@ -2603,7 +2631,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) 0.1f, 0.2f, 0.1f, 0.1f, 0.1f, 0.2f, 0.1f, 0.1f }; - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); + auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2)); auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; auto dw_stride = tensor{ 0, 0, 1, 1 }; @@ -2613,12 +2641,11 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) data("weights_dw", get_mem(dw_weights_layout, -127, 127)), convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract), - convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), - activation("activation", "conv_output", activation_func::abs) + convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation) ); implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } })); + bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } })); execute(p); } @@ -2626,12 +2653,12 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_subtr convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 4 } })); - -class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvFusingTest {}; +// 'reorder_fsv32' is not being fused in the current impl, since it has 'fused_activation' +class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvReorderFusingTest {}; TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activation) { auto p = GetParam(); - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); + auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2)); auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; auto dw_stride = tensor{ 0, 0, 1, 1 }; @@ -2642,25 +2669,26 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32), activation("activation_quantize", "reorder_fsv32", activation_func::relu), - convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), - activation("activation", "conv_output", activation_func::abs) + convolution("conv_prim2", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), + activation("activation", "conv_prim2", activation_func::abs) ); implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } })); + bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } })); + bo_fused.set_option(build_option::force_implementations({ { "activation", conv_impl } })); execute(p); } INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, ::testing::ValuesIn(std::vector{ - convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 5 } + convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 6 } })); - -class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvFusingTest {}; +// 'reorder_fsv32' is being fused even if it has 'padding' +class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvReorderFusingTest {}; TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) { auto p = GetParam(); - auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3)); + auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2)); auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor }; auto dw_stride = tensor{ 0, 0, 1, 1 }; @@ -2670,19 +2698,17 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) { data("weights_dw", get_mem(dw_weights_layout, -127, 127)), convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ { 0, 0, 1, 1 }, 0 })), - convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), - activation("activation", "conv_output", activation_func::abs), - activation("activation2", "conv_prim", activation_func::abs), - eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum) + convolution("conv_prim2", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), + reorder("reorder_out", "conv_prim2", format::fs_b_yx_fsv32, data_types::f32) ); implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } })); + bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } })); execute(p); } INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, ::testing::ValuesIn(std::vector{ - convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 6 } + convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 5 } })); #ifdef ENABLE_ONEDNN_FOR_GPU