[GPU] Support for PReLU with multiple dims slope tensor for GPU (#11782)

* reshape a slope tensor of channel-wise prelu * changed to follow prelu spec * added unittests for prelu with multiple dims slope * Update constant.cpp Blanks are added. * added comments about PReLU slope reshape policy * added int8 prelu fusion tests
2022-06-02 23:01:01 +09:00
parent fc61b001c0
commit 04b69af0f5
5 changed files with 190 additions and 12 deletions
--- a/src/plugins/intel_gpu/src/graph/activation.cpp
+++ b/src/plugins/intel_gpu/src/graph/activation.cpp
@@ -75,7 +75,7 @@ activation_inst::typed_primitive_inst(network& network, activation_node const& n
        /// Slope input x dimension should be equal to input feature size (one slope per channel).
        auto slope_layout = node.slope_input().get_output_layout();
        auto slope_input_size = slope_layout.size;
-        auto input_feature_size = slope_layout.feature();
+        auto input_feature_size = input_layout.feature();

        CLDNN_ERROR_LESS_THAN(node.id(),
                              "Slope x size",
@@ -84,14 +84,6 @@ activation_inst::typed_primitive_inst(network& network, activation_node const& n
                              input_feature_size,
                              "Dimensions mismatch between input and slope input in Activation layer(slope x size "
                              "should be equal to input feature size)!");
-
-        // All other dimensions should be 1
-        CLDNN_ERROR_NOT_EQUAL(node.id(),
-                              "Slope input size count",
-                              slope_input_size.count(),
-                              "Slope input size x",
-                              slope_input_size.feature[0],
-                              "Dimensions mismatch of slope input in Activation layer!");
    }
 }
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/activation_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/activation_ref.cl
@@ -92,7 +92,8 @@ KERNEL(activation)(
        #define NL_M_PARAMETERIZED (float)params[2*feature + 0]
        #define NL_N_PARAMETERIZED (float)params[2*feature + 1]
    #elif PARAMS_NUM == 1
-        #define NL_M_PARAMETERIZED (float)params[feature]
+        const unsigned param_index = GET_INDEX(ADDITIONAL_PARAMS,,ORDER);
+        #define NL_M_PARAMETERIZED (float)params[param_index]
        #define NL_N_PARAMETERIZED (float)NL_N
    #else
        #define NL_M_PARAMETERIZED (float)NL_M
--- a/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/constant.cpp
@@ -60,7 +60,7 @@ struct ConstProperties {
 static void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::shared_ptr<ngraph::op::v0::Constant>& op, const ConstProperties& props);

 static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
-    const auto& constDims = op->get_shape();
+    ngraph::Shape constDims = op->get_shape();
    auto constUsers = op->get_output_target_inputs(0);
    size_t numConstUsers = constUsers.size();

@@ -115,6 +115,25 @@ static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::C
            handleConvWeights(outOp, consts, numConstUsers, false);
        } else if (ngraph::is_type<ngraph::op::v1::GroupConvolutionBackpropData>(outOp) && node.get_index() == 1) {
            handleConvWeights(outOp, consts, numConstUsers, true);
+        } else if (ngraph::is_type<ngraph::op::v0::PRelu>(outOp) && node.get_index() == 1) {
+            // PReLU slope tensor reshape policy
+            //
+            // 1. 1-dim slope is handled by 'getConstTensor'.
+            //   ex) [1] --> [1, 1, 1, 1]
+            //       [N] --> [1, N, 1, 1]
+            //
+            // 2. Multi-dims slope tensor is handled by the numpy broadcasting rule that is defined at
+            //    'https://docs.openvino.ai/latest/openvino_docs_ops_broadcast_rules.html'.
+            //   ex) [N, 1, 1] --> [1, N, 1, 1]
+            //       [N, M, 1] --> [1, N, M, 1]
+            auto input_shape = outOp->get_input_shape(0);
+            if (constDims.size() != 1 && constDims.size() < input_shape.size()) {
+                // Reshape 'constDims' according to the numpy broadcasting rule.
+                ngraph::Shape slope_shape(input_shape.size(), 1);
+                for (int j = 1; j <= constDims.size(); j++)
+                    slope_shape[slope_shape.size() - j] = constDims[constDims.size() - j];
+                constDims = slope_shape;
+            }
        }
    }

--- a/src/plugins/intel_gpu/src/plugin/ops/unary.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/unary.cpp
@@ -85,7 +85,7 @@ static void CreatePReluOp(Program& p, const std::shared_ptr<ngraph::op::v0::PRel
        if (!ngraph::op::util::get_single_value(slope_node, slope))
            IE_THROW() << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
        CreateUnaryEltwiseOp(p, op, cldnn::activation_func::relu_negative_slope, {slope});
-    } else if (out_shape.size() >= 2 && ngraph::shape_size(slope_shape) == out_shape[1]) {
+    } else if (out_shape.size() >= 2) {
        auto inputs = p.GetInputPrimitiveIDs(op);
        std::string layerName = layer_type_name_ID(op);
        auto activationPrimitive = cldnn::activation(layerName,
--- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp
@@ -112,6 +112,10 @@ public:
    layout get_per_channel_layout(convolution_test_params& p) {
        return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
    }
+
+    layout get_prelu_slope_layout(convolution_test_params& p) {
+        return layout{ p.default_type, p.input_format, tensor{1, p.out_shape.feature[0], p.out_shape.spatial[0], 1} };
+    }
 };

 class ConvReorderFusingTest : public BaseFusingTest<convolution_test_params> {
@@ -563,6 +567,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic_sum) {
    execute(p);
 }

+TEST_P(conv_fp32_prelu_eltwise, basic_sum_slope_2) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
    auto p = GetParam();
    create_topologies(
@@ -581,6 +603,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
    execute(p);
 }

+TEST_P(conv_fp32_prelu_eltwise, basic_prod_slope_2) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
    auto p = GetParam();
    tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
@@ -600,6 +640,25 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
    execute(p);
 }

+TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum_slope_2) {
+    auto p = GetParam();
+    tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
    auto p = GetParam();
    tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
@@ -619,6 +678,26 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
    execute(p);
 }

+TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod_slope_2) {
+    auto p = GetParam();
+    tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+
 TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
    auto p = GetParam();
    create_topologies(
@@ -640,6 +719,27 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
    execute(p);
 }

+TEST_P(conv_fp32_prelu_eltwise, vector_ops_slope_2) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) {
    auto p = GetParam();
    auto slope_type = p.default_type == data_types::f32 ? data_types::f16 : data_types::f32;
@@ -662,6 +762,28 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) {
    execute(p);
 }

+TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types_slope_2) {
+    auto p = GetParam();
+    auto slope_type = p.default_type == data_types::f32 ? data_types::f16 : data_types::f32;
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(layout{ slope_type, p.input_format, tensor{ 1, p.out_shape.feature[0], p.out_shape.spatial[0], 1 } })),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
+    bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_prelu_eltwise, ::testing::ValuesIn(std::vector<convolution_test_params>{
    // convolution_test_params{ CASE_CONV_FP32_1, 2, 4 },
    convolution_test_params{ CASE_CONV_FP32_2, 2, 4 },
@@ -1599,6 +1721,24 @@ TEST_P(conv_int8_prelu_eltwise, basic) {
    execute(p);
 }

+TEST_P(conv_int8_prelu_eltwise, basic_slope_2) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 TEST_P(conv_int8_prelu_eltwise, fsv16) {
    auto p = GetParam();
    create_topologies(
@@ -1625,6 +1765,32 @@ TEST_P(conv_int8_prelu_eltwise, fsv16) {
    execute(p);
 }

+TEST_P(conv_int8_prelu_eltwise, fsv16_slope_2) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("slope_data", get_mem(get_prelu_slope_layout(p))),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+        eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    if (p.default_format.dimension() == 4) {
+        implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
+        bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
+    } else {
+        // TODO Add 5D int8 optimized convolution implementations
+        return;
+    }
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
 INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_prelu_eltwise, ::testing::ValuesIn(std::vector<convolution_test_params>{
    convolution_test_params{ CASE_CONV_U8S8_1, 2, 4 },
    convolution_test_params{ CASE_CONV_U8S8_2, 2, 4 },