[GPU] Support for PReLU with multiple dims slope tensor for GPU (#11782)

* reshape a slope tensor of channel-wise prelu

* changed to follow prelu spec

* added unittests for prelu with multiple dims slope

* Update constant.cpp

Blanks are added.

* added comments about PReLU slope reshape policy

* added int8 prelu fusion tests
This commit is contained in:
Eddy Kim
2022-06-02 23:01:01 +09:00
committed by GitHub
parent fc61b001c0
commit 04b69af0f5
5 changed files with 190 additions and 12 deletions

View File

@@ -75,7 +75,7 @@ activation_inst::typed_primitive_inst(network& network, activation_node const& n
/// Slope input x dimension should be equal to input feature size (one slope per channel).
auto slope_layout = node.slope_input().get_output_layout();
auto slope_input_size = slope_layout.size;
auto input_feature_size = slope_layout.feature();
auto input_feature_size = input_layout.feature();
CLDNN_ERROR_LESS_THAN(node.id(),
"Slope x size",
@@ -84,14 +84,6 @@ activation_inst::typed_primitive_inst(network& network, activation_node const& n
input_feature_size,
"Dimensions mismatch between input and slope input in Activation layer(slope x size "
"should be equal to input feature size)!");
// All other dimensions should be 1
CLDNN_ERROR_NOT_EQUAL(node.id(),
"Slope input size count",
slope_input_size.count(),
"Slope input size x",
slope_input_size.feature[0],
"Dimensions mismatch of slope input in Activation layer!");
}
}
} // namespace cldnn

View File

@@ -92,7 +92,8 @@ KERNEL(activation)(
#define NL_M_PARAMETERIZED (float)params[2*feature + 0]
#define NL_N_PARAMETERIZED (float)params[2*feature + 1]
#elif PARAMS_NUM == 1
#define NL_M_PARAMETERIZED (float)params[feature]
const unsigned param_index = GET_INDEX(ADDITIONAL_PARAMS,,ORDER);
#define NL_M_PARAMETERIZED (float)params[param_index]
#define NL_N_PARAMETERIZED (float)NL_N
#else
#define NL_M_PARAMETERIZED (float)NL_M

View File

@@ -60,7 +60,7 @@ struct ConstProperties {
static void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::shared_ptr<ngraph::op::v0::Constant>& op, const ConstProperties& props);
static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
const auto& constDims = op->get_shape();
ngraph::Shape constDims = op->get_shape();
auto constUsers = op->get_output_target_inputs(0);
size_t numConstUsers = constUsers.size();
@@ -115,6 +115,25 @@ static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::C
handleConvWeights(outOp, consts, numConstUsers, false);
} else if (ngraph::is_type<ngraph::op::v1::GroupConvolutionBackpropData>(outOp) && node.get_index() == 1) {
handleConvWeights(outOp, consts, numConstUsers, true);
} else if (ngraph::is_type<ngraph::op::v0::PRelu>(outOp) && node.get_index() == 1) {
// PReLU slope tensor reshape policy
//
// 1. 1-dim slope is handled by 'getConstTensor'.
// ex) [1] --> [1, 1, 1, 1]
// [N] --> [1, N, 1, 1]
//
// 2. Multi-dims slope tensor is handled by the numpy broadcasting rule that is defined at
// 'https://docs.openvino.ai/latest/openvino_docs_ops_broadcast_rules.html'.
// ex) [N, 1, 1] --> [1, N, 1, 1]
// [N, M, 1] --> [1, N, M, 1]
auto input_shape = outOp->get_input_shape(0);
if (constDims.size() != 1 && constDims.size() < input_shape.size()) {
// Reshape 'constDims' according to the numpy broadcasting rule.
ngraph::Shape slope_shape(input_shape.size(), 1);
for (int j = 1; j <= constDims.size(); j++)
slope_shape[slope_shape.size() - j] = constDims[constDims.size() - j];
constDims = slope_shape;
}
}
}

View File

@@ -85,7 +85,7 @@ static void CreatePReluOp(Program& p, const std::shared_ptr<ngraph::op::v0::PRel
if (!ngraph::op::util::get_single_value(slope_node, slope))
IE_THROW() << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::relu_negative_slope, {slope});
} else if (out_shape.size() >= 2 && ngraph::shape_size(slope_shape) == out_shape[1]) {
} else if (out_shape.size() >= 2) {
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto activationPrimitive = cldnn::activation(layerName,

View File

@@ -112,6 +112,10 @@ public:
layout get_per_channel_layout(convolution_test_params& p) {
return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
}
layout get_prelu_slope_layout(convolution_test_params& p) {
return layout{ p.default_type, p.input_format, tensor{1, p.out_shape.feature[0], p.out_shape.spatial[0], 1} };
}
};
class ConvReorderFusingTest : public BaseFusingTest<convolution_test_params> {
@@ -563,6 +567,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic_sum) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, basic_sum_slope_2) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
auto p = GetParam();
create_topologies(
@@ -581,6 +603,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, basic_prod_slope_2) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
@@ -600,6 +640,25 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum_slope_2) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
@@ -619,6 +678,26 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod_slope_2) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{ 1, 1, 1, 1 } : tensor{ 1, 1, 1, 1, 1 };
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
auto p = GetParam();
create_topologies(
@@ -640,6 +719,27 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, vector_ops_slope_2) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) {
auto p = GetParam();
auto slope_type = p.default_type == data_types::f32 ? data_types::f16 : data_types::f32;
@@ -662,6 +762,28 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types_slope_2) {
auto p = GetParam();
auto slope_type = p.default_type == data_types::f32 ? data_types::f16 : data_types::f32;
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(layout{ slope_type, p.input_format, tensor{ 1, p.out_shape.feature[0], p.out_shape.spatial[0], 1 } })),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_prelu_eltwise, ::testing::ValuesIn(std::vector<convolution_test_params>{
// convolution_test_params{ CASE_CONV_FP32_1, 2, 4 },
convolution_test_params{ CASE_CONV_FP32_2, 2, 4 },
@@ -1599,6 +1721,24 @@ TEST_P(conv_int8_prelu_eltwise, basic) {
execute(p);
}
TEST_P(conv_int8_prelu_eltwise, basic_slope_2) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_int8_prelu_eltwise, fsv16) {
auto p = GetParam();
create_topologies(
@@ -1625,6 +1765,32 @@ TEST_P(conv_int8_prelu_eltwise, fsv16) {
execute(p);
}
TEST_P(conv_int8_prelu_eltwise, fsv16_slope_2) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_prelu_slope_layout(p))),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
if (p.default_format.dimension() == 4) {
implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
} else {
// TODO Add 5D int8 optimized convolution implementations
return;
}
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_prelu_eltwise, ::testing::ValuesIn(std::vector<convolution_test_params>{
convolution_test_params{ CASE_CONV_U8S8_1, 2, 4 },
convolution_test_params{ CASE_CONV_U8S8_2, 2, 4 },