[GPU][DG2] Fix fusings_gpu/gemm_2in_act_scale_eltwise.basic/4 (#14888)

* Handle hsigmoid by hard_sigmoid
* add onednn_post_op_type::eltwise_hardsigmoid
This commit is contained in:
Dohyun Kim (Felix) 2023-02-14 17:28:33 +09:00 committed by GitHub
parent dd0bf817b7
commit 5853c78b7f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 121 additions and 21 deletions

View File

@ -380,6 +380,7 @@ protected:
case onednn_post_op_type::eltwise_clip:
case onednn_post_op_type::eltwise_linear:
case onednn_post_op_type::eltwise_round:
case onednn_post_op_type::eltwise_hardsigmoid:
{
// onednn elwise doesn't need any data from memory buffers
break;

View File

@ -397,7 +397,11 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) {
case cldnn::activation_func::hyperbolic_tan: return dnnl::algorithm::eltwise_tanh;
case cldnn::activation_func::pow: return dnnl::algorithm::eltwise_pow;
case cldnn::activation_func::sqrt: return dnnl::algorithm::eltwise_sqrt;
case cldnn::activation_func::square: return dnnl::algorithm::eltwise_square;
case cldnn::activation_func::hard_sigmoid: return dnnl::algorithm::eltwise_hardsigmoid;
// Activations that are undef algorithms must be converted to other activations before pushing to post-op.
case cldnn::activation_func::hsigmoid: return dnnl::algorithm::undef;
case cldnn::activation_func::negative: return dnnl::algorithm::undef;
default: throw std::runtime_error("Unsupported activation func for onednn primitive " + std::to_string(static_cast<int>(func)));
}
}

View File

@ -50,6 +50,7 @@ enum class onednn_post_op_type : uint32_t {
eltwise_clip,
eltwise_linear,
eltwise_round,
eltwise_hardsigmoid,
binary_mul,
binary_add,
binary_sub,
@ -72,6 +73,7 @@ static inline std::ostream& operator<< (std::ostream& os, onednn_post_op_type& t
case onednn_post_op_type::eltwise_clip: os << "eltwise_clip"; break;
case onednn_post_op_type::eltwise_linear: os << "eltwise_linear"; break;
case onednn_post_op_type::eltwise_round: os << "eltwise_round"; break;
case onednn_post_op_type::eltwise_hardsigmoid: os << "eltwise_hardsigmoid"; break;
case onednn_post_op_type::binary_mul: os << "binary_mul"; break;
case onednn_post_op_type::binary_add: os << "binary_add"; break;
case onednn_post_op_type::binary_sub: os << "binary_sub"; break;

View File

@ -118,6 +118,7 @@ inline std::string onednn_post_op_type_to_str(onednn_post_op_type type) {
case onednn_post_op_type::eltwise_clip: return "eltwise_clip";
case onednn_post_op_type::eltwise_linear: return "eltwise_linear";
case onednn_post_op_type::eltwise_round: return "eltwise_round";
case onednn_post_op_type::eltwise_hardsigmoid: return "eltwise_hardsigmoid";
case onednn_post_op_type::binary_mul: return "binary_mul";
case onednn_post_op_type::binary_add: return "binary_add";
case onednn_post_op_type::binary_sub: return "binary_add";

View File

@ -490,6 +490,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
case onednn_post_op_type::eltwise_clip:
case onednn_post_op_type::eltwise_linear:
case onednn_post_op_type::eltwise_round:
case onednn_post_op_type::eltwise_hardsigmoid:
{
dnnl::algorithm alg;
float alpha, beta;
@ -930,14 +931,21 @@ void program_node::init_onednn_primitive_attributes() {
post_ops.append_prelu(1 << oc_dim);
update_onednn_post_op_list(onednn_post_op_type::binary_relu, dep_idx);
} else if (fused_desc->activation_function == cldnn::activation_func::hard_sigmoid) {
// Splits hard_sigmoid activation into eltwise_linear, min and max.
post_ops.append_eltwise(dnnl::algorithm::eltwise_linear,
fused_desc->additional_params.a, fused_desc->additional_params.b);
post_ops.append_eltwise(dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
post_ops.append_eltwise(dnnl::algorithm::eltwise_hardsigmoid, fused_desc->additional_params.a, fused_desc->additional_params.b);
update_onednn_post_op_list(onednn_post_op_type::eltwise_hardsigmoid, empty_mem);
} else if (fused_desc->activation_function == cldnn::activation_func::hsigmoid) {
// hard_sigmoid(x,a,b) = clamp(ax+b, 0, 1)
// hsigmoid(x) = clamp(val+3, 0, 6) / 6 = clamp(val/6+0.5, 0, 1) = hard_sigmoid(val, 1/6, 1/2)
post_ops.append_eltwise(dnnl::algorithm::eltwise_hardsigmoid, 1./6, 1./2);
update_onednn_post_op_list(onednn_post_op_type::eltwise_hardsigmoid, empty_mem);
} else if (fused_desc->activation_function == cldnn::activation_func::negative) {
post_ops.append_eltwise(dnnl::algorithm::eltwise_linear, -1, 0);
update_onednn_post_op_list(onednn_post_op_type::eltwise_linear, empty_mem);
update_onednn_post_op_list(onednn_post_op_type::eltwise_clip, empty_mem);
} else {
dnnl::algorithm alg = onednn::convert_activation_func(fused_desc->activation_function);
if (alg == dnnl::algorithm::undef)
IE_THROW() << "Activations that are undef algorithms must be converted to other activations before "
"pushing to post-op.";
// Usage of alpha and beta between cldnn::pow and dnnl::eltwise::pow is different : d = pow(src, a) / d = a * pow(src, b)
if (alg == dnnl::algorithm::eltwise_pow)
post_ops.append_eltwise(alg, 1.0f, fused_desc->additional_params.a);

View File

@ -79,6 +79,26 @@ struct conv_eltw_test_params {
size_t expected_not_fused_primitives;
};
struct conv_activation_onednn_test_params {
tensor in_shape;
tensor out_shape;
tensor kernel;
ov::Strides stride;
ov::CoordinateDiff pad;
ov::Strides dilation;
uint32_t groups;
data_types data_type;
format input_format;
data_types weights_type;
format weights_format;
data_types default_type;
format default_format;
activation_func activation_function_type;
size_t expected_fused_primitives;
size_t expected_fused_primitives_onednn;
size_t expected_not_fused_primitives;
};
class ConvFusingTest : public BaseFusingTest<convolution_test_params> {
public:
void execute(convolution_test_params& p, int min=0, int max=0) {
@ -101,7 +121,6 @@ public:
return true;
return false;
};
auto pi_fused = network_fused.get_primitives_info();
auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv);
if (info_fused != pi_fused.end())
@ -281,6 +300,49 @@ public:
};
#endif // ENABLE_ONEDNN_FOR_GPU
class ConvActivationTestOnednn : public BaseFusingTest<conv_activation_onednn_test_params> {
public:
void execute(conv_activation_onednn_test_params& p, int min=0, int max=0) {
if(engine.get_device_info().supports_immad)
p.expected_fused_primitives = p.expected_fused_primitives_onednn;
cldnn::memory::ptr input_prim;
if (min == max) {
input_prim = get_mem(get_input_layout(p));
} else {
input_prim = get_mem(get_input_layout(p), min, max);
}
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
network network_fused(this->engine, this->topology_fused, cfg_fused);
network_fused.set_input_data("input", input_prim);
network_not_fused.set_input_data("input", input_prim);
compare(network_not_fused, network_fused, p);
auto find_conv = [](primitive_info& p) -> bool {
if (p.original_id == "conv_prim")
return true;
return false;
};
auto pi_fused = network_fused.get_primitives_info();
auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv);
if (info_fused != pi_fused.end())
std::cout << "kernel: " << info_fused->kernel_id << std::endl;
}
layout get_input_layout(conv_activation_onednn_test_params& p) {
auto pad = p.pad;
std::vector<int> pad_ = { 0, 0, static_cast<int>(pad[1]), static_cast<int>(pad[0]) };
return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
}
layout get_per_channel_layout(conv_activation_onednn_test_params& p) {
return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
}
layout get_prelu_slope_layout(conv_activation_onednn_test_params& p) {
return layout{ p.default_type, p.input_format, tensor{1, p.out_shape.feature[0], p.out_shape.spatial[0], 1} };
}
};
} // namespace
// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
@ -2908,6 +2970,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp16_scale, ::testing::ValuesIn(std::
bc_force_kernel_params{ CASE_CONV_FP16_15, 2, 3, "convolution_gpu_bfyx_f16_depthwise" },
}));
class conv_activation_onednn : public ConvActivationTestOnednn {};
TEST_P(conv_activation_onednn, basic) {
if (!engine.get_device_info().supports_immad)
return;
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
convolution("conv_prim", input_info("input"), { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
activation("activation", input_info("conv_prim"), p.activation_function_type),
reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
);
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_activation_onednn, ::testing::ValuesIn(std::vector<conv_activation_onednn_test_params>{
conv_activation_onednn_test_params{ CASE_CONV_U8S8_1, activation_func::relu, 2, 2, 3},
conv_activation_onednn_test_params{ CASE_CONV_U8S8_2, activation_func::relu_negative_slope, 2, 2, 3 },
conv_activation_onednn_test_params{ CASE_CONV_U8S8_3, activation_func::hard_sigmoid, 2, 2, 3 },
conv_activation_onednn_test_params{ CASE_CONV_S8S8_1, activation_func::hsigmoid, 2, 2, 3 },
conv_activation_onednn_test_params{ CASE_CONV_S8S8_2, activation_func::negative, 2, 2, 3 },
conv_activation_onednn_test_params{ CASE_CONV_S8S8_3, activation_func::sqrt, 2, 2, 3 },
}));
/* ----------------------------------------------------------------------------------------------------- */
/* ---------------------- reorder(bfyx to fs_b_yx_fsv32) + convolution kernel cases -------------------- */

View File

@ -359,9 +359,6 @@ TEST_P(gemm_2in_act_scale_eltwise, basic) {
eltwise("sum", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::sum, data_types::f32),
reorder("reorder_bfyx", input_info("sum"), p.default_format, data_types::f32)
);
// Activation won't be fused because onednn doesn't support negative activation
if (engine.get_device_info().supports_immad && !p.kernel_name.empty())
p.expected_fused_primitives += 2;
tolerance = default_tolerance(p.default_type);
execute(p);
@ -380,20 +377,21 @@ TEST_P(gemm_2in_act_scale_eltwise, broadcast_eltwise) {
eltwise("sum", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::sum, data_types::f32),
reorder("reorder_bfyx", input_info("sum"), p.default_format, data_types::f32)
);
// Activation won't be fused because onednn doesn't support negative activation
if (engine.get_device_info().supports_immad && !p.kernel_name.empty())
p.expected_fused_primitives += 2;
tolerance = default_tolerance(p.default_type);
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_act_scale_eltwise, ::testing::ValuesIn(std::vector<gemm_test_params>{
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 6 },
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 6 },
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_1, 3, 6 },
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_S8U8_1, 3, 6 },
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3, "gemm_mmad_int8" },
// gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3 , "gemm_mmad_int8_slm" }, // tolerance issue
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_2, 3, 3, "gemm_tiled_opt" },
}));
INSTANTIATE_TEST_SUITE_P(
fusings_gpu,
gemm_2in_act_scale_eltwise,
::testing::ValuesIn(std::vector<gemm_test_params>{
gemm_test_params{CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 6},
gemm_test_params{CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 6},
gemm_test_params{CASE_GEMM_ELTWISE_2IN_U8S8_1, 3, 6},
gemm_test_params{CASE_GEMM_ELTWISE_2IN_S8U8_1, 3, 6},
// Reference graph can be fused because force_implementation leads optimize_data(true) in program::set_options()
gemm_test_params{CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3, "gemm_mmad_int8"},
// gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3, "gemm_mmad_int8_slm" }, // tolerance issue
gemm_test_params{CASE_GEMM_ELTWISE_2IN_FP16_2, 3, 3, "gemm_tiled_opt"},
}));