[GPU][DG2] Fix fusings_gpu/gemm_2in_act_scale_eltwise.basic/4 (#14888)
* Handle hsigmoid by hard_sigmoid * add onednn_post_op_type::eltwise_hardsigmoid
This commit is contained in:
parent
dd0bf817b7
commit
5853c78b7f
@ -380,6 +380,7 @@ protected:
|
||||
case onednn_post_op_type::eltwise_clip:
|
||||
case onednn_post_op_type::eltwise_linear:
|
||||
case onednn_post_op_type::eltwise_round:
|
||||
case onednn_post_op_type::eltwise_hardsigmoid:
|
||||
{
|
||||
// onednn elwise doesn't need any data from memory buffers
|
||||
break;
|
||||
|
@ -397,7 +397,11 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) {
|
||||
case cldnn::activation_func::hyperbolic_tan: return dnnl::algorithm::eltwise_tanh;
|
||||
case cldnn::activation_func::pow: return dnnl::algorithm::eltwise_pow;
|
||||
case cldnn::activation_func::sqrt: return dnnl::algorithm::eltwise_sqrt;
|
||||
case cldnn::activation_func::square: return dnnl::algorithm::eltwise_square;
|
||||
case cldnn::activation_func::hard_sigmoid: return dnnl::algorithm::eltwise_hardsigmoid;
|
||||
// Activations that are undef algorithms must be converted to other activations before pushing to post-op.
|
||||
case cldnn::activation_func::hsigmoid: return dnnl::algorithm::undef;
|
||||
case cldnn::activation_func::negative: return dnnl::algorithm::undef;
|
||||
default: throw std::runtime_error("Unsupported activation func for onednn primitive " + std::to_string(static_cast<int>(func)));
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ enum class onednn_post_op_type : uint32_t {
|
||||
eltwise_clip,
|
||||
eltwise_linear,
|
||||
eltwise_round,
|
||||
eltwise_hardsigmoid,
|
||||
binary_mul,
|
||||
binary_add,
|
||||
binary_sub,
|
||||
@ -72,6 +73,7 @@ static inline std::ostream& operator<< (std::ostream& os, onednn_post_op_type& t
|
||||
case onednn_post_op_type::eltwise_clip: os << "eltwise_clip"; break;
|
||||
case onednn_post_op_type::eltwise_linear: os << "eltwise_linear"; break;
|
||||
case onednn_post_op_type::eltwise_round: os << "eltwise_round"; break;
|
||||
case onednn_post_op_type::eltwise_hardsigmoid: os << "eltwise_hardsigmoid"; break;
|
||||
case onednn_post_op_type::binary_mul: os << "binary_mul"; break;
|
||||
case onednn_post_op_type::binary_add: os << "binary_add"; break;
|
||||
case onednn_post_op_type::binary_sub: os << "binary_sub"; break;
|
||||
|
@ -118,6 +118,7 @@ inline std::string onednn_post_op_type_to_str(onednn_post_op_type type) {
|
||||
case onednn_post_op_type::eltwise_clip: return "eltwise_clip";
|
||||
case onednn_post_op_type::eltwise_linear: return "eltwise_linear";
|
||||
case onednn_post_op_type::eltwise_round: return "eltwise_round";
|
||||
case onednn_post_op_type::eltwise_hardsigmoid: return "eltwise_hardsigmoid";
|
||||
case onednn_post_op_type::binary_mul: return "binary_mul";
|
||||
case onednn_post_op_type::binary_add: return "binary_add";
|
||||
case onednn_post_op_type::binary_sub: return "binary_add";
|
||||
|
@ -490,6 +490,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
|
||||
case onednn_post_op_type::eltwise_clip:
|
||||
case onednn_post_op_type::eltwise_linear:
|
||||
case onednn_post_op_type::eltwise_round:
|
||||
case onednn_post_op_type::eltwise_hardsigmoid:
|
||||
{
|
||||
dnnl::algorithm alg;
|
||||
float alpha, beta;
|
||||
@ -930,14 +931,21 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
post_ops.append_prelu(1 << oc_dim);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_relu, dep_idx);
|
||||
} else if (fused_desc->activation_function == cldnn::activation_func::hard_sigmoid) {
|
||||
// Splits hard_sigmoid activation into eltwise_linear, min and max.
|
||||
post_ops.append_eltwise(dnnl::algorithm::eltwise_linear,
|
||||
fused_desc->additional_params.a, fused_desc->additional_params.b);
|
||||
post_ops.append_eltwise(dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
|
||||
post_ops.append_eltwise(dnnl::algorithm::eltwise_hardsigmoid, fused_desc->additional_params.a, fused_desc->additional_params.b);
|
||||
update_onednn_post_op_list(onednn_post_op_type::eltwise_hardsigmoid, empty_mem);
|
||||
} else if (fused_desc->activation_function == cldnn::activation_func::hsigmoid) {
|
||||
// hard_sigmoid(x,a,b) = clamp(ax+b, 0, 1)
|
||||
// hsigmoid(x) = clamp(val+3, 0, 6) / 6 = clamp(val/6+0.5, 0, 1) = hard_sigmoid(val, 1/6, 1/2)
|
||||
post_ops.append_eltwise(dnnl::algorithm::eltwise_hardsigmoid, 1./6, 1./2);
|
||||
update_onednn_post_op_list(onednn_post_op_type::eltwise_hardsigmoid, empty_mem);
|
||||
} else if (fused_desc->activation_function == cldnn::activation_func::negative) {
|
||||
post_ops.append_eltwise(dnnl::algorithm::eltwise_linear, -1, 0);
|
||||
update_onednn_post_op_list(onednn_post_op_type::eltwise_linear, empty_mem);
|
||||
update_onednn_post_op_list(onednn_post_op_type::eltwise_clip, empty_mem);
|
||||
} else {
|
||||
dnnl::algorithm alg = onednn::convert_activation_func(fused_desc->activation_function);
|
||||
if (alg == dnnl::algorithm::undef)
|
||||
IE_THROW() << "Activations that are undef algorithms must be converted to other activations before "
|
||||
"pushing to post-op.";
|
||||
// Usage of alpha and beta between cldnn::pow and dnnl::eltwise::pow is different : d = pow(src, a) / d = a * pow(src, b)
|
||||
if (alg == dnnl::algorithm::eltwise_pow)
|
||||
post_ops.append_eltwise(alg, 1.0f, fused_desc->additional_params.a);
|
||||
|
@ -79,6 +79,26 @@ struct conv_eltw_test_params {
|
||||
size_t expected_not_fused_primitives;
|
||||
};
|
||||
|
||||
struct conv_activation_onednn_test_params {
|
||||
tensor in_shape;
|
||||
tensor out_shape;
|
||||
tensor kernel;
|
||||
ov::Strides stride;
|
||||
ov::CoordinateDiff pad;
|
||||
ov::Strides dilation;
|
||||
uint32_t groups;
|
||||
data_types data_type;
|
||||
format input_format;
|
||||
data_types weights_type;
|
||||
format weights_format;
|
||||
data_types default_type;
|
||||
format default_format;
|
||||
activation_func activation_function_type;
|
||||
size_t expected_fused_primitives;
|
||||
size_t expected_fused_primitives_onednn;
|
||||
size_t expected_not_fused_primitives;
|
||||
};
|
||||
|
||||
class ConvFusingTest : public BaseFusingTest<convolution_test_params> {
|
||||
public:
|
||||
void execute(convolution_test_params& p, int min=0, int max=0) {
|
||||
@ -101,7 +121,6 @@ public:
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
|
||||
auto pi_fused = network_fused.get_primitives_info();
|
||||
auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv);
|
||||
if (info_fused != pi_fused.end())
|
||||
@ -281,6 +300,49 @@ public:
|
||||
};
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
class ConvActivationTestOnednn : public BaseFusingTest<conv_activation_onednn_test_params> {
|
||||
public:
|
||||
void execute(conv_activation_onednn_test_params& p, int min=0, int max=0) {
|
||||
if(engine.get_device_info().supports_immad)
|
||||
p.expected_fused_primitives = p.expected_fused_primitives_onednn;
|
||||
cldnn::memory::ptr input_prim;
|
||||
if (min == max) {
|
||||
input_prim = get_mem(get_input_layout(p));
|
||||
} else {
|
||||
input_prim = get_mem(get_input_layout(p), min, max);
|
||||
}
|
||||
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, cfg_fused);
|
||||
network_fused.set_input_data("input", input_prim);
|
||||
network_not_fused.set_input_data("input", input_prim);
|
||||
|
||||
compare(network_not_fused, network_fused, p);
|
||||
auto find_conv = [](primitive_info& p) -> bool {
|
||||
if (p.original_id == "conv_prim")
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
auto pi_fused = network_fused.get_primitives_info();
|
||||
auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv);
|
||||
if (info_fused != pi_fused.end())
|
||||
std::cout << "kernel: " << info_fused->kernel_id << std::endl;
|
||||
}
|
||||
|
||||
layout get_input_layout(conv_activation_onednn_test_params& p) {
|
||||
auto pad = p.pad;
|
||||
std::vector<int> pad_ = { 0, 0, static_cast<int>(pad[1]), static_cast<int>(pad[0]) };
|
||||
return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
|
||||
}
|
||||
|
||||
layout get_per_channel_layout(conv_activation_onednn_test_params& p) {
|
||||
return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
|
||||
}
|
||||
|
||||
layout get_prelu_slope_layout(conv_activation_onednn_test_params& p) {
|
||||
return layout{ p.default_type, p.input_format, tensor{1, p.out_shape.feature[0], p.out_shape.spatial[0], 1} };
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
|
||||
@ -2908,6 +2970,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp16_scale, ::testing::ValuesIn(std::
|
||||
bc_force_kernel_params{ CASE_CONV_FP16_15, 2, 3, "convolution_gpu_bfyx_f16_depthwise" },
|
||||
}));
|
||||
|
||||
class conv_activation_onednn : public ConvActivationTestOnednn {};
|
||||
TEST_P(conv_activation_onednn, basic) {
|
||||
if (!engine.get_device_info().supports_immad)
|
||||
return;
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
convolution("conv_prim", input_info("input"), { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
activation("activation", input_info("conv_prim"), p.activation_function_type),
|
||||
reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
execute(p);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_activation_onednn, ::testing::ValuesIn(std::vector<conv_activation_onednn_test_params>{
|
||||
conv_activation_onednn_test_params{ CASE_CONV_U8S8_1, activation_func::relu, 2, 2, 3},
|
||||
conv_activation_onednn_test_params{ CASE_CONV_U8S8_2, activation_func::relu_negative_slope, 2, 2, 3 },
|
||||
conv_activation_onednn_test_params{ CASE_CONV_U8S8_3, activation_func::hard_sigmoid, 2, 2, 3 },
|
||||
conv_activation_onednn_test_params{ CASE_CONV_S8S8_1, activation_func::hsigmoid, 2, 2, 3 },
|
||||
conv_activation_onednn_test_params{ CASE_CONV_S8S8_2, activation_func::negative, 2, 2, 3 },
|
||||
conv_activation_onednn_test_params{ CASE_CONV_S8S8_3, activation_func::sqrt, 2, 2, 3 },
|
||||
}));
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* ---------------------- reorder(bfyx to fs_b_yx_fsv32) + convolution kernel cases -------------------- */
|
||||
|
@ -359,9 +359,6 @@ TEST_P(gemm_2in_act_scale_eltwise, basic) {
|
||||
eltwise("sum", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::sum, data_types::f32),
|
||||
reorder("reorder_bfyx", input_info("sum"), p.default_format, data_types::f32)
|
||||
);
|
||||
// Activation won't be fused because onednn doesn't support negative activation
|
||||
if (engine.get_device_info().supports_immad && !p.kernel_name.empty())
|
||||
p.expected_fused_primitives += 2;
|
||||
|
||||
tolerance = default_tolerance(p.default_type);
|
||||
execute(p);
|
||||
@ -380,20 +377,21 @@ TEST_P(gemm_2in_act_scale_eltwise, broadcast_eltwise) {
|
||||
eltwise("sum", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::sum, data_types::f32),
|
||||
reorder("reorder_bfyx", input_info("sum"), p.default_format, data_types::f32)
|
||||
);
|
||||
// Activation won't be fused because onednn doesn't support negative activation
|
||||
if (engine.get_device_info().supports_immad && !p.kernel_name.empty())
|
||||
p.expected_fused_primitives += 2;
|
||||
|
||||
tolerance = default_tolerance(p.default_type);
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_act_scale_eltwise, ::testing::ValuesIn(std::vector<gemm_test_params>{
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 6 },
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 6 },
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_1, 3, 6 },
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_S8U8_1, 3, 6 },
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3, "gemm_mmad_int8" },
|
||||
// gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3 , "gemm_mmad_int8_slm" }, // tolerance issue
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_2, 3, 3, "gemm_tiled_opt" },
|
||||
}));
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
fusings_gpu,
|
||||
gemm_2in_act_scale_eltwise,
|
||||
::testing::ValuesIn(std::vector<gemm_test_params>{
|
||||
gemm_test_params{CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 6},
|
||||
gemm_test_params{CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 6},
|
||||
gemm_test_params{CASE_GEMM_ELTWISE_2IN_U8S8_1, 3, 6},
|
||||
gemm_test_params{CASE_GEMM_ELTWISE_2IN_S8U8_1, 3, 6},
|
||||
// Reference graph can be fused because force_implementation leads optimize_data(true) in program::set_options()
|
||||
gemm_test_params{CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3, "gemm_mmad_int8"},
|
||||
// gemm_test_params{ CASE_GEMM_ELTWISE_2IN_U8S8_2, 3, 3, "gemm_mmad_int8_slm" }, // tolerance issue
|
||||
gemm_test_params{CASE_GEMM_ELTWISE_2IN_FP16_2, 3, 3, "gemm_tiled_opt"},
|
||||
}));
|
||||
|
Loading…
Reference in New Issue
Block a user