[GPU] Fix failed onednn tests (#16410)

* Fix failed unit-tests on dGPU

+ modified fully_connected_random_test_i8_3d not to have ambiguous
+ oneDNN does NOT support i64 type for reorder. Added exception.
+ bugfix in prepare_primitive_fusing about exception of activation function
+ Add exception logic for dynamic to select ocl type in is_node_for_onednn

Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
Min, Byungil 2023-03-29 15:50:09 +09:00 committed by GitHub
parent 966c47e7cd
commit ea6e3481cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 130 additions and 23 deletions

View File

@ -704,14 +704,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
if (_lo.get_optimization_attributes().use_onednn_impls) {
if (input.is_type<reshape>() || input.is_type<concatenation>())
return;
#ifdef ENABLE_ONEDNN_FOR_GPU
// Activation should not fused if it isn't supported in onednn
try {
onednn::convert_activation_func(activation_node.get_primitive()->activation_function);
} catch (...) {
return;
// Activation should not be fused if oneDNN does NOT support it
if (_lo.is_primitive_implemented_for_onednn(input)) {
#ifdef ENABLE_ONEDNN_FOR_GPU
try {
onednn::convert_activation_func(activation_node.get_primitive()->activation_function);
} catch (...) {
return;
}
#endif
}
#endif
}
bool should_fuse = input.is_type<binary_convolution>();

View File

@ -189,6 +189,7 @@ public:
impl_types get_forced_impl_type_by_config(program_node& node);
static bool are_data_types_suitable_for_onednn(program_node& node);
bool are_layouts_suitable_for_onednn(program_node& node);
bool is_primitive_implemented_for_onednn(program_node& node);
bool is_format_supported(program_node& node, format::type fmt);
// Returns whether reorder between "prev" with format fmt_prev and "next" with format fmt_next

View File

@ -825,7 +825,8 @@ static bool is_node_for_onednn(deconvolution_node const& node) {
static bool is_node_for_onednn(fully_connected_node const& node) {
auto fc_prim = node.get_primitive();
auto ps = node.get_output_layout().get_partial_shape();
auto output_layout = node.get_output_layout();
auto ps = output_layout.get_partial_shape();
size_t non_spatial_count = 2 + (fc_prim->input_size == 3 ? 1 : 0);
size_t rank = ps.size();
@ -1178,6 +1179,9 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
if (in_dt == data_types::f32 && (!node.is_type<fully_connected>() && !node.is_type<convolution>()))
return false;
if (in_dt == data_types::i64 || out_dt == data_types::i64)
return false;
if (node.is_type<pooling>()) {
if (!data_type_traits::is_floating_point(in_dt) && in_dt != out_dt)
return false;
@ -1259,6 +1263,16 @@ bool layout_optimizer::are_layouts_suitable_for_onednn(program_node& node) {
return true;
}
bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) {
if (node.is_type<fully_connected>() || node.is_type<gemm>() || node.is_type<pooling>() ||
node.is_type<convolution>() || node.is_type<deconvolution>() ||
node.is_type<reduce>() || node.is_type<reorder>() || node.is_type<concatenation>()) {
return true;
}
return false;
}
impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) {
#ifdef GPU_DEBUG_CONFIG
GPU_DEBUG_GET_INSTANCE(debug_config);
@ -1419,6 +1433,10 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
preferred_impl = impl_types::ocl;
}
if (!are_data_types_suitable_for_onednn(node)) {
preferred_impl = impl_types::ocl;
}
// For mixed precision case, onednn is slower than cldnn
if (input_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(input_dt))
preferred_impl = impl_types::ocl;

View File

@ -172,7 +172,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, basic) {
data("out_low", get_mem(get_single_element_layout(p), -127)),
data("out_high", get_mem(get_single_element_layout(p), 127)),
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
activation("act2", input_info("eltwise"), activation_func::softsign),
activation("act2", input_info("eltwise"), activation_func::swish),
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
@ -193,7 +193,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
data("out_low", get_mem(get_single_element_layout(p), -127)),
data("out_high", get_mem(get_single_element_layout(p), 127)),
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
activation("act2", input_info("eltwise"), activation_func::softsign),
activation("act2", input_info("eltwise"), activation_func::pow),
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
@ -223,6 +223,42 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8,
activation_test_params{ CASE_ACTIVATION_3D_F32_2, 3, 5, "activation_ref" },
}));
class activation_eltwise_activation_quantize_u8_onendnn : public ActivationFusingTest {};
TEST_P(activation_eltwise_activation_quantize_u8_onendnn, same_behavior) {
// Case : activation function is NOT supported on oneDNN and an input primitive selects clDNN execution
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
activation("act", input_info("input"), activation_func::relu),
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
data("in_low", get_mem(get_single_element_layout(p), 0)),
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
data("out_low", get_mem(get_single_element_layout(p), -127)),
data("out_high", get_mem(get_single_element_layout(p), 127)),
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
activation("act2", input_info("eltwise"), activation_func::softsign),
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
);
tolerance = 1.f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8_onendnn, ::testing::ValuesIn(std::vector<activation_test_params>{
// InputDataType = FP32
activation_test_params{ CASE_ACTIVATION_F32_0, 3, 5, "activation_opt" },
activation_test_params{ CASE_ACTIVATION_F32_1, 3, 5, "activation_opt" },
activation_test_params{ CASE_ACTIVATION_3D_F32_0, 3, 5, "activation_opt" },
activation_test_params{ CASE_ACTIVATION_3D_F32_1, 3, 5, "activation_opt" },
activation_test_params{ CASE_ACTIVATION_F32_0, 3, 5, "activation_ref" },
activation_test_params{ CASE_ACTIVATION_F32_1, 3, 5, "activation_ref" },
activation_test_params{ CASE_ACTIVATION_3D_F32_0, 3, 5, "activation_ref" },
activation_test_params{ CASE_ACTIVATION_3D_F32_1, 3, 5, "activation_ref" },
}));
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 3, 5, "activation_ref" }, // FIXME - accuracy bug
}));

View File

@ -306,9 +306,6 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_eltwise, ::testing::ValuesIn(std::
fully_connected_test_params{ CASE_FC_U8S8_1, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_2, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3D_1, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3D_2, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 3 },
}));
class fc_int8_quantize_u8 : public FullyConnectedFusingTest {};

View File

@ -118,7 +118,7 @@ TEST_P(lrn_fp32_quantize_u8_eltwise_activation, basic) {
quantize("quantize", input_info("lrn_norm"), input_info("in_lo"), input_info("in_hi"),
input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
eltwise("eltwise", { input_info("quantize"), input_info("eltwise_data") }, eltwise_mode::prod),
activation("activation", input_info("eltwise"), activation_func::floor),
activation("activation", input_info("eltwise"), activation_func::relu),
reorder("reorder", input_info("activation"), p.default_format, data_types::f32)
);
@ -176,6 +176,47 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, lrn_fp32_quantize_u8_eltwise_activation, :
lrn_test_params{ CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16" },
}));
class lrn_fp32_quantize_u8_eltwise_activation_onednn : public LrnFusingTest {};
TEST_P(lrn_fp32_quantize_u8_eltwise_activation_onednn, same_behavior) {
// Case : activation function is NOT supported on oneDNN and an input primitive selects clDNN execution
auto p = GetParam();
uint32_t size = 5;
float k = 1.0f;
float alpha = (float)9.9e-05;
float beta = 0.75;
create_topologies(
input_layout("input", get_input_layout(p)),
data("in_lo", get_mem(get_single_element_layout(p), min_random, 0)),
data("in_hi", get_mem(get_single_element_layout(p), 1, max_random)),
data("out_lo", get_mem(get_single_element_layout(p), 0)),
data("out_hi", get_mem(get_single_element_layout(p), 255)),
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
lrn("lrn_norm", input_info("input"), size, k, alpha, beta, p.lrn_type),
quantize("quantize", input_info("lrn_norm"), input_info("in_lo"), input_info("in_hi"),
input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
eltwise("eltwise", { input_info("quantize"), input_info("eltwise_data") }, eltwise_mode::prod),
activation("activation", input_info("eltwise"), activation_func::floor),
reorder("reorder", input_info("activation"), p.default_format, data_types::f32)
);
tolerance = default_tolerance(data_types::u8);
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, lrn_fp32_quantize_u8_eltwise_activation_onednn, ::testing::ValuesIn(std::vector<lrn_test_params>{
// InputDataType = FP32 OutputDataType = FP32
lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref" },
lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel_opt" },
lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel" },
// InputDataType = FP32 OutputDataType = FP16
lrn_test_params{ CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref" },
lrn_test_params{ CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel_opt" },
lrn_test_params{ CASE_LRN_FP32_TO_FP16_3, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt" },
lrn_test_params{ CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16" },
}));
class lrn_fp32_quantize_i8_eltwise_activation : public LrnFusingTest {};
TEST_P(lrn_fp32_quantize_i8_eltwise_activation, basic) {
auto p = GetParam();

View File

@ -1140,6 +1140,7 @@ using fully_connected_random_test_f32_3d = fully_connected_random_test_3d<float,
using fully_connected_random_test_f16_3d = fully_connected_random_test_3d<FLOAT16, FLOAT16, FLOAT16, FLOAT16>;
using fully_connected_random_test_i8_3d = fully_connected_random_test_3d<int8_t, int8_t, int8_t, float>;
TEST_P(fully_connected_random_test_f32_3d, basic) {
run_test();
}
@ -1149,9 +1150,9 @@ INSTANTIATE_TEST_SUITE_P(
fully_connected_random_test_f32_3d,
::testing::Combine(
::testing::Values(1, 3),
::testing::Values(shared_dims{1, 1, 1},
::testing::Values(shared_dims{1, 1, 2},
shared_dims{1, 1, 3},
shared_dims{3, 1, 1},
shared_dims{3, 1, 2},
shared_dims{3, 1, 3}),
::testing::Values(1, 3, 16),
::testing::Values(format::bfyx),
@ -1201,9 +1202,9 @@ INSTANTIATE_TEST_SUITE_P(
fully_connected_random_test_f16_3d,
::testing::Combine(
::testing::Values(1, 3),
::testing::Values(shared_dims{1, 1, 1},
::testing::Values(shared_dims{1, 1, 2},
shared_dims{1, 1, 16},
shared_dims{3, 1, 1},
shared_dims{3, 1, 2},
shared_dims{3, 1, 16}),
::testing::Values(1, 3, 16),
::testing::Values(format::bfyx),
@ -1221,9 +1222,9 @@ INSTANTIATE_TEST_SUITE_P(
fully_connected_random_test_i8_3d,
::testing::Combine(
::testing::Values(1, 3),
::testing::Values(shared_dims{1, 1, 1},
::testing::Values(shared_dims{1, 1, 2},
shared_dims{1, 1, 16},
shared_dims{3, 1, 1},
shared_dims{3, 1, 2},
shared_dims{3, 1, 16}),
::testing::Values(1, 3, 16),
::testing::Values(format::bfyx),
@ -2169,9 +2170,19 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParam<fully_connected_dy
input_data_vec,
weights_data_vec,
bias_data_vec);
for (int b = 0; b < batch_size; b++) {
for (int ofm = 0; ofm < output_f; ofm++) {
ASSERT_EQ(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm]);
if (engine.get_device_info().supports_immad) {
for (int b = 0; b < batch_size; b++) {
for (int ofm = 0; ofm < output_f; ofm++) {
EXPECT_NEAR(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm],
default_tolerance(input_dt));
}
}
} else {
for (int b = 0; b < batch_size; b++) {
for (int ofm = 0; ofm < output_f; ofm++) {
ASSERT_EQ(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm]);
}
}
}
}