[GPU] Fix failed onednn tests (#16410)
* Fix failed unit-tests on dGPU + modified fully_connected_random_test_i8_3d not to have ambiguous + oneDNN does NOT support i64 type for reorder. Added exception. + bugfix in prepare_primitive_fusing about exception of activation function + Add exception logic for dynamic to select ocl type in is_node_for_onednn Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
parent
966c47e7cd
commit
ea6e3481cd
@ -704,14 +704,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
if (_lo.get_optimization_attributes().use_onednn_impls) {
|
||||
if (input.is_type<reshape>() || input.is_type<concatenation>())
|
||||
return;
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
// Activation should not fused if it isn't supported in onednn
|
||||
try {
|
||||
onednn::convert_activation_func(activation_node.get_primitive()->activation_function);
|
||||
} catch (...) {
|
||||
return;
|
||||
|
||||
// Activation should not be fused if oneDNN does NOT support it
|
||||
if (_lo.is_primitive_implemented_for_onednn(input)) {
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
try {
|
||||
onednn::convert_activation_func(activation_node.get_primitive()->activation_function);
|
||||
} catch (...) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool should_fuse = input.is_type<binary_convolution>();
|
||||
|
@ -189,6 +189,7 @@ public:
|
||||
impl_types get_forced_impl_type_by_config(program_node& node);
|
||||
static bool are_data_types_suitable_for_onednn(program_node& node);
|
||||
bool are_layouts_suitable_for_onednn(program_node& node);
|
||||
bool is_primitive_implemented_for_onednn(program_node& node);
|
||||
bool is_format_supported(program_node& node, format::type fmt);
|
||||
|
||||
// Returns whether reorder between "prev" with format fmt_prev and "next" with format fmt_next
|
||||
|
@ -825,7 +825,8 @@ static bool is_node_for_onednn(deconvolution_node const& node) {
|
||||
|
||||
static bool is_node_for_onednn(fully_connected_node const& node) {
|
||||
auto fc_prim = node.get_primitive();
|
||||
auto ps = node.get_output_layout().get_partial_shape();
|
||||
auto output_layout = node.get_output_layout();
|
||||
auto ps = output_layout.get_partial_shape();
|
||||
size_t non_spatial_count = 2 + (fc_prim->input_size == 3 ? 1 : 0);
|
||||
size_t rank = ps.size();
|
||||
|
||||
@ -1178,6 +1179,9 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
|
||||
if (in_dt == data_types::f32 && (!node.is_type<fully_connected>() && !node.is_type<convolution>()))
|
||||
return false;
|
||||
|
||||
if (in_dt == data_types::i64 || out_dt == data_types::i64)
|
||||
return false;
|
||||
|
||||
if (node.is_type<pooling>()) {
|
||||
if (!data_type_traits::is_floating_point(in_dt) && in_dt != out_dt)
|
||||
return false;
|
||||
@ -1259,6 +1263,16 @@ bool layout_optimizer::are_layouts_suitable_for_onednn(program_node& node) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool layout_optimizer::is_primitive_implemented_for_onednn(program_node& node) {
|
||||
if (node.is_type<fully_connected>() || node.is_type<gemm>() || node.is_type<pooling>() ||
|
||||
node.is_type<convolution>() || node.is_type<deconvolution>() ||
|
||||
node.is_type<reduce>() || node.is_type<reorder>() || node.is_type<concatenation>()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node) {
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
@ -1419,6 +1433,10 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
preferred_impl = impl_types::ocl;
|
||||
}
|
||||
|
||||
if (!are_data_types_suitable_for_onednn(node)) {
|
||||
preferred_impl = impl_types::ocl;
|
||||
}
|
||||
|
||||
// For mixed precision case, onednn is slower than cldnn
|
||||
if (input_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(input_dt))
|
||||
preferred_impl = impl_types::ocl;
|
||||
|
@ -172,7 +172,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, basic) {
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", input_info("eltwise"), activation_func::softsign),
|
||||
activation("act2", input_info("eltwise"), activation_func::swish),
|
||||
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
|
||||
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
|
||||
reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
|
||||
@ -193,7 +193,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", input_info("eltwise"), activation_func::softsign),
|
||||
activation("act2", input_info("eltwise"), activation_func::pow),
|
||||
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
|
||||
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
|
||||
reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
|
||||
@ -223,6 +223,42 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8,
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_2, 3, 5, "activation_ref" },
|
||||
}));
|
||||
|
||||
class activation_eltwise_activation_quantize_u8_onendnn : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise_activation_quantize_u8_onendnn, same_behavior) {
|
||||
// Case : activation function is NOT supported on oneDNN and an input primitive selects clDNN execution
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", input_info("input"), activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("in_low", get_mem(get_single_element_layout(p), 0)),
|
||||
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { input_info("act"), input_info("eltwise_data") }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", input_info("eltwise"), activation_func::softsign),
|
||||
quantize("quant", input_info("act2"), input_info("in_low"), input_info("in_high"),
|
||||
input_info("out_low"), input_info("out_high"), 256, data_types::u8),
|
||||
reorder("reorder_bfyx", input_info("quant"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
tolerance = 1.f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8_onendnn, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_F32_0, 3, 5, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_F32_1, 3, 5, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_0, 3, 5, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_1, 3, 5, "activation_opt" },
|
||||
|
||||
activation_test_params{ CASE_ACTIVATION_F32_0, 3, 5, "activation_ref" },
|
||||
activation_test_params{ CASE_ACTIVATION_F32_1, 3, 5, "activation_ref" },
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_0, 3, 5, "activation_ref" },
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_1, 3, 5, "activation_ref" },
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 3, 5, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
@ -306,9 +306,6 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_eltwise, ::testing::ValuesIn(std::
|
||||
fully_connected_test_params{ CASE_FC_U8S8_1, 2, 3 },
|
||||
fully_connected_test_params{ CASE_FC_U8S8_2, 2, 3 },
|
||||
fully_connected_test_params{ CASE_FC_U8S8_3, 2, 3 },
|
||||
fully_connected_test_params{ CASE_FC_U8S8_3D_1, 2, 3 },
|
||||
fully_connected_test_params{ CASE_FC_U8S8_3D_2, 2, 3 },
|
||||
fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 3 },
|
||||
}));
|
||||
|
||||
class fc_int8_quantize_u8 : public FullyConnectedFusingTest {};
|
||||
|
@ -118,7 +118,7 @@ TEST_P(lrn_fp32_quantize_u8_eltwise_activation, basic) {
|
||||
quantize("quantize", input_info("lrn_norm"), input_info("in_lo"), input_info("in_hi"),
|
||||
input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
|
||||
eltwise("eltwise", { input_info("quantize"), input_info("eltwise_data") }, eltwise_mode::prod),
|
||||
activation("activation", input_info("eltwise"), activation_func::floor),
|
||||
activation("activation", input_info("eltwise"), activation_func::relu),
|
||||
reorder("reorder", input_info("activation"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
@ -176,6 +176,47 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, lrn_fp32_quantize_u8_eltwise_activation, :
|
||||
lrn_test_params{ CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16" },
|
||||
}));
|
||||
|
||||
class lrn_fp32_quantize_u8_eltwise_activation_onednn : public LrnFusingTest {};
|
||||
TEST_P(lrn_fp32_quantize_u8_eltwise_activation_onednn, same_behavior) {
|
||||
// Case : activation function is NOT supported on oneDNN and an input primitive selects clDNN execution
|
||||
auto p = GetParam();
|
||||
uint32_t size = 5;
|
||||
float k = 1.0f;
|
||||
float alpha = (float)9.9e-05;
|
||||
float beta = 0.75;
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("in_lo", get_mem(get_single_element_layout(p), min_random, 0)),
|
||||
data("in_hi", get_mem(get_single_element_layout(p), 1, max_random)),
|
||||
data("out_lo", get_mem(get_single_element_layout(p), 0)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 255)),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
lrn("lrn_norm", input_info("input"), size, k, alpha, beta, p.lrn_type),
|
||||
quantize("quantize", input_info("lrn_norm"), input_info("in_lo"), input_info("in_hi"),
|
||||
input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
|
||||
eltwise("eltwise", { input_info("quantize"), input_info("eltwise_data") }, eltwise_mode::prod),
|
||||
activation("activation", input_info("eltwise"), activation_func::floor),
|
||||
reorder("reorder", input_info("activation"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
tolerance = default_tolerance(data_types::u8);
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, lrn_fp32_quantize_u8_eltwise_activation_onednn, ::testing::ValuesIn(std::vector<lrn_test_params>{
|
||||
// InputDataType = FP32 OutputDataType = FP32
|
||||
lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref" },
|
||||
lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel_opt" },
|
||||
lrn_test_params{ CASE_LRN_FP32_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel" },
|
||||
|
||||
// InputDataType = FP32 OutputDataType = FP16
|
||||
lrn_test_params{ CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_across_channel, "lrn_ref" },
|
||||
lrn_test_params{ CASE_LRN_FP32_TO_FP16_1, 2, 5, lrn_norm_region_within_channel, "lrn_gpu_within_channel_opt" },
|
||||
lrn_test_params{ CASE_LRN_FP32_TO_FP16_3, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_yxfb_b8_opt" },
|
||||
lrn_test_params{ CASE_LRN_FP32_TO_FP16_5, 2, 5, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features_fsv16" },
|
||||
}));
|
||||
|
||||
class lrn_fp32_quantize_i8_eltwise_activation : public LrnFusingTest {};
|
||||
TEST_P(lrn_fp32_quantize_i8_eltwise_activation, basic) {
|
||||
auto p = GetParam();
|
||||
|
@ -1140,6 +1140,7 @@ using fully_connected_random_test_f32_3d = fully_connected_random_test_3d<float,
|
||||
using fully_connected_random_test_f16_3d = fully_connected_random_test_3d<FLOAT16, FLOAT16, FLOAT16, FLOAT16>;
|
||||
using fully_connected_random_test_i8_3d = fully_connected_random_test_3d<int8_t, int8_t, int8_t, float>;
|
||||
|
||||
|
||||
TEST_P(fully_connected_random_test_f32_3d, basic) {
|
||||
run_test();
|
||||
}
|
||||
@ -1149,9 +1150,9 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
fully_connected_random_test_f32_3d,
|
||||
::testing::Combine(
|
||||
::testing::Values(1, 3),
|
||||
::testing::Values(shared_dims{1, 1, 1},
|
||||
::testing::Values(shared_dims{1, 1, 2},
|
||||
shared_dims{1, 1, 3},
|
||||
shared_dims{3, 1, 1},
|
||||
shared_dims{3, 1, 2},
|
||||
shared_dims{3, 1, 3}),
|
||||
::testing::Values(1, 3, 16),
|
||||
::testing::Values(format::bfyx),
|
||||
@ -1201,9 +1202,9 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
fully_connected_random_test_f16_3d,
|
||||
::testing::Combine(
|
||||
::testing::Values(1, 3),
|
||||
::testing::Values(shared_dims{1, 1, 1},
|
||||
::testing::Values(shared_dims{1, 1, 2},
|
||||
shared_dims{1, 1, 16},
|
||||
shared_dims{3, 1, 1},
|
||||
shared_dims{3, 1, 2},
|
||||
shared_dims{3, 1, 16}),
|
||||
::testing::Values(1, 3, 16),
|
||||
::testing::Values(format::bfyx),
|
||||
@ -1221,9 +1222,9 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
fully_connected_random_test_i8_3d,
|
||||
::testing::Combine(
|
||||
::testing::Values(1, 3),
|
||||
::testing::Values(shared_dims{1, 1, 1},
|
||||
::testing::Values(shared_dims{1, 1, 2},
|
||||
shared_dims{1, 1, 16},
|
||||
shared_dims{3, 1, 1},
|
||||
shared_dims{3, 1, 2},
|
||||
shared_dims{3, 1, 16}),
|
||||
::testing::Values(1, 3, 16),
|
||||
::testing::Values(format::bfyx),
|
||||
@ -2169,9 +2170,19 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParam<fully_connected_dy
|
||||
input_data_vec,
|
||||
weights_data_vec,
|
||||
bias_data_vec);
|
||||
for (int b = 0; b < batch_size; b++) {
|
||||
for (int ofm = 0; ofm < output_f; ofm++) {
|
||||
ASSERT_EQ(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm]);
|
||||
|
||||
if (engine.get_device_info().supports_immad) {
|
||||
for (int b = 0; b < batch_size; b++) {
|
||||
for (int ofm = 0; ofm < output_f; ofm++) {
|
||||
EXPECT_NEAR(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm],
|
||||
default_tolerance(input_dt));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int b = 0; b < batch_size; b++) {
|
||||
for (int ofm = 0; ofm < output_f; ofm++) {
|
||||
ASSERT_EQ(ref_result[b * output_f + ofm], output_ptr[b * output_f + ofm]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user