[GPU] Add fused operation test for concat and pooling (#7900)

* [GPU] Add fused operation test for concat.

And fused test and enable post ops in concat onednn integration.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Add depence code when machine does not support imad.

oneDNN post operation has issue.

Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
hyunback kim 2021-10-19 14:02:08 +09:00 committed by GitHub
parent 3f8856862d
commit 88cab67833
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 234 additions and 2 deletions

View File

@ -40,8 +40,7 @@ protected:
args.insert({DNNL_ARG_DST, output.get_onednn_memory(_pd.dst_desc())});
}
// TODO post operation
// configure_post_ops_arguments(instance, args);
configure_post_ops_arguments(instance, args);
return args;
}

View File

@ -5070,6 +5070,137 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, deconv_activation_eltwise_diff_sizes,
conv_eltw_test_params{CASE_DECONV_ELTW_i8_5, 2, 4},
}));
#ifdef ENABLE_ONEDNN_FOR_GPU
/* ----------------------------------------------------------------------------------------------------- */
/* --------------------------------------- Concat cases ------------------------------------------------ */
/* ----------------------------------------------------------------------------------------------------- */
struct concat_test_params {
tensor in_shape;
data_types data_type;
format input_format;
data_types default_type;
format default_format;
size_t expected_fused_primitives;
size_t expected_not_fused_primitives;
std::string kernel_name;
};
#define CASE_CONCAT_F32_1 {1, 8, 4, 4}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_CONCAT_F16_1 {1, 8, 4, 4}, data_types::f16, format::bfyx, data_types::f16, format::bfyx
class ConcatOneDNNFusingTest : public ::BaseFusingTest<concat_test_params> {
public:
void execute(concat_test_params& p) {
// Onednn post operation has issue in a machine that does not support imad.
if (!engine.get_device_info().supports_imad)
return;
auto input0_prim = get_mem(get_input_layout(p));
auto input1_prim = get_mem(get_input_layout(p));
build_options onednn_options;
build_options cldnn_options;
onednn_options.set_option(build_option::optimize_data(true));
cldnn_options.set_option(build_option::optimize_data(true));
implementation_desc onednn_impl = {p.input_format, "", impl_types::onednn};
implementation_desc cldnn_impl = {p.input_format, "", impl_types::ocl};
onednn_options.set_option(build_option::force_implementations({{"concat", onednn_impl}}));
cldnn_options.set_option(build_option::force_implementations({{"concat", cldnn_impl}}));
// for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn
network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_options);
network network_fused_onednn(this->engine, this->topology_fused, onednn_options);
network_fused_cldnn.set_input_data("input0", input0_prim);
network_fused_cldnn.set_input_data("input1", input1_prim);
network_fused_onednn.set_input_data("input0", input0_prim);
network_fused_onednn.set_input_data("input1", input1_prim);
ASSERT_FALSE(network_fused_cldnn.get_primitives_info().empty());
ASSERT_FALSE(network_fused_onednn.get_primitives_info().empty());
auto find_and_check = [&](primitive_info& p) -> bool {
if (p.original_id == "concat" || p.original_id == "reorder_bfyx")
return true;
return false;
};
auto pi_fused_onednn = network_fused_onednn.get_primitives_info();
auto pi_fused_cldnn = network_fused_cldnn.get_primitives_info();
auto info_fused_onednn = std::find_if(pi_fused_onednn.begin(), pi_fused_onednn.end(), find_and_check);
auto info_fused_cldnn = std::find_if(pi_fused_cldnn.begin(), pi_fused_cldnn.end(), find_and_check);
ASSERT_TRUE(info_fused_onednn != pi_fused_onednn.end());
ASSERT_TRUE(info_fused_cldnn != pi_fused_cldnn.end());
compare(network_fused_cldnn, network_fused_onednn, p);
}
layout get_input_layout(concat_test_params& p) { return layout{p.data_type, p.input_format, p.in_shape}; }
layout get_per_channel_layout(concat_test_params& p) {
return layout{p.default_type, p.default_format, tensor{1, p.in_shape.feature[0], 1, 1}};
}
};
class concat_onednn_activation : public ConcatOneDNNFusingTest {};
TEST_P(concat_onednn_activation, along_f) {
auto p = GetParam();
create_topologies(
input_layout("input0", get_input_layout(p)),
input_layout("input1", get_input_layout(p)),
concatenation("concat",
{ "input0", "input1" },
concatenation::concatenation_axis::along_f,
data_types::f16,
"",
padding{ { 0, 0, 0, 0 }, 0 }),
activation("act", "concat", activation_func::relu),
reorder("reorder_bfyx", "act", cldnn::format::bfyx, p.default_type)
);
tolerance = 1.f;
execute(p);
}
class concat_onednn_eltwise : public ConcatOneDNNFusingTest {};
TEST_P(concat_onednn_eltwise, along_f) {
auto p = GetParam();
layout data_layout(p.default_type, p.default_format, tensor{1, p.in_shape.feature[0]*2, 1, 1});
create_topologies(
input_layout("input0", get_input_layout(p)),
input_layout("input1", get_input_layout(p)),
data("scale_data", get_mem(data_layout, 1.0f / tensor{1, 1, 4, 4}.count())),
concatenation("concat",
{ "input0", "input1" },
concatenation::concatenation_axis::along_f,
data_types::f16,
"",
padding{ { 0, 0, 0, 0 }, 0 }),
eltwise("scale", {"concat", "scale_data"}, eltwise_mode::prod, p.default_type),
reorder("reorder_bfyx", "scale", cldnn::format::bfyx, p.default_type)
);
tolerance = 1.f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
concat_onednn_activation,
::testing::ValuesIn(std::vector<concat_test_params>{
concat_test_params{CASE_CONCAT_F16_1, 3, 3, ""},
}));
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
concat_onednn_eltwise,
::testing::ValuesIn(std::vector<concat_test_params>{
concat_test_params{CASE_CONCAT_F32_1, 4, 4, ""},
concat_test_params{CASE_CONCAT_F16_1, 4, 4, ""},
}));
#endif
/* ----------------------------------------------------------------------------------------------------- */
/* --------------------------------------- Pooling cases ----------------------------------------------- */
/* ----------------------------------------------------------------------------------------------------- */
@ -5095,6 +5226,7 @@ struct pooling_test_params {
#define CASE_POOLING_F32_8 {16, 32, 10, 10}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_POOLING_F32_9 {16, 32, 10, 10}, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::bfyx
#define CASE_POOLING_F32_10 {16, 32, 10, 10, 10}, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::bfyx
#define CASE_POOLING_F32_11 {1, 1, 3, 3}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_POOLING_F32_F16_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, data_types::f16, format::bfyx
#define CASE_POOLING_F32_F16_2 {2, 16, 8, 8}, data_types::f32, format::bfyx, data_types::f16, format::bfyx
@ -5511,6 +5643,107 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu,
pooling_test_params{CASE_POOLING_U8_FP16_6, 2, 4, pooling_mode::max, "pooling_gpu_int8_ref"},
}));
#ifdef ENABLE_ONEDNN_FOR_GPU
class PoolingOneDNNFusingTest : public ::BaseFusingTest<pooling_test_params> {
public:
void execute(pooling_test_params& p) {
// Onednn post operation has issue in a machine that does not support imad.
if (!engine.get_device_info().supports_imad)
return;
auto input_prim = get_mem(get_input_layout(p));
build_options onednn_options;
build_options cldnn_options;
onednn_options.set_option(build_option::optimize_data(true));
cldnn_options.set_option(build_option::optimize_data(true));
implementation_desc onednn_impl = {p.input_format, "", impl_types::onednn};
implementation_desc cldnn_impl = {p.input_format, "", impl_types::ocl};
onednn_options.set_option(build_option::force_implementations({{"pooling", onednn_impl}}));
cldnn_options.set_option(build_option::force_implementations({{"pooling", cldnn_impl}}));
// for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn
network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_options);
network network_fused_onednn(this->engine, this->topology_fused, onednn_options);
network_fused_cldnn.set_input_data("input", input_prim);
network_fused_onednn.set_input_data("input", input_prim);
ASSERT_FALSE(network_fused_cldnn.get_primitives_info().empty());
ASSERT_FALSE(network_fused_onednn.get_primitives_info().empty());
auto find_and_check = [&](primitive_info& p) -> bool {
if (p.original_id == "pooling" || p.original_id == "output_reorder")
return true;
return false;
};
auto pi_fused_onednn = network_fused_onednn.get_primitives_info();
auto pi_fused_cldnn = network_fused_onednn.get_primitives_info();
auto info_fused_onednn = std::find_if(pi_fused_onednn.begin(), pi_fused_onednn.end(), find_and_check);
auto info_fused_cldnn = std::find_if(pi_fused_cldnn.begin(), pi_fused_cldnn.end(), find_and_check);
ASSERT_TRUE(info_fused_onednn != pi_fused_onednn.end());
ASSERT_TRUE(info_fused_cldnn != pi_fused_cldnn.end());
compare(network_fused_cldnn, network_fused_onednn, p);
}
layout get_input_layout(pooling_test_params& p) { return layout{p.data_type, p.input_format, p.in_shape}; }
layout get_per_channel_layout(pooling_test_params& p) {
return layout{p.default_type, p.default_format, tensor{1, p.in_shape.feature[0], 1, 1}};
}
};
class pooling_onednn_activation1 : public PoolingOneDNNFusingTest {};
TEST_P(pooling_onednn_activation1, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
pooling("pooling", "input", p.pool_mode, tensor{1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}),
activation("act", "pooling", activation_func::relu),
reorder("output_reorder", "act", format::bfyx, data_types::f32));
tolerance = 1e-05f;
execute(p);
}
class pooling_onednn_activation2 : public PoolingOneDNNFusingTest {};
TEST_P(pooling_onednn_activation2, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
pooling("pooling", "input", p.pool_mode, { 1, 1, 3, 3 }, { 1, 1, 1, 1 }),
activation("act", "pooling", activation_func::relu),
reorder("output_reorder", "act", format::bfyx, data_types::f32));
tolerance = 1e-05f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
pooling_onednn_activation1,
::testing::ValuesIn(std::vector<pooling_test_params>{
// pooling_test_params{CASE_POOLING_F32_1, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_F16_1, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_I8_1, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_U8_1, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_U8_2, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_I8_1, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_I8_2, 2, 2, pooling_mode::max, ""},
}));
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
pooling_onednn_activation2,
::testing::ValuesIn(std::vector<pooling_test_params>{
pooling_test_params{CASE_POOLING_F32_11, 2, 2, pooling_mode::max, ""},
pooling_test_params{CASE_POOLING_F32_11, 2, 2, pooling_mode::average, ""},
pooling_test_params{CASE_POOLING_F32_11, 2, 2, pooling_mode::average_no_padding, ""},
}));
#endif
/* ----------------------------------------------------------------------------------------------------- */
/* -------------------------------- DepthToSpace cases ------------------------------------------------- */
/* ----------------------------------------------------------------------------------------------------- */