[GPU] Add fused operation test for concat and pooling (#7900)
* [GPU] Add fused operation test for concat. And fused test and enable post ops in concat onednn integration. Signed-off-by: hyunback <hyunback.kim@intel.com> * Add depence code when machine does not support imad. oneDNN post operation has issue. Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
parent
3f8856862d
commit
88cab67833
@ -40,8 +40,7 @@ protected:
|
||||
args.insert({DNNL_ARG_DST, output.get_onednn_memory(_pd.dst_desc())});
|
||||
}
|
||||
|
||||
// TODO post operation
|
||||
// configure_post_ops_arguments(instance, args);
|
||||
configure_post_ops_arguments(instance, args);
|
||||
|
||||
return args;
|
||||
}
|
||||
|
@ -5070,6 +5070,137 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, deconv_activation_eltwise_diff_sizes,
|
||||
conv_eltw_test_params{CASE_DECONV_ELTW_i8_5, 2, 4},
|
||||
}));
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* --------------------------------------- Concat cases ------------------------------------------------ */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
struct concat_test_params {
|
||||
tensor in_shape;
|
||||
data_types data_type;
|
||||
format input_format;
|
||||
data_types default_type;
|
||||
format default_format;
|
||||
size_t expected_fused_primitives;
|
||||
size_t expected_not_fused_primitives;
|
||||
std::string kernel_name;
|
||||
};
|
||||
|
||||
#define CASE_CONCAT_F32_1 {1, 8, 4, 4}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_CONCAT_F16_1 {1, 8, 4, 4}, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
|
||||
class ConcatOneDNNFusingTest : public ::BaseFusingTest<concat_test_params> {
|
||||
public:
|
||||
void execute(concat_test_params& p) {
|
||||
// Onednn post operation has issue in a machine that does not support imad.
|
||||
if (!engine.get_device_info().supports_imad)
|
||||
return;
|
||||
|
||||
auto input0_prim = get_mem(get_input_layout(p));
|
||||
auto input1_prim = get_mem(get_input_layout(p));
|
||||
|
||||
build_options onednn_options;
|
||||
build_options cldnn_options;
|
||||
|
||||
onednn_options.set_option(build_option::optimize_data(true));
|
||||
cldnn_options.set_option(build_option::optimize_data(true));
|
||||
|
||||
implementation_desc onednn_impl = {p.input_format, "", impl_types::onednn};
|
||||
implementation_desc cldnn_impl = {p.input_format, "", impl_types::ocl};
|
||||
onednn_options.set_option(build_option::force_implementations({{"concat", onednn_impl}}));
|
||||
cldnn_options.set_option(build_option::force_implementations({{"concat", cldnn_impl}}));
|
||||
|
||||
// for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn
|
||||
network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_options);
|
||||
network network_fused_onednn(this->engine, this->topology_fused, onednn_options);
|
||||
|
||||
network_fused_cldnn.set_input_data("input0", input0_prim);
|
||||
network_fused_cldnn.set_input_data("input1", input1_prim);
|
||||
network_fused_onednn.set_input_data("input0", input0_prim);
|
||||
network_fused_onednn.set_input_data("input1", input1_prim);
|
||||
|
||||
ASSERT_FALSE(network_fused_cldnn.get_primitives_info().empty());
|
||||
ASSERT_FALSE(network_fused_onednn.get_primitives_info().empty());
|
||||
|
||||
auto find_and_check = [&](primitive_info& p) -> bool {
|
||||
if (p.original_id == "concat" || p.original_id == "reorder_bfyx")
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
|
||||
auto pi_fused_onednn = network_fused_onednn.get_primitives_info();
|
||||
auto pi_fused_cldnn = network_fused_cldnn.get_primitives_info();
|
||||
auto info_fused_onednn = std::find_if(pi_fused_onednn.begin(), pi_fused_onednn.end(), find_and_check);
|
||||
auto info_fused_cldnn = std::find_if(pi_fused_cldnn.begin(), pi_fused_cldnn.end(), find_and_check);
|
||||
|
||||
ASSERT_TRUE(info_fused_onednn != pi_fused_onednn.end());
|
||||
ASSERT_TRUE(info_fused_cldnn != pi_fused_cldnn.end());
|
||||
|
||||
compare(network_fused_cldnn, network_fused_onednn, p);
|
||||
}
|
||||
|
||||
layout get_input_layout(concat_test_params& p) { return layout{p.data_type, p.input_format, p.in_shape}; }
|
||||
layout get_per_channel_layout(concat_test_params& p) {
|
||||
return layout{p.default_type, p.default_format, tensor{1, p.in_shape.feature[0], 1, 1}};
|
||||
}
|
||||
};
|
||||
|
||||
class concat_onednn_activation : public ConcatOneDNNFusingTest {};
|
||||
TEST_P(concat_onednn_activation, along_f) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input0", get_input_layout(p)),
|
||||
input_layout("input1", get_input_layout(p)),
|
||||
concatenation("concat",
|
||||
{ "input0", "input1" },
|
||||
concatenation::concatenation_axis::along_f,
|
||||
data_types::f16,
|
||||
"",
|
||||
padding{ { 0, 0, 0, 0 }, 0 }),
|
||||
activation("act", "concat", activation_func::relu),
|
||||
reorder("reorder_bfyx", "act", cldnn::format::bfyx, p.default_type)
|
||||
);
|
||||
|
||||
tolerance = 1.f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
class concat_onednn_eltwise : public ConcatOneDNNFusingTest {};
|
||||
TEST_P(concat_onednn_eltwise, along_f) {
|
||||
auto p = GetParam();
|
||||
layout data_layout(p.default_type, p.default_format, tensor{1, p.in_shape.feature[0]*2, 1, 1});
|
||||
|
||||
create_topologies(
|
||||
input_layout("input0", get_input_layout(p)),
|
||||
input_layout("input1", get_input_layout(p)),
|
||||
data("scale_data", get_mem(data_layout, 1.0f / tensor{1, 1, 4, 4}.count())),
|
||||
concatenation("concat",
|
||||
{ "input0", "input1" },
|
||||
concatenation::concatenation_axis::along_f,
|
||||
data_types::f16,
|
||||
"",
|
||||
padding{ { 0, 0, 0, 0 }, 0 }),
|
||||
eltwise("scale", {"concat", "scale_data"}, eltwise_mode::prod, p.default_type),
|
||||
reorder("reorder_bfyx", "scale", cldnn::format::bfyx, p.default_type)
|
||||
);
|
||||
|
||||
tolerance = 1.f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
|
||||
concat_onednn_activation,
|
||||
::testing::ValuesIn(std::vector<concat_test_params>{
|
||||
concat_test_params{CASE_CONCAT_F16_1, 3, 3, ""},
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
|
||||
concat_onednn_eltwise,
|
||||
::testing::ValuesIn(std::vector<concat_test_params>{
|
||||
concat_test_params{CASE_CONCAT_F32_1, 4, 4, ""},
|
||||
concat_test_params{CASE_CONCAT_F16_1, 4, 4, ""},
|
||||
}));
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* --------------------------------------- Pooling cases ----------------------------------------------- */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
@ -5095,6 +5226,7 @@ struct pooling_test_params {
|
||||
#define CASE_POOLING_F32_8 {16, 32, 10, 10}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_POOLING_F32_9 {16, 32, 10, 10}, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_POOLING_F32_10 {16, 32, 10, 10, 10}, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_POOLING_F32_11 {1, 1, 3, 3}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_POOLING_F32_F16_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_POOLING_F32_F16_2 {2, 16, 8, 8}, data_types::f32, format::bfyx, data_types::f16, format::bfyx
|
||||
@ -5511,6 +5643,107 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu,
|
||||
pooling_test_params{CASE_POOLING_U8_FP16_6, 2, 4, pooling_mode::max, "pooling_gpu_int8_ref"},
|
||||
}));
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
class PoolingOneDNNFusingTest : public ::BaseFusingTest<pooling_test_params> {
|
||||
public:
|
||||
void execute(pooling_test_params& p) {
|
||||
// Onednn post operation has issue in a machine that does not support imad.
|
||||
if (!engine.get_device_info().supports_imad)
|
||||
return;
|
||||
|
||||
auto input_prim = get_mem(get_input_layout(p));
|
||||
|
||||
build_options onednn_options;
|
||||
build_options cldnn_options;
|
||||
|
||||
onednn_options.set_option(build_option::optimize_data(true));
|
||||
cldnn_options.set_option(build_option::optimize_data(true));
|
||||
|
||||
implementation_desc onednn_impl = {p.input_format, "", impl_types::onednn};
|
||||
implementation_desc cldnn_impl = {p.input_format, "", impl_types::ocl};
|
||||
onednn_options.set_option(build_option::force_implementations({{"pooling", onednn_impl}}));
|
||||
cldnn_options.set_option(build_option::force_implementations({{"pooling", cldnn_impl}}));
|
||||
|
||||
// for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn
|
||||
network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_options);
|
||||
network network_fused_onednn(this->engine, this->topology_fused, onednn_options);
|
||||
|
||||
network_fused_cldnn.set_input_data("input", input_prim);
|
||||
network_fused_onednn.set_input_data("input", input_prim);
|
||||
|
||||
ASSERT_FALSE(network_fused_cldnn.get_primitives_info().empty());
|
||||
ASSERT_FALSE(network_fused_onednn.get_primitives_info().empty());
|
||||
|
||||
auto find_and_check = [&](primitive_info& p) -> bool {
|
||||
if (p.original_id == "pooling" || p.original_id == "output_reorder")
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
|
||||
auto pi_fused_onednn = network_fused_onednn.get_primitives_info();
|
||||
auto pi_fused_cldnn = network_fused_onednn.get_primitives_info();
|
||||
auto info_fused_onednn = std::find_if(pi_fused_onednn.begin(), pi_fused_onednn.end(), find_and_check);
|
||||
auto info_fused_cldnn = std::find_if(pi_fused_cldnn.begin(), pi_fused_cldnn.end(), find_and_check);
|
||||
|
||||
ASSERT_TRUE(info_fused_onednn != pi_fused_onednn.end());
|
||||
ASSERT_TRUE(info_fused_cldnn != pi_fused_cldnn.end());
|
||||
|
||||
compare(network_fused_cldnn, network_fused_onednn, p);
|
||||
}
|
||||
|
||||
layout get_input_layout(pooling_test_params& p) { return layout{p.data_type, p.input_format, p.in_shape}; }
|
||||
layout get_per_channel_layout(pooling_test_params& p) {
|
||||
return layout{p.default_type, p.default_format, tensor{1, p.in_shape.feature[0], 1, 1}};
|
||||
}
|
||||
};
|
||||
|
||||
class pooling_onednn_activation1 : public PoolingOneDNNFusingTest {};
|
||||
TEST_P(pooling_onednn_activation1, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
pooling("pooling", "input", p.pool_mode, tensor{1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}),
|
||||
activation("act", "pooling", activation_func::relu),
|
||||
reorder("output_reorder", "act", format::bfyx, data_types::f32));
|
||||
|
||||
tolerance = 1e-05f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
class pooling_onednn_activation2 : public PoolingOneDNNFusingTest {};
|
||||
TEST_P(pooling_onednn_activation2, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
pooling("pooling", "input", p.pool_mode, { 1, 1, 3, 3 }, { 1, 1, 1, 1 }),
|
||||
activation("act", "pooling", activation_func::relu),
|
||||
reorder("output_reorder", "act", format::bfyx, data_types::f32));
|
||||
|
||||
tolerance = 1e-05f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
|
||||
pooling_onednn_activation1,
|
||||
::testing::ValuesIn(std::vector<pooling_test_params>{
|
||||
// pooling_test_params{CASE_POOLING_F32_1, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_F16_1, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_I8_1, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_U8_1, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_U8_2, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_I8_1, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_I8_2, 2, 2, pooling_mode::max, ""},
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu,
|
||||
pooling_onednn_activation2,
|
||||
::testing::ValuesIn(std::vector<pooling_test_params>{
|
||||
pooling_test_params{CASE_POOLING_F32_11, 2, 2, pooling_mode::max, ""},
|
||||
pooling_test_params{CASE_POOLING_F32_11, 2, 2, pooling_mode::average, ""},
|
||||
pooling_test_params{CASE_POOLING_F32_11, 2, 2, pooling_mode::average_no_padding, ""},
|
||||
}));
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* -------------------------------- DepthToSpace cases ------------------------------------------------- */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
|
Loading…
Reference in New Issue
Block a user