[GPU] added optimized blocked formats support for 5d interpolation liner onnx and nearest modes (#12977)

This commit contains two features which are split into two commits:
1) linear onnx interpolation of 5d formats
2) Optimize blocked formats for linear_onnx and nearest formats
   - contain correspondent unit and single layer tests
This commit is contained in:
OlehKravchyshyn
2022-10-26 15:53:38 +03:00
committed by GitHub
parent bdc037adcf
commit dc1a8b7795
9 changed files with 350 additions and 44 deletions

View File

@@ -23,7 +23,8 @@ layout resample_inst::calc_output_layout(resample_node const& node, kernel_impl_
auto output_type = input_layout.data_type;
if ((input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8)
&& desc->operation_type != resample::InterpolateOp::InterpolateMode::NEAREST) {
&& desc->operation_type != resample::InterpolateOp::InterpolateMode::NEAREST
&& desc->operation_type != resample::InterpolateOp::InterpolateMode::LINEAR_ONNX) {
output_type = data_types::f32;
}
if (impl_param.has_fused_primitives()) {

View File

@@ -24,6 +24,7 @@
#define TO_OUT_VEC_TYPE(x) CAT(convert_, OUT_VEC_TYPE)(x)
#if defined(SAMPLE_TYPE_CAFFE_INTERP)
inline uint FUNC(get_input_index)(uint b, uint f, uint y, uint x)
{
#if INPUT0_DIMS < 5
@@ -41,6 +42,7 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint y, uint x)
#error [clDNN resample_ref.cl]: output format - not supported
#endif
}
#endif
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
{
@@ -232,9 +234,17 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
#endif
)
{
#if OUTPUT_DIMS == 5
const int xyz = get_global_id(0);
const int z = xyz / (OUTPUT_SIZE_Y * X_BLOCKS);
const int xy = xyz % (OUTPUT_SIZE_Y * X_BLOCKS);
const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
const int y = (xy / X_BLOCKS);
#else
const int xy = get_global_id(0);
const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
const int y = (xy / X_BLOCKS);
#endif
const int f_block = get_group_id(1);
const int b = get_global_id(2);
const int feature_num = f_block * FEATURE_SLICE_SIZE + get_sub_group_local_id();
@@ -251,7 +261,12 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
const int ix = floor((x + out_x) * SCALES[4]);
const int iy = floor(y * SCALES[3]);
#if OUTPUT_DIMS == 5
const int iz = floor(z * SCALES[2]);
in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iz, iy, ix));
#else
in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iy, ix));
#endif
#elif defined(SAMPLE_TYPE_INTERP)
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(SCALES[4]) * (x + out_x);
@@ -265,10 +280,17 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
const ACCUMULATOR_TYPE dx = ix - left_x_index;
const ACCUMULATOR_TYPE dy = iy - top_y_index;
#if OUTPUT_DIMS == 5
const in_vec_t top_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, top_y_index, left_x_index));
const in_vec_t top_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, top_y_index, right_x_index));
const in_vec_t bottom_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, bottom_y_index, left_x_index));
const in_vec_t bottom_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, bottom_y_index, right_x_index));
#else
const in_vec_t top_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, top_y_index, left_x_index));
const in_vec_t top_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, top_y_index, right_x_index));
const in_vec_t bottom_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, bottom_y_index, left_x_index));
const in_vec_t bottom_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, bottom_y_index, right_x_index));
#endif
const acc_vec_t top = TO_ACC_VEC_TYPE(top_left) + (TO_ACC_VEC_TYPE(top_right) - TO_ACC_VEC_TYPE(top_left)) * dx;
const acc_vec_t bottom = TO_ACC_VEC_TYPE(bottom_left) + (TO_ACC_VEC_TYPE(bottom_right) - TO_ACC_VEC_TYPE(bottom_left)) * dx;
@@ -302,10 +324,19 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
#endif // PADDING_USED == 1
#if OUTPUT_DIMS == 5
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
#else
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
#endif
#if PADDING_USED == 1
if (tlOutOfBounds)
top_left = INPUT0_VAL_ZERO;
@@ -328,7 +359,11 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
#endif
#if OUTPUT_DIMS == 5
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
#else
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, y, (x + out_x)), out);
#endif
}
}
#endif // !SAMPLE_TYPE_CAFFE_INTERP

View File

@@ -405,9 +405,7 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
#else
OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
#endif
output[OUTPUT_GET_INDEX(batch, feature, oz, oy, ox)] = res;
#endif // #if OUTPUT_DIMS == 5
#elif defined(SAMPLE_TYPE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE

View File

@@ -37,6 +37,7 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
@@ -51,6 +52,21 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
// 5d formats
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
k.EnableDifferentTypes();
k.EnableTensorOffset();
k.EnableTensorPitches();
@@ -70,6 +86,7 @@ ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_sele
auto out_layout = arg.outputs[0].GetLayout();
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
size_t dims = arg.outputs[0].Dimentions();
const auto& out = arg.outputs[0];
if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP) {
@@ -87,7 +104,11 @@ ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_sele
opt_x_block_size = GetOptimalDivisor(out.X().v, 32);
}
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v;
if (dims == 5) {
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v * out.Z().v;
} else {
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v;
}
dispatchData.gws[1] = Align(out.Feature().v, sub_group_size);
dispatchData.gws[2] = arg.outputs[0].Batch().v;
@@ -95,8 +116,12 @@ ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_sele
dispatchData.lws[1] = sub_group_size;
dispatchData.lws[2] = 1;
if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 || arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv32 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 || arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32) {
if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv32 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv16 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv32) {
dispatchData.lws[2] = GetOptimalDivisor(dispatchData.gws[2]);
}
}
@@ -120,21 +145,24 @@ bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) cons
return false;
const auto& input = params.inputs[0];
const auto & output = params.outputs[0];
if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
params.resampleType != ResampleType::BILINEAR_INTERP)
params.resampleType != ResampleType::BILINEAR_INTERP &&
params.resampleType != ResampleType::LINEAR_ONNX)
return false;
if (input.GetLayout() != DataLayout::fs_b_yx_fsv32 &&
input.GetLayout() != DataLayout::b_fs_yx_fsv16 &&
input.GetLayout() != DataLayout::b_fs_yx_fsv32 &&
input.GetLayout() != DataLayout::bs_fs_yx_bsv16_fsv16 &&
input.GetLayout() != DataLayout::bs_fs_yx_bsv16_fsv32 &&
input.GetLayout() != DataLayout::bs_fs_yx_bsv32_fsv16 &&
input.GetLayout() != DataLayout::bs_fs_yx_bsv32_fsv32)
// in the case of 5D support only NEAREST_NEIGHBOR and partially LINEAR_ONNX (interpolate X and Y axes)
if (input.Dimentions() == 5 &&
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
!(params.resampleType == ResampleType::LINEAR_ONNX &&
input.Batch().v == output.Batch().v &&
input.Feature().v == output.Feature().v &&
input.Z().v == output.Z().v))
return false;
return true;
}

View File

@@ -50,6 +50,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
if (axes.size() != scales.size())
IE_THROW() << op->get_friendly_name() << " Incorrect axes and scales should be the same size";
// TODO shouldn't be all this checking done in ngraph::op::v4::Interpolate?
auto interpolateMode = attrs.mode;
if (interpolateMode == ov::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX) {
if (inputRank != 2 && inputRank != 4 && inputRank != 5)
@@ -57,7 +58,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
if (axes.size() != 2 && axes.size() != 3 && inputRank != axes.size())
IE_THROW() << "mode 'linear_onnx' supports only axes with size 2, 3 or equal to input rank";
bool correctAxes =
(((axes.size() == 2 || axes.size() == 4) && inputRank != 5) &&
(((axes.size() == 2 || axes.size() == 4) && inputRank < 5) &&
((axes[0] == 0 && axes[1] == 1) ||
(axes[0] == 1 && axes[1] == 0) ||
(axes[0] == 2 && axes[1] == 3) ||

View File

@@ -1930,8 +1930,6 @@ struct resample_opt_random_test_params {
struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_test_params>
{
bool enable_profiling = false;
template <typename T>
void fill_random_typed(memory::ptr mem, int min, int max, int k) {
auto l = mem->get_layout();
@@ -1939,16 +1937,19 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
size_t f = l.feature();
size_t x = l.spatial(0);
size_t y = l.spatial(1);
size_t z = l.spatial(2);
auto data = generate_random_4d<T>(b, f, y, x, min, max, k);
auto data = generate_random_5d<T>(b, f, z, y, x, min, max, k);
mem_lock<T> ptr{mem, get_test_stream()};
for (size_t bi = 0; bi < b; ++bi) {
for (size_t fi = 0; fi < f; ++fi) {
for (size_t yi = 0; yi < y; ++yi) {
for (size_t xi = 0; xi < x; ++xi) {
auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
auto offset = mem->get_layout().get_linear_offset(coords);
ptr[offset] = data[bi][fi][yi][xi];
for (size_t zi = 0; zi < z; ++zi) {
for (size_t yi = 0; yi < y; ++yi) {
for (size_t xi = 0; xi < x; ++xi) {
auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
auto offset = mem->get_layout().get_linear_offset(coords);
ptr[offset] = data[bi][fi][zi][yi][xi];
}
}
}
}
@@ -1984,24 +1985,25 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
size_t f = output_lay.feature();
size_t x = output_lay.spatial(0);
size_t y = output_lay.spatial(1);
size_t z = output_lay.spatial(2);
mem_lock<T> ref_ptr{out_ref, get_test_stream()};
mem_lock<T> opt_ptr{out_opt, get_test_stream()};
for (size_t bi = 0; bi < b; ++bi) {
for (size_t fi = 0; fi < f; ++fi) {
for (size_t yi = 0; yi < y; ++yi) {
for (size_t xi = 0; xi < x; ++xi) {
auto ref_out_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
auto ref_out_offset = output_lay.get_linear_offset(ref_out_coords);
auto ref_out_val = ref_ptr[ref_out_offset];
auto opt_out_offset = opt_output_lay.get_linear_offset(ref_out_coords);
auto opt_out_val = opt_ptr[opt_out_offset];
EXPECT_EQ(ref_out_offset, opt_out_offset);
if (std::is_same<T, FLOAT16>::value) {
EXPECT_NEAR(static_cast<float>(opt_out_val), static_cast<float>(ref_out_val), 1.e-1f);
} else {
EXPECT_EQ(opt_out_val, ref_out_val);
for (size_t zi = 0; zi < z; ++zi) {
for (size_t yi = 0; yi < y; ++yi) {
for (size_t xi = 0; xi < x; ++xi) {
auto ref_out_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
auto ref_out_offset = output_lay.get_linear_offset(ref_out_coords);
auto ref_out_val = ref_ptr[ref_out_offset];
auto opt_out_offset = opt_output_lay.get_linear_offset(ref_out_coords);
auto opt_out_val = opt_ptr[opt_out_offset];
EXPECT_EQ(ref_out_offset, opt_out_offset);
if (std::is_same<T, FLOAT16>::value) {
EXPECT_NEAR(static_cast<float>(opt_out_val), static_cast<float>(ref_out_val), 1.e-1f);
} else {
EXPECT_EQ(opt_out_val, ref_out_val);
}
}
}
}
@@ -2011,14 +2013,15 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
return true;
}
void execute_compare(const resample_opt_random_test_params& params, bool check_result) {
void execute_compare(const resample_opt_random_test_params& params, bool check_result, const std::string& kernel = "resample_opt") {
auto& engine = get_test_engine();
auto in_layout = layout(params.input_type, format::bfyx, params.input_size);
const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
auto in_layout = layout(params.input_type, origin_format, params.input_size);
auto in_mem = engine.allocate_memory(in_layout);
fill_random(in_mem);
/// bfyx
/// bfyx or bfzyx
cldnn::topology topo;
topo.add(input_layout("in", in_layout));
auto prim = resample("resample", "in", params.output_size, params.num_filter, params.operation_type);
@@ -2043,11 +2046,11 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
prim_opt.pads_begin = params.pads_begin;
prim_opt.pads_end = params.pads_end;
topo_opt.add(prim_opt);
topo_opt.add(reorder("to_output_type", "resample_opt", params.out_format, params.input_type));
topo_opt.add(reorder("res_to_bfyx", "to_output_type", format::bfyx, params.input_type));
topo_opt.add(reorder("res_to_bfyx", "resample_opt", origin_format, params.input_type));
auto build_opts_opt = build_options();
build_opts_opt.set_option(build_option::outputs({"resample_opt", "to_output_type", "res_to_bfyx"}));
build_opts_opt.set_option(build_option::outputs({"resample_opt", "res_to_bfyx"}));
build_opts_opt.set_option(build_option::force_implementations({ {"resample_opt", {params.in_format, kernel}} }));
network net_opt(engine, topo_opt, build_opts_opt);
@@ -2075,6 +2078,123 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
}
}
}
};
struct resample_opt_random_test_ext : resample_opt_random_test
{
static double get_exectime(const std::map<cldnn::primitive_id, cldnn::network_output>& outputs,
const std::string& primitive_id)
{
using namespace std::chrono;
std::shared_ptr<event> e = outputs.at(primitive_id).get_event();
e->wait(); // should ensure execution completion, if not segfault will occur
double avg_time = 0.0;
auto intervals = e->get_profiling_info();
for (const auto& q : intervals)
{
if (q.stage == instrumentation::profiling_stage::executing) {
continue;
}
avg_time = duration_cast<duration<double, microseconds::period>>(q.value->value()).count();
break;
}
return avg_time;
}
static void print_all_perf(std::map<primitive_id, network_output> outputs)
{
std::cout << "Print last run time" << std::endl;
using namespace std::chrono;
for( const auto &n : outputs ) {
std::shared_ptr<event> e = n.second.get_event();
auto intervals = e->get_profiling_info();
double time = 0.0;
for (const auto& q : intervals)
{
if (q.stage == instrumentation::profiling_stage::executing) {
continue;
}
time = duration_cast<duration<double, microseconds::period>>(q.value->value()).count();
break;
}
std::cout << n.first << ":" << time << std::endl;
}
std::cout << std::endl;
}
cldnn::engine_configuration get_profiling_config() {
//const bool enable_profiling = true;
std::string sources_dumps_dir = "";
cldnn::queue_types queue_type = cldnn::queue_types::out_of_order;
priority_mode_types priority_mode = priority_mode_types::disabled;
throttle_mode_types throttle_mode = throttle_mode_types::disabled;
bool use_memory_pool = true;
bool use_unified_shared_memory = true;
return engine_configuration(true, queue_type, sources_dumps_dir, priority_mode, throttle_mode, use_memory_pool, use_unified_shared_memory);
}
void execute_perf_test(const resample_opt_random_test_params& params, const std::string& kernel, const bool do_planar = false) {
auto& engine = get_test_engine(get_profiling_config());
const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
auto in_layout = layout(params.input_type, origin_format, params.input_size);
auto in_mem = engine.allocate_memory(in_layout);
fill_random(in_mem);
format working_format = do_planar == true ? origin_format : format(params.in_format);
cldnn::topology topo_opt;
topo_opt.add(input_layout("in", in_layout));
topo_opt.add(reorder("in_to_input_type", "in", working_format, params.input_type));
auto prim_opt = resample("resample_opt", "in_to_input_type", params.output_size, params.num_filter, params.operation_type);
prim_opt.pads_begin = params.pads_begin;
prim_opt.pads_end = params.pads_end;
topo_opt.add(prim_opt);
topo_opt.add(reorder("res_to_bfyx", "resample_opt", origin_format, params.input_type));
auto build_opts_opt = build_options();
build_opts_opt.set_option(build_option::outputs({"res_to_bfyx"}));
build_opts_opt.set_option(build_option::force_implementations({ {"resample_opt", {working_format, kernel}} }));
build_opts_opt.set_option(build_option::debug(true));
// optimize_data is turned on to test cross-layout
network net_opt(engine, topo_opt, build_opts_opt);
// Use in_mem from ref network
net_opt.set_input_data("in", in_mem);
// first execution of opt
std::map<primitive_id, network_output> result_opt;
auto r = 100;
double exectime = 0.f;
for (int i = 0; i < r; ++i) {
result_opt = net_opt.execute();
exectime += get_exectime(result_opt, "resample_opt");
}
exectime /= r;
std::string frm_str = format(working_format).to_string();
std::string input_type = data_type_traits::name(params.input_type);
std::string is_opt = (do_planar == true) ? " not optimazed " : " optimized ";
std::string mode;
switch (params.operation_type) {
case resample::InterpolateOp::InterpolateMode::NEAREST:
mode = "nearest";
break;
case resample::InterpolateOp::InterpolateMode::LINEAR_ONNX:
mode = "onnx";
break;
default:
mode = "unknown";
}
std::cout << "Exectued time " << "" << mode << " " << is_opt << " " << kernel << " " << " input(" << params.input_size.to_string()
<< ") output(" << params.output_size.to_string() << ") "
<< frm_str << " " << input_type << " " << exectime << std::endl;
// Uncomment line below if you like to see the latencies of all operations from last iteration
//print_all_perf(result_opt);
}
};
TEST_P(resample_opt_random_test, random) {
@@ -2082,6 +2202,16 @@ TEST_P(resample_opt_random_test, random) {
execute_compare(param, true);
}
TEST_P(resample_opt_random_test_ext, DISABLED_random) {
auto param = GetParam();
// Comparison tests (2 lines below) are disabled because they took too much time on big shapes
// execute_compare(param, true, "resample_opt");
// execute_compare(param, true, "resample_ref");
execute_perf_test(param, "resample_opt");
execute_perf_test(param, "resample_ref", false);
execute_perf_test(param, "resample_ref", true);
}
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
resample_opt_random_test,
testing::ValuesIn(
@@ -2091,6 +2221,12 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
{ data_types::i8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
{ data_types::i8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
{ data_types::i8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16, {}, {}},
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
@@ -2111,3 +2247,78 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx,
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_5d_nearest,
resample_opt_random_test,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_5d_onnx,
resample_opt_random_test,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f32, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f16, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
{ data_types::f32, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
{ data_types::f16, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
{ data_types::f32, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
{ data_types::i8, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::u8, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::i8, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::u8, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::i8, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
{ data_types::u8, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
}
));
// those tests should be disabled or deleted
INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_onnx,
resample_opt_random_test_ext,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f32, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f16, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
{ data_types::f32, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
{ data_types::f16, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
{ data_types::f32, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
{ data_types::i8, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::u8, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
{ data_types::i8, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::u8, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
{ data_types::i8, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
{ data_types::u8, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
{ data_types::f16, {32, 32, 256, 256, 1}, {32, 32, 512, 512, 1}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
{ data_types::f16, {1, 32, 64, 64, 32}, {1, 32, 128, 128, 32}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_nearest,
resample_opt_random_test_ext,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 128, 16, 16, 16}, {1, 128, 32, 32, 32}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f16, {1, 128, 32, 32, 32}, {1, 128, 64, 64, 64}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f16, {1, 128, 64, 64, 64}, {1, 128, 128, 128, 128}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
}
));

View File

@@ -310,6 +310,14 @@ cldnn::engine& get_test_engine() {
return *test_engine;
}
cldnn::engine& get_test_engine(const cldnn::engine_configuration& configuration) {
static std::shared_ptr<cldnn::engine> test_engine = nullptr;
if (!test_engine) {
test_engine = cldnn::engine::create(engine_types::ocl, runtime_types::ocl, configuration);
}
return *test_engine;
}
#ifdef ENABLE_ONEDNN_FOR_GPU
cldnn::engine& get_onednn_test_engine() {
static std::shared_ptr<cldnn::engine> test_engine = nullptr;

View File

@@ -54,6 +54,7 @@ namespace tests {
std::shared_ptr<cldnn::engine> create_test_engine(cldnn::queue_types queue_type = cldnn::queue_types::out_of_order);
cldnn::engine& get_test_engine();
cldnn::engine& get_test_engine(const cldnn::engine_configuration& configuration);
#ifdef ENABLE_ONEDNN_FOR_GPU
cldnn::engine& get_onednn_test_engine();
#endif