[GPU] added optimized blocked formats support for 5d interpolation liner onnx and nearest modes (#12977)
This commit contains two features which are split into two commits: 1) linear onnx interpolation of 5d formats 2) Optimize blocked formats for linear_onnx and nearest formats - contain correspondent unit and single layer tests
This commit is contained in:
@@ -23,7 +23,8 @@ layout resample_inst::calc_output_layout(resample_node const& node, kernel_impl_
|
||||
|
||||
auto output_type = input_layout.data_type;
|
||||
if ((input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8)
|
||||
&& desc->operation_type != resample::InterpolateOp::InterpolateMode::NEAREST) {
|
||||
&& desc->operation_type != resample::InterpolateOp::InterpolateMode::NEAREST
|
||||
&& desc->operation_type != resample::InterpolateOp::InterpolateMode::LINEAR_ONNX) {
|
||||
output_type = data_types::f32;
|
||||
}
|
||||
if (impl_param.has_fused_primitives()) {
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#define TO_OUT_VEC_TYPE(x) CAT(convert_, OUT_VEC_TYPE)(x)
|
||||
|
||||
|
||||
#if defined(SAMPLE_TYPE_CAFFE_INTERP)
|
||||
inline uint FUNC(get_input_index)(uint b, uint f, uint y, uint x)
|
||||
{
|
||||
#if INPUT0_DIMS < 5
|
||||
@@ -41,6 +42,7 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint y, uint x)
|
||||
#error [clDNN resample_ref.cl]: output format - not supported
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
|
||||
{
|
||||
@@ -232,9 +234,17 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#if OUTPUT_DIMS == 5
|
||||
const int xyz = get_global_id(0);
|
||||
const int z = xyz / (OUTPUT_SIZE_Y * X_BLOCKS);
|
||||
const int xy = xyz % (OUTPUT_SIZE_Y * X_BLOCKS);
|
||||
const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
|
||||
const int y = (xy / X_BLOCKS);
|
||||
#else
|
||||
const int xy = get_global_id(0);
|
||||
const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
|
||||
const int y = (xy / X_BLOCKS);
|
||||
#endif
|
||||
const int f_block = get_group_id(1);
|
||||
const int b = get_global_id(2);
|
||||
const int feature_num = f_block * FEATURE_SLICE_SIZE + get_sub_group_local_id();
|
||||
@@ -251,7 +261,12 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
const int ix = floor((x + out_x) * SCALES[4]);
|
||||
const int iy = floor(y * SCALES[3]);
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
const int iz = floor(z * SCALES[2]);
|
||||
in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iz, iy, ix));
|
||||
#else
|
||||
in_vec_t res = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, iy, ix));
|
||||
#endif
|
||||
#elif defined(SAMPLE_TYPE_INTERP)
|
||||
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
|
||||
const ACCUMULATOR_TYPE ix = TO_ACCUMULATOR_TYPE(SCALES[4]) * (x + out_x);
|
||||
@@ -265,10 +280,17 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
const ACCUMULATOR_TYPE dx = ix - left_x_index;
|
||||
const ACCUMULATOR_TYPE dy = iy - top_y_index;
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
const in_vec_t top_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, top_y_index, left_x_index));
|
||||
const in_vec_t top_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, top_y_index, right_x_index));
|
||||
const in_vec_t bottom_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, bottom_y_index, left_x_index));
|
||||
const in_vec_t bottom_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, bottom_y_index, right_x_index));
|
||||
#else
|
||||
const in_vec_t top_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, top_y_index, left_x_index));
|
||||
const in_vec_t top_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, top_y_index, right_x_index));
|
||||
const in_vec_t bottom_left = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, bottom_y_index, left_x_index));
|
||||
const in_vec_t bottom_right = READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, bottom_y_index, right_x_index));
|
||||
#endif
|
||||
|
||||
const acc_vec_t top = TO_ACC_VEC_TYPE(top_left) + (TO_ACC_VEC_TYPE(top_right) - TO_ACC_VEC_TYPE(top_left)) * dx;
|
||||
const acc_vec_t bottom = TO_ACC_VEC_TYPE(bottom_left) + (TO_ACC_VEC_TYPE(bottom_right) - TO_ACC_VEC_TYPE(bottom_left)) * dx;
|
||||
@@ -302,10 +324,19 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
|
||||
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
|
||||
#endif // PADDING_USED == 1
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
|
||||
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
|
||||
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
|
||||
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
|
||||
#else
|
||||
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
|
||||
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
|
||||
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
|
||||
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
|
||||
#endif
|
||||
|
||||
#if PADDING_USED == 1
|
||||
if (tlOutOfBounds)
|
||||
top_left = INPUT0_VAL_ZERO;
|
||||
@@ -328,7 +359,11 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
|
||||
#else
|
||||
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, y, (x + out_x)), out);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif // !SAMPLE_TYPE_CAFFE_INTERP
|
||||
|
||||
@@ -405,9 +405,7 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
|
||||
#else
|
||||
OUTPUT_TYPE res = ACTIVATION(TO_OUTPUT_TYPE(interp_val), ACTIVATION_PARAMS);
|
||||
#endif
|
||||
|
||||
output[OUTPUT_GET_INDEX(batch, feature, oz, oy, ox)] = res;
|
||||
|
||||
#endif // #if OUTPUT_DIMS == 5
|
||||
|
||||
#elif defined(SAMPLE_TYPE_INTERP) // defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
|
||||
|
||||
@@ -37,6 +37,7 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
|
||||
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
|
||||
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
|
||||
@@ -51,6 +52,21 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
|
||||
|
||||
// 5d formats
|
||||
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
|
||||
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
|
||||
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
@@ -70,6 +86,7 @@ ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_sele
|
||||
auto out_layout = arg.outputs[0].GetLayout();
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
|
||||
|
||||
size_t dims = arg.outputs[0].Dimentions();
|
||||
const auto& out = arg.outputs[0];
|
||||
|
||||
if (arg.resampleType == ResampleType::CAFFE_BILINEAR_INTERP) {
|
||||
@@ -87,7 +104,11 @@ ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_sele
|
||||
opt_x_block_size = GetOptimalDivisor(out.X().v, 32);
|
||||
}
|
||||
|
||||
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v;
|
||||
if (dims == 5) {
|
||||
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v * out.Z().v;
|
||||
} else {
|
||||
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v;
|
||||
}
|
||||
dispatchData.gws[1] = Align(out.Feature().v, sub_group_size);
|
||||
dispatchData.gws[2] = arg.outputs[0].Batch().v;
|
||||
|
||||
@@ -95,8 +116,12 @@ ResampleKernelBase::DispatchData ResampleKernelOpt::SetDefault(const kernel_sele
|
||||
dispatchData.lws[1] = sub_group_size;
|
||||
dispatchData.lws[2] = 1;
|
||||
|
||||
if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 || arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv32 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 || arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32) {
|
||||
if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv32 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv16 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv32) {
|
||||
dispatchData.lws[2] = GetOptimalDivisor(dispatchData.gws[2]);
|
||||
}
|
||||
}
|
||||
@@ -120,21 +145,24 @@ bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) cons
|
||||
return false;
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
const auto & output = params.outputs[0];
|
||||
|
||||
if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
|
||||
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
|
||||
params.resampleType != ResampleType::BILINEAR_INTERP)
|
||||
params.resampleType != ResampleType::BILINEAR_INTERP &&
|
||||
params.resampleType != ResampleType::LINEAR_ONNX)
|
||||
return false;
|
||||
|
||||
if (input.GetLayout() != DataLayout::fs_b_yx_fsv32 &&
|
||||
input.GetLayout() != DataLayout::b_fs_yx_fsv16 &&
|
||||
input.GetLayout() != DataLayout::b_fs_yx_fsv32 &&
|
||||
input.GetLayout() != DataLayout::bs_fs_yx_bsv16_fsv16 &&
|
||||
input.GetLayout() != DataLayout::bs_fs_yx_bsv16_fsv32 &&
|
||||
input.GetLayout() != DataLayout::bs_fs_yx_bsv32_fsv16 &&
|
||||
input.GetLayout() != DataLayout::bs_fs_yx_bsv32_fsv32)
|
||||
// in the case of 5D support only NEAREST_NEIGHBOR and partially LINEAR_ONNX (interpolate X and Y axes)
|
||||
if (input.Dimentions() == 5 &&
|
||||
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
|
||||
!(params.resampleType == ResampleType::LINEAR_ONNX &&
|
||||
input.Batch().v == output.Batch().v &&
|
||||
input.Feature().v == output.Feature().v &&
|
||||
input.Z().v == output.Z().v))
|
||||
return false;
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
|
||||
if (axes.size() != scales.size())
|
||||
IE_THROW() << op->get_friendly_name() << " Incorrect axes and scales should be the same size";
|
||||
|
||||
// TODO shouldn't be all this checking done in ngraph::op::v4::Interpolate?
|
||||
auto interpolateMode = attrs.mode;
|
||||
if (interpolateMode == ov::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX) {
|
||||
if (inputRank != 2 && inputRank != 4 && inputRank != 5)
|
||||
@@ -57,7 +58,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
|
||||
if (axes.size() != 2 && axes.size() != 3 && inputRank != axes.size())
|
||||
IE_THROW() << "mode 'linear_onnx' supports only axes with size 2, 3 or equal to input rank";
|
||||
bool correctAxes =
|
||||
(((axes.size() == 2 || axes.size() == 4) && inputRank != 5) &&
|
||||
(((axes.size() == 2 || axes.size() == 4) && inputRank < 5) &&
|
||||
((axes[0] == 0 && axes[1] == 1) ||
|
||||
(axes[0] == 1 && axes[1] == 0) ||
|
||||
(axes[0] == 2 && axes[1] == 3) ||
|
||||
|
||||
@@ -1930,8 +1930,6 @@ struct resample_opt_random_test_params {
|
||||
|
||||
struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_test_params>
|
||||
{
|
||||
bool enable_profiling = false;
|
||||
|
||||
template <typename T>
|
||||
void fill_random_typed(memory::ptr mem, int min, int max, int k) {
|
||||
auto l = mem->get_layout();
|
||||
@@ -1939,16 +1937,19 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
size_t f = l.feature();
|
||||
size_t x = l.spatial(0);
|
||||
size_t y = l.spatial(1);
|
||||
size_t z = l.spatial(2);
|
||||
|
||||
auto data = generate_random_4d<T>(b, f, y, x, min, max, k);
|
||||
auto data = generate_random_5d<T>(b, f, z, y, x, min, max, k);
|
||||
mem_lock<T> ptr{mem, get_test_stream()};
|
||||
for (size_t bi = 0; bi < b; ++bi) {
|
||||
for (size_t fi = 0; fi < f; ++fi) {
|
||||
for (size_t yi = 0; yi < y; ++yi) {
|
||||
for (size_t xi = 0; xi < x; ++xi) {
|
||||
auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
|
||||
auto offset = mem->get_layout().get_linear_offset(coords);
|
||||
ptr[offset] = data[bi][fi][yi][xi];
|
||||
for (size_t zi = 0; zi < z; ++zi) {
|
||||
for (size_t yi = 0; yi < y; ++yi) {
|
||||
for (size_t xi = 0; xi < x; ++xi) {
|
||||
auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
|
||||
auto offset = mem->get_layout().get_linear_offset(coords);
|
||||
ptr[offset] = data[bi][fi][zi][yi][xi];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1984,24 +1985,25 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
size_t f = output_lay.feature();
|
||||
size_t x = output_lay.spatial(0);
|
||||
size_t y = output_lay.spatial(1);
|
||||
size_t z = output_lay.spatial(2);
|
||||
mem_lock<T> ref_ptr{out_ref, get_test_stream()};
|
||||
mem_lock<T> opt_ptr{out_opt, get_test_stream()};
|
||||
for (size_t bi = 0; bi < b; ++bi) {
|
||||
for (size_t fi = 0; fi < f; ++fi) {
|
||||
for (size_t yi = 0; yi < y; ++yi) {
|
||||
for (size_t xi = 0; xi < x; ++xi) {
|
||||
auto ref_out_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
|
||||
auto ref_out_offset = output_lay.get_linear_offset(ref_out_coords);
|
||||
auto ref_out_val = ref_ptr[ref_out_offset];
|
||||
|
||||
auto opt_out_offset = opt_output_lay.get_linear_offset(ref_out_coords);
|
||||
auto opt_out_val = opt_ptr[opt_out_offset];
|
||||
|
||||
EXPECT_EQ(ref_out_offset, opt_out_offset);
|
||||
if (std::is_same<T, FLOAT16>::value) {
|
||||
EXPECT_NEAR(static_cast<float>(opt_out_val), static_cast<float>(ref_out_val), 1.e-1f);
|
||||
} else {
|
||||
EXPECT_EQ(opt_out_val, ref_out_val);
|
||||
for (size_t zi = 0; zi < z; ++zi) {
|
||||
for (size_t yi = 0; yi < y; ++yi) {
|
||||
for (size_t xi = 0; xi < x; ++xi) {
|
||||
auto ref_out_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
|
||||
auto ref_out_offset = output_lay.get_linear_offset(ref_out_coords);
|
||||
auto ref_out_val = ref_ptr[ref_out_offset];
|
||||
auto opt_out_offset = opt_output_lay.get_linear_offset(ref_out_coords);
|
||||
auto opt_out_val = opt_ptr[opt_out_offset];
|
||||
EXPECT_EQ(ref_out_offset, opt_out_offset);
|
||||
if (std::is_same<T, FLOAT16>::value) {
|
||||
EXPECT_NEAR(static_cast<float>(opt_out_val), static_cast<float>(ref_out_val), 1.e-1f);
|
||||
} else {
|
||||
EXPECT_EQ(opt_out_val, ref_out_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2011,14 +2013,15 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
return true;
|
||||
}
|
||||
|
||||
void execute_compare(const resample_opt_random_test_params& params, bool check_result) {
|
||||
void execute_compare(const resample_opt_random_test_params& params, bool check_result, const std::string& kernel = "resample_opt") {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto in_layout = layout(params.input_type, format::bfyx, params.input_size);
|
||||
const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
|
||||
auto in_layout = layout(params.input_type, origin_format, params.input_size);
|
||||
auto in_mem = engine.allocate_memory(in_layout);
|
||||
fill_random(in_mem);
|
||||
|
||||
/// bfyx
|
||||
/// bfyx or bfzyx
|
||||
cldnn::topology topo;
|
||||
topo.add(input_layout("in", in_layout));
|
||||
auto prim = resample("resample", "in", params.output_size, params.num_filter, params.operation_type);
|
||||
@@ -2043,11 +2046,11 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
prim_opt.pads_begin = params.pads_begin;
|
||||
prim_opt.pads_end = params.pads_end;
|
||||
topo_opt.add(prim_opt);
|
||||
topo_opt.add(reorder("to_output_type", "resample_opt", params.out_format, params.input_type));
|
||||
topo_opt.add(reorder("res_to_bfyx", "to_output_type", format::bfyx, params.input_type));
|
||||
topo_opt.add(reorder("res_to_bfyx", "resample_opt", origin_format, params.input_type));
|
||||
|
||||
auto build_opts_opt = build_options();
|
||||
build_opts_opt.set_option(build_option::outputs({"resample_opt", "to_output_type", "res_to_bfyx"}));
|
||||
build_opts_opt.set_option(build_option::outputs({"resample_opt", "res_to_bfyx"}));
|
||||
build_opts_opt.set_option(build_option::force_implementations({ {"resample_opt", {params.in_format, kernel}} }));
|
||||
|
||||
network net_opt(engine, topo_opt, build_opts_opt);
|
||||
|
||||
@@ -2075,6 +2078,123 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct resample_opt_random_test_ext : resample_opt_random_test
|
||||
{
|
||||
static double get_exectime(const std::map<cldnn::primitive_id, cldnn::network_output>& outputs,
|
||||
const std::string& primitive_id)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
std::shared_ptr<event> e = outputs.at(primitive_id).get_event();
|
||||
e->wait(); // should ensure execution completion, if not segfault will occur
|
||||
double avg_time = 0.0;
|
||||
auto intervals = e->get_profiling_info();
|
||||
for (const auto& q : intervals)
|
||||
{
|
||||
if (q.stage == instrumentation::profiling_stage::executing) {
|
||||
continue;
|
||||
}
|
||||
avg_time = duration_cast<duration<double, microseconds::period>>(q.value->value()).count();
|
||||
break;
|
||||
}
|
||||
return avg_time;
|
||||
}
|
||||
|
||||
static void print_all_perf(std::map<primitive_id, network_output> outputs)
|
||||
{
|
||||
std::cout << "Print last run time" << std::endl;
|
||||
using namespace std::chrono;
|
||||
for( const auto &n : outputs ) {
|
||||
std::shared_ptr<event> e = n.second.get_event();
|
||||
auto intervals = e->get_profiling_info();
|
||||
double time = 0.0;
|
||||
for (const auto& q : intervals)
|
||||
{
|
||||
if (q.stage == instrumentation::profiling_stage::executing) {
|
||||
continue;
|
||||
}
|
||||
time = duration_cast<duration<double, microseconds::period>>(q.value->value()).count();
|
||||
break;
|
||||
}
|
||||
std::cout << n.first << ":" << time << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
cldnn::engine_configuration get_profiling_config() {
|
||||
//const bool enable_profiling = true;
|
||||
std::string sources_dumps_dir = "";
|
||||
cldnn::queue_types queue_type = cldnn::queue_types::out_of_order;
|
||||
priority_mode_types priority_mode = priority_mode_types::disabled;
|
||||
throttle_mode_types throttle_mode = throttle_mode_types::disabled;
|
||||
bool use_memory_pool = true;
|
||||
bool use_unified_shared_memory = true;
|
||||
return engine_configuration(true, queue_type, sources_dumps_dir, priority_mode, throttle_mode, use_memory_pool, use_unified_shared_memory);
|
||||
}
|
||||
|
||||
void execute_perf_test(const resample_opt_random_test_params& params, const std::string& kernel, const bool do_planar = false) {
|
||||
auto& engine = get_test_engine(get_profiling_config());
|
||||
|
||||
const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
|
||||
auto in_layout = layout(params.input_type, origin_format, params.input_size);
|
||||
auto in_mem = engine.allocate_memory(in_layout);
|
||||
fill_random(in_mem);
|
||||
|
||||
format working_format = do_planar == true ? origin_format : format(params.in_format);
|
||||
|
||||
cldnn::topology topo_opt;
|
||||
topo_opt.add(input_layout("in", in_layout));
|
||||
topo_opt.add(reorder("in_to_input_type", "in", working_format, params.input_type));
|
||||
auto prim_opt = resample("resample_opt", "in_to_input_type", params.output_size, params.num_filter, params.operation_type);
|
||||
prim_opt.pads_begin = params.pads_begin;
|
||||
prim_opt.pads_end = params.pads_end;
|
||||
topo_opt.add(prim_opt);
|
||||
topo_opt.add(reorder("res_to_bfyx", "resample_opt", origin_format, params.input_type));
|
||||
|
||||
auto build_opts_opt = build_options();
|
||||
build_opts_opt.set_option(build_option::outputs({"res_to_bfyx"}));
|
||||
build_opts_opt.set_option(build_option::force_implementations({ {"resample_opt", {working_format, kernel}} }));
|
||||
build_opts_opt.set_option(build_option::debug(true));
|
||||
// optimize_data is turned on to test cross-layout
|
||||
|
||||
network net_opt(engine, topo_opt, build_opts_opt);
|
||||
|
||||
// Use in_mem from ref network
|
||||
net_opt.set_input_data("in", in_mem);
|
||||
|
||||
// first execution of opt
|
||||
std::map<primitive_id, network_output> result_opt;
|
||||
auto r = 100;
|
||||
double exectime = 0.f;
|
||||
for (int i = 0; i < r; ++i) {
|
||||
result_opt = net_opt.execute();
|
||||
exectime += get_exectime(result_opt, "resample_opt");
|
||||
}
|
||||
exectime /= r;
|
||||
std::string frm_str = format(working_format).to_string();
|
||||
std::string input_type = data_type_traits::name(params.input_type);
|
||||
std::string is_opt = (do_planar == true) ? " not optimazed " : " optimized ";
|
||||
std::string mode;
|
||||
switch (params.operation_type) {
|
||||
case resample::InterpolateOp::InterpolateMode::NEAREST:
|
||||
mode = "nearest";
|
||||
break;
|
||||
case resample::InterpolateOp::InterpolateMode::LINEAR_ONNX:
|
||||
mode = "onnx";
|
||||
break;
|
||||
default:
|
||||
mode = "unknown";
|
||||
}
|
||||
|
||||
std::cout << "Exectued time " << "" << mode << " " << is_opt << " " << kernel << " " << " input(" << params.input_size.to_string()
|
||||
<< ") output(" << params.output_size.to_string() << ") "
|
||||
<< frm_str << " " << input_type << " " << exectime << std::endl;
|
||||
|
||||
// Uncomment line below if you like to see the latencies of all operations from last iteration
|
||||
//print_all_perf(result_opt);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(resample_opt_random_test, random) {
|
||||
@@ -2082,6 +2202,16 @@ TEST_P(resample_opt_random_test, random) {
|
||||
execute_compare(param, true);
|
||||
}
|
||||
|
||||
TEST_P(resample_opt_random_test_ext, DISABLED_random) {
|
||||
auto param = GetParam();
|
||||
// Comparison tests (2 lines below) are disabled because they took too much time on big shapes
|
||||
// execute_compare(param, true, "resample_opt");
|
||||
// execute_compare(param, true, "resample_ref");
|
||||
execute_perf_test(param, "resample_opt");
|
||||
execute_perf_test(param, "resample_ref", false);
|
||||
execute_perf_test(param, "resample_ref", true);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
@@ -2091,6 +2221,12 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
|
||||
{ data_types::i8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::i8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
|
||||
{ data_types::i8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16, {}, {}},
|
||||
|
||||
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
|
||||
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
|
||||
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::u8, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
|
||||
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
|
||||
@@ -2111,3 +2247,78 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx,
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_5d_nearest,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::i8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
|
||||
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::u8, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_5d_onnx,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f32, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f16, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
|
||||
{ data_types::f32, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
|
||||
{ data_types::f16, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::f32, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
|
||||
|
||||
{ data_types::i8, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::u8, {1, 16, 13, 13, 5}, {1, 16, 26, 26, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::i8, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::u8, {16, 16, 7, 7, 5}, {16, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::i8, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
{ data_types::u8, {32, 16, 7, 7, 5}, {32, 16, 14, 14, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
// those tests should be disabled or deleted
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_onnx,
|
||||
resample_opt_random_test_ext,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f32, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f16, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
|
||||
{ data_types::f32, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16, {}, {}},
|
||||
{ data_types::f16, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::f32, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
|
||||
|
||||
{ data_types::i8, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::u8, {1, 32, 64, 64, 5}, {1, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv32, format::b_fs_zyx_fsv32, {}, {}},
|
||||
{ data_types::i8, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::u8, {16, 32, 64, 64, 5}, {16, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv16_fsv32, format::bs_fs_zyx_bsv16_fsv32, {}, {}},
|
||||
{ data_types::i8, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
{ data_types::u8, {32, 32, 64, 64, 5}, {32, 32, 128, 128, 5}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv32, format::bs_fs_zyx_bsv32_fsv32, {}, {}},
|
||||
|
||||
{ data_types::f16, {32, 32, 256, 256, 1}, {32, 32, 512, 512, 1}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_zyx_bsv32_fsv16, format::bs_fs_zyx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 32, 64, 64, 32}, {1, 32, 128, 128, 32}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_nearest,
|
||||
resample_opt_random_test_ext,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 128, 16, 16, 16}, {1, 128, 32, 32, 32}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 128, 32, 32, 32}, {1, 128, 64, 64, 64}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 128, 64, 64, 64}, {1, 128, 128, 128, 128}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
@@ -310,6 +310,14 @@ cldnn::engine& get_test_engine() {
|
||||
return *test_engine;
|
||||
}
|
||||
|
||||
cldnn::engine& get_test_engine(const cldnn::engine_configuration& configuration) {
|
||||
static std::shared_ptr<cldnn::engine> test_engine = nullptr;
|
||||
if (!test_engine) {
|
||||
test_engine = cldnn::engine::create(engine_types::ocl, runtime_types::ocl, configuration);
|
||||
}
|
||||
return *test_engine;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
cldnn::engine& get_onednn_test_engine() {
|
||||
static std::shared_ptr<cldnn::engine> test_engine = nullptr;
|
||||
|
||||
@@ -54,6 +54,7 @@ namespace tests {
|
||||
|
||||
std::shared_ptr<cldnn::engine> create_test_engine(cldnn::queue_types queue_type = cldnn::queue_types::out_of_order);
|
||||
cldnn::engine& get_test_engine();
|
||||
cldnn::engine& get_test_engine(const cldnn::engine_configuration& configuration);
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
cldnn::engine& get_onednn_test_engine();
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user