[GPU] Feature/intepolate 3 axes onnx 5d (#13796)

added 3-axis interpolation for linear-onnx mode
fixed resample_opt for onnx mode, it didn't work in case of padding
added tests for the new implementation and fix

@OlehKravchyshyn
This commit is contained in:
OlehKravchyshyn 2023-01-31 08:45:53 +02:00 committed by GitHub
parent d1397b7b48
commit 4700207af0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 532 additions and 78 deletions

View File

@ -0,0 +1,229 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "include/fetch_utils.cl"
#include "include/batch_headers/sub_group_block_read.cl"
#include "include/batch_headers/sub_group_block_write.cl"
#define READ_FUNC(ptr, offset) BLOCK_READN(INPUT0_TYPE, VEC_SIZE, ptr, offset)
#define WRITE_FUNC(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, VEC_SIZE, ptr, offset, val)
#define IN_VEC_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE)
#define TO_IN_VEC_TYPE(x) CAT(convert_, IN_VEC_TYPE)(x)
#define ACC_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, VEC_SIZE)
#define TO_ACC_VEC_TYPE(x) CAT(convert_, ACC_VEC_TYPE)(x)
#define OUT_VEC_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)
#define TO_OUT_VEC_TYPE(x) CAT(convert_, OUT_VEC_TYPE)(x)
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
{
if (scale == 1.0f)
return num;
#if defined(COORD_TRANS_MODE_HALF_PIXEL)
return (num + 0.5f) * scale - 0.5f;
#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
return (length_resized > 1) ? (num + 0.5f) * scale - 0.5f : 0.f;
#elif defined(COORD_TRANS_MODE_ASYMMETRIC)
return num * scale;
#elif defined(COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN)
return (num + 0.5f) * scale;
#elif defined(COORD_TRANS_MODE_ALIGN_CORNERS)
return (length_resized != 1) ? num * (length_original - 1) / (length_resized - 1) : 0.f;
#else
#error [clDNN resample_onnx.cl]: coordinate transformation mode - not supported
#endif
}
REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
KERNEL (resample_onnx)(__global INPUT0_TYPE* input,
__global OUTPUT_TYPE* output
#if HAS_FUSED_OPS_DECLS
, FUSED_OPS_DECLS
#endif // #if HAS_FUSED_OPS_DECLS
)
{
const int xyz = get_global_id(0);
const int z = xyz / (OUTPUT_SIZE_Y * X_BLOCKS);
const int xy = xyz % (OUTPUT_SIZE_Y * X_BLOCKS);
const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
const int y = (xy / X_BLOCKS);
const int f_block = get_group_id(1);
const int b = get_global_id(2);
int feature_num = f_block * FEATURE_SLICE_SIZE + get_sub_group_local_id();
const uint feature_block = f_block * FEATURE_SLICE_SIZE;
typedef IN_VEC_TYPE in_vec_t;
typedef ACC_VEC_TYPE acc_vec_t;
const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
if (feature_num >= OUTPUT_FEATURE_NUM)
return;
const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3];
const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4];
const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y);
float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
int in_y1 = min((int)in_y, PADDED_Y - 1);
int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
#if defined (THREE_SPATIAL_RESAMPLE)
const int PADDED_Z = INPUT0_SIZE_Z + PADS_BEGIN[2] + PADS_END[2];
const ACCUMULATOR_TYPE iz = FUNC_CALL(get_original_coordinate)(z, SCALES[2], OUTPUT_SIZE_Z, PADDED_Z);
float in_z = fmax(0, fmin(iz, PADDED_Z - 1));
int in_z1 = min((int)in_z, PADDED_Z - 1);
int in_z2 = min(in_z1 + 1, PADDED_Z - 1);
const ACCUMULATOR_TYPE dz1 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z1)) : 0.5f;
const ACCUMULATOR_TYPE dz2 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z2)) : 0.5f;
#if PADDING_USED == 1
const int saved_in_z1 = in_z1;
const int saved_in_z2 = in_z2;
const int saved_in_y1 = in_y1;
const int saved_in_y2 = in_y2;
#endif // PADDING_USED == 1
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
int in_x1 = min((int)in_x, PADDED_X - 1);
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
#if PADDING_USED == 1
in_z1 = saved_in_z1;
in_z2 = saved_in_z2;
in_y1 = saved_in_y1;
in_y2 = saved_in_y2;
in_z1 -= PADS_BEGIN[2];
in_z2 -= PADS_BEGIN[2];
in_y1 -= PADS_BEGIN[3];
in_y2 -= PADS_BEGIN[3];
in_x1 -= PADS_BEGIN[4];
in_x2 -= PADS_BEGIN[4];
bool BackTopLOutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x1 < 0|| in_x1 >= INPUT0_SIZE_X;
bool BackTopROutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
bool BackBottomLOutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
bool BackBottomROutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
bool FrontTopLOutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
bool FrontTopROutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
bool FrontBottomLOutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
bool FrontBottomROutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
const acc_vec_t x111 = BackTopLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x1)));
const acc_vec_t x211 = BackTopROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x2)));
const acc_vec_t x121 = BackBottomLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x1)));
const acc_vec_t x221 = BackBottomROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x2)));
const acc_vec_t x112 = FrontTopLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x1)));
const acc_vec_t x212 = FrontTopROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x2)));
const acc_vec_t x122 = FrontBottomLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x1)));
const acc_vec_t x222 = FrontBottomROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x2)));
#else
const acc_vec_t x111 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x1)));
const acc_vec_t x211 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x2)));
const acc_vec_t x121 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x1)));
const acc_vec_t x221 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x2)));
const acc_vec_t x112 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x1)));
const acc_vec_t x212 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x2)));
const acc_vec_t x122 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x1)));
const acc_vec_t x222 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x2)));
#endif // PADDING_USED == 1
acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * dz2 * x111) + TO_ACC_VEC_TYPE(dx1 * dy2 * dz2 * x211);
res += TO_ACC_VEC_TYPE(dx2 * dy1 * dz2 * x121) + TO_ACC_VEC_TYPE(dx1 * dy1 * dz2 * x221);
res += TO_ACC_VEC_TYPE(dx2 * dy2 * dz1 * x112) + TO_ACC_VEC_TYPE(dx1 * dy2 * dz1 * x212);
res += TO_ACC_VEC_TYPE(dx2 * dy1 * dz1 * x122) + TO_ACC_VEC_TYPE(dx1 * dy1 * dz1 * x222);
#if HAS_FUSED_OPS
FUSED_OPS;
OUT_VEC_TYPE out = FUSED_OPS_RESULT;
#else
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
#endif // #if HAS_FUSED_OPS
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
}
#else // #if defined (THREE_SPATIAL_RESAMPLE)
#if PADDING_USED == 1
const int saved_in_y1 = in_y1;
const int saved_in_y2 = in_y2;
#endif
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
int in_x1 = min((int)in_x, PADDED_X - 1);
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
#if PADDING_USED == 1
in_y1 = saved_in_y1;
in_y2 = saved_in_y2;
in_y1 -= PADS_BEGIN[3];
in_y2 -= PADS_BEGIN[3];
in_x1 -= PADS_BEGIN[4];
in_x2 -= PADS_BEGIN[4];
bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
#endif // PADDING_USED == 1
#if OUTPUT_DIMS == 5
acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
#else
acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
#endif
#if PADDING_USED == 1
if (tlOutOfBounds)
top_left = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
if (trOutOfBounds)
top_right = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
if (blOutOfBounds)
bottom_left = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
if (brOutOfBounds)
bottom_right = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
#endif // PADDING_USED == 1
acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * top_left) +
TO_ACC_VEC_TYPE(dx1 * dy2 * top_right) +
TO_ACC_VEC_TYPE(dx2 * dy1 * bottom_left) +
TO_ACC_VEC_TYPE(dx1 * dy1 * bottom_right);
#if HAS_FUSED_OPS
FUSED_OPS;
OUT_VEC_TYPE out = FUSED_OPS_RESULT;
#else
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
#endif
#if OUTPUT_DIMS == 5
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
#else
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, y, (x + out_x)), out);
#endif
}
#endif // #if defined (THREE_SPATIAL_RESAMPLE)
}
#undef TRIANGLE_COEFF
#undef READ_FUNC
#undef WRITE_FUNC

View File

@ -24,6 +24,8 @@
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
{
if (scale == 1.0f)
return num;
#if defined(COORD_TRANS_MODE_HALF_PIXEL)
return (num + 0.5f) * scale - 0.5f;
#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
@ -231,6 +233,7 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
typedef IN_VEC_TYPE in_vec_t;
typedef ACC_VEC_TYPE acc_vec_t;
const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
#ifdef SAMPLE_TYPE_NEAREST
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
@ -272,68 +275,15 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
const acc_vec_t bottom = TO_ACC_VEC_TYPE(bottom_left) + (TO_ACC_VEC_TYPE(bottom_right) - TO_ACC_VEC_TYPE(bottom_left)) * dx;
acc_vec_t res = top + (bottom - top) * dy;
#else // defined(SAMPLE_TYPE_LINEAR_ONNX)
const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3];
const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4];
const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y);
float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
int in_y1 = min((int)in_y, PADDED_Y - 1);
int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
int in_x1 = min((int)in_x, PADDED_X - 1);
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
#if PADDING_USED == 1
in_y1 -= PADS_BEGIN[3];
in_y2 -= PADS_BEGIN[3];
in_x1 -= PADS_BEGIN[4];
in_x2 -= PADS_BEGIN[4];
bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
#endif // PADDING_USED == 1
#if OUTPUT_DIMS == 5
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
#else
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
#error [clDNN resample_opt.cl]: unsupported resample type
#endif
#if PADDING_USED == 1
if (tlOutOfBounds)
top_left = INPUT0_VAL_ZERO;
if (trOutOfBounds)
top_right = INPUT0_VAL_ZERO;
if (blOutOfBounds)
bottom_left = INPUT0_VAL_ZERO;
if (brOutOfBounds)
bottom_right = INPUT0_VAL_ZERO;
#endif // PADDING_USED == 1
acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * top_left) +
TO_ACC_VEC_TYPE(dx1 * dy2 * top_right) +
TO_ACC_VEC_TYPE(dx2 * dy1 * bottom_left) +
TO_ACC_VEC_TYPE(dx1 * dy1 * bottom_right);
#endif
#if HAS_FUSED_OPS
FUSED_OPS;
OUT_VEC_TYPE out = FUSED_OPS_RESULT;
#else
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
#endif
#endif // #if HAS_FUSED_OPS
#if OUTPUT_DIMS == 5
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);

View File

@ -0,0 +1,187 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "resample_kernel_onnx.h"
#include <kernel_selector_utils.h>
#include <vector>
namespace kernel_selector {
static constexpr size_t sub_group_size = 16;
size_t ResampleKernelOnnx::GetOptimalBlockSize(const resample_params& params) const {
std::vector<size_t> block_width = {16, 8, 4, 2, 1};
for (auto& w : block_width) {
if (params.outputs[0].X().v % w == 0) {
return w;
}
}
return 1;
}
static size_t GetOptimalDivisor(const size_t input_size, size_t max_val = 16) {
for (size_t s = max_val; s > 0; --s) {
if (input_size % s == 0) {
return s;
}
}
return 1;
}
ParamsKey ResampleKernelOnnx::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
// 4d formats
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
k.EnableInputLayout(DataLayout::fs_b_yx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv32);
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
// 5d formats
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
k.EnableDifferentTypes();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
k.EnableReampleType(ResampleType::LINEAR_ONNX);
return k;
}
DeviceFeaturesKey ResampleKernelOnnx::get_required_device_features_key(const Params& params,
const optional_params& options) const {
return get_common_subgroups_device_features_key(params, options);
}
ResampleKernelBase::DispatchData ResampleKernelOnnx::SetDefault(const kernel_selector::resample_params& arg) const {
DispatchData dispatchData;
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
const auto& out = arg.outputs[0];
auto opt_x_block_size = GetOptimalBlockSize(arg);
if (out.X().v > 32 && opt_x_block_size == 1) {
opt_x_block_size = GetOptimalDivisor(out.X().v, 32);
}
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v * out.Z().v;
dispatchData.gws[1] = Align(out.Feature().v, sub_group_size);
dispatchData.gws[2] = arg.outputs[0].Batch().v;
dispatchData.lws[0] = 1;
dispatchData.lws[1] = sub_group_size;
dispatchData.lws[2] = 1;
if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv16 ||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv32) {
dispatchData.lws[2] = GetOptimalDivisor(dispatchData.gws[2]);
}
return dispatchData;
}
KernelsPriority ResampleKernelOnnx::GetKernelsPriority(const Params& /*params*/,
const optional_params& /*options*/) const {
return FORCE_PRIORITY_4;
}
bool ResampleKernelOnnx::Validate(const Params& p, const optional_params& o) const {
const resample_params& params = static_cast<const resample_params&>(p);
if (!Parent::Validate(p, o))
return false;
const auto& input = params.inputs[0];
const auto& output = params.outputs[0];
if (input.Batch().v != output.Batch().v || input.Feature().v != output.Feature().v)
return false;
return true;
}
static bool IsThreeSpatialResample(const resample_params& params) {
const auto& input = params.inputs[0];
const auto& output = params.outputs[0];
if (input.Dimentions() == 5 && input.Z().v != output.Z().v)
return true;
return false;
}
JitConstants ResampleKernelOnnx::GetJitConstants(const resample_params& params) const {
auto jit = Parent::GetJitConstants(params);
auto opt_x_block_size = GetOptimalBlockSize(params);
if (params.outputs[0].X().v > 32 && opt_x_block_size == 1) {
opt_x_block_size = GetOptimalDivisor(params.outputs[0].X().v, 32);
}
jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", opt_x_block_size));
jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.outputs[0].X().v, opt_x_block_size)));
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
size_t vec_size = 0;
if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) {
vec_size = 2;
jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 32));
} else {
vec_size = 1;
jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 16));
}
if (IsThreeSpatialResample(params))
jit.AddConstant(MakeJitConstant("THREE_SPATIAL_RESAMPLE", ""));
jit.AddConstant(MakeJitConstant("VEC_SIZE", vec_size));
if (!params.fused_ops.empty()) {
std::vector<std::string> idx_order;
if (params.inputs[0].Dimentions() == 5)
idx_order = {"b", "feature_block", "z", "y", "(x + out_x)"};
else
idx_order = {"b", "feature_block", "y", "(x + out_x)"};
FusedOpsConfiguration conf =
{"", idx_order, "res", GetAccumulatorType(params), vec_size, LoadType::LT_ALIGNED_READ};
conf.SetVectorAxis(Tensor::DataChannelName::FEATURE);
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
}
return jit;
}
KernelsData ResampleKernelOnnx::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options);
}
} // namespace kernel_selector

View File

@ -0,0 +1,33 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "resample_kernel_base.h"
namespace kernel_selector {
class ResampleKernelOnnx : public ResampleKernelBase {
public:
using Parent = ResampleKernelBase;
ResampleKernelOnnx() : ResampleKernelBase("resample_onnx") {}
virtual ~ResampleKernelOnnx() = default;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
DeviceFeaturesKey get_required_device_features_key(const Params& params,
const optional_params& /*options*/) const override;
protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const resample_params& params) const override;
DispatchData SetDefault(const resample_params& arg) const override;
std::vector<FusedOpType> GetSupportedFusedOps() const override {
return {FusedOpType::QUANTIZE, FusedOpType::ELTWISE, FusedOpType::ACTIVATION};
}
private:
size_t GetOptimalBlockSize(const resample_params& params) const;
};
} // namespace kernel_selector

View File

@ -73,7 +73,6 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
k.EnableBatching();
k.EnableReampleType(ResampleType::BILINEAR_INTERP);
k.EnableReampleType(ResampleType::NEAREST_NEIGHBOR);
k.EnableReampleType(ResampleType::LINEAR_ONNX);
k.EnableReampleType(ResampleType::CAFFE_BILINEAR_INTERP);
return k;
}
@ -140,31 +139,17 @@ bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) cons
if (!Parent::Validate(p, o))
return false;
if (p.GetType() != KernelType::RESAMPLE || o.GetType() != KernelType::RESAMPLE)
return false;
if (params.inputs.empty())
return false;
const auto& input = params.inputs[0];
const auto & output = params.outputs[0];
if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
params.resampleType != ResampleType::BILINEAR_INTERP &&
params.resampleType != ResampleType::LINEAR_ONNX)
params.resampleType != ResampleType::BILINEAR_INTERP)
return false;
// in the case of 5D support only NEAREST_NEIGHBOR and partially LINEAR_ONNX (interpolate X and Y axes)
if (input.Dimentions() == 5 &&
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
!(params.resampleType == ResampleType::LINEAR_ONNX &&
input.Batch().v == output.Batch().v &&
input.Feature().v == output.Feature().v &&
input.Z().v == output.Z().v))
// in the case of 5D support only NEAREST_NEIGHBOR
if (input.Dimentions() == 5 && params.resampleType != ResampleType::NEAREST_NEIGHBOR)
return false;
return true;
}
@ -192,7 +177,11 @@ JitConstants ResampleKernelOpt::GetJitConstants(const resample_params &params) c
if (!params.fused_ops.empty()) {
if (params.resampleType != ResampleType::CAFFE_BILINEAR_INTERP) {
std::vector<std::string> idx_order = {"b", "feature_block", "y", "(x + out_x)"};
std::vector<std::string> idx_order;
if (params.inputs[0].Dimentions() == 5)
idx_order = {"b", "feature_block", "z", "y", "(x + out_x)"};
else
idx_order = {"b", "feature_block", "y", "(x + out_x)"};
FusedOpsConfiguration conf = {"", idx_order, "res", GetAccumulatorType(params), vec_size, LoadType::LT_ALIGNED_READ};
conf.SetVectorAxis(Tensor::DataChannelName::FEATURE);
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));

View File

@ -5,11 +5,13 @@
#include "resample_kernel_selector.h"
#include "resample_kernel_ref.h"
#include "resample_kernel_opt.h"
#include "resample_kernel_onnx.h"
namespace kernel_selector {
resample_kernel_selector::resample_kernel_selector() {
Attach<ResampleKernelRef>();
Attach<ResampleKernelOpt>();
Attach<ResampleKernelOnnx>();
}
KernelsData resample_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {

View File

@ -30,6 +30,7 @@ struct fusing_test_params {
size_t expected_not_fused_primitives;
};
#define CASE_RESAMPLE_FSV16_1_5D { 1, 16, 4, 32, 32 }, { 1, 16, 4, 64, 64 }, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfzyx
#define CASE_RESAMPLE_FSV16_1 { 1, 16, 64, 64 }, { 1, 16, 128, 128 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfyx
#define CASE_RESAMPLE_FSV16_2 { 1, 2, 32, 32 }, { 1, 2, 64, 64 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfyx
#define CASE_RESAMPLE_FSV32_1 { 1, 16, 32, 32 }, { 1, 16, 64, 64 }, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::i8, format::bfyx
@ -67,6 +68,7 @@ public:
// This test is to validate fused operation when a fused post-ops has a planar format input while its data input is a blocked format.
// It is expected to replace LT_ALIGNED_READ with LT_UNALIGNED if fused input is planar while generating FUSED_OPS_LOAD in jitter.
class format_mismatch_fusing : public PrimitiveFusingTest {};
TEST_P(format_mismatch_fusing, single_fused_node) {
auto p = GetParam();
create_topologies(
@ -93,7 +95,8 @@ INSTANTIATE_TEST_SUITE_P(validate_fusings_gpu, format_mismatch_fusing, ::testing
fusing_test_params{ CASE_RESAMPLE_FSV16_1, 3, 4 },
fusing_test_params{ CASE_RESAMPLE_FSV16_2, 3, 4 },
fusing_test_params{ CASE_RESAMPLE_FSV32_1, 3, 4 },
fusing_test_params{ CASE_RESAMPLE_FSV32_2, 3, 4 }
fusing_test_params{ CASE_RESAMPLE_FSV32_2, 3, 4 },
fusing_test_params{ CASE_RESAMPLE_FSV16_1_5D, 3, 4 },
}));
// This test is to check the replace result of mismatched fusing is valid when multiple nodes are fused.
@ -127,3 +130,35 @@ INSTANTIATE_TEST_SUITE_P(validate_fusings_gpu, format_mismatch_multiple_fusing,
fusing_test_params{ CASE_RESAMPLE_FSV16_1, 3, 4 },
fusing_test_params{ CASE_RESAMPLE_FSV16_2, 3, 4 }
}));
#define CASE_RESAMPLE_ONNX_4D_FSV16_1 { 1, 16, 64, 64 }, { 1, 16, 128, 128 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, data_types::f16, format::bfyx
#define CASE_RESAMPLE_ONNX_5D_FSV16_1 { 1, 16, 16, 16, 16 }, { 1, 16, 32, 32, 32 }, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, data_types::f16, format::bfzyx
class format_mismatch_onnx_fusing : public PrimitiveFusingTest {};
TEST_P(format_mismatch_onnx_fusing, single_fused_node) {
auto p = GetParam();
create_topologies(
// Fused eltwise contains format mismatch between data input of resample(input_format) and fused eltwise input(default_format)
input_layout("input", get_input_layout(p)),
data("eltwise_data", get_mem(get_default_layout(p), -10, 10)),
resample("resample_opt", input_info("input"), p.out_shape, 1, p.type),
eltwise("eltwise", { input_info("eltwise_data"), input_info("resample_opt") }, eltwise_mode::sum),
reorder("reorder_bfyx", input_info("eltwise"), p.output_format, data_types::f32)
);
ov::intel_gpu::ImplementationDesc resample_impl = { p.input_format, "resample_onnx" };
cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", resample_impl } }));
ov::intel_gpu::ImplementationDesc ref_resample_impl = { p.input_format, "resample_ref" };
cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", ref_resample_impl } }));
ov::intel_gpu::ImplementationDesc ref_eltwise = { p.input_format, "" };
cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise_data", ref_eltwise } }));
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(validate_fusings_onnx_gpu, format_mismatch_onnx_fusing, ::testing::ValuesIn(std::vector<fusing_test_params>{
fusing_test_params{ CASE_RESAMPLE_ONNX_4D_FSV16_1, 3, 4 },
fusing_test_params{ CASE_RESAMPLE_ONNX_5D_FSV16_1, 3, 4 }
}));

View File

@ -2041,7 +2041,6 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
ExecutionConfig config_opt;
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));
config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, kernel}} }));
network net_opt(engine, topo_opt, config_opt);
@ -2213,11 +2212,20 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx,
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_padding,
resample_opt_random_test,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {0, 0, 1, 1}, {0, 0, 1, 1}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {0, 0, 0, 0}, {0, 0, 1, 1}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {0, 0, 1, 1}, {0, 0, 0, 0}},
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_simple,
resample_opt_random_test,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
@ -2300,3 +2308,24 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_nearest,
{ data_types::f16, {1, 128, 64, 64, 64}, {1, 128, 128, 128, 128}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_padding,
resample_opt_random_test,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {0, 0, 1, 1, 1}, {0, 0, 1, 1, 1}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {0, 0, 0, 0, 0}, {0, 0, 1, 1, 1}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {0, 0, 1, 1, 1}, {0, 0, 0, 0, 0}},
}
));
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_simple,
resample_opt_random_test,
testing::ValuesIn(
std::vector<resample_opt_random_test_params>{
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
}
));