[GPU] Feature/intepolate 3 axes onnx 5d (#13796)
added 3-axis interpolation for linear-onnx mode fixed resample_opt for onnx mode, it didn't work in case of padding added tests for the new implementation and fix @OlehKravchyshyn
This commit is contained in:
parent
d1397b7b48
commit
4700207af0
@ -0,0 +1,229 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_utils.cl"
|
||||
#include "include/batch_headers/sub_group_block_read.cl"
|
||||
#include "include/batch_headers/sub_group_block_write.cl"
|
||||
|
||||
#define READ_FUNC(ptr, offset) BLOCK_READN(INPUT0_TYPE, VEC_SIZE, ptr, offset)
|
||||
#define WRITE_FUNC(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, VEC_SIZE, ptr, offset, val)
|
||||
|
||||
#define IN_VEC_TYPE MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE)
|
||||
#define TO_IN_VEC_TYPE(x) CAT(convert_, IN_VEC_TYPE)(x)
|
||||
#define ACC_VEC_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, VEC_SIZE)
|
||||
#define TO_ACC_VEC_TYPE(x) CAT(convert_, ACC_VEC_TYPE)(x)
|
||||
#define OUT_VEC_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)
|
||||
#define TO_OUT_VEC_TYPE(x) CAT(convert_, OUT_VEC_TYPE)(x)
|
||||
|
||||
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
|
||||
{
|
||||
if (scale == 1.0f)
|
||||
return num;
|
||||
#if defined(COORD_TRANS_MODE_HALF_PIXEL)
|
||||
return (num + 0.5f) * scale - 0.5f;
|
||||
#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
|
||||
return (length_resized > 1) ? (num + 0.5f) * scale - 0.5f : 0.f;
|
||||
#elif defined(COORD_TRANS_MODE_ASYMMETRIC)
|
||||
return num * scale;
|
||||
#elif defined(COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN)
|
||||
return (num + 0.5f) * scale;
|
||||
#elif defined(COORD_TRANS_MODE_ALIGN_CORNERS)
|
||||
return (length_resized != 1) ? num * (length_original - 1) / (length_resized - 1) : 0.f;
|
||||
#else
|
||||
#error [clDNN resample_onnx.cl]: coordinate transformation mode - not supported
|
||||
#endif
|
||||
}
|
||||
|
||||
REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
|
||||
KERNEL (resample_onnx)(__global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output
|
||||
#if HAS_FUSED_OPS_DECLS
|
||||
, FUSED_OPS_DECLS
|
||||
#endif // #if HAS_FUSED_OPS_DECLS
|
||||
)
|
||||
{
|
||||
const int xyz = get_global_id(0);
|
||||
const int z = xyz / (OUTPUT_SIZE_Y * X_BLOCKS);
|
||||
const int xy = xyz % (OUTPUT_SIZE_Y * X_BLOCKS);
|
||||
const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
|
||||
const int y = (xy / X_BLOCKS);
|
||||
|
||||
const int f_block = get_group_id(1);
|
||||
const int b = get_global_id(2);
|
||||
int feature_num = f_block * FEATURE_SLICE_SIZE + get_sub_group_local_id();
|
||||
const uint feature_block = f_block * FEATURE_SLICE_SIZE;
|
||||
|
||||
typedef IN_VEC_TYPE in_vec_t;
|
||||
typedef ACC_VEC_TYPE acc_vec_t;
|
||||
|
||||
const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
|
||||
|
||||
if (feature_num >= OUTPUT_FEATURE_NUM)
|
||||
return;
|
||||
|
||||
const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3];
|
||||
const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4];
|
||||
const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y);
|
||||
|
||||
float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
|
||||
int in_y1 = min((int)in_y, PADDED_Y - 1);
|
||||
int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
|
||||
const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
|
||||
const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
|
||||
|
||||
#if defined (THREE_SPATIAL_RESAMPLE)
|
||||
|
||||
const int PADDED_Z = INPUT0_SIZE_Z + PADS_BEGIN[2] + PADS_END[2];
|
||||
const ACCUMULATOR_TYPE iz = FUNC_CALL(get_original_coordinate)(z, SCALES[2], OUTPUT_SIZE_Z, PADDED_Z);
|
||||
float in_z = fmax(0, fmin(iz, PADDED_Z - 1));
|
||||
int in_z1 = min((int)in_z, PADDED_Z - 1);
|
||||
int in_z2 = min(in_z1 + 1, PADDED_Z - 1);
|
||||
const ACCUMULATOR_TYPE dz1 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z1)) : 0.5f;
|
||||
const ACCUMULATOR_TYPE dz2 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z2)) : 0.5f;
|
||||
|
||||
#if PADDING_USED == 1
|
||||
const int saved_in_z1 = in_z1;
|
||||
const int saved_in_z2 = in_z2;
|
||||
const int saved_in_y1 = in_y1;
|
||||
const int saved_in_y2 = in_y2;
|
||||
#endif // PADDING_USED == 1
|
||||
|
||||
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
|
||||
const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
|
||||
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
|
||||
int in_x1 = min((int)in_x, PADDED_X - 1);
|
||||
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
|
||||
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
|
||||
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
|
||||
#if PADDING_USED == 1
|
||||
in_z1 = saved_in_z1;
|
||||
in_z2 = saved_in_z2;
|
||||
in_y1 = saved_in_y1;
|
||||
in_y2 = saved_in_y2;
|
||||
|
||||
in_z1 -= PADS_BEGIN[2];
|
||||
in_z2 -= PADS_BEGIN[2];
|
||||
in_y1 -= PADS_BEGIN[3];
|
||||
in_y2 -= PADS_BEGIN[3];
|
||||
in_x1 -= PADS_BEGIN[4];
|
||||
in_x2 -= PADS_BEGIN[4];
|
||||
|
||||
bool BackTopLOutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x1 < 0|| in_x1 >= INPUT0_SIZE_X;
|
||||
bool BackTopROutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
|
||||
bool BackBottomLOutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
|
||||
bool BackBottomROutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
|
||||
|
||||
bool FrontTopLOutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
|
||||
bool FrontTopROutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
|
||||
bool FrontBottomLOutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
|
||||
bool FrontBottomROutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
|
||||
|
||||
const acc_vec_t x111 = BackTopLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x1)));
|
||||
const acc_vec_t x211 = BackTopROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x2)));
|
||||
const acc_vec_t x121 = BackBottomLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x1)));
|
||||
const acc_vec_t x221 = BackBottomROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x2)));
|
||||
const acc_vec_t x112 = FrontTopLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x1)));
|
||||
const acc_vec_t x212 = FrontTopROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x2)));
|
||||
const acc_vec_t x122 = FrontBottomLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x1)));
|
||||
const acc_vec_t x222 = FrontBottomROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x2)));
|
||||
#else
|
||||
const acc_vec_t x111 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x1)));
|
||||
const acc_vec_t x211 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x2)));
|
||||
const acc_vec_t x121 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x1)));
|
||||
const acc_vec_t x221 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x2)));
|
||||
const acc_vec_t x112 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x1)));
|
||||
const acc_vec_t x212 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x2)));
|
||||
const acc_vec_t x122 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x1)));
|
||||
const acc_vec_t x222 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x2)));
|
||||
#endif // PADDING_USED == 1
|
||||
|
||||
acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * dz2 * x111) + TO_ACC_VEC_TYPE(dx1 * dy2 * dz2 * x211);
|
||||
res += TO_ACC_VEC_TYPE(dx2 * dy1 * dz2 * x121) + TO_ACC_VEC_TYPE(dx1 * dy1 * dz2 * x221);
|
||||
res += TO_ACC_VEC_TYPE(dx2 * dy2 * dz1 * x112) + TO_ACC_VEC_TYPE(dx1 * dy2 * dz1 * x212);
|
||||
res += TO_ACC_VEC_TYPE(dx2 * dy1 * dz1 * x122) + TO_ACC_VEC_TYPE(dx1 * dy1 * dz1 * x222);
|
||||
|
||||
#if HAS_FUSED_OPS
|
||||
FUSED_OPS;
|
||||
OUT_VEC_TYPE out = FUSED_OPS_RESULT;
|
||||
#else
|
||||
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
|
||||
#endif // #if HAS_FUSED_OPS
|
||||
|
||||
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
|
||||
}
|
||||
#else // #if defined (THREE_SPATIAL_RESAMPLE)
|
||||
|
||||
#if PADDING_USED == 1
|
||||
const int saved_in_y1 = in_y1;
|
||||
const int saved_in_y2 = in_y2;
|
||||
#endif
|
||||
|
||||
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
|
||||
const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
|
||||
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
|
||||
int in_x1 = min((int)in_x, PADDED_X - 1);
|
||||
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
|
||||
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
|
||||
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
|
||||
|
||||
#if PADDING_USED == 1
|
||||
in_y1 = saved_in_y1;
|
||||
in_y2 = saved_in_y2;
|
||||
|
||||
in_y1 -= PADS_BEGIN[3];
|
||||
in_y2 -= PADS_BEGIN[3];
|
||||
in_x1 -= PADS_BEGIN[4];
|
||||
in_x2 -= PADS_BEGIN[4];
|
||||
|
||||
bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
|
||||
bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
|
||||
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
|
||||
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
|
||||
#endif // PADDING_USED == 1
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
|
||||
acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
|
||||
acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
|
||||
acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
|
||||
#else
|
||||
acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
|
||||
acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
|
||||
acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
|
||||
acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
|
||||
#endif
|
||||
|
||||
#if PADDING_USED == 1
|
||||
if (tlOutOfBounds)
|
||||
top_left = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
|
||||
if (trOutOfBounds)
|
||||
top_right = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
|
||||
if (blOutOfBounds)
|
||||
bottom_left = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
|
||||
if (brOutOfBounds)
|
||||
bottom_right = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
|
||||
#endif // PADDING_USED == 1
|
||||
acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * top_left) +
|
||||
TO_ACC_VEC_TYPE(dx1 * dy2 * top_right) +
|
||||
TO_ACC_VEC_TYPE(dx2 * dy1 * bottom_left) +
|
||||
TO_ACC_VEC_TYPE(dx1 * dy1 * bottom_right);
|
||||
#if HAS_FUSED_OPS
|
||||
FUSED_OPS;
|
||||
OUT_VEC_TYPE out = FUSED_OPS_RESULT;
|
||||
#else
|
||||
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
|
||||
#else
|
||||
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, y, (x + out_x)), out);
|
||||
#endif
|
||||
}
|
||||
#endif // #if defined (THREE_SPATIAL_RESAMPLE)
|
||||
}
|
||||
|
||||
#undef TRIANGLE_COEFF
|
||||
#undef READ_FUNC
|
||||
#undef WRITE_FUNC
|
@ -24,6 +24,8 @@
|
||||
|
||||
inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
|
||||
{
|
||||
if (scale == 1.0f)
|
||||
return num;
|
||||
#if defined(COORD_TRANS_MODE_HALF_PIXEL)
|
||||
return (num + 0.5f) * scale - 0.5f;
|
||||
#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
|
||||
@ -231,6 +233,7 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
typedef IN_VEC_TYPE in_vec_t;
|
||||
typedef ACC_VEC_TYPE acc_vec_t;
|
||||
|
||||
const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
|
||||
|
||||
#ifdef SAMPLE_TYPE_NEAREST
|
||||
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
|
||||
@ -272,68 +275,15 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
|
||||
const acc_vec_t bottom = TO_ACC_VEC_TYPE(bottom_left) + (TO_ACC_VEC_TYPE(bottom_right) - TO_ACC_VEC_TYPE(bottom_left)) * dx;
|
||||
acc_vec_t res = top + (bottom - top) * dy;
|
||||
#else // defined(SAMPLE_TYPE_LINEAR_ONNX)
|
||||
const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3];
|
||||
const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4];
|
||||
|
||||
const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y);
|
||||
float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
|
||||
int in_y1 = min((int)in_y, PADDED_Y - 1);
|
||||
int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
|
||||
const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
|
||||
const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
|
||||
|
||||
unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
|
||||
const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
|
||||
float in_x = fmax(0, fmin(ix, PADDED_X - 1));
|
||||
int in_x1 = min((int)in_x, PADDED_X - 1);
|
||||
int in_x2 = min(in_x1 + 1, PADDED_X - 1);
|
||||
const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
|
||||
const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
|
||||
#if PADDING_USED == 1
|
||||
in_y1 -= PADS_BEGIN[3];
|
||||
in_y2 -= PADS_BEGIN[3];
|
||||
in_x1 -= PADS_BEGIN[4];
|
||||
in_x2 -= PADS_BEGIN[4];
|
||||
|
||||
bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
|
||||
bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
|
||||
bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
|
||||
bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
|
||||
#endif // PADDING_USED == 1
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
|
||||
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
|
||||
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
|
||||
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
|
||||
#else
|
||||
const acc_vec_t top_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
|
||||
const acc_vec_t top_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
|
||||
const acc_vec_t bottom_left = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
|
||||
const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
|
||||
#error [clDNN resample_opt.cl]: unsupported resample type
|
||||
#endif
|
||||
|
||||
#if PADDING_USED == 1
|
||||
if (tlOutOfBounds)
|
||||
top_left = INPUT0_VAL_ZERO;
|
||||
if (trOutOfBounds)
|
||||
top_right = INPUT0_VAL_ZERO;
|
||||
if (blOutOfBounds)
|
||||
bottom_left = INPUT0_VAL_ZERO;
|
||||
if (brOutOfBounds)
|
||||
bottom_right = INPUT0_VAL_ZERO;
|
||||
#endif // PADDING_USED == 1
|
||||
acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * top_left) +
|
||||
TO_ACC_VEC_TYPE(dx1 * dy2 * top_right) +
|
||||
TO_ACC_VEC_TYPE(dx2 * dy1 * bottom_left) +
|
||||
TO_ACC_VEC_TYPE(dx1 * dy1 * bottom_right);
|
||||
#endif
|
||||
#if HAS_FUSED_OPS
|
||||
FUSED_OPS;
|
||||
OUT_VEC_TYPE out = FUSED_OPS_RESULT;
|
||||
#else
|
||||
OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
|
||||
#endif
|
||||
#endif // #if HAS_FUSED_OPS
|
||||
|
||||
#if OUTPUT_DIMS == 5
|
||||
WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
|
||||
|
@ -0,0 +1,187 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "resample_kernel_onnx.h"
|
||||
|
||||
#include <kernel_selector_utils.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
static constexpr size_t sub_group_size = 16;
|
||||
|
||||
size_t ResampleKernelOnnx::GetOptimalBlockSize(const resample_params& params) const {
|
||||
std::vector<size_t> block_width = {16, 8, 4, 2, 1};
|
||||
for (auto& w : block_width) {
|
||||
if (params.outputs[0].X().v % w == 0) {
|
||||
return w;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t GetOptimalDivisor(const size_t input_size, size_t max_val = 16) {
|
||||
for (size_t s = max_val; s > 0; --s) {
|
||||
if (input_size % s == 0) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
ParamsKey ResampleKernelOnnx::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::UINT8);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
|
||||
// 4d formats
|
||||
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
|
||||
k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
|
||||
k.EnableInputLayout(DataLayout::fs_b_yx_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
|
||||
|
||||
// 5d formats
|
||||
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
|
||||
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
|
||||
k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
|
||||
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
|
||||
|
||||
k.EnableDifferentTypes();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
k.EnableReampleType(ResampleType::LINEAR_ONNX);
|
||||
return k;
|
||||
}
|
||||
|
||||
DeviceFeaturesKey ResampleKernelOnnx::get_required_device_features_key(const Params& params,
|
||||
const optional_params& options) const {
|
||||
return get_common_subgroups_device_features_key(params, options);
|
||||
}
|
||||
|
||||
ResampleKernelBase::DispatchData ResampleKernelOnnx::SetDefault(const kernel_selector::resample_params& arg) const {
|
||||
DispatchData dispatchData;
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
|
||||
|
||||
const auto& out = arg.outputs[0];
|
||||
|
||||
auto opt_x_block_size = GetOptimalBlockSize(arg);
|
||||
if (out.X().v > 32 && opt_x_block_size == 1) {
|
||||
opt_x_block_size = GetOptimalDivisor(out.X().v, 32);
|
||||
}
|
||||
|
||||
dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v * out.Z().v;
|
||||
dispatchData.gws[1] = Align(out.Feature().v, sub_group_size);
|
||||
dispatchData.gws[2] = arg.outputs[0].Batch().v;
|
||||
|
||||
dispatchData.lws[0] = 1;
|
||||
dispatchData.lws[1] = sub_group_size;
|
||||
dispatchData.lws[2] = 1;
|
||||
|
||||
if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv16 ||
|
||||
arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv32) {
|
||||
dispatchData.lws[2] = GetOptimalDivisor(dispatchData.gws[2]);
|
||||
}
|
||||
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
KernelsPriority ResampleKernelOnnx::GetKernelsPriority(const Params& /*params*/,
|
||||
const optional_params& /*options*/) const {
|
||||
return FORCE_PRIORITY_4;
|
||||
}
|
||||
|
||||
bool ResampleKernelOnnx::Validate(const Params& p, const optional_params& o) const {
|
||||
const resample_params& params = static_cast<const resample_params&>(p);
|
||||
|
||||
if (!Parent::Validate(p, o))
|
||||
return false;
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
const auto& output = params.outputs[0];
|
||||
if (input.Batch().v != output.Batch().v || input.Feature().v != output.Feature().v)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool IsThreeSpatialResample(const resample_params& params) {
|
||||
const auto& input = params.inputs[0];
|
||||
const auto& output = params.outputs[0];
|
||||
|
||||
if (input.Dimentions() == 5 && input.Z().v != output.Z().v)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
JitConstants ResampleKernelOnnx::GetJitConstants(const resample_params& params) const {
|
||||
auto jit = Parent::GetJitConstants(params);
|
||||
|
||||
auto opt_x_block_size = GetOptimalBlockSize(params);
|
||||
if (params.outputs[0].X().v > 32 && opt_x_block_size == 1) {
|
||||
opt_x_block_size = GetOptimalDivisor(params.outputs[0].X().v, 32);
|
||||
}
|
||||
|
||||
jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", opt_x_block_size));
|
||||
jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.outputs[0].X().v, opt_x_block_size)));
|
||||
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
|
||||
|
||||
size_t vec_size = 0;
|
||||
if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) {
|
||||
vec_size = 2;
|
||||
jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 32));
|
||||
} else {
|
||||
vec_size = 1;
|
||||
jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 16));
|
||||
}
|
||||
if (IsThreeSpatialResample(params))
|
||||
jit.AddConstant(MakeJitConstant("THREE_SPATIAL_RESAMPLE", ""));
|
||||
|
||||
jit.AddConstant(MakeJitConstant("VEC_SIZE", vec_size));
|
||||
|
||||
if (!params.fused_ops.empty()) {
|
||||
std::vector<std::string> idx_order;
|
||||
if (params.inputs[0].Dimentions() == 5)
|
||||
idx_order = {"b", "feature_block", "z", "y", "(x + out_x)"};
|
||||
else
|
||||
idx_order = {"b", "feature_block", "y", "(x + out_x)"};
|
||||
FusedOpsConfiguration conf =
|
||||
{"", idx_order, "res", GetAccumulatorType(params), vec_size, LoadType::LT_ALIGNED_READ};
|
||||
conf.SetVectorAxis(Tensor::DataChannelName::FEATURE);
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
}
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
KernelsData ResampleKernelOnnx::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "resample_kernel_base.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
class ResampleKernelOnnx : public ResampleKernelBase {
|
||||
public:
|
||||
using Parent = ResampleKernelBase;
|
||||
ResampleKernelOnnx() : ResampleKernelBase("resample_onnx") {}
|
||||
virtual ~ResampleKernelOnnx() = default;
|
||||
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
DeviceFeaturesKey get_required_device_features_key(const Params& params,
|
||||
const optional_params& /*options*/) const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
JitConstants GetJitConstants(const resample_params& params) const override;
|
||||
DispatchData SetDefault(const resample_params& arg) const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return {FusedOpType::QUANTIZE, FusedOpType::ELTWISE, FusedOpType::ACTIVATION};
|
||||
}
|
||||
|
||||
private:
|
||||
size_t GetOptimalBlockSize(const resample_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
@ -73,7 +73,6 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
|
||||
k.EnableBatching();
|
||||
k.EnableReampleType(ResampleType::BILINEAR_INTERP);
|
||||
k.EnableReampleType(ResampleType::NEAREST_NEIGHBOR);
|
||||
k.EnableReampleType(ResampleType::LINEAR_ONNX);
|
||||
k.EnableReampleType(ResampleType::CAFFE_BILINEAR_INTERP);
|
||||
return k;
|
||||
}
|
||||
@ -140,31 +139,17 @@ bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) cons
|
||||
if (!Parent::Validate(p, o))
|
||||
return false;
|
||||
|
||||
if (p.GetType() != KernelType::RESAMPLE || o.GetType() != KernelType::RESAMPLE)
|
||||
return false;
|
||||
|
||||
if (params.inputs.empty())
|
||||
return false;
|
||||
|
||||
const auto& input = params.inputs[0];
|
||||
const auto & output = params.outputs[0];
|
||||
|
||||
if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
|
||||
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
|
||||
params.resampleType != ResampleType::BILINEAR_INTERP &&
|
||||
params.resampleType != ResampleType::LINEAR_ONNX)
|
||||
params.resampleType != ResampleType::BILINEAR_INTERP)
|
||||
return false;
|
||||
|
||||
// in the case of 5D support only NEAREST_NEIGHBOR and partially LINEAR_ONNX (interpolate X and Y axes)
|
||||
if (input.Dimentions() == 5 &&
|
||||
params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
|
||||
!(params.resampleType == ResampleType::LINEAR_ONNX &&
|
||||
input.Batch().v == output.Batch().v &&
|
||||
input.Feature().v == output.Feature().v &&
|
||||
input.Z().v == output.Z().v))
|
||||
// in the case of 5D support only NEAREST_NEIGHBOR
|
||||
if (input.Dimentions() == 5 && params.resampleType != ResampleType::NEAREST_NEIGHBOR)
|
||||
return false;
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -192,7 +177,11 @@ JitConstants ResampleKernelOpt::GetJitConstants(const resample_params ¶ms) c
|
||||
|
||||
if (!params.fused_ops.empty()) {
|
||||
if (params.resampleType != ResampleType::CAFFE_BILINEAR_INTERP) {
|
||||
std::vector<std::string> idx_order = {"b", "feature_block", "y", "(x + out_x)"};
|
||||
std::vector<std::string> idx_order;
|
||||
if (params.inputs[0].Dimentions() == 5)
|
||||
idx_order = {"b", "feature_block", "z", "y", "(x + out_x)"};
|
||||
else
|
||||
idx_order = {"b", "feature_block", "y", "(x + out_x)"};
|
||||
FusedOpsConfiguration conf = {"", idx_order, "res", GetAccumulatorType(params), vec_size, LoadType::LT_ALIGNED_READ};
|
||||
conf.SetVectorAxis(Tensor::DataChannelName::FEATURE);
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
|
@ -5,11 +5,13 @@
|
||||
#include "resample_kernel_selector.h"
|
||||
#include "resample_kernel_ref.h"
|
||||
#include "resample_kernel_opt.h"
|
||||
#include "resample_kernel_onnx.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
resample_kernel_selector::resample_kernel_selector() {
|
||||
Attach<ResampleKernelRef>();
|
||||
Attach<ResampleKernelOpt>();
|
||||
Attach<ResampleKernelOnnx>();
|
||||
}
|
||||
|
||||
KernelsData resample_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
|
||||
|
@ -30,6 +30,7 @@ struct fusing_test_params {
|
||||
size_t expected_not_fused_primitives;
|
||||
};
|
||||
|
||||
#define CASE_RESAMPLE_FSV16_1_5D { 1, 16, 4, 32, 32 }, { 1, 16, 4, 64, 64 }, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfzyx
|
||||
#define CASE_RESAMPLE_FSV16_1 { 1, 16, 64, 64 }, { 1, 16, 128, 128 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfyx
|
||||
#define CASE_RESAMPLE_FSV16_2 { 1, 2, 32, 32 }, { 1, 2, 64, 64 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfyx
|
||||
#define CASE_RESAMPLE_FSV32_1 { 1, 16, 32, 32 }, { 1, 16, 64, 64 }, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::i8, format::bfyx
|
||||
@ -67,6 +68,7 @@ public:
|
||||
// This test is to validate fused operation when a fused post-ops has a planar format input while its data input is a blocked format.
|
||||
// It is expected to replace LT_ALIGNED_READ with LT_UNALIGNED if fused input is planar while generating FUSED_OPS_LOAD in jitter.
|
||||
class format_mismatch_fusing : public PrimitiveFusingTest {};
|
||||
|
||||
TEST_P(format_mismatch_fusing, single_fused_node) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
@ -93,7 +95,8 @@ INSTANTIATE_TEST_SUITE_P(validate_fusings_gpu, format_mismatch_fusing, ::testing
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV16_1, 3, 4 },
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV16_2, 3, 4 },
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV32_1, 3, 4 },
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV32_2, 3, 4 }
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV32_2, 3, 4 },
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV16_1_5D, 3, 4 },
|
||||
}));
|
||||
|
||||
// This test is to check the replace result of mismatched fusing is valid when multiple nodes are fused.
|
||||
@ -127,3 +130,35 @@ INSTANTIATE_TEST_SUITE_P(validate_fusings_gpu, format_mismatch_multiple_fusing,
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV16_1, 3, 4 },
|
||||
fusing_test_params{ CASE_RESAMPLE_FSV16_2, 3, 4 }
|
||||
}));
|
||||
|
||||
#define CASE_RESAMPLE_ONNX_4D_FSV16_1 { 1, 16, 64, 64 }, { 1, 16, 128, 128 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, data_types::f16, format::bfyx
|
||||
#define CASE_RESAMPLE_ONNX_5D_FSV16_1 { 1, 16, 16, 16, 16 }, { 1, 16, 32, 32, 32 }, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, data_types::f16, format::bfzyx
|
||||
|
||||
class format_mismatch_onnx_fusing : public PrimitiveFusingTest {};
|
||||
|
||||
TEST_P(format_mismatch_onnx_fusing, single_fused_node) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
// Fused eltwise contains format mismatch between data input of resample(input_format) and fused eltwise input(default_format)
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("eltwise_data", get_mem(get_default_layout(p), -10, 10)),
|
||||
resample("resample_opt", input_info("input"), p.out_shape, 1, p.type),
|
||||
eltwise("eltwise", { input_info("eltwise_data"), input_info("resample_opt") }, eltwise_mode::sum),
|
||||
reorder("reorder_bfyx", input_info("eltwise"), p.output_format, data_types::f32)
|
||||
);
|
||||
|
||||
ov::intel_gpu::ImplementationDesc resample_impl = { p.input_format, "resample_onnx" };
|
||||
cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", resample_impl } }));
|
||||
ov::intel_gpu::ImplementationDesc ref_resample_impl = { p.input_format, "resample_ref" };
|
||||
cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", ref_resample_impl } }));
|
||||
ov::intel_gpu::ImplementationDesc ref_eltwise = { p.input_format, "" };
|
||||
cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise_data", ref_eltwise } }));
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(validate_fusings_onnx_gpu, format_mismatch_onnx_fusing, ::testing::ValuesIn(std::vector<fusing_test_params>{
|
||||
fusing_test_params{ CASE_RESAMPLE_ONNX_4D_FSV16_1, 3, 4 },
|
||||
fusing_test_params{ CASE_RESAMPLE_ONNX_5D_FSV16_1, 3, 4 }
|
||||
}));
|
||||
|
@ -2041,7 +2041,6 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
|
||||
ExecutionConfig config_opt;
|
||||
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));
|
||||
config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, kernel}} }));
|
||||
|
||||
network net_opt(engine, topo_opt, config_opt);
|
||||
|
||||
@ -2213,11 +2212,20 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
|
||||
}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx,
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_padding,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {0, 0, 1, 1}, {0, 0, 1, 1}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {0, 0, 0, 0}, {0, 0, 1, 1}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {0, 0, 1, 1}, {0, 0, 0, 0}},
|
||||
}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_simple,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
|
||||
@ -2300,3 +2308,24 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_nearest,
|
||||
{ data_types::f16, {1, 128, 64, 64, 64}, {1, 128, 128, 128, 128}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
}
|
||||
));
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_padding,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {0, 0, 1, 1, 1}, {0, 0, 1, 1, 1}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {0, 0, 0, 0, 0}, {0, 0, 1, 1, 1}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {0, 0, 1, 1, 1}, {0, 0, 0, 0, 0}},
|
||||
}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_simple,
|
||||
resample_opt_random_test,
|
||||
testing::ValuesIn(
|
||||
std::vector<resample_opt_random_test_params>{
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
|
||||
}
|
||||
));
|
||||
|
Loading…
Reference in New Issue
Block a user