[GPU] Feature/intepolate 3 axes onnx 5d (#13796)

added 3-axis interpolation for linear-onnx mode fixed resample_opt for onnx mode, it didn't work in case of padding added tests for the new implementation and fix @OlehKravchyshyn
2023-01-31 08:45:53 +02:00 · 2023-01-31 08:45:53 +02:00 · 4700207af0
commit 4700207af0
parent d1397b7b48
8 changed files with 532 additions and 78 deletions
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_onnx.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_onnx.cl
@ -0,0 +1,229 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/fetch_utils.cl"
+#include "include/batch_headers/sub_group_block_read.cl"
+#include "include/batch_headers/sub_group_block_write.cl"
+
+#define READ_FUNC(ptr, offset)          BLOCK_READN(INPUT0_TYPE, VEC_SIZE, ptr, offset)
+#define WRITE_FUNC(ptr, offset, val)    BLOCK_WRITEN(OUTPUT_TYPE, VEC_SIZE, ptr, offset, val)
+
+#define IN_VEC_TYPE                     MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE)
+#define TO_IN_VEC_TYPE(x)               CAT(convert_, IN_VEC_TYPE)(x)
+#define ACC_VEC_TYPE                    MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, VEC_SIZE)
+#define TO_ACC_VEC_TYPE(x)              CAT(convert_, ACC_VEC_TYPE)(x)
+#define OUT_VEC_TYPE                    MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)
+#define TO_OUT_VEC_TYPE(x)              CAT(convert_, OUT_VEC_TYPE)(x)
+
+inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
+{
+    if (scale == 1.0f)
+        return num;
+#if defined(COORD_TRANS_MODE_HALF_PIXEL)
+    return (num + 0.5f) * scale - 0.5f;
+#elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
+    return (length_resized > 1) ? (num + 0.5f) * scale - 0.5f : 0.f;
+#elif defined(COORD_TRANS_MODE_ASYMMETRIC)
+    return num * scale;
+#elif defined(COORD_TRANS_MODE_TF_HALF_PIXEL_FOR_NN)
+    return (num + 0.5f) * scale;
+#elif defined(COORD_TRANS_MODE_ALIGN_CORNERS)
+    return (length_resized != 1) ? num * (length_original - 1) / (length_resized - 1) : 0.f;
+#else
+#error [clDNN resample_onnx.cl]: coordinate transformation mode - not supported
+#endif
+}
+
+REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
+KERNEL (resample_onnx)(__global INPUT0_TYPE* input,
+                      __global OUTPUT_TYPE* output
+#if HAS_FUSED_OPS_DECLS
+                      , FUSED_OPS_DECLS
+#endif // #if HAS_FUSED_OPS_DECLS
+)
+{
+    const int xyz = get_global_id(0);
+    const int z = xyz / (OUTPUT_SIZE_Y * X_BLOCKS);
+    const int xy = xyz % (OUTPUT_SIZE_Y * X_BLOCKS);
+    const int x = (xy % X_BLOCKS) * OUTPUT_X_BLOCK_SIZE;
+    const int y = (xy / X_BLOCKS);
+
+    const int f_block = get_group_id(1);
+    const int b = get_global_id(2);
+    int feature_num = f_block * FEATURE_SLICE_SIZE + get_sub_group_local_id();
+    const uint feature_block = f_block * FEATURE_SLICE_SIZE;
+
+    typedef IN_VEC_TYPE in_vec_t;
+    typedef ACC_VEC_TYPE acc_vec_t;
+
+    const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };
+
+    if (feature_num >= OUTPUT_FEATURE_NUM)
+        return;
+
+    const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3];
+    const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4];
+    const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y);
+
+    float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
+    int in_y1 = min((int)in_y, PADDED_Y - 1);
+    int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
+    const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
+    const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
+
+#if defined (THREE_SPATIAL_RESAMPLE)
+
+    const int PADDED_Z = INPUT0_SIZE_Z + PADS_BEGIN[2] + PADS_END[2];
+    const ACCUMULATOR_TYPE iz = FUNC_CALL(get_original_coordinate)(z, SCALES[2], OUTPUT_SIZE_Z, PADDED_Z);
+    float in_z = fmax(0, fmin(iz, PADDED_Z - 1));
+    int in_z1 = min((int)in_z, PADDED_Z - 1);
+    int in_z2 = min(in_z1 + 1, PADDED_Z - 1);
+    const ACCUMULATOR_TYPE dz1 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z1)) : 0.5f;
+    const ACCUMULATOR_TYPE dz2 = (in_z1 != in_z2) ? TO_ACCUMULATOR_TYPE(fabs(in_z - in_z2)) : 0.5f;
+
+#if PADDING_USED == 1
+    const int saved_in_z1 = in_z1;
+    const int saved_in_z2 = in_z2;
+    const int saved_in_y1 = in_y1;
+    const int saved_in_y2 = in_y2;
+#endif // PADDING_USED == 1
+
+    unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
+        const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
+        float in_x = fmax(0, fmin(ix, PADDED_X - 1));
+        int in_x1 = min((int)in_x, PADDED_X - 1);
+        int in_x2 = min(in_x1 + 1, PADDED_X - 1);
+        const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
+        const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
+#if PADDING_USED == 1
+        in_z1 = saved_in_z1;
+        in_z2 = saved_in_z2;
+        in_y1 = saved_in_y1;
+        in_y2 = saved_in_y2;
+
+        in_z1 -= PADS_BEGIN[2];
+        in_z2 -= PADS_BEGIN[2];
+        in_y1 -= PADS_BEGIN[3];
+        in_y2 -= PADS_BEGIN[3];
+        in_x1 -= PADS_BEGIN[4];
+        in_x2 -= PADS_BEGIN[4];
+
+        bool BackTopLOutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x1 < 0|| in_x1 >= INPUT0_SIZE_X;
+        bool BackTopROutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
+        bool BackBottomLOutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
+        bool BackBottomROutOfBounds = in_z1 < 0 || in_z1 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
+
+        bool FrontTopLOutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
+        bool FrontTopROutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y1 < 0 || in_y1 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
+        bool FrontBottomLOutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x1 < 0 || in_x1 >= INPUT0_SIZE_X;
+        bool FrontBottomROutOfBounds = in_z2 < 0 || in_z2 >= INPUT0_SIZE_Z || in_y2 < 0 || in_y2 >= INPUT0_SIZE_Y || in_x2 < 0 || in_x2 >= INPUT0_SIZE_X;
+
+        const acc_vec_t x111 = BackTopLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x1)));
+        const acc_vec_t x211 = BackTopROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x2)));
+        const acc_vec_t x121 = BackBottomLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x1)));
+        const acc_vec_t x221 = BackBottomROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x2)));
+        const acc_vec_t x112 = FrontTopLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x1)));
+        const acc_vec_t x212 = FrontTopROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x2)));
+        const acc_vec_t x122 = FrontBottomLOutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x1)));
+        const acc_vec_t x222 = FrontBottomROutOfBounds ? INPUT0_VAL_ZERO : TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x2)));
+#else
+        const acc_vec_t x111 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x1)));
+        const acc_vec_t x211 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y1, in_x2)));
+        const acc_vec_t x121 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x1)));
+        const acc_vec_t x221 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z1, in_y2, in_x2)));
+        const acc_vec_t x112 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x1)));
+        const acc_vec_t x212 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y1, in_x2)));
+        const acc_vec_t x122 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x1)));
+        const acc_vec_t x222 = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_z2, in_y2, in_x2)));
+#endif // PADDING_USED == 1
+
+        acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * dz2 * x111) + TO_ACC_VEC_TYPE(dx1 * dy2 * dz2 * x211);
+        res += TO_ACC_VEC_TYPE(dx2 * dy1 * dz2 * x121) + TO_ACC_VEC_TYPE(dx1 * dy1 * dz2 * x221);
+        res += TO_ACC_VEC_TYPE(dx2 * dy2 * dz1 * x112) + TO_ACC_VEC_TYPE(dx1 * dy2 * dz1 * x212);
+        res += TO_ACC_VEC_TYPE(dx2 * dy1 * dz1 * x122) + TO_ACC_VEC_TYPE(dx1 * dy1 * dz1 * x222);
+
+#if HAS_FUSED_OPS
+        FUSED_OPS;
+        OUT_VEC_TYPE out = FUSED_OPS_RESULT;
+#else
+        OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
+#endif // #if HAS_FUSED_OPS
+
+        WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
+    }
+#else // #if defined (THREE_SPATIAL_RESAMPLE)
+
+#if PADDING_USED == 1
+    const int saved_in_y1 = in_y1;
+    const int saved_in_y2 = in_y2;
+#endif
+
+    unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
+        const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
+        float in_x = fmax(0, fmin(ix, PADDED_X - 1));
+        int in_x1 = min((int)in_x, PADDED_X - 1);
+        int in_x2 = min(in_x1 + 1, PADDED_X - 1);
+        const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
+        const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
+
+#if PADDING_USED == 1
+        in_y1 = saved_in_y1;
+        in_y2 = saved_in_y2;
+
+        in_y1 -= PADS_BEGIN[3];
+        in_y2 -= PADS_BEGIN[3];
+        in_x1 -= PADS_BEGIN[4];
+        in_x2 -= PADS_BEGIN[4];
+
+        bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
+        bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
+        bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
+        bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
+#endif // PADDING_USED == 1
+
+#if OUTPUT_DIMS == 5
+        acc_vec_t top_left     = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
+        acc_vec_t top_right    = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
+        acc_vec_t bottom_left  = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
+        acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
+#else
+        acc_vec_t top_left     = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
+        acc_vec_t top_right    = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
+        acc_vec_t bottom_left  = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
+        acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
+#endif
+
+#if PADDING_USED == 1
+        if (tlOutOfBounds)
+            top_left = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
+        if (trOutOfBounds)
+            top_right = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
+        if (blOutOfBounds)
+            bottom_left = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
+        if (brOutOfBounds)
+            bottom_right = TO_OUT_VEC_TYPE(INPUT0_VAL_ZERO);
+#endif // PADDING_USED == 1
+        acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * top_left) +
+                        TO_ACC_VEC_TYPE(dx1 * dy2 * top_right) +
+                        TO_ACC_VEC_TYPE(dx2 * dy1 * bottom_left) +
+                        TO_ACC_VEC_TYPE(dx1 * dy1 * bottom_right);
+#if HAS_FUSED_OPS
+        FUSED_OPS;
+        OUT_VEC_TYPE out = FUSED_OPS_RESULT;
+#else
+        OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
+#endif
+
+#if OUTPUT_DIMS == 5
+        WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
+#else
+        WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, y, (x + out_x)), out);
+#endif
+    }
+#endif // #if defined (THREE_SPATIAL_RESAMPLE)
+}
+
+#undef TRIANGLE_COEFF
+#undef READ_FUNC
+#undef WRITE_FUNC
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_opt.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/resample_opt.cl
@ -24,6 +24,8 @@

 inline float FUNC(get_original_coordinate)(float num, float scale, int length_resized, int length_original)
 {
+    if (scale == 1.0f)
+        return num;
 #if defined(COORD_TRANS_MODE_HALF_PIXEL)
    return (num + 0.5f) * scale - 0.5f;
 #elif defined(COORD_TRANS_MODE_PYTORCH_HALF_PIXEL)
@ -231,6 +233,7 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
    typedef IN_VEC_TYPE in_vec_t;
    typedef ACC_VEC_TYPE acc_vec_t;

+    const int in_size[5] = { INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X };

 #ifdef SAMPLE_TYPE_NEAREST
    unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
@ -272,68 +275,15 @@ KERNEL (resample_opt)(__global INPUT0_TYPE* input,
        const acc_vec_t bottom = TO_ACC_VEC_TYPE(bottom_left) + (TO_ACC_VEC_TYPE(bottom_right) - TO_ACC_VEC_TYPE(bottom_left)) * dx;
        acc_vec_t res = top + (bottom - top) * dy;
 #else // defined(SAMPLE_TYPE_LINEAR_ONNX)
-    const int PADDED_Y = INPUT0_SIZE_Y + PADS_BEGIN[3] + PADS_END[3];
-    const int PADDED_X = INPUT0_SIZE_X + PADS_BEGIN[4] + PADS_END[4];
-
-    const ACCUMULATOR_TYPE iy = FUNC_CALL(get_original_coordinate)(y, SCALES[3], OUTPUT_SIZE_Y, PADDED_Y);
-    float in_y = fmax(0, fmin(iy, PADDED_Y - 1));
-    int in_y1 = min((int)in_y, PADDED_Y - 1);
-    int in_y2 = min(in_y1 + 1, PADDED_Y - 1);
-    const ACCUMULATOR_TYPE dy1 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y1)) : 0.5f;
-    const ACCUMULATOR_TYPE dy2 = (in_y1 != in_y2) ? TO_ACCUMULATOR_TYPE(fabs(in_y - in_y2)) : 0.5f;
-
-    unroll_for (uint out_x = 0; out_x < OUTPUT_X_BLOCK_SIZE; out_x++) {
-        const ACCUMULATOR_TYPE ix = FUNC_CALL(get_original_coordinate)(x + out_x, SCALES[4], OUTPUT_SIZE_X, PADDED_X);
-        float in_x = fmax(0, fmin(ix, PADDED_X - 1));
-        int in_x1 = min((int)in_x, PADDED_X - 1);
-        int in_x2 = min(in_x1 + 1, PADDED_X - 1);
-        const ACCUMULATOR_TYPE dx1 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x1)) : 0.5f;
-        const ACCUMULATOR_TYPE dx2 = (in_x1 != in_x2) ? TO_ACCUMULATOR_TYPE(fabs(in_x - in_x2)) : 0.5f;
-#if PADDING_USED == 1
-        in_y1 -= PADS_BEGIN[3];
-        in_y2 -= PADS_BEGIN[3];
-        in_x1 -= PADS_BEGIN[4];
-        in_x2 -= PADS_BEGIN[4];
-
-        bool tlOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
-        bool trOutOfBounds = in_y1 < 0 || in_y1 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
-        bool blOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x1 < 0 || in_x1 >= in_size[4];
-        bool brOutOfBounds = in_y2 < 0 || in_y2 >= in_size[3] || in_x2 < 0 || in_x2 >= in_size[4];
-#endif // PADDING_USED == 1
-
-#if OUTPUT_DIMS == 5
-        const acc_vec_t top_left     = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x1)));
-        const acc_vec_t top_right    = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y1, in_x2)));
-        const acc_vec_t bottom_left  = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x1)));
-        const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, z, in_y2, in_x2)));
-#else
-        const acc_vec_t top_left     = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x1)));
-        const acc_vec_t top_right    = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y1, in_x2)));
-        const acc_vec_t bottom_left  = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x1)));
-        const acc_vec_t bottom_right = TO_ACC_VEC_TYPE(READ_FUNC(input, INPUT0_GET_INDEX(b, feature_block, in_y2, in_x2)));
+#error [clDNN resample_opt.cl]: unsupported resample type
 #endif

-#if PADDING_USED == 1
-        if (tlOutOfBounds)
-            top_left = INPUT0_VAL_ZERO;
-        if (trOutOfBounds)
-            top_right = INPUT0_VAL_ZERO;
-        if (blOutOfBounds)
-            bottom_left = INPUT0_VAL_ZERO;
-        if (brOutOfBounds)
-            bottom_right = INPUT0_VAL_ZERO;
-#endif // PADDING_USED == 1
-        acc_vec_t res = TO_ACC_VEC_TYPE(dx2 * dy2 * top_left) +
-                        TO_ACC_VEC_TYPE(dx1 * dy2 * top_right) +
-                        TO_ACC_VEC_TYPE(dx2 * dy1 * bottom_left) +
-                        TO_ACC_VEC_TYPE(dx1 * dy1 * bottom_right);
-#endif
 #if HAS_FUSED_OPS
        FUSED_OPS;
        OUT_VEC_TYPE out = FUSED_OPS_RESULT;
 #else
        OUT_VEC_TYPE out = TO_OUT_VEC_TYPE(ACTIVATION(res, ACTIVATION_PARAMS));
-#endif
+#endif // #if HAS_FUSED_OPS

 #if OUTPUT_DIMS == 5
        WRITE_FUNC(output, OUTPUT_GET_INDEX(b, feature_block, z, y, (x + out_x)), out);
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.cpp
@ -0,0 +1,187 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "resample_kernel_onnx.h"
+
+#include <kernel_selector_utils.h>
+
+#include <vector>
+
+namespace kernel_selector {
+
+static constexpr size_t sub_group_size = 16;
+
+size_t ResampleKernelOnnx::GetOptimalBlockSize(const resample_params& params) const {
+    std::vector<size_t> block_width = {16, 8, 4, 2, 1};
+    for (auto& w : block_width) {
+        if (params.outputs[0].X().v % w == 0) {
+            return w;
+        }
+    }
+    return 1;
+}
+
+static size_t GetOptimalDivisor(const size_t input_size, size_t max_val = 16) {
+    for (size_t s = max_val; s > 0; --s) {
+        if (input_size % s == 0) {
+            return s;
+        }
+    }
+    return 1;
+}
+
+ParamsKey ResampleKernelOnnx::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT8);
+
+    // 4d formats
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv32);
+    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
+    k.EnableInputLayout(DataLayout::fs_b_yx_fsv32);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+    k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv32);
+    k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16);
+    k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32);
+    k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
+
+    // 5d formats
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+    k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
+    k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
+    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
+    k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
+    k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv32);
+    k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
+    k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv32_fsv32);
+
+    k.EnableDifferentTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    k.EnableReampleType(ResampleType::LINEAR_ONNX);
+    return k;
+}
+
+DeviceFeaturesKey ResampleKernelOnnx::get_required_device_features_key(const Params& params,
+                                                                       const optional_params& options) const {
+    return get_common_subgroups_device_features_key(params, options);
+}
+
+ResampleKernelBase::DispatchData ResampleKernelOnnx::SetDefault(const kernel_selector::resample_params& arg) const {
+    DispatchData dispatchData;
+    std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws;
+
+    const auto& out = arg.outputs[0];
+
+    auto opt_x_block_size = GetOptimalBlockSize(arg);
+    if (out.X().v > 32 && opt_x_block_size == 1) {
+        opt_x_block_size = GetOptimalDivisor(out.X().v, 32);
+    }
+
+    dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v * out.Z().v;
+    dispatchData.gws[1] = Align(out.Feature().v, sub_group_size);
+    dispatchData.gws[2] = arg.outputs[0].Batch().v;
+
+    dispatchData.lws[0] = 1;
+    dispatchData.lws[1] = sub_group_size;
+    dispatchData.lws[2] = 1;
+
+    if (arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv16 ||
+        arg.outputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv32_fsv32) {
+        dispatchData.lws[2] = GetOptimalDivisor(dispatchData.gws[2]);
+    }
+
+    return dispatchData;
+}
+
+KernelsPriority ResampleKernelOnnx::GetKernelsPriority(const Params& /*params*/,
+                                                       const optional_params& /*options*/) const {
+    return FORCE_PRIORITY_4;
+}
+
+bool ResampleKernelOnnx::Validate(const Params& p, const optional_params& o) const {
+    const resample_params& params = static_cast<const resample_params&>(p);
+
+    if (!Parent::Validate(p, o))
+        return false;
+
+    const auto& input = params.inputs[0];
+    const auto& output = params.outputs[0];
+    if (input.Batch().v != output.Batch().v || input.Feature().v != output.Feature().v)
+        return false;
+
+    return true;
+}
+
+static bool IsThreeSpatialResample(const resample_params& params) {
+    const auto& input = params.inputs[0];
+    const auto& output = params.outputs[0];
+
+    if (input.Dimentions() == 5 && input.Z().v != output.Z().v)
+        return true;
+
+    return false;
+}
+
+JitConstants ResampleKernelOnnx::GetJitConstants(const resample_params& params) const {
+    auto jit = Parent::GetJitConstants(params);
+
+    auto opt_x_block_size = GetOptimalBlockSize(params);
+    if (params.outputs[0].X().v > 32 && opt_x_block_size == 1) {
+        opt_x_block_size = GetOptimalDivisor(params.outputs[0].X().v, 32);
+    }
+
+    jit.AddConstant(MakeJitConstant("OUTPUT_X_BLOCK_SIZE", opt_x_block_size));
+    jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.outputs[0].X().v, opt_x_block_size)));
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
+
+    size_t vec_size = 0;
+    if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) {
+        vec_size = 2;
+        jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 32));
+    } else {
+        vec_size = 1;
+        jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 16));
+    }
+    if (IsThreeSpatialResample(params))
+        jit.AddConstant(MakeJitConstant("THREE_SPATIAL_RESAMPLE", ""));
+
+    jit.AddConstant(MakeJitConstant("VEC_SIZE", vec_size));
+
+    if (!params.fused_ops.empty()) {
+        std::vector<std::string> idx_order;
+        if (params.inputs[0].Dimentions() == 5)
+            idx_order = {"b", "feature_block", "z", "y", "(x + out_x)"};
+        else
+            idx_order = {"b", "feature_block", "y", "(x + out_x)"};
+        FusedOpsConfiguration conf =
+            {"", idx_order, "res", GetAccumulatorType(params), vec_size, LoadType::LT_ALIGNED_READ};
+        conf.SetVectorAxis(Tensor::DataChannelName::FEATURE);
+        jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
+    }
+
+    return jit;
+}
+
+KernelsData ResampleKernelOnnx::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetCommonKernelsData(params, options);
+}
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.h
@ -0,0 +1,33 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "resample_kernel_base.h"
+
+namespace kernel_selector {
+class ResampleKernelOnnx : public ResampleKernelBase {
+public:
+    using Parent = ResampleKernelBase;
+    ResampleKernelOnnx() : ResampleKernelBase("resample_onnx") {}
+    virtual ~ResampleKernelOnnx() = default;
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+    DeviceFeaturesKey get_required_device_features_key(const Params& params,
+                                                       const optional_params& /*options*/) const override;
+
+protected:
+    bool Validate(const Params& p, const optional_params& o) const override;
+    JitConstants GetJitConstants(const resample_params& params) const override;
+    DispatchData SetDefault(const resample_params& arg) const override;
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {FusedOpType::QUANTIZE, FusedOpType::ELTWISE, FusedOpType::ACTIVATION};
+    }
+
+private:
+    size_t GetOptimalBlockSize(const resample_params& params) const;
+};
+}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_opt.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_opt.cpp
@ -73,7 +73,6 @@ ParamsKey ResampleKernelOpt::GetSupportedKey() const {
    k.EnableBatching();
    k.EnableReampleType(ResampleType::BILINEAR_INTERP);
    k.EnableReampleType(ResampleType::NEAREST_NEIGHBOR);
-    k.EnableReampleType(ResampleType::LINEAR_ONNX);
    k.EnableReampleType(ResampleType::CAFFE_BILINEAR_INTERP);
    return k;
 }
@ -140,31 +139,17 @@ bool ResampleKernelOpt::Validate(const Params& p, const optional_params& o) cons
    if (!Parent::Validate(p, o))
        return false;

-    if (p.GetType() != KernelType::RESAMPLE || o.GetType() != KernelType::RESAMPLE)
-        return false;
-
-    if (params.inputs.empty())
-        return false;
-
    const auto& input = params.inputs[0];
-    const auto & output = params.outputs[0];

    if ((input.GetDType() == Datatype::UINT8 || input.GetDType() == Datatype::INT8) &&
        params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
-        params.resampleType != ResampleType::BILINEAR_INTERP &&
-        params.resampleType != ResampleType::LINEAR_ONNX)
+        params.resampleType != ResampleType::BILINEAR_INTERP)
        return false;

-    // in the case of 5D support only NEAREST_NEIGHBOR and partially LINEAR_ONNX (interpolate X and Y axes)
-    if (input.Dimentions() == 5 &&
-         params.resampleType != ResampleType::NEAREST_NEIGHBOR &&
-         !(params.resampleType == ResampleType::LINEAR_ONNX &&
-           input.Batch().v == output.Batch().v &&
-           input.Feature().v == output.Feature().v &&
-           input.Z().v == output.Z().v))
+    // in the case of 5D support only NEAREST_NEIGHBOR
+    if (input.Dimentions() == 5 && params.resampleType != ResampleType::NEAREST_NEIGHBOR)
        return false;

-
    return true;
 }

@ -192,7 +177,11 @@ JitConstants ResampleKernelOpt::GetJitConstants(const resample_params &params) c

    if (!params.fused_ops.empty()) {
        if (params.resampleType != ResampleType::CAFFE_BILINEAR_INTERP) {
-            std::vector<std::string> idx_order = {"b", "feature_block", "y", "(x + out_x)"};
+            std::vector<std::string> idx_order;
+            if (params.inputs[0].Dimentions() == 5)
+                idx_order = {"b", "feature_block", "z", "y", "(x + out_x)"};
+            else
+                idx_order = {"b", "feature_block", "y", "(x + out_x)"};
            FusedOpsConfiguration conf = {"", idx_order, "res", GetAccumulatorType(params), vec_size, LoadType::LT_ALIGNED_READ};
            conf.SetVectorAxis(Tensor::DataChannelName::FEATURE);
            jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_selector.cpp
@ -5,11 +5,13 @@
 #include "resample_kernel_selector.h"
 #include "resample_kernel_ref.h"
 #include "resample_kernel_opt.h"
+#include "resample_kernel_onnx.h"

 namespace kernel_selector {
 resample_kernel_selector::resample_kernel_selector() {
    Attach<ResampleKernelRef>();
    Attach<ResampleKernelOpt>();
+    Attach<ResampleKernelOnnx>();
 }

 KernelsData resample_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
--- a/src/plugins/intel_gpu/tests/fusions/fusion_validity_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/fusion_validity_test.cpp
@ -30,6 +30,7 @@ struct fusing_test_params {
    size_t expected_not_fused_primitives;
 };

+#define CASE_RESAMPLE_FSV16_1_5D { 1, 16, 4, 32, 32 }, { 1, 16, 4, 64, 64 }, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfzyx
 #define CASE_RESAMPLE_FSV16_1 { 1, 16, 64, 64 }, { 1, 16, 128, 128 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfyx
 #define CASE_RESAMPLE_FSV16_2 { 1, 2,  32, 32 }, { 1, 2,   64,  64 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::f16, format::bfyx
 #define CASE_RESAMPLE_FSV32_1 { 1, 16, 32, 32 }, { 1, 16,  64,  64 }, data_types::i8,  format::b_fs_yx_fsv32, format::bfyx, resample::InterpolateOp::InterpolateMode::NEAREST, data_types::i8, format::bfyx
@ -67,6 +68,7 @@ public:
 // This test is to validate fused operation when a fused post-ops has a planar format input while its data input is a blocked format.
 // It is expected to replace LT_ALIGNED_READ with LT_UNALIGNED if fused input is planar while generating FUSED_OPS_LOAD in jitter.
 class format_mismatch_fusing : public PrimitiveFusingTest {};
+
 TEST_P(format_mismatch_fusing, single_fused_node) {
    auto p = GetParam();
    create_topologies(
@ -93,7 +95,8 @@ INSTANTIATE_TEST_SUITE_P(validate_fusings_gpu, format_mismatch_fusing, ::testing
    fusing_test_params{ CASE_RESAMPLE_FSV16_1, 3, 4 },
    fusing_test_params{ CASE_RESAMPLE_FSV16_2, 3, 4 },
    fusing_test_params{ CASE_RESAMPLE_FSV32_1, 3, 4 },
-    fusing_test_params{ CASE_RESAMPLE_FSV32_2, 3, 4 }
+    fusing_test_params{ CASE_RESAMPLE_FSV32_2, 3, 4 },
+    fusing_test_params{ CASE_RESAMPLE_FSV16_1_5D, 3, 4 },
 }));

 // This test is to check the replace result of mismatched fusing is valid when multiple nodes are fused.
@ -127,3 +130,35 @@ INSTANTIATE_TEST_SUITE_P(validate_fusings_gpu, format_mismatch_multiple_fusing,
    fusing_test_params{ CASE_RESAMPLE_FSV16_1, 3, 4 },
    fusing_test_params{ CASE_RESAMPLE_FSV16_2, 3, 4 }
 }));
+
+#define CASE_RESAMPLE_ONNX_4D_FSV16_1 { 1, 16, 64, 64 }, { 1, 16, 128, 128 }, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, data_types::f16, format::bfyx
+#define CASE_RESAMPLE_ONNX_5D_FSV16_1 { 1, 16, 16, 16, 16 }, { 1, 16, 32, 32, 32 }, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, data_types::f16, format::bfzyx
+
+class format_mismatch_onnx_fusing : public PrimitiveFusingTest {};
+
+TEST_P(format_mismatch_onnx_fusing, single_fused_node) {
+    auto p = GetParam();
+    create_topologies(
+        // Fused eltwise contains format mismatch between data input of resample(input_format) and fused eltwise input(default_format)
+        input_layout("input", get_input_layout(p)),
+        data("eltwise_data", get_mem(get_default_layout(p), -10, 10)),
+        resample("resample_opt", input_info("input"), p.out_shape, 1, p.type),
+        eltwise("eltwise", { input_info("eltwise_data"), input_info("resample_opt") }, eltwise_mode::sum),
+        reorder("reorder_bfyx", input_info("eltwise"), p.output_format, data_types::f32)
+    );
+
+    ov::intel_gpu::ImplementationDesc resample_impl = { p.input_format, "resample_onnx" };
+    cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", resample_impl } }));
+    ov::intel_gpu::ImplementationDesc ref_resample_impl = { p.input_format, "resample_ref" };
+    cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "resample_opt", ref_resample_impl } }));
+    ov::intel_gpu::ImplementationDesc ref_eltwise = { p.input_format, "" };
+    cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "eltwise_data", ref_eltwise } }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(validate_fusings_onnx_gpu, format_mismatch_onnx_fusing, ::testing::ValuesIn(std::vector<fusing_test_params>{
+    fusing_test_params{ CASE_RESAMPLE_ONNX_4D_FSV16_1, 3, 4 },
+    fusing_test_params{ CASE_RESAMPLE_ONNX_5D_FSV16_1, 3, 4 }
+}));
--- a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp
@ -2041,7 +2041,6 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes

        ExecutionConfig config_opt;
        config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));
-        config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, kernel}} }));

        network net_opt(engine, topo_opt, config_opt);

@ -2213,11 +2212,20 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_nearest,
                            }
                        ));

-INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx,
+INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_padding,
+                         resample_opt_random_test,
+                         testing::ValuesIn(
+                            std::vector<resample_opt_random_test_params>{
+                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {0, 0, 1, 1}, {0, 0, 1, 1}},
+                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {0, 0, 0, 0}, {0, 0, 1, 1}},
+                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {0, 0, 1, 1}, {0, 0, 0, 0}},
+                            }
+                        ));
+
+INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_simple,
                         resample_opt_random_test,
                         testing::ValuesIn(
                            std::vector<resample_opt_random_test_params>{
-                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, {}, {}},
                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
                                { data_types::f16, {1, 128, 13, 13},  {1, 128, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
@ -2300,3 +2308,24 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_perf_linear_5_nearest,
                                { data_types::f16, {1, 128, 64, 64, 64}, {1, 128, 128, 128, 128}, 1, resample::InterpolateOp::InterpolateMode::NEAREST, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
                            }
                        ));
+INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_padding,
+                         resample_opt_random_test,
+                         testing::ValuesIn(
+                            std::vector<resample_opt_random_test_params>{
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {0, 0, 1, 1, 1}, {0, 0, 1, 1, 1}},
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {0, 0, 0, 0, 0}, {0, 0, 1, 1, 1}},
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {0, 0, 1, 1, 1}, {0, 0, 0, 0, 0}},
+                            }
+                        ));
+
+INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_simple,
+                         resample_opt_random_test,
+                         testing::ValuesIn(
+                            std::vector<resample_opt_random_test_params>{
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16, {}, {}},
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
+                                { data_types::f16, {1, 16, 13, 13, 13},  {1, 16, 26, 26, 26},  1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
+                            }
+                        ));