[IE CLDNN] Add asymmetric quantization support to fsv16 imad general convolution kernel (#2778)

2020-11-04 15:31:40 +01:00 · 2020-11-04 15:31:40 +01:00 · fbae10a235
commit fbae10a235
parent 9c509e5f41
4 changed files with 448 additions and 104 deletions
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16_imad.cpp
@ -320,6 +320,9 @@ ParamsKey Convolution_kernel_b_fs_zyx_fsv16_imad::GetSupportedKey() const {
    k.EnableBatching();
    k.EnableGroupedConvolution();
    k.EnableQuantization(QuantizationType::SYMMETRIC);
+    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA);
+    k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS);
+    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
    k.EnableDilation();
    k.DisableTuning();
    return k;
@ -422,11 +425,31 @@ bool Convolution_kernel_b_fs_zyx_fsv16_imad::Validate(const Params& params, cons
    }

    KernelData kd = KernelData::Default<convolution_params>(params);
-    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+    convolution_params& conv_params = *static_cast<convolution_params*>(kd.params.get());

-    if (newParams.split != 1)
+    if (conv_params.split != 1)
        return false;

+    if (conv_params.quantization == QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS) {
+        if ((conv_params.activations_zero_points.empty() || conv_params.weights_zero_points.empty()) &&
+            (conv_params.compensation.empty()))
+            return false;
+    }
+    else if (conv_params.quantization == QuantizationType::ASYMMETRIC_DATA) {
+        if ((conv_params.activations_zero_points.empty()) &&
+            (conv_params.compensation.empty()))
+            return false;
+    }
+    else if (conv_params.quantization == QuantizationType::ASYMMETRIC_WEIGHTS) {
+        if (conv_params.weights_zero_points.empty())
+            return false;
+    } else {
+        if (!conv_params.activations_zero_points.empty() ||
+            !conv_params.weights_zero_points.empty() ||
+            !conv_params.compensation.empty())
+            return false;
+    }
+
    return true;
 }
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_zyx_fsv16_imad.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_zyx_fsv16_imad.cl
@ -18,10 +18,42 @@
 #include "include/mmad.cl"
 #include "include/data_types.cl"

+#define TYPE_N_(type, n) type##n
+#define TYPE_N(type, n) TYPE_N_(type, n)
 #define AS_TYPE_N_(type, n, x) as_##type##n(x)
 #define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
+#define INPUT0_TYPE_4 TYPE_N(INPUT0_TYPE, 4)
 #define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)

+#if INPUT0_PAD_BEFORE_SIZE_X != 0 || \
+    INPUT0_PAD_BEFORE_SIZE_Y != 0 || \
+    INPUT0_PAD_BEFORE_SIZE_Z != 0
+    #define NON_ZERO_INPUT0_PAD_BEFORE
+#endif
+
+#if !defined COMPENSATION_TERM || \
+    (defined COMPENSATION_TERM && defined NON_ZERO_INPUT0_PAD_BEFORE)
+    #define SHOULD_BALANCE_COMPENSATION
+#endif
+
+#if defined ASYMMETRIC_DATA_QUANTIZATION && defined SHOULD_BALANCE_COMPENSATION
+    #define SHOULD_USE_DATA_ZP
+#endif
+
+#if defined ASYMMETRIC_DATA_QUANTIZATION && \
+    defined ASYMMETRIC_WEIGHTS_QUANTIZATION && \
+    defined SHOULD_BALANCE_COMPENSATION
+    #define SHOULD_USE_DATA_AND_WEIGHTS_ZP
+#endif
+
+#ifdef SHOULD_USE_DATA_AND_WEIGHTS_ZP
+    #define ACCUMULATOR_TYPE_4 TYPE_N(ACCUMULATOR_TYPE, 4)
+#endif
+
+#ifdef ASYMMETRIC_WEIGHTS_QUANTIZATION
+    #define FILTER_TYPE_16 TYPE_N(FILTER_TYPE, 16)
+#endif
+
 #define AS_FILTER_TYPE_4(x) AS_TYPE_N(FILTER_TYPE, 4, x)

 #define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
@ -41,6 +73,15 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
 #if BIAS_TERM
    const __global BIAS_TYPE *biases,
 #endif
+#ifdef ASYMMETRIC_WEIGHTS_QUANTIZATION
+    const __global WEIGHTS_ZERO_POINTS_TYPE *weights_zp,
+#endif
+#ifdef ASYMMETRIC_DATA_QUANTIZATION
+    const __global ACTIVATIONS_ZERO_POINTS_TYPE *activations_zp,
+#endif
+#ifdef COMPENSATION_TERM
+    const __global COMPENSATION_TYPE *compensation,
+#endif
 #if HAS_FUSED_OPS_DECLS
    FUSED_OPS_DECLS,
 #endif
@ -92,8 +133,67 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(

    uint4 input_val[IN_BLOCK_DEPTH][IN_BLOCK_HEIGHT][CEIL_DIV(IN_BLOCK_WIDTH, SIMD)];

+#ifdef SHOULD_USE_DATA_ZP
+    uint data_zp_idx = g * FILTER_IFM_NUM + in_f_start;
+    uint4 data_zp_val;
+#endif
+
+#ifdef ASYMMETRIC_WEIGHTS_QUANTIZATION
+    uint4 weights_zp_val[OFM_BLOCKS_PER_SIMD];
+    __attribute__((opencl_unroll_hint))
+    for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+        weights_zp_val[ofb] = as_uint4((FILTER_TYPE_16)weights_zp[out_f + ofb * FSV]);
+    }
+    #if FILTER_IFM_NUM % FSV != 0
+        uint4 weights_zp_vec_partial[OFM_BLOCKS_PER_SIMD];
+        __attribute__((opencl_unroll_hint))
+        for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+            weights_zp_vec_partial[ofb] = weights_zp_val[ofb];
+            FILTER_TYPE* wzp_p = (FILTER_TYPE*)&weights_zp_vec_partial[ofb];
+            __attribute__((opencl_unroll_hint))
+            for (uint f = FILTER_IFM_NUM % FSV; f < FSV; f++) {
+                wzp_p[f] = 0;
+            }
+        }
+    #endif
+#endif
+
    __attribute__((opencl_unroll_hint(1)))
    for (uint k = 0; k < CEIL_DIV(FILTER_IFM_NUM, FSV) / FEATURE_SLM_SPLIT; k++) {
+        #ifdef ASYMMETRIC_WEIGHTS_QUANTIZATION
+            #if FILTER_IFM_NUM % FSV != 0
+                if (in_f_start + (k + 1) * FSV >= ALIGN(FILTER_IFM_NUM, FSV)) {
+                    __attribute__((opencl_unroll_hint))
+                    for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+                        weights_zp_val[ofb] = weights_zp_vec_partial[ofb];
+                    }
+                }
+            #endif
+        #endif
+
+        #ifdef SHOULD_USE_DATA_ZP
+            #if ((FILTER_GROUPS_NUM > 1) && (FILTER_IFM_NUM % FSV != 0))
+                data_zp_val = as_uint4(vload16(0, activations_zp + data_zp_idx));
+            #else
+                data_zp_val = vload4(0, (__global uint *)(activations_zp + data_zp_idx));
+            #endif
+        #endif
+
+        #ifdef SHOULD_USE_DATA_AND_WEIGHTS_ZP
+            ACCUMULATOR_TYPE_4 dotProdAZPxWZP[OFM_BLOCKS_PER_SIMD];
+            __attribute__((opencl_unroll_hint))
+            for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+                dotProdAZPxWZP[ofb] = 0;
+                __attribute__((opencl_unroll_hint))
+                for (uint ive = 0; ive < 4; ive++) {
+                    dotProdAZPxWZP[ofb][ive] = TO_ACCUMULATOR_TYPE(
+                    IMAD(dotProdAZPxWZP[ofb][ive],
+                    AS_INPUT0_TYPE_4(data_zp_val[ive]),
+                    AS_FILTER_TYPE_4(weights_zp_val[ofb][ive])));
+                }
+            }
+        #endif
+
        __attribute__((opencl_unroll_hint(1)))
        for (uint fzn = 0; fzn < FILTER_SIZE_Z / FILTER_SIZE_Z_UNROLL; fzn++) {
            __attribute__((opencl_unroll_hint(1)))
@ -106,48 +206,103 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
                        __attribute__((opencl_unroll_hint))
                        for (uint ixb = 0; ixb < CEIL_DIV(IN_BLOCK_WIDTH, SIMD); ++ixb) {
                            uint input_idx = input_start_idx + izb * INPUT0_Z_PITCH * FSV + iyb * INPUT0_Y_PITCH * FSV + ixb * SIMD * FSV;
-
+                            #ifdef SHOULD_USE_DATA_ZP
+                                const int y_idx = input_y + fyn * DILATION_SIZE_Y + iyb;
+                                const int z_idx = input_z + fzn * DILATION_SIZE_Z + izb;
+                            #endif
                            if (ixb != CEIL_DIV(IN_BLOCK_WIDTH, SIMD) - 1) {
+                                #ifdef SHOULD_USE_DATA_ZP
+                                    const int x_idx = input_x + ixb * SIMD + get_sub_group_local_id();
+                                    const bool input_on_padding = (((x_idx < 0) || (x_idx >= INPUT0_SIZE_X)) ||
+                                                                   ((y_idx < 0) || (y_idx >= INPUT0_SIZE_Y)) ||
+                                                                   ((z_idx < 0) || (z_idx >= INPUT0_SIZE_Z)));
+                                #endif
+
                                #if ((FILTER_GROUPS_NUM > 1) && (FILTER_IFM_NUM % FSV != 0))
                                if (in_f_offset == 0) {
                                #endif
-                                    input_val[izb][iyb][ixb] = vload4(0, (__global uint *)(conv_input + input_idx + get_sub_group_local_id() * FSV));
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        if (input_on_padding) {
+                                            input_val[izb][iyb][ixb] = data_zp_val;
+                                        } else {
+                                    #endif
+                                            input_val[izb][iyb][ixb] = vload4(0, (__global uint *)(conv_input + input_idx + get_sub_group_local_id() * FSV));
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        }
+                                    #endif
                                #if ((FILTER_GROUPS_NUM > 1) && (FILTER_IFM_NUM % FSV != 0))
                                } else {
                                    INPUT0_TYPE* input_int8_arr = (INPUT0_TYPE*) &input_val[izb][iyb][ixb];
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        INPUT0_TYPE* input_zp_int8_arr = (INPUT0_TYPE*) &data_zp_val;
+                                    #endif
                                    __attribute__((opencl_unroll_hint(FSV)))
                                    for (uint v = 0; v < FSV; v++) {
-                                        if (v + in_f_offset < FSV) {
-                                            input_int8_arr[v] = conv_input[input_idx + get_sub_group_local_id() * FSV + v];
-                                        } else {
-                                            const uint addr = input_idx + get_sub_group_local_id() * FSV + v +
-                                                        ((INPUT0_SIZE_X + INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X) *
-                                                         (INPUT0_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y) *
-                                                         (INPUT0_SIZE_Z + INPUT0_PAD_BEFORE_SIZE_Z + INPUT0_PAD_AFTER_SIZE_Z) - 1) * FSV;
-                                            input_int8_arr[v] = conv_input[addr];
-                                        }
+                                        #ifdef SHOULD_USE_DATA_ZP
+                                            if (input_on_padding) {
+                                                input_int8_arr[v] = input_zp_int8_arr[v];
+                                            } else {
+                                        #endif
+                                                if (v + in_f_offset < FSV) {
+                                                    input_int8_arr[v] = conv_input[input_idx + get_sub_group_local_id() * FSV + v];
+                                                } else {
+                                                    const uint addr = input_idx + get_sub_group_local_id() * FSV + v +
+                                                                ((INPUT0_SIZE_X + INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X) *
+                                                                    (INPUT0_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y) *
+                                                                    (INPUT0_SIZE_Z + INPUT0_PAD_BEFORE_SIZE_Z + INPUT0_PAD_AFTER_SIZE_Z) - 1) * FSV;
+                                                    input_int8_arr[v] = conv_input[addr];
+                                                }
+                                        #ifdef SHOULD_USE_DATA_ZP
+                                            }
+                                        #endif
                                    }
                                }
                                #endif
                            } else {
+                                #ifdef SHOULD_USE_DATA_ZP
+                                    const int x_idx = input_x + ixb * SIMD + tmp;
+                                    const bool input_on_padding = (((x_idx < 0) || (x_idx >= INPUT0_SIZE_X)) ||
+                                                                   ((y_idx < 0) || (y_idx >= INPUT0_SIZE_Y)) ||
+                                                                   ((z_idx < 0) || (z_idx >= INPUT0_SIZE_Z)));
+                                #endif
+
                                #if ((FILTER_GROUPS_NUM > 1) && (FILTER_IFM_NUM % FSV != 0))
                                if (in_f_offset == 0) {
                                #endif
-                                    input_val[izb][iyb][ixb] = vload4(0, (__global uint*)(conv_input + input_idx + tmp * FSV));
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        if (input_on_padding) {
+                                            input_val[izb][iyb][ixb] = data_zp_val;
+                                        } else {
+                                    #endif
+                                            input_val[izb][iyb][ixb] = vload4(0, (__global uint *)(conv_input + input_idx + tmp * FSV));
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        }
+                                    #endif
                                #if ((FILTER_GROUPS_NUM > 1) && (FILTER_IFM_NUM % FSV != 0))
                                } else {
                                    INPUT0_TYPE* input_int8_arr = (INPUT0_TYPE*) &input_val[izb][iyb][ixb];
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        INPUT0_TYPE* input_zp_int8_arr = (INPUT0_TYPE*) &data_zp_val;
+                                    #endif
                                    __attribute__((opencl_unroll_hint(FSV)))
                                    for (uint v = 0; v < FSV; v++) {
-                                        if (v + in_f_offset < FSV) {
-                                            input_int8_arr[v] = conv_input[input_idx + tmp * FSV + v];
-                                        } else {
-                                            const uint addr = input_idx + tmp * FSV + v +
-                                                        ((INPUT0_SIZE_X + INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X) *
-                                                         (INPUT0_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y) *
-                                                         (INPUT0_SIZE_Z + INPUT0_PAD_BEFORE_SIZE_Z + INPUT0_PAD_AFTER_SIZE_Z) - 1) * FSV;
-                                            input_int8_arr[v] = conv_input[addr];
-                                        }
+                                        #ifdef SHOULD_USE_DATA_ZP
+                                            if (input_on_padding) {
+                                                input_int8_arr[v] = input_zp_int8_arr[v];
+                                            } else {
+                                            #endif
+                                                if (v + in_f_offset < FSV) {
+                                                    input_int8_arr[v] = conv_input[input_idx + tmp * FSV + v];
+                                                } else {
+                                                    const uint addr = input_idx + tmp * FSV + v +
+                                                                ((INPUT0_SIZE_X + INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X) *
+                                                                    (INPUT0_SIZE_Y + INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y) *
+                                                                    (INPUT0_SIZE_Z + INPUT0_PAD_BEFORE_SIZE_Z + INPUT0_PAD_AFTER_SIZE_Z) - 1) * FSV;
+                                                    input_int8_arr[v] = conv_input[addr];
+                                                }
+                                        #ifdef SHOULD_USE_DATA_ZP
+                                            }
+                                        #endif
                                    }
                                }
                                #endif
@ -173,6 +328,14 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
                            for (uint ive = 0; ive < 4; ive++) {
                                __attribute__((opencl_unroll_hint))
                                for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) {
+                                    #ifdef SHOULD_USE_DATA_ZP
+                                        ACCUMULATOR_TYPE dotProdAZPxW = 0;
+                                        dotProdAZPxW = TO_ACCUMULATOR_TYPE(
+                                        IMAD(dotProdAZPxW,
+                                        AS_INPUT0_TYPE_4(data_zp_val[ive]),
+                                        AS_FILTER_TYPE_4(weights_val[ofb][ive])));
+                                    #endif
+
                                    __attribute__((opencl_unroll_hint(OUT_BLOCK_DEPTH)))
                                    for (uint od = 0; od < OUT_BLOCK_DEPTH; ++od) {
                                    __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
@ -185,11 +348,32 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
                                                const uint shuffle_wi = x_block_idx % SIMD;
                                                const uint shuffle_idx = x_block_idx / SIMD;

+                                                INPUT0_TYPE_4 inputs = AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val[z_block_idx][y_block_idx][shuffle_idx][ive],
+                                                    shuffle_wi));
+
                                                dotProd[ofb][od][oh][ow] = TO_ACCUMULATOR_TYPE(
                                                    IMAD(dotProd[ofb][od][oh][ow],
-                                                    AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val[z_block_idx][y_block_idx][shuffle_idx][ive], 
-                                                                                             shuffle_wi)),
+                                                    inputs,
                                                    AS_FILTER_TYPE_4(weights_val[ofb][ive])));
+
+                                                #ifdef ASYMMETRIC_WEIGHTS_QUANTIZATION
+                                                    ACCUMULATOR_TYPE dotProdAxWZP = 0;
+                                                    dotProdAxWZP = TO_ACCUMULATOR_TYPE(
+                                                    IMAD(dotProdAxWZP,
+                                                    inputs,
+                                                    AS_FILTER_TYPE_4(weights_zp_val[ofb][ive])));
+                                                    dotProd[ofb][od][oh][ow] -= dotProdAxWZP;
+                                                #endif
+
+                                                #if !defined COMPENSATION_TERM && defined ASYMMETRIC_DATA_QUANTIZATION
+                                                    dotProd[ofb][od][oh][ow] -= dotProdAZPxW;
+                                                #endif
+
+                                                #if (!defined COMPENSATION_TERM && \
+                                                        defined ASYMMETRIC_DATA_QUANTIZATION && \
+                                                        defined ASYMMETRIC_WEIGHTS_QUANTIZATION)
+                                                    dotProd[ofb][od][oh][ow] += dotProdAZPxWZP[ofb][ive];
+                                                #endif
                                            }
                                        }
                                    }
@ -207,6 +391,10 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
        input_start_idx += INPUT0_FEATURE_PITCH * FSV * FEATURE_SLM_SPLIT - (FILTER_SIZE_Z / FILTER_SIZE_Z_UNROLL) * DILATION_SIZE_Z * INPUT0_Z_PITCH * FSV;

        filter_idx += FSV * FSV * FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z * (FEATURE_SLM_SPLIT - 1);
+
+        #ifdef SHOULD_USE_DATA_ZP
+            data_zp_idx += FSV;
+        #endif
    }

 #if FEATURE_SLM_SPLIT != 1
@ -339,6 +527,14 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
    }
 #endif

+#ifdef COMPENSATION_TERM
+    COMPENSATION_TYPE comp[OFM_VALUES_PER_WI];
+    __attribute__((opencl_unroll_hint))
+    for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
+        comp[ofb] = compensation[out_f + ofb * SIMD];
+    }
+#endif
+
    ACTIVATION_TYPE dequantized[OFM_VALUES_PER_WI][OUT_BLOCK_DEPTH][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH];
    __attribute__((opencl_unroll_hint))
    for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) {
@ -351,6 +547,9 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
                    dequantized[ofb][od][oh][ow] = TO_ACTIVATION_TYPE(dotProd[ofb][od][oh][ow]);
 #if BIAS_TERM
                    dequantized[ofb][od][oh][ow] += bias[ofb];
+#endif
+#ifdef COMPENSATION_TERM
+                    dequantized[ofb][od][oh][ow] += comp[ofb];
 #endif
                }
            }
@ -498,9 +697,38 @@ KERNEL(convolution_gpu_b_fs_zyx_fsv16_imad)(
 #endif
 }

-#undef AS_INPUT0_TYPE_4
+#undef TYPE_N_
+#undef TYPE_N
 #undef AS_TYPE_N
 #undef AS_TYPE_N_
+
+#undef INPUT0_TYPE_4
+#undef AS_INPUT0_TYPE_4
+
+#ifdef NON_ZERO_INPUT0_PAD_BEFORE
+    #undef NON_ZERO_INPUT0_PAD_BEFORE
+#endif
+
+#ifdef SHOULD_BALANCE_COMPENSATION
+    #undef SHOULD_BALANCE_COMPENSATION
+#endif
+
+#ifdef SHOULD_USE_DATA_ZP
+    #undef SHOULD_USE_DATA_ZP
+#endif
+
+#ifdef SHOULD_USE_DATA_AND_WEIGHTS_ZP
+    #undef SHOULD_USE_DATA_AND_WEIGHTS_ZP
+#endif
+
+#ifdef ACCUMULATOR_TYPE_4
+#undef ACCUMULATOR_TYPE_4
+#endif
+
+#ifdef FILTER_TYPE_16
+#undef FILTER_TYPE_16
+#endif
+
 #undef AS_FILTER_TYPE_4

 #undef CEIL_DIV
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@ -193,7 +193,9 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,

    if (next.is_type<convolution>() &&
        fmt_prev == format::bfyx &&
-        fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 && prev_output_layout.size.feature[0] <= 4)
+        fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 && prev_output_layout.size.feature[0] <= 4 &&
+        next.as<convolution>().get_primitive()->activations_zero_points.empty() &&
+        next.as<convolution>().get_primitive()->weights_zero_points.empty())
        return true;

    if (next.is_type<convolution>() &&
@ -366,9 +368,7 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(layout const &input_layout,
            weights_layout.size.batch[0] >= 16 &&
            ((conv->groups == 1 && conv->split() == 1) ||
             conv->groups == static_cast<uint32_t>(input_layout.size.feature[0]) ||
-             conv->split() == static_cast<int32_t>(input_layout.size.feature[0])) &&
-            ((conv->activations_zero_points.empty() && conv->weights_zero_points.empty()) ||
-             (input_layout.size.feature[0] <= 4)))  // only bfyx -> fsv16 kernel supports asymmetric quantization in fsv16 format
+             conv->split() == static_cast<int32_t>(input_layout.size.feature[0])))
            return true;
        // Check for grouped convolution
        else if (input_layout.format.dimension() == 4 && input_layout.size.batch[0] < 16 &&
@ -380,7 +380,6 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(layout const &input_layout,
                return true;
        // Check for fsv16 imad kernel
        else if ((input_layout.format.dimension() == 4) &&
-                 (conv->activations_zero_points.empty() && conv->weights_zero_points.empty()) &&
                 ((in_features_per_group > 8) || (out_features_per_group >= 4)))
                return true;
        return false;
@ -447,7 +446,6 @@ bool layout_optimizer::convolution_b_fs_zyx_fsv16_opt(layout const &input_layout

    // Check for fsv16 imad kernel
    if ((input_layout.format.dimension() == 5) &&
-        (conv->activations_zero_points.empty() && conv->weights_zero_points.empty()) &&
        (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) &&
        (weights_layout.data_type == data_types::i8 || weights_layout.data_type == data_types::u8) &&
        ((in_features_per_group > 8) || (out_features_per_group >= 4)))
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@ -4894,9 +4894,12 @@ using TestParamType_grouped_convolution_gpu = ::testing::tuple<  int,    // 0 -
        int,            // 7  - Kernel sizeZ
        int,            // 8  - Groups number
        int,            // 9  - Stride
-        int,            // 10  - Batch
-        format,         // 11  - Input data format
-        std::string>;   // 12 - Implementation name
+        int,            // 10 - Batch
+        bool,           // 11 - Zero points for activations
+        bool,           // 12 - Zero points for weights
+        bool,           // 13 - Compensation
+        format,         // 14 - Input data format
+        std::string>;   // 15 - Implementation name

 using TestParamType_general_convolution_gpu = ::testing::tuple<  int,    // 0 - Input X size
        int,            // 1  - Input Y size
@ -4996,10 +4999,13 @@ struct convolution_grouped_gpu : public ::testing::TestWithParam<TestParamType_g
            "_groups" + std::to_string(testing::get<8>(param_info.param)) +
            "_stride" + std::to_string(testing::get<9>(param_info.param)) +
            "_batch" + std::to_string(testing::get<10>(param_info.param)) +
-            "_format" + std::to_string(testing::get<11>(param_info.param));
+            "_data_zp" + std::to_string(testing::get<11>(param_info.param)) +
+            "_weights_zp" + std::to_string(testing::get<12>(param_info.param)) +
+            "_comp" + std::to_string(testing::get<13>(param_info.param)) +
+            "_format" + std::to_string(testing::get<14>(param_info.param));

-        if (testing::get<12>(param_info.param) != "") {
-            res += "_impl_" + testing::get<12>(param_info.param);
+        if (testing::get<15>(param_info.param) != "") {
+            res += "_impl_" + testing::get<15>(param_info.param);
        }

        return res;
@ -7205,57 +7211,60 @@ INSTANTIATE_TEST_CASE_P(convolution_grouped_fsv4_fsv16,
                        ::testing::Values(
                            // Input X size, Input Y size, Input Z size, Input features, Output features,
                            // Kernel size X, Kernel size Y, Kernel size Z, Groups number, Stride, Batch,
+                            // Activation zero points, Weights zero points, Compensation,
                            // Input data format, Implementation name

                            // Format: b_fs_yx_fsv4
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 17, 3, 3, 1, 1, 1, 1, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 16, 3, 3, 1, 4, 1, 1, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 4, 2, 2, 1, 2, 1, 4, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(8, 8, 1, 16, 16, 4, 4, 1, 4, 1, 1, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(17, 17, 1, 32, 96, 3, 3, 1, 2, 2, 2, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(16, 16, 1, 8, 48, 2, 2, 1, 2, 2, 1, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(3, 3, 1, 48, 96, 2, 2, 1, 2, 8, 1, format::b_fs_yx_fsv4, ""),
-                            TestParamType_grouped_convolution_gpu(6, 6, 1, 8, 26, 3, 3, 1, 2, 4, 1, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 17, 3, 3, 1, 1, 1, 1, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 16, 3, 3, 1, 4, 1, 1, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 4, 2, 2, 1, 2, 1, 4, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(8, 8, 1, 16, 16, 4, 4, 1, 4, 1, 1, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(17, 17, 1, 32, 96, 3, 3, 1, 2, 2, 2, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(16, 16, 1, 8, 48, 2, 2, 1, 2, 2, 1, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(3, 3, 1, 48, 96, 2, 2, 1, 2, 8, 1, false, false, false, format::b_fs_yx_fsv4, ""),
+                            TestParamType_grouped_convolution_gpu(6, 6, 1, 8, 26, 3, 3, 1, 2, 4, 1, false, false, false, format::b_fs_yx_fsv4, ""),

                            // Format: b_fs_yx_fsv16
-                            TestParamType_grouped_convolution_gpu(12, 12, 1, 96, 96, 3, 3, 1, 32, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 16, 3, 3, 1, 2, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(7, 7, 1, 8, 4, 3, 3, 1, 4, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(5, 5, 1, 34, 12, 3, 3, 1, 2, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(8, 8, 1, 34, 24, 3, 3, 1, 2, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(2, 2, 1, 12, 12, 3, 3, 1, 4, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(3, 3, 1, 8, 8, 3, 3, 1, 2, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 4, 2, 2, 1, 2, 2, 4, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 17, 3, 3, 1, 1, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 16, 3, 3, 1, 4, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 4, 2, 2, 1, 2, 1, 4, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(8, 8, 1, 16, 16, 4, 4, 1, 4, 1, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(17, 17, 1, 32, 96, 3, 3, 1, 2, 2, 2, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(16, 16, 1, 8, 48, 2, 2, 1, 2, 2, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(3, 3, 1, 48, 96, 2, 2, 1, 2, 8, 1, format::b_fs_yx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(6, 6, 1, 8, 26, 3, 3, 1, 2, 4, 1, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(12, 12, 1, 96, 96, 3, 3, 1, 32, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 16, 3, 3, 1, 2, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(7, 7, 1, 8, 4, 3, 3, 1, 4, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(5, 5, 1, 34, 12, 3, 3, 1, 2, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(8, 8, 1, 34, 24, 3, 3, 1, 2, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(2, 2, 1, 12, 12, 3, 3, 1, 4, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 3, 1, 8, 8, 3, 3, 1, 2, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 4, 2, 2, 1, 2, 2, 4, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 17, 3, 3, 1, 1, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 16, 16, 3, 3, 1, 4, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 4, 1, 8, 4, 2, 2, 1, 2, 1, 4, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(8, 8, 1, 16, 16, 4, 4, 1, 4, 1, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(17, 17, 1, 32, 96, 3, 3, 1, 2, 2, 2, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(16, 16, 1, 8, 48, 2, 2, 1, 2, 2, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 3, 1, 48, 96, 2, 2, 1, 2, 8, 1, true, true, true, format::b_fs_yx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(6, 6, 1, 8, 26, 3, 3, 1, 2, 4, 1, true, true, true, format::b_fs_yx_fsv16, ""),

                            // Format: b_fs_zyx_fsv16
-                            TestParamType_grouped_convolution_gpu(4, 4, 4, 16, 17, 3, 3, 3, 1, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 4, 16, 16, 3, 3, 3, 4, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 4, 4, 8, 4, 2, 2, 2, 2, 1, 4, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(8, 8, 8, 16, 16, 4, 4, 4, 4, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(17, 17, 17, 32, 96, 3, 3, 3, 2, 2, 2, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(16, 16, 16, 8, 48, 2, 2, 2, 2, 2, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(3, 3, 3, 48, 96, 2, 2, 2, 2, 8, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(6, 6, 6, 8, 26, 3, 3, 3, 2, 4, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(7, 5, 3, 51, 99, 3, 3, 3, 3, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(8, 6, 4, 32, 64, 2, 2, 2, 2, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(33, 6, 4, 16, 32, 4, 3, 2, 2, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(33, 1, 1, 30, 62, 1, 1, 1, 2, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(2, 1, 1, 18, 32, 3, 1, 1, 2, 2, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 1, 6, 256, 256, 2, 1, 2, 4, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(4, 1, 6, 256, 512, 2, 1, 3, 16, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(1, 3, 1, 18, 2, 1, 3, 1, 2, 1, 1, format::b_fs_zyx_fsv16, ""),
-                            TestParamType_grouped_convolution_gpu(2, 3, 4, 3, 18, 3, 3, 3, 1, 1, 1, format::b_fs_zyx_fsv16, "convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32"),
-                            TestParamType_grouped_convolution_gpu(79, 224, 224, 3, 64, 3, 3, 3, 1, 2, 1, format::b_fs_zyx_fsv16, "convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32")
+                            TestParamType_grouped_convolution_gpu(7, 5, 3, 51, 99, 3, 3, 3, 3, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(8, 6, 4, 32, 64, 2, 2, 2, 2, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(33, 6, 4, 16, 32, 4, 3, 2, 2, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(33, 1, 1, 30, 62, 1, 1, 1, 2, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(2, 1, 1, 18, 32, 3, 1, 1, 2, 2, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, false, false, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, false, true, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, true, false, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, true, true, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, true, false, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(80, 1, 1, 48, 96, 33, 1, 1, 2, 8, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, false, false, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, false, true, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, true, false, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, true, true, false, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, true, false, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(3, 1, 5, 196, 252, 3, 1, 3, 4, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 1, 6, 256, 256, 2, 1, 2, 4, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(4, 1, 6, 256, 512, 2, 1, 3, 16, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(1, 3, 1, 18, 2, 1, 3, 1, 2, 1, 1, true, true, true, format::b_fs_zyx_fsv16, ""),
+                            TestParamType_grouped_convolution_gpu(2, 3, 4, 3, 18, 3, 3, 3, 1, 1, 1, false, false, false, format::b_fs_zyx_fsv16, "convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32"),
+                            TestParamType_grouped_convolution_gpu(79, 224, 224, 3, 64, 3, 3, 3, 1, 2, 1, false, false, false, format::b_fs_zyx_fsv16, "convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32")
                        ),
                        convolution_grouped_gpu::PrintToStringParamName);

@ -7273,23 +7282,28 @@ TEST_P(convolution_grouped_gpu, base) {
              groups = testing::get<8>(GetParam()),
              stride = testing::get<9>(GetParam()),
              batch_num = testing::get<10>(GetParam()),
-              output_padding = 0,
              input_offset_z = (filter_z - 1) / 2,
              input_offset_y = (filter_y - 1) / 2,
              input_offset_x = (filter_x - 1) / 2;
-    auto input_data_format = testing::get<11>(GetParam());
-    auto impl_name = testing::get<12>(GetParam());
+    const auto has_input_zp = testing::get<11>(GetParam());
+    const auto has_weights_zp = testing::get<12>(GetParam());
+    const auto has_comp = testing::get<13>(GetParam());
+    const auto input_data_format = testing::get<14>(GetParam());
+    const auto impl_name = testing::get<15>(GetParam());
+
+    // can use compensation term only if data zero points are available
+    ASSERT_TRUE(has_input_zp || !has_comp);

    auto num_in_spatial_dims = input_data_format.spatial_num();

    auto input_size = tensor(batch(batch_num), feature(input_f), spatial(input_x, input_y, input_z));
-    auto input_rnd = generate_random_5d<uint8_t>(batch_num, input_f, input_z, input_y, input_x, 0, 255);
+    auto input_rnd = generate_random_5d<int8_t>(batch_num, input_f, input_z, input_y, input_x, -127, 127);

-    auto input_lay = layout(data_types::u8, format::bfzyx, input_size);
+    auto input_lay = layout(data_types::i8, format::bfzyx, input_size);
    if (num_in_spatial_dims == 2) {
-        input_lay = layout(data_types::u8, format::bfyx, input_size);
+        input_lay = layout(data_types::i8, format::bfyx, input_size);
    }
-    std::vector<uint8_t> input_flat(input_lay.get_linear_size());
+    std::vector<int8_t> input_flat(input_lay.get_linear_size());
    for (int b = 0; b < batch_num; b++)
        for (int f = 0; f < input_f; f++)
            for (int z = 0; z < input_z; z++)
@ -7302,6 +7316,16 @@ TEST_P(convolution_grouped_gpu, base) {
    auto input = memory::allocate(engine, input_lay);
    set_values(input, input_flat);

+    auto input_zp_rnd = std::vector<int8_t>(input_f);
+    auto input_zp_prim_name = std::vector<primitive_id>(0);
+    if (has_input_zp) {
+        input_zp_rnd = generate_random_1d<int8_t>(input_f, -127, 127);
+        input_zp_prim_name = { "input_zp" };
+    }
+    auto input_zp_lay = layout(data_types::i8, format::bfyx, tensor(feature(input_f)));
+    auto input_zp = memory::allocate(engine, input_zp_lay);
+    set_values(input_zp, input_zp_rnd);
+
    auto weights_size = tensor(group(groups), batch(output_f / groups), feature(input_f / groups), spatial(filter_x, filter_y, filter_z));

    VVVVVVF<int8_t> weights_rnd = generate_random_6d<int8_t>(groups, output_f / groups, input_f / groups, filter_z, filter_y, filter_x, -127, 127);
@ -7323,6 +7347,16 @@ TEST_P(convolution_grouped_gpu, base) {
    auto weights = memory::allocate(engine, weights_lay);
    set_values(weights, weights_flat);

+    auto weights_zp_rnd = std::vector<int8_t>(output_f);
+    auto weights_zp_prim_name = std::vector<primitive_id>(0);
+    if (has_weights_zp) {
+        weights_zp_rnd = generate_random_1d<int8_t>(output_f, -127, 127);
+        weights_zp_prim_name = { "weights_zp" };
+    }
+    auto weights_zp_lay = layout(data_types::i8, format::bfyx, tensor(batch(output_f)));
+    auto weights_zp = memory::allocate(engine, weights_zp_lay);
+    set_values(weights_zp, weights_zp_rnd);
+
    VVVVVF<float> expected_result(batch_num, VVVVF<float>(output_f));

    // Calculate reference values without bias
@ -7333,36 +7367,94 @@ TEST_P(convolution_grouped_gpu, base) {
                int f_begin = gi * input_f / groups;
                int f_end = gi * input_f / groups + input_f / groups;

-                expected_result[bi][ofi + gi * output_f / groups] = reference_convolve<uint8_t, float, int8_t>(
-                    input_rnd[bi], weights_rnd[gi][ofi],            // input, weights
-                    stride, stride, stride,                         // strides
-                    0,                                              // bias
-                    1, 1, 1,                                        // dilation
-                    input_offset_z, input_offset_y, input_offset_x, // input padding
-                    0, 0, 0,                                        // output_padding
-                    f_begin, f_end,                                 // f_begin, f_end
-                    false,                                          // depthwise
-                    grouped);                                       // grouped
+                expected_result[bi][ofi + gi * output_f / groups] = reference_convolve<int8_t, float, int8_t>(
+                    input_rnd[bi], weights_rnd[gi][ofi],                    // input, weights
+                    stride, stride, stride,                                 // strides
+                    0,                                                      // bias
+                    1, 1, 1,                                                // dilation
+                    input_offset_z, input_offset_y, input_offset_x,         // input padding
+                    0, 0, 0,                                                // output_padding
+                    f_begin, f_end,                                         // f_begin, f_end
+                    false,                                                  // depthwise
+                    grouped,                                                // grouped
+                    input_zp_rnd,                                           // input zero points
+                    weights_zp_rnd[gi * (int)weights_rnd[0].size() + ofi]); // weights zero points
            }

+    auto ref_conv_out_size = tensor(batch(expected_result.size()),
+                                    feature(expected_result[0].size()),
+                                    spatial(expected_result[0][0][0][0].size(),
+                                            expected_result[0][0][0].size(),
+                                            expected_result[0][0].size()));
+
+    auto comp_val = std::vector<float>(output_f);
+    auto comp_prim_name = std::vector<primitive_id>(0);
+    if (has_comp) {
+        for (int g = 0; g < groups; g++) {
+            for (int oc = 0; oc < output_f / groups; oc++) {
+                float c = 0.f;
+                for (int ic = 0; ic < input_f / groups; ic++) {
+                    for (int zi = 0; zi < filter_z; zi++) {
+                        for (int yi = 0; yi < filter_y; yi++) {
+                            for (int xi = 0; xi < filter_x; xi++) {
+                                int azp_idx = g*(input_f / groups) + ic;
+                                int wzp_idx = g*(output_f / groups) + oc;
+                                c += weights_rnd[g][oc][ic][zi][yi][xi] * input_zp_rnd[azp_idx];
+                                if (has_weights_zp) {
+                                    c -= input_zp_rnd[azp_idx] * weights_zp_rnd[wzp_idx];
+                                }
+                            }
+                        }
+                    }
+                }
+
+                comp_val[g*(output_f / groups) + oc] = -c;
+            }
+        }
+        comp_prim_name = { "compensation" };
+    }
+    auto comp_lay = layout(data_types::f32, format::bfyx, tensor(batch(output_f)));
+    auto comp = memory::allocate(engine, comp_lay);
+    set_values(comp, comp_val);
+
+    auto stride_tensor = tensor(batch(1), feature(1), spatial(stride, stride, stride, 1));
+    if (num_in_spatial_dims == 2) {
+        stride_tensor = tensor(batch(1), feature(1), spatial(stride, stride, 1, 1));
+    }
+
    topology topology(input_layout("input", input.get_layout()),
                      data("weights", weights),
-                      reorder("input_fsv", "input", {data_types::u8, input_data_format, input_size}),
+                      reorder("input_fsv", "input", {data_types::i8, input_data_format, input_size}),
                      convolution("conv",
                                  "input_fsv",
                                  {"weights"},
+                                  std::vector<primitive_id>(0),
+                                  weights_zp_prim_name,
+                                  input_zp_prim_name,
+                                  comp_prim_name,
                                  groups,
-                                  tensor(batch(1), feature(1), spatial(stride, stride, stride, 1)),
+                                  data_types::f32,
+                                  stride_tensor,
                                  tensor(batch(0), feature(0), spatial(-input_offset_x, -input_offset_y, -input_offset_z, 0)),
                                  tensor(batch(1), feature(1), spatial(1, 1, 1, 1)),
-                                  padding({0, 0, output_padding, output_padding, output_padding}, 0.f)));
+                                  ref_conv_out_size),
+                      reorder("out", "conv", {data_types::f32, format::bfzyx, ref_conv_out_size}));
+
+    if (has_input_zp)
+        topology.add(data(input_zp_prim_name[0], input_zp));
+
+    if (has_weights_zp)
+        topology.add(data(weights_zp_prim_name[0], weights_zp));
+
+    if (has_comp)
+        topology.add(data(comp_prim_name[0], comp));

    build_options options;
    options.set_option(build_option::optimize_data(true));
    implementation_desc conv_impl = {input_data_format, impl_name};
    options.set_option(build_option::force_implementations({{"conv", conv_impl}}));

-    network network(engine, topology, options);
+    cldnn::network network(engine, topology, options);
    network.set_input_data("input", input);
    network.execute();

@ -8231,8 +8323,11 @@ INSTANTIATE_TEST_CASE_P(
        .smoke_test_params(format::b_fs_yx_fsv32, false, true)
        .smoke_test_params(format::b_fs_yx_fsv32, true, false)
        .smoke_test_params(format::b_fs_yx_fsv32, false, false, true)
-        .smoke_test_params(format::b_fs_yx_fsv16, false, false, true)
        .smoke_test_params(format::b_fs_yx_fsv16)
+        .smoke_test_params(format::b_fs_yx_fsv16, true, true)
+        .smoke_test_params(format::b_fs_yx_fsv16, false, true)
+        .smoke_test_params(format::b_fs_yx_fsv16, true, false)
+        .smoke_test_params(format::b_fs_yx_fsv16, false, false, true)
        .bs_test_params(format::bs_fs_yx_bsv16_fsv16)
    ),
    to_string_convolution_all_params