[IE CLDNN] Changed weights layout used in the plugin (#3858)

Before this patch constant with weights could be not detected if it wasn't directly connected to Conv/Deconv layer. Now weights always uses common data format (bfzyx) in the plugin which is converted into weights format later (goiyx, oiyx, etc), so weights sub-graph can now contain anything
2021-01-19 13:06:20 +03:00
parent e88de8f822
commit 9cc58fc66a
28 changed files with 506 additions and 199 deletions
--- a/inference-engine/src/cldnn_engine/cldnn_common_utils.h
+++ b/inference-engine/src/cldnn_engine/cldnn_common_utils.h
@@ -139,7 +139,6 @@ inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) {
    }
 }

-
 inline cldnn::format DefaultFormatForDims(size_t dimensions) {
    switch (dimensions) {
    case 0:
@@ -159,4 +158,29 @@ inline cldnn::format DefaultFormatForDims(size_t dimensions) {
    return cldnn::format::bfyx;  // Should not get here
 }

+// This helper function is needed to convert permute order from IE format (bfyx) into cldnn format (bfxy)
+inline std::vector<uint16_t> ConvertPermuteOrder(const std::vector<uint16_t>& ie_order, size_t rank = 0) {
+    std::vector<uint16_t> ie_order_aligned = ie_order;
+    // if order size is less than 4 - fill the rest with just copy
+    rank = std::max(rank, (size_t)4);
+    for (auto o = ie_order_aligned.size(); o < rank; o++)
+        ie_order_aligned.push_back((uint16_t)o);
+
+    std::vector<uint16_t> cldnn_order;
+    // 1. Switch permute order values for spatial dims
+    for (auto const& o : ie_order_aligned) {
+        if (o >= 2)
+            cldnn_order.push_back(1 + ie_order_aligned.size() - o);
+        else
+            cldnn_order.push_back(o);
+    }
+
+    // 2. Swap spatial positions
+    for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
+        std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
+    }
+
+    return cldnn_order;
+}
+
 }  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@@ -327,7 +327,7 @@ void Program::InitProfileInfo(const std::string& layerName,
 bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
    std::list<std::shared_ptr<ngraph::Node>> nodes_to_process = { node };
    while (!nodes_to_process.empty()) {
-        auto& current_node = nodes_to_process.front();
+        auto current_node = nodes_to_process.front();
        nodes_to_process.pop_front();

        for (size_t i = 0; i < current_node->get_input_size(); i++) {
--- a/inference-engine/src/cldnn_engine/ops/constant.cpp
+++ b/inference-engine/src/cldnn_engine/ops/constant.cpp
@@ -33,25 +33,24 @@ static ConstProperties getConstProperties(const std::shared_ptr<ngraph::op::Cons
        for (auto& t : outTensors) {
            auto outOp = t.get_node();
            if (dynamic_cast<ngraph::op::v1::Convolution*>(outOp)) {
-                return {true, false, false};
+                return {t.get_index() == 1, false, false};
            } else if (dynamic_cast<ngraph::op::v1::BinaryConvolution*>(outOp)) {
-                return {true, false, false};
+                return {t.get_index() == 1, false, false};
            } else if (auto castedOp = dynamic_cast<ngraph::op::v1::DeformableConvolution*>(outOp)) {
-                return {true, castedOp->get_group() > 1, false};
+                return {t.get_index() == 2, castedOp->get_group() > 1, false};
            } else if (dynamic_cast<ngraph::op::v1::GroupConvolution*>(outOp)) {
-                return {true, true, false};
+                return {t.get_index() == 1, true, false};
            } else if (dynamic_cast<ngraph::op::v1::ConvolutionBackpropData*>(outOp)) {
-                return {true, false, true};
+                return {t.get_index() == 1, false, true};
            } else if (dynamic_cast<ngraph::op::v1::GroupConvolutionBackpropData*>(outOp)) {
-                return {true, true, true};
+                return {t.get_index() == 1, true, true};
            }
        }
    }
    return {false, false, false};
 }

-void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
-    auto constDims = op->get_shape();
+static cldnn::tensor getConstTensor(const ngraph::Shape constDims) {
    cldnn::tensor constTensor;
    switch (constDims.size()) {
    case 6: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
@@ -75,6 +74,12 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
        break;
    default: THROW_IE_EXCEPTION << "Invalid constant blob dimensions";
    }
+    return constTensor;
+}
+
+void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
+    auto constDims = op->get_shape();
+    cldnn::tensor constTensor = getConstTensor(constDims);

    // WA to inconsistency between input and const 1d tensors
    // For Concat along batch we go with batch interpretation
@@ -119,45 +124,37 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant

    auto constFormat = DefaultFormatForDims(op->get_output_shape(0).size());
    auto prop = getConstProperties(op);
-    if (prop.isWeights) {
-        // Deconvolution has reversed channels order (io instead of oi)
-        if (prop.reversedChannelsOrder) {
-            if (prop.hasGroupDimension) {
-                switch (op->get_output_shape(0).size()) {
-                    case 5: constFormat = cldnn::format::gioyx; break;
-                    case 6: constFormat = cldnn::format::giozyx; break;
-                }
-            } else {
-                switch (op->get_output_shape(0).size()) {
-                    case 4: constFormat = cldnn::format::ioyx; break;
-                    case 5: constFormat = cldnn::format::iozyx; break;
-                }
-            }
-        } else {
-            if (prop.hasGroupDimension) {
-                switch (op->get_output_shape(0).size()) {
-                    case 5: constFormat = cldnn::format::goiyx; break;
-                    case 6: constFormat = cldnn::format::goizyx; break;
-                }
-            } else {
-                switch (op->get_output_shape(0).size()) {
-                    case 4: constFormat = cldnn::format::oiyx; break;
-                    case 5: constFormat = cldnn::format::oizyx; break;
-                }
-            }
-        }
-        std::vector<cldnn::tensor::value_type> dims(constDims.begin(), constDims.end());
-        for (size_t i = dims.size(); i < 4; i++) {
-            dims.push_back(1);
-        }
-        constTensor = cldnn::tensor(constFormat, dims);
-    }

    // If constDims has a dimension = 0, then create tensor with single value
    // TODO: check if dim=0 is a valid case
    if (std::accumulate(constDims.begin(), constDims.end(), 1, std::multiplies<size_t>()) == 0)
        constTensor = cldnn::tensor{1};

+    // Swap O and I dimensions to match expected deconvolution weights format
+    bool swap_oi = prop.isWeights && prop.reversedChannelsOrder;
+    size_t inputFeatureElements = 1;
+    size_t outputFeatureElements = 1;
+    size_t groups = 1;
+    if (swap_oi) {
+        size_t expected_min_rank = 2 + (prop.hasGroupDimension ? 1 : 0);
+        if (expected_min_rank > constDims.size())
+            THROW_IE_EXCEPTION << "Invalid constant properties or shape";
+
+        auto newDims = constDims;
+        if (prop.hasGroupDimension) {
+            std::swap(newDims[2], newDims[1]);
+            inputFeatureElements = newDims[2];
+            outputFeatureElements = newDims[1];
+            groups = newDims[0];
+        } else {
+            std::swap(newDims[1], newDims[0]);
+            inputFeatureElements = newDims[1];
+            outputFeatureElements = newDims[0];
+            groups = 1;
+        }
+        constTensor = getConstTensor(newDims);
+    }
+
    cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)),
                                              constFormat,
                                              constTensor);
@@ -176,7 +173,30 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
        auto buf = tmpPointer.data();
        auto bufSize = constLayout.bytes_count();

-        std::memcpy(&buf[0], &data[0], bufSize);
+        // Do actual weights reorder and change O and I channels order
+        if (swap_oi) {
+            auto elementSize = cldnn::data_type_traits::size_of(constLayout.data_type);
+            size_t spatial_dim_off = prop.hasGroupDimension ? 3 : 2;
+            size_t featureSize = elementSize;
+            for (size_t i = spatial_dim_off; i < constDims.size(); i++) {
+                featureSize *= constDims[i];
+            }
+
+            for (size_t g = 0; g < groups; g++) {
+                for (size_t i = 0; i < inputFeatureElements; i++) {
+                    for (size_t o = 0; o < outputFeatureElements; o++) {
+                        size_t outputShift = ((g*outputFeatureElements + o)*inputFeatureElements + i)*featureSize;
+                        size_t inputShift = ((g*inputFeatureElements + i)*outputFeatureElements + o)*featureSize;
+
+                        for (size_t b = 0; b < featureSize; b++) {
+                            buf[outputShift + b] = data[inputShift + b];
+                        }
+                    }
+                }
+            }
+        } else {
+            std::memcpy(&buf[0], &data[0], bufSize);
+        }
        p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
        p.blobMemCache[data] = initialconstPrimID;
        constPrimID = initialconstPrimID;
--- a/inference-engine/src/cldnn_engine/ops/convolution.cpp
+++ b/inference-engine/src/cldnn_engine/ops/convolution.cpp
@@ -16,7 +16,7 @@
 #include "api/convolution.hpp"
 #include "api/deconvolution.hpp"
 #include "api/binary_convolution.hpp"
-#include "api/reshape.hpp"
+#include "api/permute.hpp"
 #include "api/reorder.hpp"

 namespace CLDNNPlugin {
@@ -71,40 +71,9 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::
    auto outDims = op->get_output_shape(0);
    auto outPrecision = op->get_output_element_type(0);

-    auto weightsName = inputs[1];
+    std::vector<cldnn::primitive_id> weights = {inputs[1]};
+    const bool weights_have_group_dim = true;

-    // WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
-    // Dimensions order is GOIYZ, but
-    // the selected format is OIZYX by default.
-    if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) == nullptr) {
-        std::string reshapeName = layerName + "_cldnn_weights_reshape";
-        std::string reorderName = layerName + "_cldnn_weights_reorder";
-
-        auto weights_shape = op->get_input_shape(1);
-        std::vector<size_t> new_weights_shape;
-        new_weights_shape.push_back(weights_shape[0] * weights_shape[1]); // Merged G and O dims
-        for (size_t i = 2; i < weights_shape.size(); i++) {
-            new_weights_shape.push_back(weights_shape[i]);
-        }
-        auto reshapePrim = cldnn::reshape(reshapeName,
-                                          weightsName,
-                                          CldnnTensorFromIEDims(new_weights_shape));
-
-        p.AddPrimitive(reshapePrim);
-        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
-
-        auto reorderPrim = cldnn::reorder(reorderName,
-                                        reshapeName,
-                                        DefaultFormatForDims(new_weights_shape.size()),
-                                        DataTypeFromPrecision(op->get_input_element_type(1)));
-
-        p.AddPrimitive(reorderPrim);
-        p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
-
-        weightsName = reorderName;
-    }
-
-    std::vector<cldnn::primitive_id> weights = {weightsName};
    auto convPrim = cldnn::convolution(layerName,
                                       inputs[0],
                                       weights,
@@ -114,7 +83,8 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::
                                       params.padding,
                                       params.dilation,
                                       CldnnTensorFromIEDims(outDims),
-                                       DataTypeFromPrecision(outPrecision));
+                                       DataTypeFromPrecision(outPrecision),
+                                       weights_have_group_dim);

    p.AddPrimitive(convPrim);
    p.AddPrimitiveToProfiler(op);
@@ -130,6 +100,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convo
    auto outPrecision = op->get_output_element_type(0);

    std::vector<cldnn::primitive_id> weights = {inputs[1]};
+    const bool weights_have_group_dim = false;
+
    auto convPrim = cldnn::convolution(layerName,
                                       inputs[0],
                                       weights,
@@ -139,7 +111,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convo
                                       params.padding,
                                       params.dilation,
                                       CldnnTensorFromIEDims(outDims),
-                                       DataTypeFromPrecision(outPrecision));
+                                       DataTypeFromPrecision(outPrecision),
+                                       weights_have_group_dim);

    p.AddPrimitive(convPrim);
    p.AddPrimitiveToProfiler(op);
@@ -159,25 +132,30 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
    }

    auto weightsName = inputs[1];
-    // WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
+    auto weights_node = op->get_input_node_shared_ptr(1);
+    // WA: For the cases like Const(weights)->Sub(zp)->Deconv.
    // Dimensions order of weights blob is IOYX, but
-    // the selected format is OIYX by default. So we need to swap I and O dimensions to match the format
-    if (IsNodeOnConstPath(op->get_input_node_shared_ptr(1))) {
-        std::string reshapeName = layerName + "_cldnn_weights_reshape";
-
-        auto weights_shape = op->get_input_shape(1);
-        std::swap(weights_shape[0], weights_shape[1]);
-        auto reshapePrim = cldnn::reshape(reshapeName,
+    // the selected format is OIYX by default. So we need to swap (and transpose) I and O dimensions to match the format
+    // For Constant node on input transpose is not needed, because the data is transposed on const node creation
+    if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
+        std::string permuteName = layerName + "_cldnn_weights_permute";
+        auto weights_rank = op->get_input_shape(1).size();
+        std::vector<uint16_t> permute_order(weights_rank);
+        std::iota(std::begin(permute_order), std::end(permute_order), 0);
+        // Should be 1, 0, 2, 3 {, 4} to swap O and I
+        std::swap(permute_order[1], permute_order[0]);
+        auto permutePrim = cldnn::permute(permuteName,
                                          weightsName,
-                                          CldnnTensorFromIEDims(weights_shape));
+                                          ConvertPermuteOrder(permute_order, weights_rank));

-        p.AddPrimitive(reshapePrim);
-        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+        p.AddPrimitive(permutePrim);
+        p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);

-        weightsName = reshapeName;
+        weightsName = permuteName;
    }

    std::vector<cldnn::primitive_id> weights = {weightsName};
+    const bool weights_have_group_dim = false;

    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
    auto deconvPrim = cldnn::deconvolution(layerName,
@@ -187,9 +165,10 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
        params.groups,
        params.stride,
        params.padding,
-        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
-    p.AddPrimitive(deconvPrim);
+        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
+        weights_have_group_dim);

+    p.AddPrimitive(deconvPrim);
    p.AddPrimitiveToProfiler(op);
 }

@@ -201,13 +180,38 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
    auto dilations = op->get_dilations();
    for (auto d : dilations) {
        if (d != 1) {
-            THROW_IE_EXCEPTION << "Unsupported dilation in ConvolutionBackpropData " << op->get_friendly_name();
+            THROW_IE_EXCEPTION << "Unsupported dilation in GroupConvolutionBackpropData " << op->get_friendly_name();
        }
    }

    uint32_t groups = op->get_input_shape(1)[0];
    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), groups);
-    std::vector<cldnn::primitive_id> weights = {inputs[1]};
+
+    auto weightsName = inputs[1];
+    auto weights_node = op->get_input_node_shared_ptr(1);
+    // WA: For the cases like Const(weights)->Sub(zp)->Deconv.
+    // Dimensions order of weights blob is IOYX, but
+    // the selected format is OIYX by default. So we need to swap I and O dimensions to match the format.
+    // For Constant node on input transpose is not needed, because the data is transposed on const node creation
+    if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
+        std::string permuteName = layerName + "_cldnn_weights_permute";
+        auto weights_rank = op->get_input_shape(1).size();
+        std::vector<uint16_t> permute_order(weights_rank);
+        std::iota(std::begin(permute_order), std::end(permute_order), 0);
+        // Should be 0, 2, 1, 3, 4 {, 5} to swap O and I
+        std::swap(permute_order[2], permute_order[1]);
+        auto permutePrim = cldnn::permute(permuteName,
+                                          weightsName,
+                                          ConvertPermuteOrder(permute_order, weights_rank));
+
+        p.AddPrimitive(permutePrim);
+        p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
+
+        weightsName = permuteName;
+    }
+
+    std::vector<cldnn::primitive_id> weights = {weightsName};
+    const bool weights_have_group_dim = true;

    auto deconvPrim = cldnn::deconvolution(layerName,
        inputs[0],
@@ -216,9 +220,10 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
        params.groups,
        params.stride,
        params.padding,
-        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
-    p.AddPrimitive(deconvPrim);
+        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
+        weights_have_group_dim);

+    p.AddPrimitive(deconvPrim);
    p.AddPrimitiveToProfiler(op);
 }

--- a/inference-engine/src/cldnn_engine/ops/transpose.cpp
+++ b/inference-engine/src/cldnn_engine/ops/transpose.cpp
@@ -3,6 +3,7 @@
 //

 #include "cldnn_program.h"
+#include "cldnn_common_utils.h"

 #include "ngraph/op/transpose.hpp"
 #include "ngraph/op/constant.hpp"
@@ -11,23 +12,6 @@

 namespace CLDNNPlugin {

-template<class Type>
-std::vector<Type> GetPermuteOrder(const std::vector<Type>& ie_order, Type value_to_align = 0) {
-    static_assert(std::is_integral<Type>::value, "Integeral required.");
-    std::vector<Type> cldnn_order = ie_order;
-
-    // 1. Align to min. 4 sizes
-    if (cldnn_order.size() < 4)
-        cldnn_order.push_back(value_to_align);
-
-    // 2. Swap spatial positions
-    for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
-        std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
-    }
-
-    return cldnn_order;
-}
-
 void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpose>& op) {
    p.ValidateInputs(op, {1, 2});
    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
@@ -49,23 +33,7 @@ void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpo
            ie_order.push_back((uint16_t)o);
    }

-    // if order size is less than 4 - fill the rest with just copy
-    for (auto o = ie_order.size(); o < rank; o++)
-        ie_order.push_back((uint16_t)o);
-
-    /*
-        Because of the cldnn ordering: bfxy, and IE ordering: bfyx
-        we need to adjust the permute order.
-    */
-    std::vector<uint16_t> cldnn_permute_order;
-    // 1. Switch permute order values for spatial dims
-    for (auto const& o : ie_order) {
-        if (o >= 2)
-            cldnn_permute_order.push_back(1 + ie_order.size() - o);
-        else
-            cldnn_permute_order.push_back(o);
-    }
-    cldnn_permute_order = GetPermuteOrder(cldnn_permute_order);
+    std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(ie_order, rank);

    auto permutePrim = cldnn::permute(layerName,
                                      inputPrimitives[0],
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -34,6 +34,8 @@ std::vector<std::string> disabledTestPatterns() {
            R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*)",
            R"(.*EltwiseLayerTest.*IS=\(2\).*OpType=Mod.*opType=VECTOR.*)",
            R"(.*EltwiseLayerTest.*OpType=FloorMod.*netPRC=I64.*)",
+            // TODO: Issue: 46841
+            R"(.*(QuantGroupConvBackpropData3D).*)",

            // These tests might fail due to accuracy loss a bit bigger than threshold
            R"(.*(GRUCellTest).*)",
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/quantized_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace SubgraphTestsDefinitions;
+using namespace ngraph::helpers;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+
+const std::vector<size_t > levels = {256};
+const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
+
+/* ============= 2D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto quantConvBackpropData2DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_QuantConvBackpropData2D, QuantConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantConvBackpropData2DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        QuantConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D ConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto quantConvBackpropData3DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_QuantConvBackpropData3D, QuantConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantConvBackpropData3DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        QuantConvBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace SubgraphTestsDefinitions;
+using namespace ngraph::helpers;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+const std::vector<size_t> numGroups = {2, 8, 16};
+
+const std::vector<size_t > levels = {256};
+const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
+
+/* ============= 2D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto quantGroupConvBackpropData2DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_QuantGroupConvBackpropData2D, QuantGroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantGroupConvBackpropData2DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        QuantGroupConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D GroupConvolutionBackpropData ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto quantGroupConvBackpropData3DParams = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::AUTO),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(granularity)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_QuantGroupConvBackpropData3D, QuantGroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                quantGroupConvBackpropData3DParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        QuantGroupConvBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/thirdparty/clDNN/api/convolution.hpp
+++ b/inference-engine/thirdparty/clDNN/api/convolution.hpp
@@ -48,6 +48,12 @@ struct convolution : public primitive_base<convolution> {
    /// @param with_activation Enable Relu activation.
    /// @param activation_slp Relu activation slope.
    /// @param output_size User-defined output data size of the primitive (w/o padding).
+    /// @param output_type User-defined output data type of the primitive.
+    /// @param grouped_weights_shape Defines if weights tensor has explicit group dimension.
+    /// This parameter affects how bfzyx and bfwzyx format on weights is converted:
+    /// bfzyx -> oizyx (grouped_weights_shape=false) or goiyx (grouped_weights_shape=true)
+    /// bfwzyx -> error (grouped_weights_shape=false) or goizyx (grouped_weights_shape=true)
+    /// If weights already have (g)oi(z)yx format, then this flag has no effect
    convolution(const primitive_id& id,
                const primitive_id& input,
                const std::vector<primitive_id>& weights,
@@ -58,6 +64,7 @@ struct convolution : public primitive_base<convolution> {
                tensor dilation,
                tensor output_size,
                data_types output_type,
+                bool grouped_weights_shape,
                const padding& output_padding = padding())
            : primitive_base(id, {input}, output_padding, optional_data_type{output_type}),
              input_offset(input_offset),
@@ -70,6 +77,7 @@ struct convolution : public primitive_base<convolution> {
              padding_above(tensor(0)),
              padding_below(tensor(0)),
              deformable_mode(false),
+              grouped_weights_shape(grouped_weights_shape),
              weights(weights),
              bias(bias),
              weights_zero_points(std::vector<primitive_id>(0)),
@@ -107,6 +115,7 @@ struct convolution : public primitive_base<convolution> {
                tensor input_offset,
                tensor dilation,
                tensor output_size,
+                bool grouped_weights_shape,
                const padding& output_padding = padding())
            : primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
              input_offset(input_offset),
@@ -119,6 +128,7 @@ struct convolution : public primitive_base<convolution> {
              padding_above(tensor(0)),
              padding_below(tensor(0)),
              deformable_mode(false),
+              grouped_weights_shape(grouped_weights_shape),
              weights(weights),
              bias(bias),
              weights_zero_points(w_zero_point),
@@ -163,6 +173,7 @@ struct convolution : public primitive_base<convolution> {
                tensor input_offset,
                tensor dilation,
                tensor output_size,
+                bool grouped_weights_shape,
                const padding& output_padding = padding())
            : primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
              input_offset(input_offset),
@@ -175,6 +186,7 @@ struct convolution : public primitive_base<convolution> {
              padding_above(tensor(0)),
              padding_below(tensor(0)),
              deformable_mode(false),
+              grouped_weights_shape(grouped_weights_shape),
              weights(weights),
              bias(bias),
              weights_zero_points(w_zero_point),
@@ -220,6 +232,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -264,6 +277,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(padding_above),
          padding_below(padding_below),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -310,6 +324,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(padding_above),
          padding_below(padding_below),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -343,6 +358,7 @@ struct convolution : public primitive_base<convolution> {
                tensor stride = {1, 1, 1, 1},
                tensor input_offset = tensor(0),
                tensor dilation = {1, 1, 1, 1},
+                bool grouped_weights_shape = false,
                const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
@@ -354,6 +370,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(false),
+          grouped_weights_shape(grouped_weights_shape),
          weights(weights),
          bias(bias),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -385,6 +402,7 @@ struct convolution : public primitive_base<convolution> {
                tensor stride = {1, 1, 1, 1},
                tensor input_offset = tensor(0),
                tensor dilation = {1, 1, 1, 1},
+                bool grouped_weights_shape = false,
                const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
@@ -396,6 +414,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(false),
+          grouped_weights_shape(grouped_weights_shape),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -437,6 +456,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(padding_above),
          padding_below(padding_below),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -480,6 +500,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(padding_above),
          padding_below(padding_below),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -508,6 +529,7 @@ struct convolution : public primitive_base<convolution> {
                tensor stride = {1, 1, 1, 1},
                tensor input_offset = tensor(0),
                tensor dilation = {1, 1, 1, 1},
+                bool grouped_weights_shape = false,
                const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
@@ -519,6 +541,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(false),
+          grouped_weights_shape(grouped_weights_shape),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -561,6 +584,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -604,6 +628,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(false),
+          grouped_weights_shape(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -651,6 +676,7 @@ struct convolution : public primitive_base<convolution> {
          padding_above(tensor(0)),
          padding_below(tensor(0)),
          deformable_mode(true),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias),
          weights_zero_points(std::vector<primitive_id>(0)),
@@ -756,6 +782,8 @@ struct convolution : public primitive_base<convolution> {
    tensor padding_below;
    /// @param deformable_mode.
    bool deformable_mode;
+    /// @param grouped_weights_shape Defines if weights tensor has explicit group dimension.
+    bool grouped_weights_shape;
    /// @brief List of primitive ids containing weights data.
    const primitive_id_arr weights;
    /// @brief List of primitive ids containing bias data.
--- a/inference-engine/thirdparty/clDNN/api/deconvolution.hpp
+++ b/inference-engine/thirdparty/clDNN/api/deconvolution.hpp
@@ -55,6 +55,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          stride(stride),
          with_output_size(false),
          groups(1),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias) {}
    /// @brief Constructs deconvolution primitive.
@@ -81,6 +82,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          stride(stride),
          with_output_size(false),
          groups(groups),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias) {}

@@ -104,6 +106,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          stride(stride),
          with_output_size(false),
          groups(1),
+          grouped_weights_shape(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)) {}

@@ -129,6 +132,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          stride(stride),
          with_output_size(false),
          groups(groups),
+          grouped_weights_shape(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)) {}

@@ -157,6 +161,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          with_output_size(true),
          output_size(output_size),
          groups(1),
+          grouped_weights_shape(false),
          weights(weights),
          bias(bias) {}

@@ -180,6 +185,7 @@ struct deconvolution : public primitive_base<deconvolution> {
                  tensor stride,
                  tensor input_offset,
                  tensor output_size,
+                  bool grouped_weights_shape,
                  const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          input_offset(input_offset),
@@ -187,6 +193,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          with_output_size(true),
          output_size(output_size),
          groups(groups),
+          grouped_weights_shape(grouped_weights_shape),
          weights(weights),
          bias(bias) {}

@@ -213,6 +220,7 @@ struct deconvolution : public primitive_base<deconvolution> {
          with_output_size(true),
          output_size(output_size),
          groups(1),
+          grouped_weights_shape(false),
          weights(weights),
          bias(std::vector<primitive_id>(0)) {}

@@ -283,6 +291,8 @@ struct deconvolution : public primitive_base<deconvolution> {
    tensor output_size;
    /// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
    uint32_t groups;
+    /// @param grouped_weights_shape Defines if weights tensor has explicit group dimension.
+    bool grouped_weights_shape;
    /// @brief List of primitive ids containing weights data.
    const primitive_id_arr weights;
    /// @brief List of primitive ids containing bias data.
--- a/inference-engine/thirdparty/clDNN/api/tensor.hpp
+++ b/inference-engine/thirdparty/clDNN/api/tensor.hpp
@@ -615,13 +615,13 @@ public:
     *
     * @endcode
     */
-    tensor(value_type batch_num, value_type feature_num, value_type width, value_type height)
+    tensor(value_type batch_num, value_type feature_num, value_type x, value_type y)
        : tensor(1) {
        _sizes[0] = batch_num;
        _sizes[tensor_batch_dim_max] = feature_num;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
-        if (batch_num == 0 && feature_num == 0 && width == 0 && height == 0)
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = x;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = y;
+        if (batch_num == 0 && feature_num == 0 && x == 0 && y == 0)
            _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = 0;
    }

@@ -629,7 +629,7 @@ public:
    /// @details Example:
    /*! @code
    *
-    tensor my_tensor( 2, 3, 4, 5, 6 );   // b=2, f=3, x=4, y=5, z =6
+    tensor my_tensor( 2, 3, 4, 5, 6 );   // b=2, f=3, x=4, y=5, z=6
    cout << my_tensor.batch[0] << endl;           // 2
    cout << my_tensor.feature[0] << endl;         // 3
    cout << "x=" << my_tensor.spatial[0] << endl; // x=4
@@ -638,38 +638,37 @@ public:
    *
    * @endcode
    */
-    tensor(value_type batch_num, value_type feature_num, value_type width, value_type height, value_type depth)
+    tensor(value_type batch_num, value_type feature_num, value_type x, value_type y, value_type z)
        : tensor(1) {
        _sizes[0] = batch_num;
        _sizes[tensor_batch_dim_max] = feature_num;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = depth;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = x;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = y;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = z;
    }

    /// @brief Constructs @p tensor.
    /// @details Example:
    /*! @code
    *
-    tensor my_tensor( 2, 3, 4, 5, 6, 7 );   // b=2, f=3, x=4, y=5, lx= 6, ly =7
+    tensor my_tensor( 2, 3, 4, 5, 6, 7 );   // b=2, f=3, x=4, y=5, z=6, w=7
    cout << my_tensor.batch[0] << endl;           // 2
    cout << my_tensor.feature[0] << endl;         // 3
    cout << "x=" << my_tensor.spatial[0] << endl; // x=4
    cout << "y=" << my_tensor.spatial[1] << endl; // y=5
-    cout << "local x=" << my_tensor.local[0] << endl; // local x=6
-    cout << "loxal y=" << my_tensor.local[1] << endl; // local y=7
+    cout << "z=" << my_tensor.spatial[2] << endl; // z=6
+    cout << "w=" << my_tensor.spatial[3] << endl; // w=7
    *
    * @endcode
    */
-    tensor(value_type batch_num, value_type feature_num, value_type width,
-           value_type height, value_type local_x, value_type local_y)
+    tensor(value_type batch_num, value_type feature_num, value_type x, value_type y, value_type z, value_type w)
        : tensor(1) {
        _sizes[0] = batch_num;
        _sizes[tensor_batch_dim_max] = feature_num;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max] = local_x;
-        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max + 1] = local_y;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max] = x;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = y;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = z;
+        _sizes[tensor_batch_dim_max + tensor_feature_dim_max + 3] = w;
    }

    /// @brief Constructs @p tensor using vector of sizes.
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
@@ -810,6 +810,11 @@ WeightsTensor WeightsTensor::TransformIgnorePadding(WeightsLayout l, WeightsType
        vec[Channelndex(l, WeightsChannelName::Z)] = 1;
        vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
        vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
+    } else if (g > 1 && src_channels == 5 && dst_channels == 4) {
+        vec[Channelndex(l, WeightsChannelName::X)] = X().v;
+        vec[Channelndex(l, WeightsChannelName::Y)] = Y().v;
+        vec[Channelndex(l, WeightsChannelName::IFM)] = Z().v;
+        vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v * IFM().v;
    } else {
        assert(0);
    }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_ref.cpp
@@ -54,7 +54,7 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params,
    } else {
        dispatchData.gws[0] = output.Batch().v;
        dispatchData.gws[1] = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v;
-        dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v, 16);
+        dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v * output.W().v, 16);

        dispatchData.lws[0] = 1;
        dispatchData.lws[1] = 1;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
@@ -35,6 +35,7 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
    k.EnableInputLayout(DataLayout::yxfb);
    k.EnableInputLayout(DataLayout::byxf);
    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfwzyx);
    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
    k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
@@ -45,6 +46,7 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
    k.EnableOutputLayout(DataLayout::bfyx);
    k.EnableOutputLayout(DataLayout::yxfb);
    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfwzyx);
    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_ref.cl
@@ -39,6 +39,12 @@ KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
    const int x = zyx % OUTPUT_SIZE_X;
    const int y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
    const int z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y;
+#elif OUTPUT_DIMS == 6
+    const int wzyx = get_global_id(2);
+    const int x = wzyx % OUTPUT_SIZE_X;
+    const int y = (wzyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+    const int z = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
+    const int w = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) / OUTPUT_SIZE_Z;
 #endif

 #if PACKED_BINARY_OUTPUT
@@ -77,37 +83,49 @@ KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,

 #else

-#if INPUT0_DIMS == 5
+#if INPUT0_DIMS == 6
+    const int input_offset = INPUT0_GET_INDEX(b, of, w, z, y, x);
+#elif INPUT0_DIMS == 5
    const int input_offset = INPUT0_GET_INDEX(b, of, z, y, x);
 #elif INPUT0_DIMS <= 4
    const int input_offset = INPUT0_GET_INDEX(b, of, y, x);
 #endif

-#if OUTPUT_DIMS == 5
+#if OUTPUT_DIMS == 6
+    const int output_offset = OUTPUT_GET_INDEX(b, of, w, z, y, x);
+#elif OUTPUT_DIMS == 5
    const int output_offset = OUTPUT_GET_INDEX(b, of, z, y, x);
 #elif OUTPUT_DIMS <= 4
    const int output_offset = OUTPUT_GET_INDEX(b, of, y, x);
 #endif

-#if INPUT1_DIMS == 5
+#if INPUT1_DIMS == 6
+    const int input_low_offset = INPUT1_GET_INDEX_SAFE(b, of, w, z, y, x);
+#elif INPUT1_DIMS == 5
    const int input_low_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x);
 #elif INPUT1_DIMS <= 4
    const int input_low_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x);
 #endif

-#if INPUT2_DIMS == 5
+#if INPUT2_DIMS == 6
+    const int input_high_offset = INPUT2_GET_INDEX_SAFE(b, of, w, z, y, x);
+#elif INPUT2_DIMS == 5
    const int input_high_offset = INPUT2_GET_INDEX_SAFE(b, of, z, y, x);
 #elif INPUT2_DIMS <= 4
    const int input_high_offset = INPUT2_GET_INDEX_SAFE(b, of, y, x);
 #endif

-#if INPUT3_DIMS == 5
+#if INPUT3_DIMS == 6
+    const int output_low_offset = INPUT3_GET_INDEX_SAFE(b, of, w, z, y, x);
+#elif INPUT3_DIMS == 5
    const int output_low_offset = INPUT3_GET_INDEX_SAFE(b, of, z, y, x);
 #elif INPUT3_DIMS <= 4
    const int output_low_offset = INPUT3_GET_INDEX_SAFE(b, of, y, x);
 #endif

-#if INPUT4_DIMS == 5
+#if INPUT4_DIMS == 6
+    const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, w, z, y, x);
+#elif INPUT4_DIMS == 5
    const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, z, y, x);
 #elif INPUT4_DIMS <= 4
    const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, y, x);
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/quantize_gpu_scale_shift_opt.cl
@@ -43,14 +43,25 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
    const int x = zyx % OUTPUT_SIZE_X;
    const int y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
    const int z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y;
+#elif OUTPUT_DIMS == 6
+    const int wzyx = get_global_id(GWS_YX);
+    const int x = wzyx % OUTPUT_SIZE_X;
+    const int y = (wzyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+    const int z = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
+    const int w = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) / OUTPUT_SIZE_Z;
 #endif

-#if INPUT0_DIMS == 5
+#if INPUT0_DIMS == 6
+    const int input_offset = INPUT0_GET_INDEX(b, of, w, z, y, x);
+#elif INPUT0_DIMS == 5
    const int input_offset = INPUT0_GET_INDEX(b, of, z, y, x);
 #elif INPUT0_DIMS <= 4
    const int input_offset = INPUT0_GET_INDEX(b, of, y, x);
 #endif
-#if OUTPUT_DIMS == 5
+
+#if OUTPUT_DIMS == 6
+    const int output_offset = OUTPUT_GET_INDEX(b, of, w, z, y, x);
+#elif OUTPUT_DIMS == 5
    const int output_offset = OUTPUT_GET_INDEX(b, of, z, y, x);
 #elif OUTPUT_DIMS <= 4
    const int output_offset = OUTPUT_GET_INDEX(b, of, y, x);
@@ -61,6 +72,8 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
    const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x);
 #elif INPUT1_DIMS == 5
    const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x);
+#elif INPUT1_DIMS == 6
+    const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, w, z, y, x);
 #endif
 #endif

@@ -68,6 +81,8 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
    const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, y, x);
 #elif INPUT7_DIMS == 5
    const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, z, y, x);
+#elif INPUT7_DIMS == 6
+    const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, w, z, y, x);
 #endif

 #if PER_TENSOR_INPUT_SCALE
--- a/inference-engine/thirdparty/clDNN/src/convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/convolution.cpp
@@ -217,7 +217,17 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {

    // get output feature map from weights. It should be the same as number of biases. Will be verifed in
    // convolution::create()
-    auto number_of_features = weights_layout.size.batch[0] * weights_layout.size.group[0];
+    auto group = desc->groups;
+    int32_t number_of_features = 0;
+    if (desc->grouped_weights_shape && !format::is_grouped(weights_layout.format)) {
+        number_of_features = weights_layout.size.feature[0] * static_cast<int32_t>(group);
+    } else {
+        if (format::is_grouped(weights_layout.format)) {
+            number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(group);
+        } else {
+            number_of_features = weights_layout.size.batch[0];
+        }
+    }

    if (desc->with_output_size) {
        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(),
@@ -336,6 +346,11 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
    auto split = node.get_split();
    for (decltype(split) j = 0; j < split; j++) {
        auto filter_inst = node.weights(j).get_output_layout();  // convolution filter
+        auto weights_ifm = filter_inst.size.feature[0];
+        if (argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
+            weights_ifm = filter_inst.size.spatial[filter_inst.format.spatial_num() - 1] * argument.groups;
+        }
+
        if (bias_term()) {
            auto bias_inst = node.bias(j).get_output_layout();
            CLDNN_ERROR_NOT_EQUAL(node.id(),
@@ -406,7 +421,7 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
                              "Weights feature maps number",
                              (input_inst.size.feature[0] - input_offset.feature[0]) / split,
                              "input feature maps number",
-                              filter_inst.size.feature[0],
+                              weights_ifm,
                              "Weights/ifm mismatch");
    }
 }
--- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
@@ -49,7 +49,16 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) {
    auto strd = desc->stride;
    auto group = desc->groups;

-    auto number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(group);
+    int32_t number_of_features = 0;
+    if (desc->grouped_weights_shape && !format::is_grouped(weights_layout.format)) {
+        number_of_features = weights_layout.size.feature[0] * static_cast<int32_t>(group);
+    } else {
+        if (format::is_grouped(weights_layout.format)) {
+            number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(group);
+        } else {
+            number_of_features = weights_layout.size.batch[0];
+        }
+    }

    if (desc->with_output_size) {
        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(),
@@ -173,6 +182,10 @@ deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_no
    for (decltype(split) j = 0; j < split; j++) {
        auto filter_inst = node.weights(j).get_output_layout();  // deconvolution filter
        auto input_offset = argument.input_offset;
+        auto weights_ifm = filter_inst.size.feature[0];
+        if (argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
+            weights_ifm = filter_inst.size.spatial[filter_inst.format.spatial_num() - 1] * argument.groups;
+        }

        if (argument.bias.size() != 0) {
            auto bias_inst = node.bias(j).get_output_layout();
@@ -241,7 +254,7 @@ deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_no
                              "Weights feature maps number",
                              (input_inst.size.feature[0] - input_offset.feature[0]) / split,
                              "input feature maps number",
-                              filter_inst.size.feature[0],
+                              weights_ifm,
                              "Weights/ifm mimsmatch");
    }
 }
--- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
@@ -88,10 +88,8 @@ public:
        const auto& deformable_groups = primitive->deformable_groups;
        const auto transposed = arg.get_transposed();

-        assert(arg.get_output_layout().size.feature[0] == weights_layout.size.batch[0] * weights_layout.size.group[0]);
-
        auto conv_params = get_weight_bias_zero_point_default_params<kernel_selector::convolution_params>(
-            arg, split, 1);
+            arg, split, 1, primitive->grouped_weights_shape);
        auto conv_optional_params =
            get_default_weights_bias_optional_params<kernel_selector::convolution_optional_params>(arg.get_program());

--- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
@@ -81,7 +81,8 @@ public:
        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
            arg,
            (groups > 1) ? 1 : actual_split,
-            1);
+            1,
+            primitive->grouped_weights_shape);
        auto deconv_optional_params =
            get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());

--- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
@@ -164,6 +164,12 @@ attach_quantize_gpu::attach_quantize_gpu() {
    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);

+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
+
    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);

--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
@@ -90,6 +90,7 @@ void pre_replace_deconv::run(program_impl& p) {
                    bias_vec.push_back(bias_id);
                auto input_offset = deconv_prim->input_offset;
                auto output_padding = deconv_prim->output_padding;
+                auto grouped_weights_shape = deconv_prim->grouped_weights_shape;

                // remove deconvolution node and its connections to weights and biases, rename it and move to the optimized
                // list
@@ -136,6 +137,7 @@ void pre_replace_deconv::run(program_impl& p) {
                                                                   stride,
                                                                   input_offset,
                                                                   tensor{ 1, 1, 1, 1 },
+                                                                   grouped_weights_shape,
                                                                   output_padding);
                    p.get_or_create(conv_prim);
                } else {
@@ -146,6 +148,7 @@ void pre_replace_deconv::run(program_impl& p) {
                                                                   stride,
                                                                   input_offset,
                                                                   tensor{ 1, 1, 1, 1 },
+                                                                   grouped_weights_shape,
                                                                   output_padding);
                    p.get_or_create(conv_prim);
                }
@@ -231,6 +234,7 @@ void pre_replace_deconv::run(program_impl& p) {
                tensor stride = { 1, 1, 1, 1 };
                tensor input_offset = { 0, 0, -scale_factor, -scale_factor };
                auto output_padding = deconv_prim->output_padding;
+                auto grouped_weights_shape = deconv_prim->grouped_weights_shape;

                // remove deconvolution node and its connections to weights and biases,
                // rename it and move to the optimized list
@@ -309,6 +313,7 @@ void pre_replace_deconv::run(program_impl& p) {
                    stride,
                    input_offset,
                    tensor{ 1, 1, 1, 1 },
+                    grouped_weights_shape,
                    output_padding);
                p.get_or_create(conv_prim);

--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -312,7 +312,8 @@ void prepare_primitive_fusing::fuse_bias(program_impl &p) {
                                                                     desc->input_offset,
                                                                     desc->dilation,
                                                                     conv.get_output_layout().size,
-                                                                     conv.get_output_layout().data_type);
+                                                                     conv.get_output_layout().data_type,
+                                                                     desc->grouped_weights_shape);

            conv_with_bias_prim->activations_zero_points = desc->activations_zero_points;
            conv_with_bias_prim->weights_zero_points = desc->weights_zero_points;
@@ -334,7 +335,8 @@ void prepare_primitive_fusing::fuse_bias(program_impl &p) {
                                                                         desc->groups,
                                                                         desc->stride,
                                                                         desc->input_offset,
-                                                                         deconv.get_output_layout().size);
+                                                                         deconv.get_output_layout().size,
+                                                                         desc->grouped_weights_shape);

            auto& new_deconv_node = p.get_or_create(deconv_with_bias_prim);
            fuse_bias_f(deconv, new_deconv_node, bias_node, eltw_node);
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
@@ -658,6 +658,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) {
                        old_conv_prim->input_offset,
                        old_conv_prim->dilation,
                        output_size,
+                        old_conv_prim->grouped_weights_shape,
                        old_conv_prim->output_padding);

            auto& new_conv_node = p.get_or_create(new_conv_prim);
--- a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
+++ b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
@@ -106,11 +106,11 @@ kernel_selector::weights_type to_weights_type(data_types dt);
 data_types from_weights_type(kernel_selector::weights_type dt);
 kernel_selector::data_layout to_data_layout(format f);
 cldnn::format from_data_layout(kernel_selector::data_layout l);
-kernel_selector::weights_layout to_weights_layout(format f);
+kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped);
 cldnn::format::type from_weights_layout(kernel_selector::weights_layout l);
 kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode);
 kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split = 1, const tensor view_offset = tensor {});
-kernel_selector::weights_tensor convert_weights_tensor(const layout& l);
+kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false);
 layout from_weights_tensor(const kernel_selector::weights_tensor& t);
 kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation_func);

@@ -192,10 +192,10 @@ inline params_t get_default_params(const arg_t& arg, uint32_t split = 1) {
 }

 template <typename params_t, typename arg_t>
-inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1) {
+inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1, bool has_group_dimension = false) {
    params_t params = get_default_params<params_t>(arg, split);
    const auto& weights_layout = arg.weights().get_output_layout();
-    params.weights = convert_weights_tensor(weights_layout);
+    params.weights = convert_weights_tensor(weights_layout, has_group_dimension);

    if (arg.bias_term()) {
        auto bias_layout = arg.bias().get_output_layout();
@@ -210,8 +210,8 @@ inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split
 }

 template <typename params_t, typename arg_t>
-params_t get_weight_bias_zero_point_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1) {
-    params_t params = get_weights_bias_default_params<params_t>(arg, split, groups);
+params_t get_weight_bias_zero_point_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1, bool has_group_dimension = false) {
+    params_t params = get_weights_bias_default_params<params_t>(arg, split, groups, has_group_dimension);

    if (arg.weights_zero_points_term()) {
        params.weights_zero_points.push_back(
--- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
+++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
@@ -200,7 +200,7 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) {
    }
 }

-kernel_selector::weights_layout to_weights_layout(format f) {
+kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) {
    switch (f) {
        case format::bfyx:
        case format::oiyx:
@@ -271,6 +271,12 @@ kernel_selector::weights_layout to_weights_layout(format f) {
        case format::os_is_y_x8_osv8_isv4_swizzled_by_4:
            return kernel_selector::weights_layout::os_is_y_x8_osv8_isv4_swizzled_by_4;
        case format::bfzyx:
+            return is_grouped ? kernel_selector::weights_layout::goiyx : kernel_selector::weights_layout::oizyx;
+        case format::bfwzyx: {
+            if (!is_grouped)
+                throw std::runtime_error("Invalid conversion of data format to weights format. bfwzyx can't be non-grouped as 4D spatials are not supported");
+            return kernel_selector::weights_layout::goizyx;
+        }
        case format::oizyx:
            return kernel_selector::weights_layout::oizyx;
        case format::iozyx:
@@ -593,10 +599,10 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
    return kernel_selector::data_tensor(vec, to_data_type(l.data_type), ks_layout);
 }

-kernel_selector::weights_tensor convert_weights_tensor(const layout& l) {
+kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped) {
    const auto& t = l.size.sizes(l.format);
    const auto ks_type = to_weights_type(l.data_type);
-    const auto ks_layout = to_weights_layout(l.format);
+    const auto ks_layout = to_weights_layout(l.format, is_grouped);
    std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(ks_layout));

    for (size_t i = 0; i < vec.size(); i++) {
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -1132,7 +1132,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
        input_layout("input", input.get_layout()),
        data("weights_1", weights_1),
        data("biases_1", biases_1),
-        convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32));
+        convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32, true));

    network network(engine, topology);
    network.set_input_data("input", input);
@@ -4295,7 +4295,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an
        data("a_zp", a_zp),
        data("w_zp", w_zp),
        convolution("conv", "input", { "weights" }, { "biases" }, { "w_zp" }, { "a_zp" }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
        reorder("out", "conv", format::bfyx, data_types::f32));

    build_options opts;
@@ -4366,7 +4366,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
        data("biases", biases),
        data("a_zp", a_zp),
        convolution("conv", "input", { "weights" }, { "biases" }, { }, { "a_zp" }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
        reorder("out", "conv", format::bfyx, data_types::f32));

    build_options opts;
@@ -4451,7 +4451,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
        data("biases", biases),
        data("a_zp", a_zp),
        convolution("conv", "input", { "weights" }, { "biases" }, { }, { "a_zp" }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
        reorder("out", "conv", format::bfyx, data_types::f32));

    build_options opts;
@@ -4552,7 +4552,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
        activation("activation", "input", activation_func::relu),  // needed just to add padding
        eltwise("in", {"activation", "a_zp"}, eltwise_mode::sub, data_types::f32),
        convolution("conv", "in", { "weights" }, { "biases" }, 1,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, data_types::f32),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, data_types::f32, false),
        reorder("out", "conv", format::bfyx, data_types::f32));

    build_options opts;
@@ -4623,7 +4623,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p
        data("biases", biases),
        data("w_zp", w_zp),
        convolution("conv", "input", { "weights" }, { "biases" }, { "w_zp" }, { }, 1, data_types::f32,
-                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
+                    tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
        reorder("out", "conv", format::bfyx, data_types::f32));

    build_options opts;
@@ -7018,7 +7018,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa
        reorder("input_reordered", "input", reordered_input_layout),
        data("weights", weights),
        data("bias", bias),
-        convolution("conv", "input_reordered", { "weights" }, { "bias" }, num_groups, stride, input_offset, dilation, output_size, data_types::f32),
+        convolution("conv", "input_reordered", { "weights" }, { "bias" }, num_groups, stride, input_offset, dilation, output_size, data_types::f32, true),
        reorder("out", "conv", format::bfyx, data_types::f32));

    build_options options;
@@ -7440,7 +7440,8 @@ TEST_P(convolution_grouped_gpu, base) {
                                  stride_tensor,
                                  tensor(batch(0), feature(0), spatial(-input_offset_x, -input_offset_y, -input_offset_z, 0)),
                                  tensor(batch(1), feature(1), spatial(1, 1, 1, 1)),
-                                  ref_conv_out_size),
+                                  ref_conv_out_size,
+                                  true),
                      reorder("out", "conv", {data_types::f32, format::bfzyx, ref_conv_out_size}));

    if (has_input_zp)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -512,9 +512,9 @@ public:
 #define CASE_CONV_FP32_8 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
 #define CASE_CONV_FP32_9 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
 #define CASE_CONV_FP32_10 {32, 16, 4, 5, 4}, {32, 32, 4, 5, 4}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
-#define CASE_CONV_FP32_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
-#define CASE_CONV_FP32_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
-#define CASE_CONV_FP32_13 {1, 16, 18, 5, 4}, {1, 16, 16, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_FP32_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_FP32_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_FP32_13 {1, 16, 18, 5, 4}, {1, 16, 16, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
 #define CASE_CONV_FP32_14 {1, 3, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx, data_types::f32, format::bfyx

 #define CASE_CONV_FP16_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx, data_types::f16, format::bfyx
@@ -527,8 +527,8 @@ public:
 #define CASE_CONV_FP16_8 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
 #define CASE_CONV_FP16_9 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
 #define CASE_CONV_FP16_10 {32, 16, 4, 5, 4}, {32, 32, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bs_fs_zyx_bsv16_fsv16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
-#define CASE_CONV_FP16_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
-#define CASE_CONV_FP16_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
+#define CASE_CONV_FP16_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
+#define CASE_CONV_FP16_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
 #define CASE_CONV_FP16_13 {16, 32, 4, 5}, {16, 64, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::fs_b_yx_fsv32, data_types::f16, format::bfyx, data_types::f16, format::bfyx

 #define CASE_CONV_U8S8_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
@@ -571,10 +571,10 @@ public:
 #define CASE_CONV_ELTW_FP32_2 {1, 16, 4, 5}, {1, 32, 2, 3}, {1, 1, 1, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::os_is_yx_isv16_osv16, data_types::f32, format::bfyx
 #define CASE_CONV_ELTW_FP32_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::os_is_yx_isv16_osv16, data_types::f32, format::bfyx
 #define CASE_CONV_ELTW_FP32_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 32, 1, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32,  format::gs_oiyx_gsv16, data_types::f32, format::bfyx
-#define CASE_CONV_ELTW_FP32_5 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 32, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
-#define CASE_CONV_ELTW_FP32_6 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 16, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_ELTW_FP32_5 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 32, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_ELTW_FP32_6 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 16, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
 #define CASE_CONV_ELTW_FP32_7 {1, 16, 3, 5}, {1, 32, 1, 3}, {1, 32, 3, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::os_is_yx_isv16_osv16, data_types::f32, format::bfyx
-#define CASE_CONV_ELTW_FP32_8 {1, 32, 3, 5, 4}, {1, 16, 1, 3, 2}, {1, 1, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_ELTW_FP32_8 {1, 32, 3, 5, 4}, {1, 16, 1, 3, 2}, {1, 1, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx

 #define CASE_CONV_ELTW_i8_1 {1, 16, 3, 5}, {1, 32, 1, 3}, {1, 32, 3, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::os_is_yx_osv16_isv16, data_types::f32, format::bfyx
 #define CASE_CONV_ELTW_i8_2 {1, 16, 3, 5, 3}, {1, 32, 2, 4, 2}, {1, 1, 2, 4, 2}, {1, 1, 2, 2, 2}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::oiyx, data_types::f32, format::bfzyx
@@ -2214,7 +2214,7 @@ TEST_P(conv_int8_asymmetric_weights, basic) {
                 data("bias", get_mem(get_bias_layout(p))),
                 data("w_zp", get_mem(get_weights_zp_layout(p), 1, 127)),
                 eltwise("w_sub", {"weights", "w_zp"}, eltwise_mode::sub, data_types::f32),
-                 convolution("conv_prim", "input", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32),
+                 convolution("conv_prim", "input", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32, false),
                 reorder("reorder_bfyx", "conv_prim", p.default_format, data_types::f32)
    );
    tolerance = 1.f;
@@ -2281,7 +2281,7 @@ TEST_P(conv_int8_asymmetric_data, basic) {
                 data("bias", get_mem(get_bias_layout(p))),
                 data("a_zp", get_mem(get_activations_zp_layout(p), 1, 127)),
                 eltwise("a_sub", {"input", "a_zp"}, eltwise_mode::sub, data_types::f32),
-                 convolution("conv_prim", "a_sub", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32),
+                 convolution("conv_prim", "a_sub", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32, false),
                 reorder("reorder_bfyx", "conv_prim", p.default_format, data_types::f32)
    );
    tolerance = 1.f;
@@ -2352,7 +2352,7 @@ TEST_P(conv_int8_asymmetric_data_and_weights, basic) {
                 data("w_zp", get_mem(get_weights_zp_layout(p), 1, 127)),
                 eltwise("a_sub", {"input", "a_zp"}, eltwise_mode::sub, data_types::f32),
                 eltwise("w_sub", {"weights", "w_zp"}, eltwise_mode::sub, data_types::f32),
-                 convolution("conv_prim", "a_sub", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32),
+                 convolution("conv_prim", "a_sub", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32, false),
                 reorder("reorder_bfyx", "conv_prim", p.default_format, data_types::f32)
    );
    tolerance = 1.f;
@@ -3980,7 +3980,7 @@ using deconv_test_params = bc_test_params;
 #define CASE_DECONV_S8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_S8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_S8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
-#define CASE_DECONV_S8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_S8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_S8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_S8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
@@ -3989,7 +3989,7 @@ using deconv_test_params = bc_test_params;
 #define CASE_DECONV_U8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_U8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_U8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
-#define CASE_DECONV_U8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_U8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_U8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
 #define CASE_DECONV_U8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx