From d972a71b4cad93edf9d2bbb780ac6512c1c62daa Mon Sep 17 00:00:00 2001
From: Eddy Kim <eddy.kim@intel.com>
Date: Tue, 25 Apr 2023 06:21:50 +0900
Subject: [PATCH] [GPU] Fixed the prepare_quantization pass to support
 grouped_weights_shape (#17093)

* fixed to support grouped_weights_shape

* added grouped_weights unit tests
---
 .../graph_optimizer/prepare_quantization.cpp  |  20 ++-
 .../tests/test_cases/convolution_gpu_test.cpp | 115 ++++++++++++------
 .../intel_gpu/tests/test_utils/test_utils.h   |  26 ++++
 3 files changed, 115 insertions(+), 46 deletions(-)
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
index ed40aaf1442..452334495fa 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp
@@ -510,12 +510,16 @@ void prepare_quantization::prepare_asymmetric_quantization(program &p, convoluti
     };
 
     const auto& stream = p.get_stream();
-    auto fill_compensation = [&](int groups, const memory::ptr w, const memory::ptr azp, const memory::ptr wzp, memory::ptr compensation) {
-        const auto& wl = w->get_layout();
+    auto fill_compensation = [&](int groups, const memory::ptr w, const memory::ptr azp, const memory::ptr wzp, memory::ptr compensation,
+                                 bool grouped_weights_shape) {
+        auto wl = w->get_layout();
+        if (!format::is_weights_format(wl.format)) {
+            wl = wl.convert_to_weights_layout(grouped_weights_shape);
+        }
 
         const int GS = groups;
-        const int OC = wl.batch() / GS;
-        const int IC = wl.feature();  // already divided by GS
+        const int OC = grouped_weights_shape ? wl.ofm() : (wl.ofm() / GS);
+        const int IC = wl.ifm();
         const int KS = wl.spatial(0)*wl.spatial(1)*wl.spatial(2);
 
         const auto& w_dt = wl.data_type;
@@ -595,7 +599,11 @@ void prepare_quantization::prepare_asymmetric_quantization(program &p, convoluti
     bool need_compensation = false;
 
     auto output_size = convolution_node.get_output_layout().get_tensor();
-    int ofm = in1.get_output_layout().batch();
+    auto wl = in1.get_output_layout();
+    if (!format::is_weights_format(wl.format)) {
+        wl = wl.convert_to_weights_layout(convolution_node.typed_desc()->grouped_weights_shape);
+    }
+    int ofm = wl.group() * wl.ofm();
     int ifm = in0.get_output_layout().feature();
     int ofm_aligned = ((ofm + 31) / 32) * 32;
     int ifm_aligned = ((ifm + 31) / 32) * 32;
@@ -655,7 +663,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program &p, convoluti
         auto azp = asymmetric_data ? new_a_zp->as<data>().get_attached_memory_ptr() : nullptr;
         auto wzp = asymmetric_weights ? new_w_zp->as<data>().get_attached_memory_ptr() : nullptr;
         int groups = static_cast<int>(convolution_node.get_groups());
-        fill_compensation(groups, w, azp, wzp, data_to_allocate);
+        fill_compensation(groups, w, azp, wzp, data_to_allocate, convolution_node.typed_desc()->grouped_weights_shape);
 
         auto compensation_prim = std::make_shared<data>(convolution_node.id() + "_compensation", data_to_allocate);
         new_compenstation = &p.get_or_create(compensation_prim);
diff --git a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
index 280b6da1fa4..b9daa823396 100644
--- a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp
@@ -7911,10 +7911,10 @@ class convolution_test_base {
 public:
     virtual topology build_topology(cldnn::engine& engine) {
         auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size());
-        auto wei_lay = layout(weights_type(), format::bfyx, weights_size());
+        auto wei_lay = grouped_weights_shape() ? layout(weights_type(), format::bfzyx, grouped_weights_size()) : layout(weights_type(), format::bfyx, weights_size());
 
         auto wei_mem = engine.allocate_memory(wei_lay);
-        auto weights_flat = flatten_4d(format::bfyx, _weights);
+        auto weights_flat = grouped_weights_shape() ? flatten_5d(format::bfzyx, _grouped_weights) : flatten_4d(format::bfyx, _weights);
         set_values(wei_mem, weights_flat);
         layout reordered_layout = layout{ input_type(), input_format(), input_size(), padding_size() };
         auto topo = topology();
@@ -7949,6 +7949,7 @@ public:
                 {static_cast<std::ptrdiff_t>(_offset_y), static_cast<std::ptrdiff_t>(_offset_x)},
                 {static_cast<uint64_t>(_dilation_y), static_cast<uint64_t>(_dilation_x)});
             conv_prim.output_data_types = {output_type()};
+            conv_prim.grouped_weights_shape = grouped_weights_shape();
             topo.add(conv_prim);
         } else {
             auto bias_lay = layout(output_type(), format::bfyx, tensor(feature(output_features())));
@@ -7965,6 +7966,7 @@ public:
                 {static_cast<std::ptrdiff_t>(_offset_y), static_cast<std::ptrdiff_t>(_offset_x)},
                 {static_cast<uint64_t>(_dilation_y), static_cast<uint64_t>(_dilation_x)});
             conv_prim.output_data_types = {output_type()};
+            conv_prim.grouped_weights_shape = grouped_weights_shape();
             topo.add(conv_prim);
         }
 
@@ -8047,6 +8049,10 @@ public:
         _weights = std::move(weights);
     }
 
+    void set_grouped_weights(VVVVVF<WeightsT> grouped_weights) {
+        _grouped_weights = std::move(grouped_weights);
+    }
+
     void set_bias(VF<OutputT> bias) {
         _bias = std::move(bias);
     }
@@ -8082,9 +8088,14 @@ public:
         _bigger_pad = bigger_pad;
     }
 
+    void set_grouped_weights_shape(bool grouped_weights_shape) {
+        _grouped_weights_shape = grouped_weights_shape;
+    }
+
 protected:
     VVVVF<InputT> _input;
     VVVVF<WeightsT> _weights;
+    VVVVVF<WeightsT> _grouped_weights;
     VF<OutputT> _bias;
     VF<InputT> _input_zp;
     VF<WeightsT> _weights_zp;
@@ -8094,15 +8105,16 @@ protected:
     int _dilation_x, _dilation_y;
     bool _padded_input;
     bool _bigger_pad;
+    bool _grouped_weights_shape;
 
     size_t batch_num() const { return _input.size(); }
     size_t input_features() const { return _input[0].size(); }
     size_t input_x() const { return _input[0][0][0].size(); }
     size_t input_y() const { return _input[0][0].size(); }
-    size_t output_features() const { return _weights.size(); }
-    size_t weights_input_features() const { return _weights[0].size(); }
-    size_t filter_x() const { return _weights[0][0][0].size(); }
-    size_t filter_y() const { return _weights[0][0].size(); }
+    size_t output_features() const { return _grouped_weights_shape ? _grouped_weights[0].size() : _weights.size(); }
+    size_t weights_input_features() const { return _grouped_weights_shape ? _grouped_weights[0][0].size() : _weights[0].size(); }
+    size_t filter_x() const { return _grouped_weights_shape ? _grouped_weights[0][0][0][0].size() : _weights[0][0][0].size(); }
+    size_t filter_y() const { return _grouped_weights_shape ? _grouped_weights[0][0][0].size() : _weights[0][0].size(); }
     size_t groups() const { return input_features() / weights_input_features(); }
 
     bool has_bias() { return _bias.size() > 0; }
@@ -8110,6 +8122,7 @@ protected:
     bool has_weights_zp() { return _weights_zp.size() > 0; }
     bool need_padded_input() { return _padded_input; }
     bool bigger_pad() { return _bigger_pad; }
+    bool grouped_weights_shape() { return _grouped_weights_shape; }
 
     data_types input_type() const { return type_to_data_type<InputT>::value; }
     format input_format() const { return _input_fmt; }
@@ -8127,6 +8140,13 @@ protected:
                       TensorValue(filter_x()),
                       TensorValue(filter_y()));
     }
+    tensor grouped_weights_size() const {
+        return tensor(TensorValue(groups()),
+                      TensorValue(output_features()),
+                      TensorValue(filter_x()),
+                      TensorValue(filter_y()),
+                      TensorValue(weights_input_features()));
+    }
     padding padding_size() const {
         if (_padded_input) {
             if (_bigger_pad) {
@@ -8163,6 +8183,7 @@ struct convolution_random_test_all_params {
     bool asymmetric_data;
     bool need_padded_input;
     bool bigger_pad;
+    bool grouped_weights_shape;
 };
 
 template <typename InputT, typename WeightsT, typename OutputT>
@@ -8172,6 +8193,7 @@ public:
         VVVVF<OutputT> expected = VVVVF<OutputT>(this->batch_num(), VVVF<OutputT>(this->output_features()));
         bool depthwise = this->groups() == this->input_features();
         bool grouped = (this->groups() > 1 && !depthwise) ? true : false;
+        size_t group_size = this->output_features() / this->groups();
         for (size_t bi = 0; bi < this->batch_num(); ++bi)
         for (size_t fi = 0; fi < this->output_features(); ++fi) {
             size_t f_begin = depthwise ? fi : 0;
@@ -8180,7 +8202,7 @@ public:
             auto weights_zp = this->has_weights_zp() ? this->_weights_zp[fi] : static_cast<WeightsT>(0);
             expected[bi][fi] = reference_convolve<InputT, OutputT, WeightsT>(
                 this->_input[bi],
-                this->_weights[fi],
+                (this->_grouped_weights_shape ? this->_grouped_weights[fi / group_size][fi % group_size] : this->_weights[fi]),
                 this->_stride_y,
                 this->_stride_x,
                 static_cast<float>(bias),
@@ -8205,14 +8227,20 @@ public:
 
         auto input_data = generate_random_4d<InputT>(
             params.batch, params.input_features, params.input_xy[1], params.input_xy[0], -256, 256);
-        auto weights_data = generate_random_4d<WeightsT>(
-            params.output_features, wei_in_f, params.filter_xy[1], params.filter_xy[0], -256, 256);
+        if (params.grouped_weights_shape) {
+            auto weights_data = generate_random_5d<WeightsT>(
+                params.groups, (params.output_features / params.groups), wei_in_f, params.filter_xy[1], params.filter_xy[0], -256, 256);
+            this->set_grouped_weights(std::move(weights_data));
+        } else {
+            auto weights_data = generate_random_4d<WeightsT>(
+                params.output_features, wei_in_f, params.filter_xy[1], params.filter_xy[0], -256, 256);
+            this->set_weights(std::move(weights_data));
+        }
         auto bias_data = params.with_bias ? generate_random_1d<OutputT>(params.output_features, -256, 256) : VF<OutputT>();
         auto weights_zp_data = params.asymmetric_weights ? generate_random_1d<WeightsT>(params.output_features, -256, 256) : VF<WeightsT>();
         auto input_zp_data = params.asymmetric_data ? generate_random_1d<InputT>(params.input_features, -256, 256) : VF<InputT>();
 
         this->set_input(params.input_format, std::move(input_data));
-        this->set_weights(std::move(weights_data));
         this->set_bias(std::move(bias_data));
         this->set_strides(params.stride_xy[0], params.stride_xy[1]);
         this->set_offsets(params.offset_xy[0], params.offset_xy[1]);
@@ -8221,6 +8249,7 @@ public:
         this->set_input_zp(std::move(input_zp_data));
         this->set_padded_input(params.need_padded_input);
         this->set_bigger_pad(params.bigger_pad);
+        this->set_grouped_weights_shape(params.grouped_weights_shape);
     }
 
     void run_random(const convolution_random_test_all_params& params) {
@@ -8250,6 +8279,7 @@ static std::string to_string_convolution_all_params(const testing::TestParamInfo
     bool asymm_input = params.asymmetric_data;
     bool padded_input = params.need_padded_input;
     bool bigger_pad = params.bigger_pad;
+    bool grouped_weights_shape = params.grouped_weights_shape;
     // Wrapper for negative walues as ex. "-1" will generate invalid gtest param string
     auto to_string_neg = [](int val) {
         if (val >= 0)
@@ -8267,7 +8297,7 @@ static std::string to_string_convolution_all_params(const testing::TestParamInfo
         "_g" + std::to_string(groups) +
         (Bias ? "_bias" : "") + (asymm_weights ? "_wzp" : "") + (asymm_input ? "_izp" : "") +
         (padded_input ? "_in_pad" : "") +
-        (bigger_pad ? "_bigger_pad" : "");
+        (bigger_pad ? "_bigger_pad" : "") + (grouped_weights_shape ? "_grouped_weights" : "");
 }
 
 template <typename InputT, typename WeightsT, typename OutputT>
@@ -8276,10 +8306,11 @@ public:
     using parent = convolution_random_test_base<InputT, WeightsT, OutputT>;
     topology build_topology(cldnn::engine& engine) override {
         auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size(), this->padding_size());
-        auto wei_lay = layout(this->weights_type(), format::bfyx, this->weights_size());
+        auto wei_lay = this->grouped_weights_shape() ? layout(this->weights_type(), format::bfzyx, this->grouped_weights_size()) :
+                       layout(this->weights_type(), format::bfyx, this->weights_size());
 
         auto wei_mem = engine.allocate_memory(wei_lay);
-        auto wei_flat = flatten_4d(format::bfyx, this->_weights);
+        auto wei_flat = this->grouped_weights_shape() ? flatten_5d(format::bfzyx, this->_grouped_weights) : flatten_4d(format::bfyx, this->_weights);
         set_values(wei_mem, wei_flat);
         layout reordered_layout = layout{ this->input_type(), this->input_format(), this->input_size(), this->padding_size() };
         auto topo = topology();
@@ -8314,6 +8345,7 @@ public:
                 {static_cast<std::ptrdiff_t>(this->_offset_y), static_cast<std::ptrdiff_t>(this->_offset_x)},
                 {static_cast<uint64_t>(this->_dilation_y), static_cast<uint64_t>(this->_dilation_x)});
             conv_prim.output_data_types = {this->output_type()};
+            conv_prim.grouped_weights_shape = this->grouped_weights_shape();
             topo.add(conv_prim);
         } else {
             auto bias_lay = layout(this->output_type(), format::bfyx, tensor(feature(this->output_features())));
@@ -8330,6 +8362,7 @@ public:
                 {static_cast<std::ptrdiff_t>(this->_offset_y), static_cast<std::ptrdiff_t>(this->_offset_x)},
                 {static_cast<uint64_t>(this->_dilation_y), static_cast<uint64_t>(this->_dilation_x)});
             conv_prim.output_data_types = {this->output_type()};
+            conv_prim.grouped_weights_shape = this->grouped_weights_shape();
             topo.add(conv_prim);
         }
 
@@ -8471,43 +8504,43 @@ struct params_generator : std::vector<convolution_random_test_all_params> {
         for (auto b : batches) {
             // first conv
             push_back(convolution_random_test_all_params{
-                b, 3, 32, { 28, 28 }, { 7, 7 }, { 2, 2 }, { 3, 3 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 3, 32, { 28, 28 }, { 7, 7 }, { 2, 2 }, { 3, 3 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 3, 64, { 1024, 10 }, { 5, 5 }, { 2, 2 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 3, 64, { 1024, 10 }, { 5, 5 }, { 2, 2 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 3, 15, { 10, 10 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 3, 15, { 10, 10 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 4, 18, { 10, 10 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 4, 18, { 10, 10 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // 3x3
             push_back(convolution_random_test_all_params{
-                b, 32, 48, { 14, 14 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 48, { 14, 14 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 32, 48, { 14, 14 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 48, { 14, 14 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // 1x1
             push_back(convolution_random_test_all_params{
-                b, 32, 48, { 28, 28 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 48, { 28, 28 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 32, 48, { 28, 28 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 48, { 28, 28 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // 5x5
             push_back(convolution_random_test_all_params{
-                b, 32, 48, { 28, 28 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 48, { 28, 28 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 32, 48, { 28, 28 }, { 5, 5 }, { 2, 2 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 48, { 28, 28 }, { 5, 5 }, { 2, 2 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // depthwise
             push_back(convolution_random_test_all_params{
-                b, 64, 64, { 19, 19 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 64, 64, { 19, 19 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 64, 64, { 19, 19 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 1, 1 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 64, 64, { 19, 19 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 1, 1 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // dilation
             push_back(convolution_random_test_all_params{
-                b, 32, 24, { 19, 19 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 2, 2 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 24, { 19, 19 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 2, 2 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 32, 24, { 19, 19 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 2, 2 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 32, 24, { 19, 19 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 2, 2 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // depthwise + dilation
             push_back(convolution_random_test_all_params{
-                b, 64, 64, { 19, 19 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 2, 2 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 64, 64, { 19, 19 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 2, 2 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 64, 64, { 19, 19 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 2, 2 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 64, 64, { 19, 19 }, { 3, 3 }, { 2, 2 }, { 1, 1 }, { 2, 2 }, true, 64, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
         }
         return *this;
     }
@@ -8521,19 +8554,19 @@ struct params_generator : std::vector<convolution_random_test_all_params> {
         for (auto b : batches) {
             // 1x1
             push_back(convolution_random_test_all_params{
-                b, 23, 41, { 19, 19 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 23, 41, { 19, 19 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 23, 41, { 19, 19 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 23, 41, { 19, 19 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // 3x3
             push_back(convolution_random_test_all_params{
-                b, 16, 28, { 14, 14 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 16, 28, { 14, 14 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 23, 41, { 19, 17 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 23, 41, { 19, 17 }, { 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             // 5x5
             push_back(convolution_random_test_all_params{
-                b, 16, 28, { 14, 14 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 16, 28, { 14, 14 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
             push_back(convolution_random_test_all_params{
-                b, 23, 41, { 19, 17 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                b, 23, 41, { 19, 17 }, { 5, 5 }, { 1, 1 }, { 2, 2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, false });
         }
         return *this;
     }
@@ -8542,21 +8575,22 @@ struct params_generator : std::vector<convolution_random_test_all_params> {
                                      bool asymm_weights = false,
                                      bool asymm_data = false,
                                      bool padded_input = false,
-                                     bool bigger_pad = false) {
+                                     bool bigger_pad = false,
+                                     bool grouped_weights_shape = false) {
         std::vector<int> strides = { 1, 2 };
         for (auto s : strides) {
             // 1x1
             push_back(convolution_random_test_all_params{
             //      feature   input     filter    stride    offset  dilation  bias  groups
             //batch in  out   x  y      x  y      x  y      x  y      x  y
-                16, 32, 32, { 4, 4 }, { 1, 1 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                16, 32, 32, { 4, 4 }, { 1, 1 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, grouped_weights_shape });
             push_back(convolution_random_test_all_params{
-                16, 32, 32, { 9, 9 }, { 1, 1 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                16, 32, 32, { 9, 9 }, { 1, 1 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, grouped_weights_shape });
             // 3x3
             push_back(convolution_random_test_all_params{
-                16, 32, 32, { 4, 4 }, { 3, 3 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                16, 32, 32, { 4, 4 }, { 3, 3 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, grouped_weights_shape });
             push_back(convolution_random_test_all_params{
-                16, 32, 32, { 9, 9 }, { 3, 3 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad });
+                16, 32, 32, { 9, 9 }, { 3, 3 }, { s, s }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data, padded_input, bigger_pad, grouped_weights_shape });
         }
         return *this;
     }
@@ -8615,6 +8649,7 @@ INSTANTIATE_TEST_SUITE_P(
         .smoke_test_params(format::b_fs_yx_fsv16, false, false, true)
         .smoke_test_params(format::b_fs_yx_fsv16, false, false, true, true)
         .bs_test_params(format::bs_fs_yx_bsv16_fsv16)
+        .bs_test_params(format::b_fs_yx_fsv16, false, true, false, false, true)
     ),
     to_string_convolution_all_params
 );
diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/test_utils/test_utils.h
index a5d7779a66d..9d6e5099766 100644
--- a/src/plugins/intel_gpu/tests/test_utils/test_utils.h
+++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.h
@@ -166,6 +166,32 @@ inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
     return vec;
 }
 
+template<typename T>
+inline VF<T> flatten_5d(cldnn::format input_format, VVVVVF<T> &data) {
+    size_t a = data.size();
+    size_t b = data[0].size();
+    size_t c = data[0][0].size();
+    size_t d = data[0][0][0].size();
+    size_t e = data[0][0][0][0].size();
+    VF<T> vec(a * b * c * d * e, (T)(0.0f));
+    size_t idx = 0;
+
+    switch (input_format.value) {
+        case cldnn::format::bfzyx:
+            for (size_t bi = 0; bi < a; ++bi)
+                for (size_t fi = 0; fi < b; ++fi)
+                    for (size_t zi = 0; zi < c; ++zi)
+                        for (size_t yi = 0; yi < d; ++yi)
+                            for (size_t xi = 0; xi < e; ++xi)
+                                vec[idx++] = data[bi][fi][zi][yi][xi];
+            break;
+
+        default:
+            assert(0);
+    }
+    return vec;
+}
+
 template<typename T>
 inline VF<T> flatten_6d(cldnn::format input_format, VVVVVVF<T> &data) {
     size_t a = data.size();