[GPU] Pooling refactoring (#13664)

2022-11-03 14:53:32 +04:00 · 2022-11-03 14:53:32 +04:00 · 2b5dd4fac8
commit 2b5dd4fac8
parent c11389fa18
37 changed files with 110 additions and 1948 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/max_unpooling.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/max_unpooling.hpp
@ -1,82 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "primitive.hpp"
-#include <vector>
-
-namespace cldnn {
-/// @addtogroup cpp_api C++ API
-/// @{
-/// @addtogroup cpp_topology Network Topology
-/// @{
-/// @addtogroup cpp_primitives Primitives
-/// @{
-
-/// @brief Performs "max_unpooling" operation.
-/// @details Reverse operation of max pooling, based on the argmax data where indices of each max pooling region are stored.
-struct max_unpooling : public primitive_base<max_unpooling> {
-    CLDNN_DECLARE_PRIMITIVE(max_unpooling)
-
-    /// @brief Constructs max_unpooling primitive.
-    /// @param id This primitive id.
-    /// @param input Input primitive id.
-    /// @param argmax Primitive id which contains indices of each max pooling region.
-    /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
-    /// Used only for output size computation.
-    /// @param size Pooling kernel size. Used only for output size computation.
-    /// @param pad Defines logical pad value added to input tensor. Used only for output size computation.
-    max_unpooling(const primitive_id& id,
-                  const primitive_id& input,
-                  const primitive_id& argmax,
-                  const tensor& size,
-                  const tensor& stride,
-                  const tensor& pad = {0, 0, 0, 0},
-                  const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding),
-          argmax(argmax),
-          pad(pad),
-          stride(stride),
-          size(size),
-          with_output_size(false) {}
-
-    /// @brief Constructs max_unpooling primitive (with provided output size)
-    /// @param id This primitive id.
-    /// @param input Input primitive id.
-    /// @param argmax Primitive id which contains indices of each max pooling region.
-    /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-    /// @param output_size User-defined output data size of the primitive (w/o padding).
-    max_unpooling(const primitive_id& id,
-                  const primitive_id& input,
-                  const primitive_id& argmax,
-                  tensor output_size,
-                  const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding),
-          argmax(argmax),
-          with_output_size(true),
-          output_size(output_size) {}
-
-    /// @brief Primitive id which contains indices of each max pooling region.
-    /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-    primitive_id argmax;
-    /// @brief Defines logical pad value added to input tensor.
-    tensor pad;
-    /// @brief Defines shift in input buffer between adjacent calculations of output values. Used only for output size computation.
-    tensor stride;
-    /// @brief Pooling kernel size. Used only for output size computation.
-    tensor size;
-    /// @brief Indicates that the primitive has user-defined output size (non-zero value). Used only for output size computation.
-    bool with_output_size;
-    /// @brief User-defined output data size of the primitive (w/o padding).
-    tensor output_size;
-
-protected:
-    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override { return {argmax}; }
-};
-/// @}
-/// @}
-/// @}
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/pooling.hpp
@ -26,8 +26,6 @@ enum class pooling_mode : int32_t {
    average,
    /// @brief Average-pooling method without values which are outside of the input.
    average_no_padding,
-    /// @brief Maximum-pooling method with additional buffer to store argmax indices.
-    max_with_argmax,
    /// @brief Pooling with bilinear interpolation.
    bilinear,
    /// @brief Deformable pooling with bilinear interpolation.
@ -51,46 +49,18 @@ struct pooling : public primitive_base<pooling> {
            pooling_mode mode,
            const ov::Shape& size,
            const ov::Strides& stride,
-            const ov::Shape& pad = {0, 0},
+            const ov::Shape& pads_begin = {0, 0},
+            const ov::Shape& pads_end = {0, 0},
            const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
-          argmax(""),
          mode(static_cast<pooling_mode>(mode)),
-          global_pooling(false),
-          pad(pad),
+          pads_begin(pads_begin),
+          pads_end(pads_end),
          stride(stride),
          size(size),
-          with_output_size(false),
-          pad_end(size.size(), 0) {}
+          with_output_size(false) {}

-    /// @brief Constructs pooling primitive with argmax.
-    /// @param id This primitive id.
-    /// @param input Input primitive id.
-    /// @param argmax Primitive id which contains indices of each max pooling region.
-    /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-    /// @param mode Pooling mode.
-    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
-    /// @param size Pooling kernel size.
-    /// @param pad Defines logical pad value added to input tensor
-    pooling(const primitive_id& id,
-            const primitive_id& input,
-            const primitive_id& argmax,
-            pooling_mode mode,
-            const ov::Shape& size,
-            const ov::Strides& stride,
-            const ov::Shape& pad = {0, 0},
-            const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding),
-          argmax(argmax),
-          mode(static_cast<pooling_mode>(mode)),
-          global_pooling(false),
-          pad(pad),
-          stride(stride),
-          size(size),
-          with_output_size(false),
-          pad_end(size.size(), 0) {}
-
-    /// @brief Constructs pooling primitive (computes input paddings to match output size).
+    /// @brief Constructs pooling primitive with known output shape.
    /// @param id This primitive id.
    /// @param input Input primitive id.
    /// @param mode Pooling mode.
@ -103,68 +73,19 @@ struct pooling : public primitive_base<pooling> {
            pooling_mode mode,
            const ov::Shape& size,
            const ov::Strides& stride,
-            const ov::Shape& pad,
+            const ov::Shape& pads_begin,
+            const ov::Shape& pads_end,
            tensor output_size,
            const data_types output_data_type,
            const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
-          argmax(""),
          mode(static_cast<pooling_mode>(mode)),
-          global_pooling(false),
-          pad(pad),
+          pads_begin(pads_begin),
+          pads_end(pads_end),
          stride(stride),
          size(size),
          with_output_size(true),
-          output_size(output_size),
-          pad_end(size.size(), 0) {}
-
-    /// @brief Constructs pooling primitive with argmax (computes input paddings to match output size).
-    /// @param id This primitive id.
-    /// @param input Input primitive id.
-    /// @param argmax Primitive id which contains indices of each max pooling region.
-    /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-    /// @param mode Pooling mode.
-    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
-    /// @param size Pooling kernel size.
-    /// @param pad Defines logical pad value added to input tensor.
-    /// @param output_size User-defined output data size of the primitive (w/o padding).
-    pooling(const primitive_id& id,
-            const primitive_id& input,
-            const primitive_id& argmax,
-            pooling_mode mode,
-            const ov::Shape& size,
-            const ov::Strides& stride,
-            const ov::Shape& pad,
-            tensor output_size,
-            const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding),
-          argmax(argmax),
-          mode(static_cast<pooling_mode>(mode)),
-          global_pooling(false),
-          pad(pad),
-          stride(stride),
-          size(size),
-          with_output_size(true),
-          output_size(output_size),
-          pad_end(size.size(), 0) {}
-
-    /// @brief Constructs pooling primitive with kernel size equal to the spatial dimension of input tensor.
-    /// @param id This primitive id.
-    /// @param input Input primitive id.
-    /// @param mode Pooling mode.
-    pooling(const primitive_id& id,
-            const primitive_id& input,
-            pooling_mode mode,
-            const padding& output_padding = padding())
-        : primitive_base(id, {input}, output_padding),
-          argmax(""),
-          mode(static_cast<pooling_mode>(mode)),
-          global_pooling(true),
-          pad({0, 0}),
-          stride({1, 1}),
-          size({0, 0}),
-          with_output_size(false),
-          pad_end(size.size(), 0) {}
+          output_size(output_size) {}

    /// @brief Constructs pooling primitive that supports MaxPool features from opset8 (dilation and indices output).
    /// @param id This primitive id.
@ -184,41 +105,35 @@ struct pooling : public primitive_base<pooling> {
            const ov::Shape& size,
            const ov::Strides& stride,
            const ov::Strides& dilation,
-            const ov::Shape& pad,
-            const ov::Shape& pad_end,
+            const ov::Shape& pads_begin,
+            const ov::Shape& pads_end,
            int64_t axis,
            data_types index_element_type,
            tensor output_size,
            const data_types output_data_type,
            const padding& output_padding = padding())
            : primitive_base(id, {input, indices_output}, output_padding, optional_data_type{output_data_type}),
-              argmax(""),
              indices_output(indices_output),
              mode(pooling_mode::max),
-              global_pooling(false),
-              pad(pad),
+              pads_begin(pads_begin),
+              pads_end(pads_end),
              stride(stride),
              dilation(dilation),
              size(size),
              with_output_size(true),
              output_size(output_size),
-              pad_end(pad_end),
              axis(axis),
              index_element_type(index_element_type),
-              maxPoolOpset8Features(true)
-              {}
+              maxPoolOpset8Features(true) {}

-    /// @brief Primitive id which contains indices of each max pooling region.
-    /// Indices must be in flattened bfyx format with no padding. Needs to be fp32 data type.
-    primitive_id argmax;
    /// @brief Primitive id which contains indices output.
    primitive_id indices_output;
    /// @brief Pooling mode.
    pooling_mode mode;
-    /// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
-    bool global_pooling;
    /// @brief Defines logical pad value added to input tensor.
-    ov::Shape pad;
+    ov::Shape pads_begin;
+    /// @brief Defines a shift, relative to the end of padding shape.
+    ov::Shape pads_end;
    /// @brief Defines shift in input buffer between adjacent calculations of output values.
    ov::Strides stride;
    /// @brief Defines index of next pixel to select when pooling
@ -229,8 +144,6 @@ struct pooling : public primitive_base<pooling> {
    bool with_output_size;
    /// @brief User-defined output data size of the primitive (w/o padding).
    tensor output_size;
-    /// @brief Defines a shift, relative to the end of padding shape.
-    ov::Shape pad_end;
    /// @brief first dimension of input that should be used to calculate the upper bound of index output
    int64_t axis = 0;
    /// @brief type of index output
@ -240,8 +153,6 @@ struct pooling : public primitive_base<pooling> {
 protected:
    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
        std::vector<std::reference_wrapper<const primitive_id>> ret;
-        if (!argmax.empty())
-            ret.push_back(argmax);
        if (!indices_output.empty())
            ret.push_back(indices_output);
        return ret;
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_padding.cpp
@ -111,7 +111,7 @@ void prepare_padding::run(program& p) {
                    needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(),
                                                                              prim->output_size,
                                                                              size,
-                                                                              ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()),
+                                                                              ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()),
                                                                              prim->stride,
                                                                              ov::Strides(prim->size.size(), 1),
                                                                              false,
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@ -649,11 +649,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
            return data_type_traits::is_i8_u8(in_dt);
        };

-        auto pooling_supports_fusings = [](pooling_node& node) -> bool {
-            auto pooling_mode = node.get_primitive()->mode;
-            return pooling_mode != cldnn::pooling_mode::max_with_argmax;
-        };
-
        auto dts_supports_fusings = [](depth_to_space_node& node) -> bool {
            bool input_conv = node.get_dependency(0).is_type<convolution>();
            bool out_eltw = node.get_users().front()->is_type<eltwise>();
@ -772,7 +767,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {

            should_fuse |= input_data.is_type<gemm>() && gemm_supports_fusings(input_data.as<gemm>());

-            should_fuse |= input_data.is_type<pooling>() && pooling_supports_fusings(input_data.as<pooling>());
+            should_fuse |= input_data.is_type<pooling>();

            should_fuse |= input_data.is_type<resample>();

@ -861,8 +856,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                           _lo.get_optimization_attributes().use_onednn_impls ||
                           (in_dt_is_i8_u8 && out_dt_is_i8_u8));

-            should_fuse |= input_data.is_type<pooling>() && quantize_node.get_scale_shift_opt() &&
-                           pooling_supports_fusings(input_data.as<pooling>());
+            should_fuse |= input_data.is_type<pooling>() && quantize_node.get_scale_shift_opt();

            should_fuse |= input_data.is_type<fully_connected>() && quantize_node.get_scale_shift_opt();

@ -960,7 +954,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
                                      (parents[i]->is_type<gather_elements>()) ||
                                      (parents[i]->is_type<scatter_nd_update>()) ||
                                      (parents[i]->is_type<scatter_elements_update>()) ||
-                                      (parents[i]->is_type<pooling>() && pooling_supports_fusings(parents[i]->as<pooling>())) ||
+                                      (parents[i]->is_type<pooling>()) ||
                                      (parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
                                      (parents[i]->is_type<gather>()) ||
                                      (parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>())) ||
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/trim_to_outputs.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/trim_to_outputs.cpp
@ -8,7 +8,6 @@

 // ToDo: remove those include with the appropriate code below once we will have support for multiple outputs of a
 // primitive
-#include "max_unpooling_inst.h"
 #include "pooling_inst.h"
 #include <vector>
 #include <queue>
@ -27,13 +26,10 @@ void trim_to_outputs::run(program& p) {
    queue.push(&p.get_outputs());

    std::vector<program_node*> special_nodes;
-    for (auto& node : p.get_processing_order()) {
-        if (node->is_type<input_layout>() ||  // input layout may become disconnected during prior boxes calculations so
-                                              // it may have not been marked at this place but we don't want to remove it
-            node->is_type<max_unpooling>() ||  // ToDo: remove this after support for multi-outputs in primitives will
-                                               // be implemented.
-            (node->is_type<pooling>() && node->as<pooling>().get_primitive()->mode == pooling_mode::max_with_argmax))
-            special_nodes.push_back(node);
+    for (auto& node : p.get_processing_order()) {   // input layout may become disconnected during prior boxes calculations so
+        if (node->is_type<input_layout>()) {        // it may have not been marked at this place but we don't want to remove it
+            special_nodes.push_back(node);          // ToDo: remove this after support for multi-outputs in primitives will
+        }                                           // be implemented.
    }
    queue.push(&special_nodes);

--- a/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/max_unpooling.cpp
@ -1,83 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "max_unpooling_inst.h"
-#include "primitive_base.hpp"
-#include "impls/implementation_map.hpp"
-#include "intel_gpu/runtime/error_handler.hpp"
-#include "intel_gpu/graph/network.hpp"
-#include "kernel_selector_helper.h"
-#include "max_unpooling/max_unpooling_kernel_selector.h"
-#include "max_unpooling/max_unpooling_kernel_base.h"
-#include <vector>
-
-namespace cldnn {
-namespace ocl {
-
-struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
-    using parent = typed_primitive_impl_ocl<max_unpooling>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<max_unpooling_impl>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<max_unpooling>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        args.inputs.push_back(instance.dep_memory_ptr(1));
-        return args;
-    }
-
-public:
-    event::ptr execute_impl(const std::vector<event::ptr>& events, max_unpooling_inst& instance) override {
-        // clear output buffer
-        std::vector<event::ptr> tmp_events(events);
-        auto& stream = instance.get_network().get_stream();
-        auto ev = instance.output_memory().fill(stream);
-        tmp_events.push_back(ev);
-        return parent::execute_impl(tmp_events, instance);
-    }
-
-    static primitive_impl* create(const max_unpooling_node& arg, const kernel_impl_params& impl_param) {
-        auto max_unpooling_params = get_default_params<kernel_selector::max_unpooling_params>(impl_param);
-        auto max_unpooling_optional_params =
-            get_default_optional_params<kernel_selector::max_unpooling_optional_params>(arg.get_program());
-
-        const auto max_idx = 1;
-        max_unpooling_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[max_idx]));
-
-        auto& kernel_selector = kernel_selector::max_unpooling_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(max_unpooling_params, max_unpooling_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);
-
-        return max_unpool;
-    }
-};
-
-namespace detail {
-
-attach_max_unpooling_impl::attach_max_unpooling_impl() {
-    implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
-        std::make_tuple(data_types::f32, format::yxfb),
-        std::make_tuple(data_types::f16, format::yxfb),
-        std::make_tuple(data_types::f32, format::bfyx),
-        std::make_tuple(data_types::f16, format::bfyx),
-        std::make_tuple(data_types::i8, format::bfyx),
-        std::make_tuple(data_types::i8, format::yxfb),
-        std::make_tuple(data_types::f32, format::byxf),
-        std::make_tuple(data_types::f16, format::byxf),
-        std::make_tuple(data_types::i8, format::byxf),
-    });
-}
-
-}  // namespace detail
-}  // namespace ocl
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/pooling.cpp
@ -21,12 +21,10 @@ void validate_args(const pooling_node& arg) {
    auto stride_rank = arg.get_primitive()->stride.size();
    auto window_rank = arg.get_primitive()->size.size();

-    if (!arg.get_primitive()->global_pooling) {
    CLDNN_ERROR_NOT_EQUAL(arg.id(), "input dimensions", input_rank, "output dimensions", output_rank, "");
    CLDNN_ERROR_NOT_EQUAL(arg.id(), "stride dimensions", stride_rank, "output dimensions", output_rank, "");
    CLDNN_ERROR_NOT_EQUAL(arg.id(), "window dimensions", window_rank, "output dimensions", output_rank, "");
 }
-}

 kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
    switch (mode) {
@ -36,8 +34,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
            return kernel_selector::pool_type::AVG;
        case pooling_mode::average_no_padding:
            return kernel_selector::pool_type::AVG;
-        case pooling_mode::max_with_argmax:
-            return kernel_selector::pool_type::MAX_WITH_ARGMAX;
        default:
            assert(0);
            return kernel_selector::pool_type::MAX;
@ -47,8 +43,6 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
 kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mode) {
    switch (mode) {
        case pooling_mode::max:
-        case pooling_mode::max_with_argmax:
-            return kernel_selector::kernel_divider_mode::DONT_CARE;
        case pooling_mode::average:
            return kernel_selector::kernel_divider_mode::FIXED;
        case pooling_mode::average_no_padding:
@ -71,8 +65,6 @@ struct pooling_impl : typed_primitive_impl_ocl<pooling> {
 protected:
    kernel_arguments_data get_arguments(typed_primitive_inst<pooling>& instance, int32_t split) const override {
        kernel_arguments_data args = parent::get_arguments(instance, split);
-        if (!instance.argument->argmax.empty())
-            args.inputs.push_back(instance.dep_memory_ptr(1));
        return args;
    }

@ -102,7 +94,9 @@ public:
        }

        const auto& stride = primitive->stride;
-        const auto& pad = primitive->pad;
+        const auto& pads_begin = primitive->pads_begin;
+        const auto& pads_end = primitive->pads_end;
+
        const auto& dilation = primitive->dilation;
        auto kernel = primitive->size;
        const auto& input_layout = impl_param.input_layouts[0];
@ -114,19 +108,12 @@ public:
        pp.poolType = cldnn_2_pool_type(primitive->mode);
        pp.remainderAction = kernel_selector::pool_remainder::CEIL;

-        if (primitive->global_pooling) {
-            kernel = ov::Shape(spatial_rank, 1);
-            for (size_t i = 0; i < spatial_rank; i++) {
-                kernel[i] = input_layout.spatial(spatial_rank - i - 1);
-            }
-        }
-
        // check if last pooling window goes outside of input size + padding. If so the avg pooling size will be
        // adjusted to that, to work properly this calculation must take pad_end into account.
        auto dynamic_mode = false;
        for (size_t i = 0; i < spatial_rank; i++) {
            dynamic_mode |= (((output_layout.spatial(i) - 1) * stride[spatial_rank - i - 1]) + primitive->size[spatial_rank - i - 1]) >
-                                 (primitive->pad_end[spatial_rank - i - 1] + pad[spatial_rank - i - 1]) + input_layout.spatial(i);
+                                 (pads_end[spatial_rank - i - 1] + pads_begin[spatial_rank - i - 1]) + input_layout.spatial(i);
        }

        if (primitive->mode == pooling_mode::average && dynamic_mode)
@ -134,17 +121,14 @@ public:
        else
            pp.divMode = cldnn_2_kernel_divider_mode(primitive->mode);

-        if (primitive->mode == pooling_mode::max_with_argmax)
-            pool_params.inputs.push_back(convert_data_tensor(arg.argmax().get_output_layout()));
-
        uint32_t kernel_z = kernel.size() >= 3 ? kernel[kernel.size() - 3] : 1;
        uint32_t kernel_y = kernel.size() >= 2 ? kernel[kernel.size() - 2] : 1;
        uint32_t kernel_x = kernel.size() >= 1 ? kernel[kernel.size() - 1] : 1;
        pp.poolSize = {kernel_x, kernel_y, kernel_z};

-        uint32_t pad_z = std::max<std::ptrdiff_t>(pad.size() >= 3 ? pad[pad.size() - 3] : 0, 0);
-        uint32_t pad_y = std::max<std::ptrdiff_t>(pad.size() >= 2 ? pad[pad.size() - 2] : 0, 0);
-        uint32_t pad_x = std::max<std::ptrdiff_t>(pad.size() >= 1 ? pad[pad.size() - 1] : 0, 0);
+        uint32_t pad_z = std::max<std::ptrdiff_t>(pads_begin.size() >= 3 ? pads_begin[pads_begin.size() - 3] : 0, 0);
+        uint32_t pad_y = std::max<std::ptrdiff_t>(pads_begin.size() >= 2 ? pads_begin[pads_begin.size() - 2] : 0, 0);
+        uint32_t pad_x = std::max<std::ptrdiff_t>(pads_begin.size() >= 1 ? pads_begin[pads_begin.size() - 1] : 0, 0);
        pp.poolPad  = {pad_x, pad_y, pad_z};

        uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1;
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp
@ -46,7 +46,6 @@ void register_implementations() {
    REGISTER_OCL(lrn);
    REGISTER_OCL(lstm_gemm);
    REGISTER_OCL(lstm_elt);
-    REGISTER_OCL(max_unpooling);
    REGISTER_OCL(mutable_data);
    REGISTER_OCL(mvn);
    REGISTER_OCL(non_max_suppression);
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp
@ -43,7 +43,6 @@
 #include "intel_gpu/primitives/lstm_dynamic.hpp"
 #include "intel_gpu/primitives/lstm_dynamic_input.hpp"
 #include "intel_gpu/primitives/lstm_dynamic_timeloop.hpp"
-#include "intel_gpu/primitives/max_unpooling.hpp"
 #include "intel_gpu/primitives/mutable_data.hpp"
 #include "intel_gpu/primitives/mvn.hpp"
 #include "intel_gpu/primitives/non_max_suppression.hpp"
@ -127,7 +126,6 @@ REGISTER_OCL(grid_sample);
 REGISTER_OCL(lrn);
 REGISTER_OCL(lstm_gemm);
 REGISTER_OCL(lstm_elt);
-REGISTER_OCL(max_unpooling);
 REGISTER_OCL(mutable_data);
 REGISTER_OCL(mvn);
 REGISTER_OCL(non_max_suppression);
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/pooling_onednn.cpp
@ -32,17 +32,12 @@ protected:

        dnnl::memory::dims stride(prim->stride.begin(), prim->stride.end());
        dnnl::memory::dims kernel(prim->size.begin(), prim->size.end());
-        dnnl::memory::dims pad_l(prim->pad.begin(), prim->pad.end());
-        dnnl::memory::dims pad_r(prim->pad_end.begin(), prim->pad_end.end());
+        dnnl::memory::dims pad_l(prim->pads_begin.begin(), prim->pads_begin.end());
+        dnnl::memory::dims pad_r(prim->pads_end.begin(), prim->pads_end.end());

        auto input_md = onednn::layout_to_memory_desc(input_layout);
        auto output_md = onednn::layout_to_memory_desc(output_layout);

-        if (prim->global_pooling) {
-            for (size_t i = 0; i < kernel.size(); i++)
-                kernel[i] = input_md.dims()[2 + i];
-        }
-
        for (size_t i = 0; i < kernel.size(); i++) {
            pad_r[i] = (output_md.dims()[2 + i] - 1) * stride[i] - input_md.dims()[2 + i] + kernel[i] - pad_l[i];
        }
--- a/src/plugins/intel_gpu/src/graph/include/max_unpooling_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/max_unpooling_inst.h
@ -1,40 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-#include "intel_gpu/primitives/max_unpooling.hpp"
-#include "primitive_inst.h"
-
-#include <string>
-#include <memory>
-
-namespace cldnn {
-
-template <>
-struct typed_program_node<max_unpooling> : public typed_program_node_base<max_unpooling> {
-    using parent = typed_program_node_base<max_unpooling>;
-    typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog);
-
-public:
-    using parent::parent;
-    program_node& input() const { return get_dependency(0); }
-    program_node& argmax() const { return get_dependency(1); }
-};
-
-using max_unpooling_node = typed_program_node<max_unpooling>;
-
-template <>
-class typed_primitive_inst<max_unpooling> : public typed_primitive_inst_base<max_unpooling> {
-    using parent = typed_primitive_inst_base<max_unpooling>;
-
-public:
-    typed_primitive_inst(network& network, max_unpooling_node const& desc);
-    static layout calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param);
-    static std::string to_string(max_unpooling_node const& node);
-};
-
-using max_unpooling_inst = typed_primitive_inst<max_unpooling>;
-
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/max_unpooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/max_unpooling.cpp
@ -1,112 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "max_unpooling_inst.h"
-#include "primitive_type_base.h"
-#include "sliding_window_utils_legacy.h"
-#include "intel_gpu/runtime/error_handler.hpp"
-#include "json_object.h"
-#include <string>
-#include <memory>
-
-namespace cldnn {
-primitive_type_id max_unpooling::type_id() {
-    static primitive_type_base<max_unpooling> instance;
-    return &instance;
-}
-
-max_unpooling_node::typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog)
-    : parent(prim, prog) {
-    can_share_buffer(false);  // for max_unpooling initial zero values are significant
-}
-
-layout max_unpooling_inst::calc_output_layout(max_unpooling_node const& node, kernel_impl_params const& impl_param) {
-    assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
-           "Output data type forcing is not supported for max_unpooling_node!");
-    auto desc = impl_param.typed_desc<max_unpooling>();
-
-    auto input_layout = impl_param.get_input_layout(0);
-    auto argmax_layout = impl_param.get_input_layout(1);
-
-    CLDNN_ERROR_NOT_EQUAL(desc->id,
-                          "Argmax data type",
-                          static_cast<size_t>(argmax_layout.data_type),
-                          "expected to be fp32",
-                          static_cast<size_t>(data_types::f32),
-                          "Argmax data type is not fp32.");
-
-    if (desc->with_output_size) {
-        tensor output_size(input_layout.batch(),
-                           input_layout.feature(),
-                           desc->output_size.spatial[0],
-                           desc->output_size.spatial[1]);
-        return {input_layout.data_type, input_layout.format, output_size};
-    }
-
-    auto pad = desc->pad;
-    auto stride = desc->stride;
-    auto window_size = desc->size;
-
-    CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
-                                   "stride spatial X",
-                                   stride.spatial[0],
-                                   "",
-                                   0,
-                                   "Stride spatial X must be positive (>= 1)");
-    CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
-                                   "stride spatial Y",
-                                   stride.spatial[1],
-                                   "",
-                                   0,
-                                   "Stride spatial Y must be positive (>= 1)");
-    CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
-                                   "window size spatial X",
-                                   window_size.spatial[0],
-                                   "",
-                                   0,
-                                   "Size X (of pooling window) must be positive (>= 1)");
-    CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc->id,
-                                   "window size spatial Y",
-                                   window_size.spatial[1],
-                                   "",
-                                   0,
-                                   "Size Y (of pooling window) must be positive (>= 1)");
-
-    auto output_range = calc_sliding_window_needed_input_range(input_layout.get_tensor(),
-                                                               window_size,
-                                                               pad,
-                                                               stride,
-                                                               {1, 1, 1, 1},
-                                                               true,
-                                                               1);
-
-    tensor output_size(input_layout.batch(),
-                       input_layout.feature(),
-                       output_range.spatial[0],
-                       output_range.spatial[1]);
-    return {input_layout.data_type, input_layout.format, output_size};
-}
-
-std::string max_unpooling_inst::to_string(max_unpooling_node const& node) {
-    auto desc = node.get_primitive();
-    auto node_info = node.desc_to_json();
-    auto& input = node.input();
-    auto& argmax = node.argmax();
-
-    std::stringstream primitive_description;
-
-    json_composite max_unmax_unpooling_info;
-    max_unmax_unpooling_info.add("input", input.id());
-    max_unmax_unpooling_info.add("argmax", argmax.id());
-
-    node_info->add("max unmax_unpooling info", max_unmax_unpooling_info);
-    node_info->dump(primitive_description);
-
-    return primitive_description.str();
-}
-
-max_unpooling_inst::typed_primitive_inst(network& network, max_unpooling_node const& node)
-    : parent(network, node) {}
-
-}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/pooling.cpp
+++ b/src/plugins/intel_gpu/src/graph/pooling.cpp
@ -22,7 +22,7 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i

    auto input_layout = impl_param.get_input_layout();

-    auto pad = desc->pad;
+    auto pad = desc->pads_begin;
    auto stride = desc->stride;
    auto window_size = desc->size;

@ -46,43 +46,6 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node, kernel_i
        }
    }

-    if (!desc->argmax.empty())
-        CLDNN_ERROR_NOT_EQUAL(desc->id,
-                              "Pooling mode",
-                              static_cast<size_t>(desc->mode),
-                              "should be max_with_argmax",
-                              static_cast<size_t>(pooling_mode::max_with_argmax),
-                              "Pooling mode should be set to max_with_argmax when argmax primitive is present.");
-
-    if (desc->mode == pooling_mode::max_with_argmax) {
-        CLDNN_ERROR_NOT_EQUAL(desc->id,
-                              "Argmax primitive",
-                              static_cast<size_t>(desc->argmax.empty()),
-                              "should not be empty",
-                              static_cast<size_t>(0),
-                              "Argmax primitive not present despite max_with_argmax mode.");
-
-        auto argmax_layout = impl_param.get_input_layout(1);
-        CLDNN_ERROR_NOT_EQUAL(desc->id,
-                              "Argmax data type",
-                              static_cast<size_t>(argmax_layout.data_type),
-                              "expected to be fp32",
-                              static_cast<size_t>(data_types::f32),
-                              "Argmax data type is not fp32.");
-        CLDNN_ERROR_NOT_PROPER_FORMAT(desc->id,
-                                      "Input_layout.format",
-                                      input_layout.format.value,
-                                      "argmax_layout.format",
-                                      argmax_layout.format);
-    }
-
-    if (desc->global_pooling) {
-        window_size = ov::Shape(input_layout.get_spatial_rank(), 1);
-        for (size_t i = 0; i < input_layout.get_spatial_rank(); i++) {
-            window_size[i] = input_layout.spatial(input_layout.get_spatial_rank() - i - 1);
-        }
-    }
-
    uint32_t stride_z = stride.size() >= 3 ? stride[stride.size() - 3] : 1;
    uint32_t stride_y = stride.size() >= 2 ? stride[stride.size() - 2] : 1;
    uint32_t stride_x = stride.size() >= 1 ? stride[stride.size() - 1] : 1;
@ -190,13 +153,10 @@ std::string pooling_inst::to_string(pooling_node const& node) {

    std::stringstream primitive_description;

-    bool is_global = desc->global_pooling;
-
    json_composite pooling_info;
    pooling_info.add("mode", mode);
    pooling_info.add("stride", cldnn::to_string(strd));
    pooling_info.add("kernel size", cldnn::to_string(kernel_size));
-    pooling_info.add("is global", is_global ? "true" : "false");
    if (desc->with_output_size) {
        json_composite ud_out_size_info;
        ud_out_size_info.add("size", desc->output_size.to_string());
--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@ -327,7 +327,7 @@ bool program::analyze_output_size_handling_need() {
            auto calc_output_range = calc_sliding_window_output_range<swor_mode::exceed_once_data>(
                primInputSize,
                size,
-                ov::CoordinateDiff(prim->pad.begin(), prim->pad.end()),
+                ov::CoordinateDiff(prim->pads_begin.begin(), prim->pads_begin.end()),
                prim->stride,
                ov::Strides(prim->stride.size(), 1),
                true,
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/max_unpooling_gpu_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/max_unpooling_gpu_ref.cl
@ -1,44 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "include/batch_headers/data_types.cl"
-#include "include/batch_headers/fetch_data.cl"
-
-KERNEL(pooling_gpu)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, const __global float* arg_max)
-{
-#if OUTPUT_LAYOUT_BFYX  || OUTPUT_LAYOUT_BYXF
-    const uint x    = (uint)get_global_id(0);
-    const uint y    = (uint)get_global_id(1);
-    const uint bf   = (uint)get_global_id(2);
-    const uint f    = bf % INPUT0_FEATURE_NUM;
-    const uint b    = bf / INPUT0_FEATURE_NUM;
-
-    if (x >= INPUT0_SIZE_X)
-    {
-        return;
-    }
-#elif OUTPUT_LAYOUT_YXFB
-    const uint x    = (uint)get_global_id(1);
-    const uint y    = (uint)get_global_id(2);
-    const uint bf   = (uint)get_global_id(0);
-    const uint f    = bf / INPUT0_BATCH_NUM;
-    const uint b    = bf % INPUT0_BATCH_NUM;
-#endif
-
-    const uint input_id = GET_DATA_INDEX(INPUT0, b, f, y, x);
-    const uint arg_max_id = GET_DATA_INDEX(INPUT1, b, f, y, x);
-    const uint pool_idx = convert_uint(arg_max[arg_max_id]);
-
-#if OUTPUT_PADDED
-    const uint x_output = pool_idx % OUTPUT_SIZE_X;
-    const uint y_output = (pool_idx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
-    const uint f_output = (pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
-    const uint b_output = pool_idx / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM;
-
-    const uint output_pos = GET_DATA_INDEX(OUTPUT, b_output, f_output, y_output, x_output);
-    output[output_pos] += ACTIVATION(input[input_id], ACTIVATION_PARAMS);
-#else
-    output[pool_idx] += ACTIVATION(input[input_id], ACTIVATION_PARAMS);
-#endif
-}
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_bfyx_block_opt.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_bfyx_block_opt.cl
@ -5,7 +5,7 @@
 #include "include/batch_headers/data_types.cl"
 #include "include/batch_headers/fetch_data.cl"

-#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
+#if MAX_POOLING
    #define INIT_VAL ACCUMULATOR_VAL_MIN
 #elif defined AVG_POOLING
    #define INIT_VAL ACCUMULATOR_VAL_ZERO
@ -15,7 +15,7 @@

 inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in)
 {
-#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
+#if MAX_POOLING
    return ACCUMULATOR_MAX_FUNC(tmp, in);
 #elif AVG_POOLING
    return tmp + in;
@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY
 KERNEL(pooling_gpu)(
    const __global INPUT0_TYPE* input,
    __global OUTPUT_TYPE* output
-#if MAX_WITH_ARGMAX_POOLING
-    , __global float* arg_max
-#endif
 #if HAS_FUSED_OPS_DECLS
    , FUSED_OPS_DECLS
 #endif
@ -49,12 +46,6 @@ KERNEL(pooling_gpu)(
    ACCUMULATOR_TYPE max_x[BLOCK_SIZE_Y];
    ACCUMULATOR_TYPE result[POOL_SIZE_Y];

-#if MAX_WITH_ARGMAX_POOLING
-    uint arg_max_x[BLOCK_SIZE_Y] = { 0 };
-    uint arg_max_result[POOL_SIZE_Y] = { 0 };
-    uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
-#endif
-
    for(uint i = 0; i < BLOCK_SIZE_Y; i++)
    {
        max_x[i] = INIT_VAL;
@ -65,33 +56,15 @@ KERNEL(pooling_gpu)(
    {
        for(uint i = 0; i < POOL_SIZE_X; i++)
        {
-
-#if MAX_WITH_ARGMAX_POOLING
-            if(input[input_idx] > max_x[j])
-                arg_max_x[j] = input_idx_bfyx_no_padding;
-#endif
            max_x[j] = FUNC_CALL(apply_pooling)(max_x[j], TO_ACCUMULATOR_TYPE(input[input_idx]));
            input_idx += INPUT0_X_PITCH;
-
-#if MAX_WITH_ARGMAX_POOLING
-            input_idx_bfyx_no_padding++;
-#endif
-
        }
        input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
-
-#if MAX_WITH_ARGMAX_POOLING
-        input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
-#endif
    }

    for(uint i = 0; i < POOL_SIZE_Y; i++)
    {
        result[i] = max_x[i * STRIDE_SIZE_Y];
-
-#if MAX_WITH_ARGMAX_POOLING
-        arg_max_result[i] = arg_max_x[i * STRIDE_SIZE_Y];
-#endif
    }

    // now we do max in "y" dimension
@ -100,10 +73,6 @@ KERNEL(pooling_gpu)(
        for(uint j = 1; j < POOL_SIZE_Y; j++)
        {

-#if MAX_WITH_ARGMAX_POOLING
-            if(max_x[j + i * STRIDE_SIZE_Y] > result[i])
-                arg_max_result[i] = arg_max_x[j + i * STRIDE_SIZE_Y];
-#endif

            result[i] = FUNC_CALL(apply_pooling)(result[i], max_x[j + i * STRIDE_SIZE_Y]);
        }
@ -111,10 +80,6 @@ KERNEL(pooling_gpu)(

    uint output_pos = GET_DATA_INDEX(OUTPUT, b, f, y, x);

-#if MAX_WITH_ARGMAX_POOLING
-    uint arg_max_pos = GET_DATA_INDEX(INPUT1, b, f, y, x);
-#endif
-
    OUTPUT_TYPE final_result;
    ACTIVATION_TYPE pool_result;

@ -134,10 +99,6 @@ KERNEL(pooling_gpu)(
        #endif
            output[output_pos] = final_result;
            output_pos += OUTPUT_Y_PITCH;
-#if MAX_WITH_ARGMAX_POOLING
-            arg_max[arg_max_pos] = arg_max_result[i];
-            arg_max_pos += INPUT1_Y_PITCH;
-#endif
        }
    }
 }
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/pooling_gpu_ref.cl
@ -5,7 +5,7 @@
 #include "include/batch_headers/data_types.cl"
 #include "include/batch_headers/fetch_data.cl"

-#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
+#if MAX_POOLING
    #define INIT_VAL ACCUMULATOR_VAL_MIN
 #elif AVG_POOLING
    #define INIT_VAL ACCUMULATOR_VAL_ZERO
@ -15,7 +15,7 @@

 inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TYPE in)
 {
-#if MAX_POOLING || MAX_WITH_ARGMAX_POOLING
+#if MAX_POOLING
    return ACCUMULATOR_MAX_FUNC(tmp, in);
 #elif AVG_POOLING
    return tmp + in;
@ -25,9 +25,6 @@ inline ACCUMULATOR_TYPE FUNC(apply_pooling)(ACCUMULATOR_TYPE tmp, ACCUMULATOR_TY
 KERNEL(pooling_gpu)(
    const __global INPUT0_TYPE* input,
    __global OUTPUT_TYPE* output
-#if MAX_WITH_ARGMAX_POOLING
-, __global float* arg_max
-#endif
 #ifdef SELECTED_INDICES_TYPE
 , __global SELECTED_INDICES_TYPE* indices
 #endif
@ -98,10 +95,6 @@ KERNEL(pooling_gpu)(
    uint result_idx = 0;
 #endif

-#if MAX_WITH_ARGMAX_POOLING
-    uint arg_max_idx = 0;
-#endif
-
 #ifdef CHECK_BOUNDARY
    if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
        offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y ||
@ -163,19 +156,6 @@ KERNEL(pooling_gpu)(
                            const uint input_idx = batch_and_feature_offset + input_offset_y*INPUT0_Y_PITCH + input_offset_x*INPUT0_X_PITCH;
    #endif
 #endif
-
-#if MAX_WITH_ARGMAX_POOLING
-                            if(input[input_idx] > result)
-                            {
-#if  OUTPUT_DIMS < 5
-                                const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
-#else
-                                const uint input_idx_bfyx_no_padding = input_offset_x + INPUT0_SIZE_X * (input_offset_y + INPUT0_SIZE_Y *
-                                                               (input_offset_z + INPUT0_SIZE_Z * (f + INPUT0_FEATURE_NUM * b)));
-#endif
-                                arg_max_idx = input_idx_bfyx_no_padding;
-                            }
-#endif
                            const ACCUMULATOR_TYPE casted_input = TO_ACCUMULATOR_TYPE(input[input_idx]);
                            #ifdef SELECTED_INDICES_TYPE
                                if (casted_input > result)
@ -220,14 +200,6 @@ KERNEL(pooling_gpu)(
    uint input_idx = INPUT0_GET_INDEX(b, f, offset_y, offset_x);
 #endif

-#if MAX_WITH_ARGMAX_POOLING
-    #if  OUTPUT_DIMS < 5
-        uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (f + INPUT0_FEATURE_NUM * b));
-    #else
-        uint input_idx_bfyx_no_padding = offset_x + INPUT0_SIZE_X * (offset_y + INPUT0_SIZE_Y * (offset_z + INPUT0_SIZE_Z *(f + INPUT0_FEATURE_NUM * b)));
-    #endif
-#endif
-
 #if OUTPUT_DIMS == 5
    for(uint l = 0; l < POOL_SIZE_Z; l++)
    {
@ -236,11 +208,6 @@ KERNEL(pooling_gpu)(
        {
            for(uint i = 0; i < POOL_SIZE_X; i++)
            {
-#if MAX_WITH_ARGMAX_POOLING
-            if(input[input_idx] > result)
-                arg_max_idx = input_idx_bfyx_no_padding;
-#endif
-
 #if OUTPUT_DIMS == 5
    #if !INPUT0_SIMPLE
                uint input_idx = INPUT0_GET_INDEX(b, f, offset_z + l, offset_y + j, offset_x + i);
@ -267,21 +234,11 @@ KERNEL(pooling_gpu)(
                input_idx += INPUT0_X_PITCH;
    #endif
 #endif
-
-#if MAX_WITH_ARGMAX_POOLING
-                input_idx_bfyx_no_padding++;
-#endif
            }
            input_idx += (INPUT0_Y_PITCH - POOL_SIZE_X*INPUT0_X_PITCH);
-#if MAX_WITH_ARGMAX_POOLING
-            input_idx_bfyx_no_padding += (INPUT0_SIZE_X - POOL_SIZE_X);
-#endif
        }
 #if  OUTPUT_DIMS == 5  // 3D
        input_idx += (INPUT0_Z_PITCH - POOL_SIZE_Y*INPUT0_Y_PITCH);
-#if MAX_WITH_ARGMAX_POOLING
-        input_idx_bfyx_no_padding += (INPUT0_SIZE_Y - POOL_SIZE_Y);
-#endif
    }
 #endif

@ -322,12 +279,6 @@ KERNEL(pooling_gpu)(
    #endif
    indices[output_pos] = TO_SELECTED_INDICES_TYPE(result_idx);
 #endif
-
-#if MAX_WITH_ARGMAX_POOLING
-    //INPUT1 macro stands for Argmax
-    const uint arg_max_pos = GET_DATA_INDEX_5D(INPUT1, b, f, z, y, x);
-    arg_max[arg_max_pos] = convert_float(arg_max_idx);
-#endif
 }

 #undef INIT_VAL
--- a/src/plugins/intel_gpu/src/kernel_selector/common_types.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/common_types.h
@ -38,7 +38,6 @@ enum class KernelType {
    RESAMPLE,
    REGION_YOLO,
    REORG_YOLO,
-    MAX_UNPOOLING,
    MVN,
    LSTM_GEMM,
    LSTM_ELT,
@ -184,7 +183,6 @@ enum class ActivationFunction {
 enum class PoolType {
    MAX,
    AVG,
-    MAX_WITH_ARGMAX,
    BILINEAR,
    DEFORMABLE_BILINEAR
 };
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_common.cpp
@ -242,7 +242,6 @@ std::string toString(PoolType mode) {
    switch (mode) {
        case PoolType::MAX:                 return "MAX";
        case PoolType::AVG:                 return "AVG";
-        case PoolType::MAX_WITH_ARGMAX:     return "MAX_WITH_ARGMAX";
        case PoolType::BILINEAR:            return "BILINEAR";
        case PoolType::DEFORMABLE_BILINEAR: return "DEFORMABLE_BILINEAR";
        default: return "";
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.cpp
@ -213,9 +213,6 @@ void ParamsKey::EnablePoolType(PoolType t) {
        case PoolType::AVG:
            key.restrict.val.dedicated.pooling.avg = 1;
            break;
-        case PoolType::MAX_WITH_ARGMAX:
-            key.restrict.val.dedicated.pooling.max_with_argmax = 1;
-            break;
        case PoolType::BILINEAR:
            key.restrict.val.dedicated.pooling.bilinear = 1;
            break;
--- a/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h
@ -106,7 +106,6 @@ public:
                        uint32_t max : 1;
                        uint32_t avg : 1;
                        uint32_t floor : 1;
-                        uint32_t max_with_argmax : 1;
                        uint32_t ceil : 1;
                        uint32_t bilinear : 1;
                        uint32_t deformable_bilinear : 1;
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.cpp
@ -1,77 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "max_unpooling_kernel_base.h"
-#include <algorithm>
-
-namespace kernel_selector {
-bool MaxUnpoolingKernelBase::Validate(const Params& p, const optional_params& o) const {
-    if (p.GetType() != KernelType::MAX_UNPOOLING || o.GetType() != KernelType::MAX_UNPOOLING) {
-        return false;
-    }
-
-    return true;
-}
-
-JitConstants MaxUnpoolingKernelBase::GetJitConstants(const max_unpooling_params& params) const {
-    JitConstants jit = MakeBaseParamsJitConstants(params);
-    if (params.outputs[0].PitchesDifferFromLogicalDims())
-        jit.AddConstant(MakeJitConstant("OUTPUT_PADDED", 1));
-    return jit;
-}
-
-MaxUnpoolingKernelBase::DispatchData MaxUnpoolingKernelBase::SetDefault(const max_unpooling_params& params) const {
-    const auto& input = params.inputs[0];
-
-    DispatchData dispatchData;
-
-    if (input.GetLayout() == DataLayout::bfyx || input.GetLayout() == DataLayout::byxf) {
-        // Determine global work sizes.
-        dispatchData.gws[2] = input.Batch().v * input.Feature().v;  // B, F
-        dispatchData.gws[0] = Align(input.X().v, 32);               // X
-        dispatchData.gws[1] = input.Y().v;                          // Y
-
-        dispatchData.lws[0] = 32;
-        dispatchData.lws[1] = 1;
-        dispatchData.lws[2] = 1;
-    } else {
-        // Determine global work sizes.
-        dispatchData.gws[0] = input.Batch().v * input.Feature().v;  // B, F
-        dispatchData.gws[1] = input.X().v;                          // X
-        dispatchData.gws[2] = input.Y().v;                          // Y
-
-        dispatchData.lws[0] = std::min(std::max(dispatchData.gws[0], static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (dispatchData.gws[0] % dispatchData.lws[0] != 0) {
-            --dispatchData.lws[0];
-        }
-        dispatchData.lws[1] = 1;
-        dispatchData.lws[2] = 1;
-    }
-
-    return dispatchData;
-}
-
-KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
-                                                         const optional_params& options) const {
-    if (!Validate(params, options)) {
-        return {};
-    }
-
-    const max_unpooling_params& orgParams = static_cast<const max_unpooling_params&>(params);
-
-    DispatchData dispatchData = SetDefault(orgParams);
-
-    KernelData kd = KernelData::Default<max_unpooling_params>(params);
-
-    auto cldnn_jit = GetJitConstants(orgParams);
-    auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
-    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
-    auto& kernel = kd.kernels[0];
-    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
-    kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
-
-    return {kd};
-}
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_base.h
@ -1,43 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "kernel_base_opencl.h"
-#include "kernel_selector_params.h"
-
-namespace kernel_selector {
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// max_unpooling_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct max_unpooling_params : public base_params {
-    max_unpooling_params() : base_params(KernelType::MAX_UNPOOLING) {}
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// max_unpooling_optional_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct max_unpooling_optional_params : optional_params {
-    max_unpooling_optional_params() : optional_params(KernelType::MAX_UNPOOLING) {}
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// MaxUnpoolingKernelBase
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-class MaxUnpoolingKernelBase : public KernelBaseOpenCL {
-public:
-    using KernelBaseOpenCL::KernelBaseOpenCL;
-    virtual ~MaxUnpoolingKernelBase() {}
-
-    struct DispatchData : public CommonDispatchData {
-        bool needsBoundary = false;
-    };
-
-protected:
-    bool Validate(const Params&, const optional_params&) const override;
-    virtual JitConstants GetJitConstants(const max_unpooling_params& params) const;
-    virtual DispatchData SetDefault(const max_unpooling_params& params) const;
-    KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
-};
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.cpp
@ -1,36 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "max_unpooling_kernel_gpu_ref.h"
-
-namespace kernel_selector {
-ParamsKey MaxUnpoolingKernelGPURef::GetSupportedKey() const {
-    ParamsKey k;
-    k.EnableInputDataType(Datatype::F16);
-    k.EnableInputDataType(Datatype::F32);
-    k.EnableInputDataType(Datatype::INT8);
-    k.EnableOutputDataType(Datatype::F16);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::INT8);
-    k.EnableInputLayout(DataLayout::bfyx);
-    k.EnableInputLayout(DataLayout::yxfb);
-    k.EnableInputLayout(DataLayout::byxf);
-    k.EnableOutputLayout(DataLayout::bfyx);
-    k.EnableOutputLayout(DataLayout::yxfb);
-    k.EnableOutputLayout(DataLayout::byxf);
-    k.EnableTensorOffset();
-    k.EnableTensorPitches();
-    k.EnableBatching();
-    k.EnableDifferentTypes();
-    return k;
-}
-
-KernelsData MaxUnpoolingKernelGPURef::GetKernelsData(const Params& params, const optional_params& options) const {
-    return GetCommonKernelsData(params, options);
-}
-
-KernelsPriority MaxUnpoolingKernelGPURef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
-    return FORCE_PRIORITY_9;
-}
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h
@ -1,19 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "max_unpooling_kernel_base.h"
-
-namespace kernel_selector {
-class MaxUnpoolingKernelGPURef : public MaxUnpoolingKernelBase {
-public:
-    MaxUnpoolingKernelGPURef() : MaxUnpoolingKernelBase("max_unpooling_gpu_ref") {}
-    virtual ~MaxUnpoolingKernelGPURef() {}
-
-    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
-    ParamsKey GetSupportedKey() const override;
-};
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.cpp
@ -1,15 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "max_unpooling_kernel_selector.h"
-#include "max_unpooling_kernel_gpu_ref.h"
-
-namespace kernel_selector {
-
-max_unpooling_kernel_selector::max_unpooling_kernel_selector() { Attach<MaxUnpoolingKernelGPURef>(); }
-
-KernelsData max_unpooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
-    return GetNaiveBestKernel(params, options, KernelType::MAX_UNPOOLING);
-}
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/max_unpooling/max_unpooling_kernel_selector.h
@ -1,23 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "kernel_selector.h"
-
-namespace kernel_selector {
-class max_unpooling_kernel_selector : public kernel_selector_base {
-public:
-    static max_unpooling_kernel_selector& Instance() {
-        static max_unpooling_kernel_selector instance_;
-        return instance_;
-    }
-
-    max_unpooling_kernel_selector();
-
-    virtual ~max_unpooling_kernel_selector() {}
-
-    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
-};
-}  // namespace kernel_selector
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_base.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_base.cpp
@ -203,8 +203,6 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1,
                     GetFusedPrimitiveInputsCount(params));
    uint32_t param_idx = 1;
-    if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX)
-        kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});

    if (orgParams.maxPoolOpset8Features) {
        kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, param_idx++});
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp
@ -20,7 +20,6 @@ ParamsKey PoolingKernel_b_fs_yx_fsv16::GetSupportedKey() const {
    k.EnableBatching();
    k.EnablePoolType(PoolType::MAX);
    k.EnablePoolType(PoolType::AVG);
-    k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
    k.EnablePoolRemainder(PoolRemainder::FLOOR);
    k.EnablePoolRemainder(PoolRemainder::CEIL);
    k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.cpp
@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPUBfyxBlockOpt::GetSupportedKey() const {
    k.EnableBatching();
    k.EnablePoolType(PoolType::MAX);
    k.EnablePoolType(PoolType::AVG);
-    k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
    k.EnablePoolRemainder(PoolRemainder::FLOOR);
    k.EnablePoolRemainder(PoolRemainder::CEIL);
    k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_int8_ref.cpp
@ -90,8 +90,8 @@ bool PoolingKernelGPUInt8Ref::Validate(const Params& params, const optional_para

    if (p.inputs[0].GetDType() == Datatype::INT8 || p.inputs[0].GetDType() == Datatype::UINT8) {
        // Max pooling doesn't change quantization ranges, so output data type should be the same as input
-        if ((p.poolType == PoolType::MAX || p.poolType == PoolType::MAX_WITH_ARGMAX)
-            && (p.outputs[0].GetDType() != p.inputs[0].GetDType()) && p.quantization == QuantizationType::NONE)
+        if (p.poolType == PoolType::MAX && p.outputs[0].GetDType() != p.inputs[0].GetDType() &&
+            p.quantization == QuantizationType::NONE)
            return false;
 //         Average pooling should produce FP by default. (u)int8 is possible when quantize op is fused.
 //        if (p.poolType == PoolType::AVG &&
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/pooling/pooling_kernel_gpu_ref.cpp
@ -20,7 +20,6 @@ ParamsKey PoolingKernelGPURef::GetSupportedKey() const {
    k.EnableBatching();
    k.EnablePoolType(PoolType::MAX);
    k.EnablePoolType(PoolType::AVG);
-    k.EnablePoolType(PoolType::MAX_WITH_ARGMAX);
    k.EnablePoolRemainder(PoolRemainder::FLOOR);
    k.EnablePoolRemainder(PoolRemainder::CEIL);
    k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
--- a/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/pooling.cpp
@ -36,9 +36,9 @@ static void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Av
                                   kernel,
                                   strides,
                                   pads_begin,
+                                   pads_end,
                                   tensor_from_dims(op->get_output_shape(0)),
                                   cldnn::element_type_to_data_type(op->get_output_element_type(0)));
-    poolPrim.pad_end = pads_end;
    p.add_primitive(*op, poolPrim);
 }

@ -64,9 +64,9 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
                                   kernel,
                                   strides,
                                   pads_begin,
+                                   pads_end,
                                   tensor_from_dims(op->get_output_shape(0)),
                                   cldnn::element_type_to_data_type(op->get_output_element_type(0)));
-    poolPrim.pad_end = pads_end;
    p.add_primitive(*op, poolPrim);
 }

--- a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp
@ -148,11 +148,12 @@ TEST_P(pooling_f32_activation, basic) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 3);
    ov::Strides stride(r, 1);
-    ov::Shape pad(r, 1);
+    ov::Shape pads_begin(r, 1);
+    ov::Shape pads_end(r, 1);

    create_topologies(
        input_layout("input", get_input_layout(p)),
-        pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        activation("act", "pooling", activation_func::relu),
        reorder("output_reorder", "act", format::bfyx, data_types::f32)
    );
@ -185,12 +186,13 @@ TEST_P(pooling_f32_scale, basic) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 3);
    ov::Strides stride(r, 1);
-    ov::Shape pad(r, 1);
+    ov::Shape pads_begin(r, 1);
+    ov::Shape pads_end(r, 1);

    create_topologies(
        input_layout("input", get_input_layout(p)),
        data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)),
-        pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
        reorder("output_reorder", "scale", format::bfyx, data_types::f32)
    );
@ -205,12 +207,13 @@ TEST_P(pooling_f32_scale, fp16_scale_out) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 3);
    ov::Strides stride(r, 1);
-    ov::Shape pad(r, 1);
+    ov::Shape pads_begin(r, 1);
+    ov::Shape pads_end(r, 1);

    create_topologies(
        input_layout("input", get_input_layout(p)),
        data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 9.0f)),
-        pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, data_types::f16),
        reorder("output_reorder", "scale", format::bfyx, data_types::f32)
    );
@ -241,7 +244,8 @@ TEST_P(pooling_scale_activation_quantize, basic) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 4);
    ov::Strides stride(r, 2);
-    ov::Shape pad(r, 0);
+    ov::Shape pads_begin(r, 0);
+    ov::Shape pads_end(r, 0);

    create_topologies(
        input_layout("input", get_input_layout(p)),
@ -250,7 +254,7 @@ TEST_P(pooling_scale_activation_quantize, basic) {
        data("out_lo", get_mem(get_single_element_layout(p), 0)),
        data("out_hi", get_mem(get_single_element_layout(p), 255)),
        data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
-        pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
        activation("activation", "scale", activation_func::relu),
        quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8),
@ -267,7 +271,8 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 4);
    ov::Strides stride(r, 2);
-    ov::Shape pad(r, 0);
+    ov::Shape pads_begin(r, 0);
+    ov::Shape pads_end(r, 0);

    create_topologies(
        input_layout("input", get_input_layout(p)),
@ -276,7 +281,7 @@ TEST_P(pooling_scale_activation_quantize, i8_output_data_type) {
        data("out_lo", get_mem(get_single_element_layout(p), -127, 127)),
        data("out_hi", get_mem(get_single_element_layout(p), -127, 127)),
        data("scale_data",  get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
-        pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
        activation("activation", "scale", activation_func::relu),
        quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
@ -293,7 +298,8 @@ TEST_P(pooling_scale_activation_quantize, per_channel) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 4);
    ov::Strides stride(r, 2);
-    ov::Shape pad(r, 0);
+    ov::Shape pads_begin(r, 0);
+    ov::Shape pads_end(r, 0);

    create_topologies(
        input_layout("input", get_input_layout(p)),
@ -302,7 +308,7 @@ TEST_P(pooling_scale_activation_quantize, per_channel) {
        data("out_lo", get_mem(get_single_element_layout(p), 0)),
        data("out_hi", get_mem(get_single_element_layout(p), 255)),
        data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
-        pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
        activation("activation", "scale", activation_func::hyperbolic_tan),
        quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::u8),
@ -362,12 +368,13 @@ TEST_P(pooling_scale_activation, basic) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 4);
    ov::Strides stride(r, 2);
-    ov::Shape pad(r, 0);
+    ov::Shape pads_begin(r, 0);
+    ov::Shape pads_end(r, 0);

    create_topologies(
        input_layout("input", get_input_layout(p)),
        data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 16.0f)),
-        pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
        activation("activation", "scale", activation_func::relu),
        reorder("output_reorder", "activation", p.default_format, data_types::f32)
@ -383,12 +390,13 @@ TEST_P(pooling_scale_activation, eltwise_mul) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 4);
    ov::Strides stride(r, 2);
-    ov::Shape pad(r, 0);
+    ov::Shape pads_begin(r, 0);
+    ov::Shape pads_end(r, 0);

    create_topologies(
        input_layout("input", get_input_layout(p)),
        data("scale_data", get_mem(get_per_channel_layout(p))),
-        pooling("pooling", "input", "", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        eltwise("scale", { "pooling", "scale_data" }, eltwise_mode::prod, p.default_type),
        activation("activation", "scale", activation_func::relu),
        reorder("output_reorder", "activation", p.default_format, data_types::f32)
@ -577,11 +585,12 @@ TEST_P(pooling_onednn_activation1, basic) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 3);
    ov::Strides stride(r, 1);
-    ov::Shape pad(r, 1);
+    ov::Shape pads_begin(r, 1);
+    ov::Shape pads_end(r, 1);

    create_topologies(
        input_layout("input", get_input_layout(p)),
-        pooling("pooling", "input", p.pool_mode, kernel, stride, pad),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        activation("act", "pooling", activation_func::relu),
        reorder("output_reorder", "act", format::bfyx, data_types::f32)
    );
@ -597,10 +606,12 @@ TEST_P(pooling_onednn_activation2, basic) {
    auto r = get_input_layout(p).get_spatial_rank();
    ov::Shape kernel(r, 3);
    ov::Strides stride(r, 1);
+    ov::Shape pads_begin(r, 0);
+    ov::Shape pads_end(r, 0);

    create_topologies(
        input_layout("input", get_input_layout(p)),
-        pooling("pooling", "input", p.pool_mode, kernel, stride),
+        pooling("pooling", "input", p.pool_mode, kernel, stride, pads_begin, pads_end),
        activation("act", "pooling", activation_func::relu),
        reorder("output_reorder", "act", format::bfyx, data_types::f32)
    );
--- a/src/plugins/intel_gpu/tests/test_cases/max_unpooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/max_unpooling_gpu_test.cpp
@ -1,442 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include "test_utils.h"
-
-#include <intel_gpu/primitives/input_layout.hpp>
-#include <intel_gpu/primitives/max_unpooling.hpp>
-#include <intel_gpu/primitives/reorder.hpp>
-#include <intel_gpu/primitives/data.hpp>
-#include <intel_gpu/primitives/mutable_data.hpp>
-#include <intel_gpu/primitives/pooling.hpp>
-
-using namespace cldnn;
-using namespace ::tests;
-
-TEST(max_unpooling_gpu, basic_in2x3x2x2) {
-    //  Input  : 2x2x2x1
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x3x2
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Input:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-    //
-    //  Output:
-    //  f0: b0:  0    0  0   b1:   0    0    0
-    //  f0: b0:  0    4  0   b1:   0.5  0    0
-    //  f1: b0:  0    0  0   b1:   0    0    0
-    //  f1: b0:  0    8  16  b1:   12   0    17
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    });
-
-    set_values(arg_max, {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("arg_max", arg_max));
-    topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("max_unpooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 2);
-    EXPECT_EQ(output_layout.spatial(0), 3);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_output_vec = {
-        0.f, 0.f, 0.f,
-        0.f, 4.f, 0.f,
-        0.f, 0.f, 0.f,
-        0.f, 8.f, 16.f,
-        0.f, 0.f, 0.f,
-        0.5f, 0.f, 0.f,
-        0.f, 0.f, 0.f,
-        12.f, 0.f, 17.f,
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-    }
-}
-
-TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) {
-    //  Input  : 2x2x2x1
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x3x2
-    //  Output Padding : 0x0x1x1
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Input:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-    //
-    //  Output:
-    //  f0: b0:  0    0  0   b1:   0    0    0
-    //  f0: b0:  0    4  0   b1:   0.5  0    0
-    //  f1: b0:  0    0  0   b1:   0    0    0
-    //  f1: b0:  0    8  16  b1:   12   0    17
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    });
-
-    set_values(arg_max, {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("arg_max", arg_max));
-    topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("max_unpooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 2);
-    EXPECT_EQ(output_layout.spatial(0), 3);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_output_vec = {
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 4.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 8.f, 16.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.5f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-        0.f, 12.f, 0.f, 17.f, 0.f,
-        0.f, 0.f, 0.f, 0.f, 0.f,
-    };
-    std::vector<float> out;
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        out.push_back(output_ptr[i]);
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-    }
-}
-
-TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) {
-    //  Input  : 2x2x2x1
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x3x2
-    //  Output size explicitly provided
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Input:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-    //
-    //  Output:
-    //  f0: b0:  0    0  0   b1:   0    0    0
-    //  f0: b0:  0    4  0   b1:   0.5  0    0
-    //  f1: b0:  0    0  0   b1:   0    0    0
-    //  f1: b0:  0    8  16  b1:   12   0    17
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    });
-
-    set_values(arg_max, {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("arg_max", arg_max));
-    topology.add(max_unpooling("max_unpooling", "input", "arg_max", {2, 2, 3, 2}));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("max_unpooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 2);
-    EXPECT_EQ(output_layout.spatial(0), 3);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_output_vec = {
-        0.f, 0.f, 0.f,
-        0.f, 4.f, 0.f,
-        0.f, 0.f, 0.f,
-        0.f, 8.f, 16.f,
-        0.f, 0.f, 0.f,
-        0.5f, 0.f, 0.f,
-        0.f, 0.f, 0.f,
-        12.f, 0.f, 17.f,
-    };
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-    }
-}
-
-TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) {
-    //  Input  : 2x2x2x1
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x3x2
-    //  Input values in fp16
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Input:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-    //
-    //  Output:
-    //  f0: b0:  0    0  0   b1:   0    0    0
-    //  f0: b0:  0    4  0   b1:   0.5  0    0
-    //  f1: b0:  0    0  0   b1:   0    0    0
-    //  f1: b0:  0    8  16  b1:   12   0    17
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 2, 1 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        FLOAT16(4.0f), FLOAT16(4.0f),
-        FLOAT16(8.0f), FLOAT16(16.0f),
-        FLOAT16(0.5f), FLOAT16(0.0f),
-        FLOAT16(12.0f), FLOAT16(17.0f)
-    });
-
-    set_values(arg_max, {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("arg_max", arg_max));
-    topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("max_unpooling").get_memory();
-    cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 2);
-    EXPECT_EQ(output_layout.spatial(0), 3);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_output_vec = {
-        0.f, 0.f, 0.f,
-        0.f, 4.f, 0.f,
-        0.f, 0.f, 0.f,
-        0.f, 8.f, 16.f,
-        0.f, 0.f, 0.f,
-        0.5f, 0.f, 0.f,
-        0.f, 0.f, 0.f,
-        12.f, 0.f, 17.f,
-    };
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
-    }
-}
-
-TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) {
-    //  Input  : 2x2x3x2
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x2x2
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Forward Max Pooling Output:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-    //
-    //  Unpooling output:
-    //  f0: b0:  0    0  0   b1:   0    0    0
-    //  f0: b0:  0    4  0   b1:   0.5  0    0
-    //  f1: b0:  0    0  0   b1:   0    0    0
-    //  f1: b0:  0    8  16  b1:   12   0    17
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        1.0f, 2.0f, -10.f,
-        3.0f, 4.0f, -14.f,
-        5.0f, 6.0f, -12.f,
-        7.0f, 8.0f, 16.0f,
-        0.f, 0.f, -11.f,
-        0.5f, -0.5f, -15.f,
-        1.5f, 5.2f, -13.f,
-        12.f, 9.f, 17.f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mutable_data("arg_max", arg_max));
-    topology.add(pooling("pooling_max_with_argmax", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
-    topology.add(max_unpooling("max_unpooling", "pooling_max_with_argmax", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("max_unpooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-    cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 2);
-    EXPECT_EQ(output_layout.spatial(0), 3);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_argmax_vec = {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    };
-
-    std::vector<float> expected_output_vec = {
-        0.f, 0.f, 0.f,
-        0.f, 4.f, 0.f,
-        0.f, 0.f, 0.f,
-        0.f, 8.f, 16.f,
-        0.f, 0.f, 0.f,
-        0.5f, 0.f, 0.f,
-        0.f, 0.f, 0.f,
-        12.f, 0.f, 17.f,
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-    }
-
-    for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
-        EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
-    }
-}
--- a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp
@ -125,18 +125,12 @@ VVVF<typename pooling_mode_output<InputT, Mode>::type> reference_pooling(const V
                                                                         int stride_z,
                                                                         int offset_x,
                                                                         int offset_y,
-                                                                         int offset_z,
-                                                                         bool global_pooling) {
+                                                                         int offset_z) {
    using output_t = typename pooling_mode_output<InputT, Mode>::type;
    VVVF<output_t> result;
    auto size_x = input[0][0].size();
    auto size_y = input[0].size();
    auto size_z = input.size();
-    if (global_pooling) {
-        pool_z = size_z;
-        pool_y = size_y;
-        pool_x = size_x;
-    }

    auto accumulator = pooling_accumulator<InputT, Mode>();

@ -279,141 +273,6 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
    EXPECT_EQ(2.0f, output_ptr[0]);
 }

-TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) {
-    //  Brief test description.
-    //
-    //  Pool mode: max
-    //  Global pooling: true
-    //  Padding: none
-    //
-    //  Input data:
-    //  [-0.5,  1.0,  0.5]
-    //  [ 2.0,  1.5, -0.5]
-    //  [ 0.0, -1.0,  0.5]
-    //
-    //  Expected output:
-    //  [ 2.0]
-
-    auto& engine = get_test_engine();
-
-    auto input_prim = engine.allocate_memory({ data_types::f32,  format::yxfb,{ 1, 1, 3, 3 } });
-
-    topology topology;
-    topology.add(input_layout("input_prim", input_prim->get_layout()));
-    topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
-
-    network network(engine, topology);
-    set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
-    network.set_input_data("input_prim", input_prim);
-
-    auto outputs = network.execute();
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "pool_prim");
-
-    auto output_prim = outputs.begin()->second.get_memory();
-
-    cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
-
-    EXPECT_EQ(2.0f, output_ptr[0]);
-}
-
-TEST(pooling_forward_gpu, basic_max_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) {
-    //  Brief test description.
-    //
-    //  Pool mode: max
-    //  Global pooling: true
-    //  Padding: none
-
-    auto& engine = get_test_engine();
-
-    auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } });
-
-    topology topology;
-    topology.add(input_layout("input_prim", input_prim->get_layout()));
-    topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
-
-    network network(engine, topology);
-    std::vector<char> vals = {
-           0,  3,  2, -1,  6,   8,  3,  -9,  6, -1,  1,  7,  -1,  6,  18,  3,
-          -9,  5, -2,  2,  6,  -1,  6,   7,  3, -9,  6, -3,   3,  5,  -1, 16,
-           8,  3, -9,  6, -4,   4,  3,  -1,  6,  8, 33, -9,   6, -5,   5, 21,
-          -1,  6,  8,  3, -9,   6, -5,  36,  2, -1,  6,  8,   3, -9,   6, -6,
-           6,  1, -1,  6,  8,   3, -9,  66, -7,  7, 29, -1,   6,  8,   3, -9,
-           6, 44,  8, -2, -1,   6,  8,   3, -9,  6, -8,  9,  -1, 10,   6,  8,
-           3, -9,  6, -9, 10,  -3, -1,   6,  8,  3, 99,  6, -10, 11,  -4, -1,
-           6,  8,  3, -9, 64, -11, 12,  -5, -1,  6,  8, 38,  -9,  6, -12, 13,
-          -2, -1,  6, 81,  3,  -9,  6, -13, 14, -2, -1, 64,   8,  3,  -9,  6,
-    };
-    set_values(input_prim, vals);
-    network.set_input_data("input_prim", input_prim);
-
-    auto outputs = network.execute();
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "pool_prim");
-
-    auto output_prim = outputs.begin()->second.get_memory();
-
-    cldnn::mem_lock<char> output_ptr(output_prim, get_test_stream());
-
-    std::vector<char> answers = { 8, 44, 8, 81, 64, 8, 12, 66, 14, 8, 99, 64, 8, 11, 18, 21 };
-
-    ASSERT_EQ(answers.size(), output_ptr.size());
-    for (size_t i = 0; i < output_ptr.size(); ++i) {
-        EXPECT_EQ(answers[i], output_ptr[i]);
-    }
-}
-
-TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) {
-    //  Brief test description.
-    //
-    //  Pool mode: avg
-    //  Global pooling: true
-    //  Padding: none
-
-    auto& engine = get_test_engine();
-
-    auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } });
-
-    topology topology;
-    topology.add(input_layout("input_prim", input_prim->get_layout()));
-    topology.add(pooling("pool_prim", "input_prim", pooling_mode::average));
-
-    network network(engine, topology);
-    std::vector<char> vals = {
-           0,  3,  2, -1,  6,   8,  3,  -9,  6, -1,  1,  7,  -1,  6,  18,  3,
-          -9,  5, -2,  2,  6,  -1,  6,   7,  3, -9,  6, -3,   3,  5,  -1, 16,
-           8,  3, -9,  6, -4,   4,  3,  -1,  6,  8, 33, -9,   6, -5,   5, 21,
-          -1,  6,  8,  3, -9,   6, -5,  36,  2, -1,  6,  8,   3, -9,   6, -6,
-           6,  1, -1,  6,  8,   3, -9,  66, -7,  7, 29, -1,   6,  8,   3, -9,
-           6, 44,  8, -2, -1,   6,  8,   3, -9,  6, -8,  9,  -1, 10,   6,  8,
-           3, -9,  6, -9, 10,  -3, -1,   6,  8,  3, 99,  6, -10, 11,  -4, -1,
-           6,  8,  3, -9, 64, -11, 12,  -5, -1,  6,  8, 38,  -9,  6, -12, 13,
-          -2, -1,  6, 81,  3,  -9,  6, -13, 14, -2, -1, 64,   8,  3,  -9,  6,
-    };
-    set_values(input_prim, vals);
-    network.set_input_data("input_prim", input_prim);
-
-    auto outputs = network.execute();
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "pool_prim");
-
-    auto output_prim = outputs.begin()->second.get_memory();
-
-    cldnn::mem_lock<uint8_t> output_ptr(output_prim, get_test_stream());
-
-    std::vector<uint8_t> answers = {
-         29, 199, 241, 63,  85,  85, 213, 64,  85,  85,  21, 64, 142, 227,   8, 65,
-         57, 142,  19, 65, 171, 170, 170, 62,  57, 142,  35, 64,   0,   0,  32, 65,
-        199, 113,  28, 64,  29, 199, 241, 63,  29, 199, 153, 65,  57, 142,  83, 65,
-        228,  56,  14, 63, 142, 227, 120, 64, 171, 170, 170, 63,  85,  85, 181, 64,
-    };
-
-    ASSERT_EQ(answers.size(), output_ptr.size());
-    for (size_t i = 0; i < output_ptr.size(); ++i) {
-        EXPECT_EQ(answers[i], output_ptr[i]) << i;
-    }
-}
-
 TEST(pooling_forward_gpu, basic_max_pooling_int8) {

    auto& engine = get_test_engine();
@ -966,7 +825,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out

        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
-        topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
+        topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0}));

        network network(engine, topology);
        set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
@ -1027,7 +886,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out

        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
-        topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
+        topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));

        network network(engine, topology);

@ -1098,7 +957,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp
        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
        topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 })));
-        topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 2, 2}, 0}));
+        topology.add(pooling("pool_prim", "reorder", pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 2, 2}, 0}));

        network network(engine, topology);
        set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
@ -1161,7 +1020,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp
        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
        topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })));
-        topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
+        topology.add(pooling("pool_prim", "reorder", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));

        network network(engine, topology);

@ -1232,7 +1091,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou
        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
        topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
-        topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 }));
+        topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, 2, 2 }, 0 }));

        network network(engine, topology);
        set_values(input_prim, {
@ -1300,7 +1159,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
        topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })));
-        topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
+        topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));

        network network(engine, topology);

@ -1337,407 +1196,6 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
    }
 }

-TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) {
-    //  Input  : 2x2x3x2
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x2x2
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Output:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
-
-    set_values(input, {
-        1.0f, 2.0f, -10.f,
-        3.0f, 4.0f, -14.f,
-        5.0f, 6.0f, -12.f,
-        7.0f, 8.0f, 16.0f,
-        0.f, 0.f, -11.f,
-        0.5f, -0.5f, -15.f,
-        1.5f, 5.2f, -13.f,
-        12.f, 9.f, 17.f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mutable_data("arg_max", arg_max));
-    topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("pooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-    cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 1);
-    EXPECT_EQ(output_layout.spatial(0), 2);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_argmax_vec = {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    };
-
-    std::vector<float> expected_output_vec = {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-        EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
-    }
-}
-
-TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) {
-    //  Input  : 2x2x3x2x1
-    //  Argmax : 2x2x2x1x1
-    //  Output : 2x2x2x2x1
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Output:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 3, 2, 1 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 2, 1, 1 } });
-
-    set_values(input, {
-        1.0f, 2.0f, -10.f,
-        3.0f, 4.0f, -14.f,
-        5.0f, 6.0f, -12.f,
-        7.0f, 8.0f, 16.0f,
-        0.f, 0.f, -11.f,
-        0.5f, -0.5f, -15.f,
-        1.5f, 5.2f, -13.f,
-        12.f, 9.f, 17.f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mutable_data("arg_max", arg_max));
-    topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 2, 2 }, { 1, 1, 1 }, {0, 0, 0}));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("pooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-    cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
-
-    EXPECT_EQ(output_layout.format, format::bfzyx);
-    EXPECT_EQ(output_layout.spatial(2), 1);
-    EXPECT_EQ(output_layout.spatial(1), 1);
-    EXPECT_EQ(output_layout.spatial(0), 2);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_argmax_vec = {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    };
-
-    std::vector<float> expected_output_vec = {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-        EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
-    }
-}
-
-TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
-    //  Input  : 2x2x3x2
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x2x2
-    //  Input Padding : 2x2
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Output:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        1.0f, 2.0f, -10.f,
-        3.0f, 4.0f, -14.f,
-        5.0f, 6.0f, -12.f,
-        7.0f, 8.0f, 16.0f,
-        0.f, 0.f, -11.f,
-        0.5f, -0.5f, -15.f,
-        1.5f, 5.2f, -13.f,
-        12.f, 9.f, 17.f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 })));
-    topology.add(mutable_data("arg_max", arg_max));
-    topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("pooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-    cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 1);
-    EXPECT_EQ(output_layout.spatial(0), 2);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_argmax_vec = {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    };
-
-    std::vector<float> expected_output_vec = {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-        EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
-    }
-}
-
-TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) {
-    //  Input  : 2x2x3x2
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x2x2
-    //  Output Padding : 2x2
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Output:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        1.0f, 2.0f, -10.f,
-        3.0f, 4.0f, -14.f,
-        5.0f, 6.0f, -12.f,
-        7.0f, 8.0f, 16.0f,
-        0.f, 0.f, -11.f,
-        0.5f, -0.5f, -15.f,
-        1.5f, 5.2f, -13.f,
-        12.f, 9.f, 17.f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 })));
-    topology.add(mutable_data("arg_max", arg_max));
-    topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("pooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-    cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 1);
-    EXPECT_EQ(output_layout.spatial(0), 2);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_argmax_vec = {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    };
-
-    std::vector<float> expected_output_vec = {
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 4.0f, 4.0f, 0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 8.0f, 16.0f,0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 0.5f, 0.0f, 0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 12.0f, 17.0f, 0.0f,
-        0.0f, 0.0f, 0.0f, 0.0f,
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-    }
-
-    for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
-        EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
-    }
-}
-
-TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) {
-    //  Input  : 2x2x3x2
-    //  Argmax : 2x2x2x1
-    //  Output : 2x2x2x2
-
-    //  Forward Max Pooling Input:
-    //  f0: b0:  1    2  -10   b1:   0    0     -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13
-    //  f1: b0:  7    8   16    b1:   12   9     17
-    //
-    //  Output:
-    //  f0: b0:  4    4   b1:   0.5    0
-    //  f1: b0:  8   16   b1:   12    17
-    //
-    //  Argmax:
-    //  f0: b0:  4    4   b1:   15    13
-    //  f1: b0:  10  11   b1:   21    23
-
-    auto& engine = get_test_engine();
-
-    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
-    auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
-
-    set_values(input, {
-        1.0f, 2.0f, -10.f,
-        3.0f, 4.0f, -14.f,
-        5.0f, 6.0f, -12.f,
-        7.0f, 8.0f, 16.0f,
-        0.f, 0.f, -11.f,
-        0.5f, -0.5f, -15.f,
-        1.5f, 5.2f, -13.f,
-        12.f, 9.f, 17.f
-    });
-
-    topology topology;
-    topology.add(input_layout("input", input->get_layout()));
-    topology.add(mutable_data("arg_max", arg_max));
-    topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 2, 2 }, { 1, 1 }, { 0, 0 }, { 2, 2, 2, 1 }));
-
-    network network(engine, topology);
-
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
-
-    auto output = outputs.at("pooling").get_memory();
-    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
-    auto output_layout = output->get_layout();
-    cldnn::mem_lock<float> argmax_ptr(arg_max, get_test_stream());
-
-    EXPECT_EQ(output_layout.format, format::bfyx);
-    EXPECT_EQ(output_layout.spatial(1), 1);
-    EXPECT_EQ(output_layout.spatial(0), 2);
-    EXPECT_EQ(output_layout.feature(), 2);
-    EXPECT_EQ(output_layout.batch(), 2);
-
-    std::vector<float> expected_argmax_vec = {
-        4.0f, 4.0f,
-        10.0f, 11.0f,
-        15.0f, 13.0f,
-        21.0f, 23.0f
-    };
-
-    std::vector<float> expected_output_vec = {
-        4.0f, 4.0f,
-        8.0f, 16.0f,
-        0.5f, 0.0f,
-        12.0f, 17.0f
-    };
-
-    for (size_t i = 0; i < expected_output_vec.size(); ++i) {
-        EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
-        EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
-    }
-}
-
 template <class DataType>
 static void generic_average_wo_padding_test(format fmt, tensor output, tensor input, ov::Shape window, ov::Strides stride, ov::Shape offset)
 {
@ -2259,7 +1717,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x
        topology topology;
        topology.add(input_layout("input_prim", input_prim->get_layout()));
        topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor)));
-        topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
+        topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
        topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ { 0, 0, 1, 1 }, 0 })));

        network network(engine, topology);
@ -2332,7 +1790,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x
    topology topology;
    topology.add(input_layout("input_prim", input_prim->get_layout()));
    topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ { 0, 0, 2, 1 } , 0 })));
-    topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
+    topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, padding{{0, 0, 1, 1}, 0}));
    topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0, 0, 1, 1 }, 0 })));

    network network(engine, topology);
@ -2409,7 +1867,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
        topology golden_topology;
        golden_topology.add(input_layout("input", input_prim->get_layout()));
        golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 })));
-        golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
+        golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));

        network golden_network(engine, golden_topology);
        golden_network.set_input_data("input", input_prim);
@ -2426,7 +1884,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3
        topology golden_topology;
        golden_topology.add(input_layout("input", input_prim->get_layout()));
        golden_topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0, 0, x_in_pad, y_in_pad}, 0 })));
-        golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
+        golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 }));
        golden_topology.add(reorder("reorder_pooling", "fsv32_pooling", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 })));

        network fsv32_network(engine, golden_topology);
@ -2468,15 +1926,8 @@ public:
            stride.insert(stride.begin(), stride_z());
            pad.insert(pad.begin(), offset_z());
        }
-        if (global_pooling())
-            topo.add(pooling("pool", "input", pool_mode()));
-        else
-            topo.add(pooling("pool",
-                             "input",
-                             pool_mode(),
-                             kernel,
-                             stride,
-                             pad));
+
+        topo.add(pooling("pool", "input", pool_mode(), kernel, stride, pad));
        return topo;
    }

@ -2579,7 +2030,6 @@ public:
    size_t offset_x() { return _offset_x; }
    size_t offset_y() { return _offset_y; }
    size_t offset_z() { return _offset_z; }
-    bool global_pooling() { return _global_pooling; }

    void set_input(format::type input_fmt, VVVVVF<InputT> input_data) {
        _input_fmt = input_fmt;
@ -2604,16 +2054,11 @@ public:
        _offset_z = z;
    }

-    void set_global_pooling(bool global_pooling) {
-        _global_pooling = global_pooling;
-    }
-
    VVVVVF<InputT> _input;
    format::type _input_fmt;
    size_t _pool_x, _pool_y, _pool_z;
    int _stride_x, _stride_y, _stride_z;
    int _offset_x, _offset_y, _offset_z;
-    bool _global_pooling;
 };

 using pooling_random_test_params = std::tuple<
@ -2623,8 +2068,7 @@ using pooling_random_test_params = std::tuple<
    std::tuple<size_t, size_t, size_t>, // pool x, y, z
    std::tuple<int, int, int>,          // stride x, y, z
    std::tuple<int, int, int>,          // offset x, y, z
-    format::type,                       // input format
-    bool                                // global pooling
+    format::type                        // input format
 >;

 template <typename InputT, pooling_mode Mode>
@ -2647,8 +2091,7 @@ public:
                    this->stride_z(),
                    this->offset_x(),
                    this->offset_y(),
-                    this->offset_z(),
-                    this->global_pooling());
+                    this->offset_z());
            }
        }
        return reference;
@ -2658,7 +2101,6 @@ public:
        size_t b, f, in_x, in_y, in_z, p_x, p_y, p_z;
        int s_x, s_y, s_z, o_x, o_y, o_z;
        format::type in_fmt;
-        bool global_pooling;

        std::forward_as_tuple(
            b,
@ -2667,24 +2109,16 @@ public:
            std::forward_as_tuple(p_x, p_y, p_z),
            std::forward_as_tuple(s_x, s_y, s_z),
            std::forward_as_tuple(o_x, o_y, o_z),
-            in_fmt,
-            global_pooling
+            in_fmt
        ) = params;

        auto input_data = generate_random_5d<InputT>(b, f, in_z, in_y, in_x, -256, 256);

        this->set_input(in_fmt, std::move(input_data));
-        if (global_pooling) {
-            this->set_pool_size(0, 0, 0);
-            this->set_strides(1, 1, 1);
-            this->set_offsets(0, 0, 0);
-        } else {
        this->set_pool_size(p_x, p_y, p_z);
        this->set_strides(s_x, s_y, s_z);
        this->set_offsets(o_x, o_y, o_z);
    }
-        this->set_global_pooling(global_pooling);
-    }

    void run_random(const pooling_random_test_params& params) {
        param_set_up(params);
@ -2733,8 +2167,7 @@ INSTANTIATE_TEST_SUITE_P(
                                     format::bfyx,
                                     format::b_fs_yx_fsv4,
                                     format::b_fs_yx_fsv16,
-                                     format::b_fs_yx_fsv32),
-                     testing::Values(false, true)),
+                                     format::b_fs_yx_fsv32)),
                    testing::internal::DefaultParamName<pooling_random_test_params>);

 INSTANTIATE_TEST_SUITE_P(
@ -2747,8 +2180,7 @@ INSTANTIATE_TEST_SUITE_P(
                     testing::Values(std::tuple<int, int, int>(2, 2, 2)),
                     testing::Values(std::tuple<int, int, int>(0, 0, 0)),
                     testing::Values(format::bfzyx,
-                                     format::b_fs_zyx_fsv16),
-                     testing::Values(false, true)),
+                                     format::b_fs_zyx_fsv16)),
                    testing::internal::DefaultParamName<pooling_random_test_params>);

 INSTANTIATE_TEST_SUITE_P(
@ -2761,9 +2193,7 @@ INSTANTIATE_TEST_SUITE_P(
        testing::Values(std::tuple<size_t, size_t, size_t>(1, 1, 1), std::tuple<size_t, size_t, size_t>(3, 3, 1)),
        testing::Values(std::tuple<int, int, int>(1, 1, 1)),
        testing::Values(std::tuple<int, int, int>(0, 0, 0)),
-        testing::Values(format::bs_fs_yx_bsv16_fsv16),
-        testing::Values(false, true)
-    ),
+        testing::Values(format::bs_fs_yx_bsv16_fsv16)),
    testing::internal::DefaultParamName<pooling_random_test_params>);

 template <typename InputT, pooling_mode Mode>
@ -2851,8 +2281,7 @@ INSTANTIATE_TEST_SUITE_P(
                                     format::b_fs_yx_fsv16,
                                     format::fs_b_yx_fsv32,
                                     format::b_fs_yx_fsv32,
-                                     format::b_fs_yx_fsv4),
-                     testing::Values(false)),
+                                     format::b_fs_yx_fsv4)),
    testing::internal::DefaultParamName<pooling_random_test_params>);

 TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride)
@ -3485,10 +2914,10 @@ public:
                    all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride));

                    // Output padding
-                    all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } }));
+                    all_layer_params.emplace_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 1, 5 }, { 0, 0, 19, 4 } }));

                    // Input + output padding
-                    all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } }));
+                    all_layer_params.emplace_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_pad(2, 3, size), generate_pad(2, 3, size), { { 0, 0, 2, 1 }, { 0, 0, 3, 4 } }));
                }
            }
        }
@ -3545,8 +2974,8 @@ public:
        int height = generic_params->input_layouts[0].spatial(1);
        int width = generic_params->input_layouts[0].spatial(0);

-        auto pad_height = pooling->pad[0];
-        auto pad_width = pooling->pad[1];
+        auto pad_height = pooling->pads_begin[0];
+        auto pad_width = pooling->pads_begin[1];

        auto kernel_height = pooling->size[0];
        auto kernel_width = pooling->size[1];
@ -3581,8 +3010,8 @@ public:

        cldnn::pooling_mode pooling_mode = pooling->mode;

-        int pad_width = pooling->pad[1];
-        int pad_height = pooling->pad[0];
+        int pad_width = pooling->pads_begin[1];
+        int pad_height = pooling->pads_begin[0];

        int kernel_width = pooling->size[1];
        int kernel_height = pooling->size[0];
--- a/src/plugins/intel_gpu/tests/test_utils/test_utils.h
+++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.h
@ -552,7 +552,8 @@ inline void PrintTupleTo(const std::tuple<std::shared_ptr<test_params>, std::sha
        auto pooling = std::static_pointer_cast<cldnn::pooling>(primitive);
        std::string pooling_mode = (pooling->mode == cldnn::pooling_mode::max) ? "max" : "average";
        str << "Pooling mode: " << pooling_mode
-            << " Pad x: " << pooling->pad[1] << " Pad y: " << pooling->pad[0]
+            << " Pads_begin x: " << pooling->pads_begin[1] << " Pads_begin y: " << pooling->pads_begin[0]
+            << " Pads_end x: " << pooling->pads_end[1] << " Pads_end y: " << pooling->pads_end[0]
            << " Stride x: " << pooling->stride[1] << " Stride y: " << pooling->stride[0]
            << " Size x: " << pooling->size[1] << " Size y: " << pooling->size[0];
    } else {