From efb602e13b7edf2ca617edc5593fb658f069bf94 Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.park@intel.com>
Date: Thu, 5 Jan 2023 19:31:32 +0900
Subject: [PATCH] [GPU] Update strided slice impl for non-constant
 begin/end/stride support (#14619)

* Add strided slice dynamic TCs for gpuFunctests

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Update StridedSliceLayerGPUTest for rest inputs's type (parameter)

Signed-off-by: Andrew Park <andrew.park@intel.com>

* remove W/A for strided slice

Signed-off-by: Andrew Park <andrew.park@intel.com>

* primitive API updates for dynamic begin/end/stride inputs support

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Update ocl impl and kernel param, and strided_slice_ref kernel w/o memory_deps

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Update TCs for new API w/ vectors on strided_slice_gpu unittests

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Apply code review

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Apply padding when the size of rest input data is smaller than actual input rank

Signed-off-by: Andrew Park <andrew.park@intel.com>

* Update strided_slice_optimize pass to run onyl when all deps are constant

Signed-off-by: Andrew Park <andrew.park@intel.com>

Signed-off-by: Andrew Park <andrew.park@intel.com>
---
 .../intel_gpu/primitives/strided_slice.hpp    |  44 ++
 .../strided_slice_optimize.cpp                |   9 +
 .../src/graph/impls/ocl/strided_slice.cpp     | 121 +++--
 .../intel_gpu/src/graph/primitive_inst.cpp    |  16 +-
 .../intel_gpu/src/graph/program_node.cpp      |  22 -
 .../intel_gpu/src/graph/strided_slice.cpp     |  68 ++-
 .../cl_kernels/strided_slice_ref.cl           | 283 ++++++++++-
 .../strided_slice_kernel_ref.cpp              |  44 +-
 .../strided_slice/strided_slice_kernel_ref.h  |  30 ++
 .../src/plugin/ops/strided_slice.cpp          |  61 ++-
 .../shape_infer/strided_slice_si_test.cpp     |  53 +-
 .../test_cases/strided_slice_gpu_test.cpp     | 455 ++++++++----------
 .../dynamic/strided_slice.cpp                 | 251 ++++++++++
 13 files changed, 1065 insertions(+), 392 deletions(-)
 create mode 100644 src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp

diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp
index 2ae712be600..1609d2ac6b8 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp
@@ -46,6 +46,9 @@ struct strided_slice : public primitive_base<strided_slice> {
                   const ov::Shape out_size,
                   const padding& output_padding = padding())
         : primitive_base(id, {input, begin_id, end_id, strides_id}, {output_padding}),
+          begin({}),
+          end({}),
+          strides({}),
           begin_mask(begin_mask),
           end_mask(end_mask),
           new_axis_mask(new_axis_mask),
@@ -53,6 +56,47 @@ struct strided_slice : public primitive_base<strided_slice> {
           ellipsis_mask(ellipsis_mask),
           out_size(out_size) {}
 
+    /// @brief Constructs strided_slice primitive with constant begin/end/stride
+    /// @param id This primitive id.
+    /// @param input Input data primitive id.
+    /// @param begin Begin indexes for input.
+    /// @param end End indexes for input.
+    /// @param strides Strides for input.
+    /// @param begin_mask Array of bits, that provide replace begin[i] to max possible range in that dimension.
+    /// @param end_mask Array of bits, that provide replace end[i] to max possible range in that dimension.
+    /// @param new_axis_mask Array of bits, that provide adding a new length 1 dimension at ith position in the output tensor.
+    /// @param shrink_axis_mask Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i].
+    /// @param ellipsis_mask Array of bits, that provide inserts missing dimensions on a position of a non-zero bit.
+    /// @param out_size Size of output tensor
+    strided_slice(const primitive_id& id,
+                  const input_info& input,
+                  const std::vector<int64_t>& begin,
+                  const std::vector<int64_t>& end,
+                  const std::vector<int64_t>& strides,
+                  const std::vector<int64_t>& begin_mask,
+                  const std::vector<int64_t>& end_mask,
+                  const std::vector<int64_t>& new_axis_mask,
+                  const std::vector<int64_t>& shrink_axis_mask,
+                  const std::vector<int64_t>& ellipsis_mask,
+                  const ov::Shape out_size,
+                  const padding& output_padding = padding())
+        : primitive_base(id, {input}, {output_padding}),
+          begin(begin),
+          end(end),
+          strides(strides),
+          begin_mask(begin_mask),
+          end_mask(end_mask),
+          new_axis_mask(new_axis_mask),
+          shrink_axis_mask(shrink_axis_mask),
+          ellipsis_mask(ellipsis_mask),
+          out_size(out_size) {}
+
+    /// @brief Begin indexes for input
+    std::vector<int64_t> begin;
+    /// @brief End indexes for input
+    std::vector<int64_t> end;
+    /// @brief Strides for input
+    std::vector<int64_t> strides;
     /// @brief Array of bits, that provide replace begin[i] to max possible range in that dimension.
     std::vector<int64_t> begin_mask;
     /// @brief Array of bits, that provide replace end[i] to max possible range in that dimension.
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp
index 10926ab55c6..79af41e6a58 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp
@@ -32,6 +32,15 @@ void strided_slice_optimize::run(program& p) {
                 continue;
 
             auto& deps = node->get_dependencies();
+            auto is_other_deps_constant = [deps]() {
+                for (size_t i = 1; i < deps.size(); i++) {
+                    if (!deps[i].first->is_type<data>()) return false;
+                }
+                return true;
+            };
+            if (!is_other_deps_constant())
+                continue;
+
             for (size_t i = deps.size(); i--;)
                 if (deps[i].first->is_type<data>())
                     node->remove_dependency(i);
diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
index 10f181f17fd..03f78d1bc14 100644
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
@@ -26,7 +26,7 @@ std::vector<T>& pad_vector_to_size(std::vector<T>& data, size_t size, DT value)
 template <typename T, typename MT>
 std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const T& src, const std::vector<MT>& mask) {
     for (size_t i = 0; i < dst.size(); ++i) {
-        if (!mask[i])
+        if (mask[i])
             dst[i] = src;
     }
     return dst;
@@ -35,7 +35,7 @@ std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const T& src, con
 template <typename T, typename MT>
 std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const std::vector<T>& src, const std::vector<MT>& mask) {
     for (size_t i = 0; i < dst.size(); ++i) {
-        if (!mask[i])
+        if (mask[i])
             dst[i] = src[i];
     }
     return dst;
@@ -64,13 +64,53 @@ public:
         auto op_params = get_default_optional_params<kernel_selector::strided_slice_optional_params>(impl_param.get_program());
         const size_t dims_num = params.inputs[0].Dimentions();
 
+        std::vector<int32_t> begin(prim->begin.begin(), prim->begin.end());
+        std::vector<int32_t> end(prim->end.begin(), prim->end.end());
+        std::vector<int32_t> strides(prim->strides.begin(), prim->strides.end());
         // Getting data from constant inputs. There are 3 args: Begin, End, Stride
-        for (size_t i = 1; i < arg.get_dependencies().size(); ++i) {
-            OPENVINO_ASSERT(impl_param.memory_deps.count(i) > 0, "[GPU] Can't find StridedSlice memory dependency");
-            auto mem = impl_param.memory_deps.at(i);
-            std::vector<int32_t> sizes = read_vector<int32_t>(mem, impl_param.prog->get_stream());
-            pad_vector_to_size(sizes, dims_num, i != 1);  // for "begin" completion used 0 value, for other - 1
-            params.striding_params.push_back(sizes);
+        if (!begin.empty()) {
+            pad_vector_to_size(begin, dims_num, 0);
+            params.begin_type = kernel_selector::StridedSliceArgType::Constant;
+            params.striding_params.push_back(begin);
+        } else {
+            params.begin_type = kernel_selector::StridedSliceArgType::Input;
+            auto begin_layout = impl_param.get_input_layout(1);
+            params.inputs.push_back(convert_data_tensor(begin_layout));
+            params.begin_dims = begin_layout.count();
+        }
+
+        auto get_index_end = [&]() {
+            size_t offset = 1;
+            if (begin.empty() && params.begin_type == kernel_selector::StridedSliceArgType::Input)
+                offset++;
+            return offset;
+        };
+        if (!end.empty()) {
+            pad_vector_to_size(end, dims_num, 1);
+            params.end_type = kernel_selector::StridedSliceArgType::Constant;
+            params.striding_params.push_back(end);
+        } else {
+            params.end_type = kernel_selector::StridedSliceArgType::Input;
+            auto end_layout = impl_param.get_input_layout(get_index_end());
+            params.inputs.push_back(convert_data_tensor(end_layout));
+            params.end_dims = end_layout.count();
+        }
+
+        auto get_index_stride = [&]() {
+            size_t offset = get_index_end();
+            if (end.empty() && params.end_type == kernel_selector::StridedSliceArgType::Input)
+                offset++;
+            return offset;
+        };
+        if (!strides.empty()) {
+            pad_vector_to_size(strides, dims_num, 1);
+            params.stride_type = kernel_selector::StridedSliceArgType::Constant;
+            params.striding_params.push_back(strides);
+        } else {
+            params.stride_type = kernel_selector::StridedSliceArgType::Input;
+            auto stride_layout = impl_param.get_input_layout(get_index_stride());
+            params.inputs.push_back(convert_data_tensor(stride_layout));
+            params.stride_dims = stride_layout.count();
         }
 
         auto begin_mask_ = prim->begin_mask;
@@ -82,17 +122,10 @@ public:
         std::vector<uint8_t> end_mask(end_mask_.begin(), end_mask_.end());
         std::vector<uint8_t> new_axis_mask(new_axis_mask_.begin(), new_axis_mask_.end());
         std::vector<uint8_t> shrink_axis_mask(shrink_axis_mask_.begin(), shrink_axis_mask_.end());
-        // Plugin requires inverted mask values. Consider changing primitive impl to be aligned with the spec.
-        for (auto& b : begin_mask) {
-            b = 1 - b;
-        }
-        for (auto& e : end_mask) {
-            e = 1 - e;
-        }
         params.end_mask = end_mask;
-        pad_vector_to_size(params.end_mask, dims_num, 1);
+        pad_vector_to_size(params.end_mask, dims_num, 0);
         params.begin_mask = begin_mask;
-        pad_vector_to_size(params.begin_mask, dims_num, 1);
+        pad_vector_to_size(params.begin_mask, dims_num, 0);
 
         params.new_axis_mask = new_axis_mask;
         params.shrink_axis_mask = shrink_axis_mask;
@@ -103,34 +136,36 @@ public:
         std::vector<int32_t> out_shape;
         for (const auto& dim : logical_dims)
             out_shape.push_back(static_cast<int32_t>(dim));
-        // If the ith bit of begin_mask is not set, begin[i] is ignored and the range of the appropriate dimension starts from 0.
-        vector_assign_if_not_mask(params.striding_params[0], 0, params.begin_mask);
-        // If the ith bit of end_mask is not set, end[i] is ignored and the fullest possible range in that dimension is used
-        // instead.
-        vector_assign_if_not_mask(params.striding_params[1], out_shape, params.end_mask);
-        for (size_t dim = 0; dim < params.striding_params[2].size(); dim++) {
-            if (params.striding_params[0][dim] < 0)
-                params.striding_params[0][dim] = std::max(out_shape[dim] + params.striding_params[0][dim], (int32_t)0);
-            if (params.striding_params[1][dim] < 0)
-                params.striding_params[1][dim] = std::max(out_shape[dim] + params.striding_params[1][dim], (int32_t)0);
+        if (params.striding_params.size() == 3) {
+            // If the ith bit of begin_mask is not set, begin[i] is ignored and the range of the appropriate dimension starts from 0.
+            vector_assign_if_not_mask(params.striding_params[0], 0, params.begin_mask);
+            // If the ith bit of end_mask is not set, end[i] is ignored and the fullest possible range in that dimension is used
+            // instead.
+            vector_assign_if_not_mask(params.striding_params[1], out_shape, params.end_mask);
+            for (size_t dim = 0; dim < params.striding_params[2].size(); dim++) {
+                if (params.striding_params[0][dim] < 0)
+                    params.striding_params[0][dim] = std::max(out_shape[dim] + params.striding_params[0][dim], (int32_t)0);
+                if (params.striding_params[1][dim] < 0)
+                    params.striding_params[1][dim] = std::max(out_shape[dim] + params.striding_params[1][dim], (int32_t)0);
 
-            params.striding_params[0][dim] = std::min(params.striding_params[0][dim], out_shape[dim]);
-            params.striding_params[1][dim] = std::min(params.striding_params[1][dim], out_shape[dim]);
+                params.striding_params[0][dim] = std::min(params.striding_params[0][dim], out_shape[dim]);
+                params.striding_params[1][dim] = std::min(params.striding_params[1][dim], out_shape[dim]);
 
-            auto& begin = params.striding_params[0][dim];
-            auto& end = params.striding_params[1][dim];
-            auto& stride = params.striding_params[2][dim];
-            bool is_reverse = stride < 0;
-            // If begin > end && is_reverse, then we don't need to adjust begin/end values, the kernel will process it correctly
-            // If begin <= end, then we swap begin/end values and subtruct 1 from each of them
-            // E.g. out_shape[dim] = 100; begin=0; end=100; stride=-1
-            // swap: begin=100; end=0;
-            // sub: begin=99; end=-1;
-            // So the kernel will put the slices [99, 0] in reversed order as expected.
-            if (is_reverse && begin <= end) {
-                std::swap(begin, end);
-                begin--;
-                end--;
+                auto& begin = params.striding_params[0][dim];
+                auto& end = params.striding_params[1][dim];
+                auto& stride = params.striding_params[2][dim];
+                bool is_reverse = stride < 0;
+                // If begin > end && is_reverse, then we don't need to adjust begin/end values, the kernel will process it correctly
+                // If begin <= end, then we swap begin/end values and subtruct 1 from each of them
+                // E.g. out_shape[dim] = 100; begin=0; end=100; stride=-1
+                // swap: begin=100; end=0;
+                // sub: begin=99; end=-1;
+                // So the kernel will put the slices [99, 0] in reversed order as expected.
+                if (is_reverse && begin <= end) {
+                    std::swap(begin, end);
+                    begin--;
+                    end--;
+                }
             }
         }
 
diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
index 99d32622d8c..1a24b526ad6 100644
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -10,11 +10,9 @@
 #include "arg_max_min_inst.h"
 #include "fully_connected_inst.h"
 #include "convolution_inst.h"
-#include "strided_slice_inst.h"
 #include "crop_inst.h"
 #include "deconvolution_inst.h"
 #include "shape_of_inst.h"
-#include "strided_slice_inst.h"
 #include "gemm_inst.h"
 #include "experimental_detectron_roi_feature_extractor_inst.hpp"
 #include "compilation_context.hpp"
@@ -170,19 +168,7 @@ void primitive_inst::update_shape() {
     if (_node->is_type<shape_of>() && !input_shape_changed)
         return;
 
-    // Strided slice loads data from {1,2,3} dependencies in impl::create method.
-    // It means that this data must be put into impl_params map
-    // Thus we treat it as "dynamic" case
-    // TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
-    bool strided_slice_wa = false;
-    if (_node->is_type<strided_slice>()) {
-        for (size_t i = 1; i < _node->get_dependencies().size(); i++) {
-            if (!_node->get_dependency(i).is_type<data>())
-                strided_slice_wa = true;
-        }
-    }
-
-    if (!strided_slice_wa && !input_shape_changed && !_node->generates_dynamic_output() && _impl_params->get_output_layout().is_static())
+    if (!input_shape_changed && !_node->generates_dynamic_output() && _impl_params->get_output_layout().is_static())
         return;
 
     std::vector<event::ptr> dependencies_events;
diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp
index 2b90dc3c947..640c520f540 100644
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@@ -6,7 +6,6 @@
 #include "program_helpers.h"
 #include "primitive_inst.h"
 #include "loop_inst.h"
-#include "strided_slice_inst.h"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #ifdef ENABLE_ONEDNN_FOR_GPU
 #include "convolution_inst.h"
@@ -348,16 +347,6 @@ bool program_node::recalc_output_layouts(bool invalidate_users_if_changed) {
 }
 
 bool program_node::is_dynamic() const {
-    // Strided slice loads data from {1,2,3} dependencies in impl::create method.
-    // It means that this data must be put into impl_params map
-    // Thus we treat it as "dynamic" case
-    // TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
-    if (is_type<strided_slice>()) {
-        for (size_t i = 1; i < get_dependencies().size(); i++) {
-            if (!get_dependency(i).is_type<data>())
-                return true;
-        }
-    }
     for (const auto& input : get_dependencies()) {
         if (input.first->is_dynamic_output_layout())
             return true;
@@ -371,17 +360,6 @@ bool program_node::is_dynamic() const {
 }
 
 bool program_node::is_dynamic() {
-    // Strided slice loads data from {1,2,3} dependencies in impl::create method.
-    // It means that this data must be put into impl_params map
-    // Thus we treat it as "dynamic" case
-    // TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
-    if (is_type<strided_slice>()) {
-        for (size_t i = 1; i < get_dependencies().size(); i++) {
-            if (!get_dependency(i).is_type<data>())
-                return true;
-        }
-    }
-
     for (auto& input : get_dependencies()) {
         if (input.first->is_dynamic_output_layout())
             return true;
diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
index 341e22ead86..55e7e3c0ec6 100644
--- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp
@@ -33,21 +33,33 @@ template<typename ShapeType>
 std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node const& /*node*/, const kernel_impl_params& impl_param) {
     auto desc = impl_param.typed_desc<strided_slice>();
     auto input0_layout = impl_param.get_input_layout(0);
+    auto input0_shape = input0_layout.get<ShapeType>();
+    auto input0_rank = input0_shape.size();
 
     auto& constant_mem = impl_param.memory_deps;
+    auto begin_data = desc->begin;
+    auto end_data = desc->end;
+    auto strides_data = desc->strides;
 
-    if (!constant_mem.count(1) || !constant_mem.count(2) || !constant_mem.count(3)) {
+    if ((begin_data.empty() && !constant_mem.count(1))
+        || (end_data.empty() && !constant_mem.count(2))
+        || (strides_data.empty() && !constant_mem.count(3))) {
         auto out_shape = ov::PartialShape::dynamic(input0_layout.get_partial_shape().size());
         return { layout{out_shape, input0_layout.data_type, format::get_default_format(out_shape.rank().get_length())} };
     }
 
     ov::op::v1::StridedSlice op;
+
+    ShapeType begin_shape = begin_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ begin_data.size() };
+    ShapeType end_shape = end_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ end_data.size() };
+    ShapeType strides_shape = strides_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ strides_data.size() };
+
     std::vector<ShapeType> output_shapes = {ShapeType{}};
     std::vector<ShapeType> input_shapes = {
-        input0_layout.get<ShapeType>(),
-        impl_param.get_input_layout(1).get<ShapeType>(),
-        impl_param.get_input_layout(2).get<ShapeType>(),
-        impl_param.get_input_layout(3).get<ShapeType>()
+        input0_shape,
+        begin_shape,
+        end_shape,
+        strides_shape
     };
 
     op.set_begin_mask(desc->begin_mask);
@@ -56,24 +68,37 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
     op.set_shrink_axis_mask(desc->shrink_axis_mask);
     op.set_ellipsis_mask_mask(desc->ellipsis_mask);
 
-    auto mem1 = constant_mem.at(1);
-    auto mem2 = constant_mem.at(2);
-    auto mem3 = constant_mem.at(3);
+    std::map<size_t, ngraph::HostTensorPtr> const_data;
+    if (!begin_data.empty() && !end_data.empty() && !strides_data.empty()) {
+        auto begin_tensor = make_host_tensor({ begin_shape, data_types::i64, format::bfyx }, static_cast<void*>(begin_data.data()));
+        auto end_tensor = make_host_tensor({ end_shape, data_types::i64, format::bfyx }, static_cast<void*>(end_data.data()));
+        auto strides_tensor = make_host_tensor({ strides_shape, data_types::i64, format::bfyx }, static_cast<void*>(strides_data.data()));
 
-    cldnn::mem_lock<uint8_t, mem_lock_type::read> lock1(mem1, impl_param.prog->get_stream());
-    cldnn::mem_lock<uint8_t, mem_lock_type::read> lock2(mem2, impl_param.prog->get_stream());
-    cldnn::mem_lock<uint8_t, mem_lock_type::read> lock3(mem3, impl_param.prog->get_stream());
+        const_data.emplace(1, begin_tensor);
+        const_data.emplace(2, end_tensor);
+        const_data.emplace(3, strides_tensor);
 
-    auto tensor1 = make_host_tensor(mem1->get_layout(), lock1.data());
-    auto tensor2 = make_host_tensor(mem2->get_layout(), lock2.data());
-    auto tensor3 = make_host_tensor(mem3->get_layout(), lock3.data());
+        ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
+    } else {
+        auto begin_mem = constant_mem.at(1);
+        auto end_mem = constant_mem.at(2);
+        auto strides_mem = constant_mem.at(3);
+
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock1(begin_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock2(end_mem, impl_param.prog->get_stream());
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> lock3(strides_mem, impl_param.prog->get_stream());
+
+        auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock1.data());
+        auto end_tensor = make_host_tensor(end_mem->get_layout(), lock2.data());
+        auto strides_tensor = make_host_tensor(strides_mem->get_layout(), lock3.data());
+
+        const_data.emplace(1, begin_tensor);
+        const_data.emplace(2, end_tensor);
+        const_data.emplace(3, strides_tensor);
+
+        ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
+    }
 
-    std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> const_data = {
-        {1, tensor1},
-        {2, tensor2},
-        {3, tensor3},
-    };
-    ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data);
     auto output_format = format::get_default_format(output_shapes[0].size());
 
     return { layout{output_shapes[0], input0_layout.data_type, output_format} };
@@ -93,6 +118,9 @@ std::string strided_slice_inst::to_string(strided_slice_node const& node) {
     strided_slice_info.add("begin_param id", node.get_dependency(1).id());
     strided_slice_info.add("end_param id", node.get_dependency(2).id());
     strided_slice_info.add("stride_param id", node.get_dependency(3).id());
+    strided_slice_info.add("begin", node.get_primitive()->begin);
+    strided_slice_info.add("end", node.get_primitive()->end);
+    strided_slice_info.add("strides", node.get_primitive()->strides);
     strided_slice_info.add("begin mask", node.get_primitive()->begin_mask);
     strided_slice_info.add("end mask", node.get_primitive()->end_mask);
     strided_slice_info.add("new axis mask", node.get_primitive()->new_axis_mask);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
index 55f73c6d31b..21e3f90e8c5 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl
@@ -4,10 +4,263 @@
 
 #include "include/batch_headers/fetch_data.cl"
 
-KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
+#ifdef STRIDE_TYPE
+inline void FUNC(get_slice_step)(const __global STRIDE_TYPE* stride,
+                                 int* step_batch, int* step_feature,
+                                 int* step_z, int* step_y, int* step_x)
+{
+#ifdef OUTPUT_LAYOUT_BFYX
+    const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0);
+    const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0);
+    const uint y_index = STRIDE_GET_INDEX(2, 0, 0, 0);
+    const uint x_index = STRIDE_GET_INDEX(3, 0, 0, 0);
+#elif OUTPUT_LAYOUT_BFZYX
+    const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0, 0);
+    const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0, 0);
+    const uint z_index = STRIDE_GET_INDEX(2, 0, 0, 0, 0);
+    const uint y_index = STRIDE_GET_INDEX(3, 0, 0, 0, 0);
+    const uint x_index = STRIDE_GET_INDEX(4, 0, 0, 0, 0);
+#endif
+
+    *step_batch = batch_index < STRIDE_DIMS ? stride[batch_index] : 1;
+    *step_feature = feature_index < STRIDE_DIMS ? stride[feature_index] : 1;
+#ifdef OUTPUT_LAYOUT_BFYX
+    *step_z = 0;
+#elif OUTPUT_LAYOUT_BFZYX
+    *step_z = z_index < STRIDE_DIMS ? stride[z_index] : 1;
+#endif
+    *step_y = y_index < STRIDE_DIMS ? stride[y_index] : 1;
+    *step_x = x_index < STRIDE_DIMS ? stride[x_index] : 1;
+}
+#endif // STRIDE_TYPE
+
+#ifdef END_TYPE
+inline int FUNC(check_end_bound)(const end_num,
+                                 const uint out_num)
+{
+    int num;
+    if (end_num < 0) {
+        num = max(TO_END_TYPE(out_num) + end_num, TO_END_TYPE(0));
+    } else {
+        num = end_num;
+    }
+    num = min(num, (int)out_num);
+    return num;
+}
+
+inline void FUNC(get_slice_end)(const __global END_TYPE* end,
+                                int* end_batch, int* end_feature,
+                                int* end_z, int* end_y, int* end_x)
+{
+    const uint out_batch_num = INPUT0_BATCH_NUM;
+    const uint out_feature_num = INPUT0_FEATURE_NUM;
+    const uint out_z_num = INPUT0_SIZE_Z;
+    const uint out_y_num = INPUT0_SIZE_Y;
+    const uint out_x_num = INPUT0_SIZE_X;
+#ifdef OUTPUT_LAYOUT_BFYX
+    const uint batch_index = END_GET_INDEX(0, 0, 0, 0);
+    const uint feature_index = END_GET_INDEX(1, 0, 0, 0);
+    const uint y_index = END_GET_INDEX(2, 0, 0, 0);
+    const uint x_index = END_GET_INDEX(3, 0, 0, 0);
+#elif OUTPUT_LAYOUT_BFZYX
+    const uint batch_index = END_GET_INDEX(0, 0, 0, 0, 0);
+    const uint feature_index = END_GET_INDEX(1, 0, 0, 0, 0);
+    const uint z_index = END_GET_INDEX(2, 0, 0, 0, 0);
+    const uint y_index = END_GET_INDEX(3, 0, 0, 0, 0);
+    const uint x_index = END_GET_INDEX(4, 0, 0, 0, 0);
+#endif
+    END_TYPE batch = batch_index < END_DIMS ? end[batch_index] : 0;
+    END_TYPE feature = feature_index < END_DIMS ? end[feature_index] : 0;
+#ifdef OUTPUT_LAYOUT_BFZYX
+    END_TYPE z = z_index < END_DIMS ? end[z_index] : 0;
+#endif
+    END_TYPE y = y_index < END_DIMS ? end[y_index] : 0;
+    END_TYPE x = x_index < END_DIMS ? end[x_index] : 0;
+
+    batch = (END_BATCH == 0) ? batch : TO_END_TYPE(out_batch_num);
+    feature = (END_FEATURE == 0) ? feature : TO_END_TYPE(out_feature_num);
+#ifdef OUTPUT_LAYOUT_BFZYX
+    z = (END_Z == 0) ? z: TO_END_TYPE(out_z_num);
+#endif
+    y = (END_Y == 0) ? y : TO_END_TYPE(out_y_num);
+    x = (END_X == 0) ? x : TO_END_TYPE(out_x_num);
+
+    *end_batch = FUNC_CALL(check_end_bound)(batch, out_batch_num);
+    *end_feature = FUNC_CALL(check_end_bound)(feature, out_feature_num);
+#ifdef OUTPUT_LAYOUT_BFYX
+    *end_z = 0;
+#elif OUTPUT_LAYOUT_BFZYX
+    *end_z = FUNC_CALL(check_end_bound)(z, out_z_num);
+#endif
+    *end_y = FUNC_CALL(check_end_bound)(y, out_y_num);
+    *end_x = FUNC_CALL(check_end_bound)(x, out_x_num);
+}
+
+inline void FUNC(check_negative_stride)(const __global END_TYPE* end,
+                                        const int steps_batch, const int steps_feature,
+                                        const int steps_z, const int steps_y, const int steps_x,
+                                        int* begin_batch, int* begin_feature,
+                                        int* begin_z, int* begin_y, int* begin_x)
+{
+    bool is_negative = (steps_batch < 0) || (steps_feature < 0) || (steps_z < 0) || (steps_y < 0) || (steps_x < 0);
+    if (is_negative) {
+        int end_batch, end_feature, end_z, end_y, end_x;
+        FUNC_CALL(get_slice_end)(end, &end_batch, &end_feature, &end_z, &end_y, &end_x);
+        const int slice_end_batch = end_batch;
+        const int slice_end_feature = end_feature;
+        const int slice_end_z = end_z;
+        const int slice_end_y = end_y;
+        const int slice_end_x = end_x;
+
+        if ((steps_batch < 0) && (*begin_batch <= slice_end_batch))
+            *begin_batch = slice_end_batch - 1;
+        if ((steps_feature < 0) && (*begin_feature <= slice_end_feature))
+            *begin_feature = slice_end_feature - 1;
+        if ((steps_z < 0) && (*begin_z <= slice_end_z))
+            *begin_z = slice_end_z - 1;
+        if ((steps_y < 0) && (*begin_y <= slice_end_y))
+            *begin_y = slice_end_y - 1;
+        if ((steps_x < 0) && (*begin_x <= slice_end_x))
+            *begin_x = slice_end_x - 1;
+    }
+}
+#else // END_TYPE
+inline void FUNC(check_negative_stride)(const int steps_batch, const int steps_feature,
+                                        const int steps_z, const int steps_y, const int steps_x,
+                                        int* begin_batch, int* begin_feature,
+                                        int* begin_z, int* begin_y, int* begin_x)
+{
+    const int slice_end_batch = SLICE_END_BATCH;
+    const int slice_end_feature = SLICE_END_FEATURE;
+    const int slice_end_z = SLICE_END_Z;
+    const int slice_end_y = SLICE_END_Y;
+    const int slice_end_x = SLICE_END_X;
+
+    if ((steps_batch < 0) && (*begin_batch <= slice_end_batch))
+        *begin_batch = slice_end_batch - 1;
+    if ((steps_feature < 0) && (*begin_feature <= slice_end_feature))
+        *begin_feature = slice_end_feature - 1;
+    if ((steps_z < 0) && (*begin_z <= slice_end_z))
+        *begin_z = slice_end_z - 1;
+    if ((steps_y < 0) && (*begin_y <= slice_end_y))
+        *begin_y = slice_end_y - 1;
+    if ((steps_x < 0) && (*begin_x <= slice_end_x))
+        *begin_x = slice_end_x - 1;
+}
+#endif // END_TYPE
+
+#ifdef BEGIN_TYPE
+inline int FUNC(check_begin_bound)(BEGIN_TYPE begin_num,
+                                   const uint out_num)
+{
+    int num;
+    if (begin_num < 0) {
+        num = max(TO_BEGIN_TYPE(out_num) + begin_num, TO_BEGIN_TYPE(0));
+    } else {
+        num = begin_num;
+    }
+    num = min(num, (int)out_num);
+    return num;
+}
+
+inline void FUNC(get_slice_begin)(const __global BEGIN_TYPE* begin,
+                                  int* begin_batch, int* begin_feature,
+                                  int* begin_z, int* begin_y, int* begin_x)
+{
+    const uint out_batch_num = INPUT0_BATCH_NUM;
+    const uint out_feature_num = INPUT0_FEATURE_NUM;
+    const uint out_z_num = INPUT0_SIZE_Z;
+    const uint out_y_num = INPUT0_SIZE_Y;
+    const uint out_x_num = INPUT0_SIZE_X;
+#ifdef OUTPUT_LAYOUT_BFYX
+    const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0);
+    const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0);
+    const uint y_index = STRIDE_GET_INDEX(2, 0, 0, 0);
+    const uint x_index = STRIDE_GET_INDEX(3, 0, 0, 0);
+#elif OUTPUT_LAYOUT_BFZYX
+    const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0, 0);
+    const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0, 0);
+    const uint z_index = STRIDE_GET_INDEX(2, 0, 0, 0, 0);
+    const uint y_index = STRIDE_GET_INDEX(3, 0, 0, 0, 0);
+    const uint x_index = STRIDE_GET_INDEX(4, 0, 0, 0, 0);
+#endif
+    BEGIN_TYPE batch = batch_index < BEGIN_DIMS ? begin[batch_index] : 0;
+    BEGIN_TYPE feature = feature_index < BEGIN_DIMS ? begin[feature_index] : 0;
+#ifdef OUTPUT_LAYOUT_BFZYX
+    BEGIN_TYPE z = z_index < BEGIN_DIMS ? begin[z_index] : 0;
+#endif
+    BEGIN_TYPE y = y_index < BEGIN_DIMS ? begin[y_index] : 0;
+    BEGIN_TYPE x = x_index < BEGIN_DIMS ? begin[x_index] : 0;
+
+    batch = (BEGIN_BATCH == 0) ? batch : 0;
+    feature = (BEGIN_FEATURE == 0) ? feature : 0;
+#ifdef OUTPUT_LAYOUT_BFZYX
+    z = (BEGIN_Z == 0) ? z: 0;
+#endif
+    y = (BEGIN_Y == 0) ? y : 0;
+    x = (BEGIN_X == 0) ? x : 0;
+
+    *begin_batch = FUNC_CALL(check_begin_bound)(batch, out_batch_num);
+    *begin_feature = FUNC_CALL(check_begin_bound)(feature, out_feature_num);
+#ifdef OUTPUT_LAYOUT_BFYX
+    *begin_z = 0;
+#elif OUTPUT_LAYOUT_BFZYX
+    *begin_z = FUNC_CALL(check_begin_bound)(z, out_z_num);
+#endif
+    *begin_y = FUNC_CALL(check_begin_bound)(y, out_y_num);
+    *begin_x = FUNC_CALL(check_begin_bound)(x, out_x_num);
+}
+#endif // BEGIN_TYPE
+
+KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input,
+#ifdef BEGIN_TYPE
+                          const __global BEGIN_TYPE* begin,
+#endif
+#ifdef END_TYPE
+                          const __global END_TYPE* end,
+#endif
+#ifdef STRIDE_TYPE
+                          const __global STRIDE_TYPE* stride,
+#endif
+                          __global OUTPUT_TYPE* output)
 {
     const uint batch = get_global_id(0);
     const uint feature = get_global_id(1);
+#ifdef STRIDE_TYPE
+    int step_batch, step_feature, step_z, step_y, step_x;
+    FUNC_CALL(get_slice_step)(stride, &step_batch, &step_feature, &step_z, &step_y, &step_x);
+    const int slice_steps_batch = step_batch;
+    const int slice_steps_feature = step_feature;
+    const int slice_steps_z = step_z;
+    const int slice_steps_y = step_y;
+    const int slice_steps_x = step_x;
+#else // STRIDE_TYPE
+    const int slice_steps_batch = SLICE_STEPS_BATCH;
+    const int slice_steps_feature = SLICE_STEPS_FEATURE;
+    const int slice_steps_z = SLICE_STEPS_Z;
+    const int slice_steps_y = SLICE_STEPS_Y;
+    const int slice_steps_x = SLICE_STEPS_X;
+#endif // STRIDE_TYPE
+#ifdef BEGIN_TYPE
+    int begin_batch, begin_feature, begin_z, begin_y, begin_x;
+    FUNC_CALL(get_slice_begin)(begin, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x);
+#ifdef END_TYPE
+    FUNC_CALL(check_negative_stride)(end, slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x);
+#else // END_TYPE
+    FUNC_CALL(check_negative_stride)(slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x);
+#endif // END_TYPE
+    const int slice_begin_batch = begin_batch;
+    const int slice_begin_feature = begin_feature;
+    const int slice_begin_z = begin_z;
+    const int slice_begin_y = begin_y;
+    const int slice_begin_x = begin_x;
+#else // BEGIN_TYPE
+    const int slice_begin_batch = SLICE_BEGIN_BATCH;
+    const int slice_begin_feature = SLICE_BEGIN_FEATURE;
+    const int slice_begin_z = SLICE_BEGIN_Z;
+    const int slice_begin_y = SLICE_BEGIN_Y;
+    const int slice_begin_x = SLICE_BEGIN_X;
+#endif // BEGIN_TYPE
 
 #if NEW_AXIS_MODE
     // If NEW_AXIS_MODE that just copy input to output
@@ -28,7 +281,7 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP
         y_input * INPUT0_Y_PITCH +
         x_input * INPUT0_X_PITCH;
     output[input_index] = input[input_index];
-#else
+#else // NEW_AXIS_MODE
 #ifdef OUTPUT_LAYOUT_BFYX
     const uint z = 0;
     const uint y = get_global_id(2) / OUTPUT_SIZE_X;
@@ -43,23 +296,23 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP
 #if SHRINK_MODE
     const uint in_indices[] = {INPUT_INDICES_ORDER};
     const uint input_index = INPUT0_OFFSET +
-        (SLICE_BEGIN_BATCH + in_indices[0] * SLICE_STEPS_BATCH) * INPUT0_BATCH_PITCH +
-        (SLICE_BEGIN_FEATURE + in_indices[1] * SLICE_STEPS_FEATURE) * INPUT0_FEATURE_PITCH +
+        (slice_begin_batch + in_indices[0] * slice_steps_batch) * INPUT0_BATCH_PITCH +
+        (slice_begin_feature + in_indices[1] * slice_steps_feature) * INPUT0_FEATURE_PITCH +
     #if INPUT0_LAYOUT_BFZYX
-        (SLICE_BEGIN_Z + in_indices[2] * SLICE_STEPS_Z) * INPUT0_Z_PITCH +
-        (SLICE_BEGIN_Y + in_indices[3] * SLICE_STEPS_Y) * INPUT0_Y_PITCH +
-        (SLICE_BEGIN_X + in_indices[4] * SLICE_STEPS_X) * INPUT0_X_PITCH;
+        (slice_begin_z + in_indices[2] * slice_steps_z) * INPUT0_Z_PITCH +
+        (slice_begin_y + in_indices[3] * slice_steps_y) * INPUT0_Y_PITCH +
+        (slice_begin_x + in_indices[4] * slice_steps_x) * INPUT0_X_PITCH;
     #else
-        (SLICE_BEGIN_Y + in_indices[2] * SLICE_STEPS_Y) * INPUT0_Y_PITCH +
-        (SLICE_BEGIN_X + in_indices[3] * SLICE_STEPS_X) * INPUT0_X_PITCH;
+        (slice_begin_y + in_indices[2] * slice_steps_y) * INPUT0_Y_PITCH +
+        (slice_begin_x + in_indices[3] * slice_steps_x) * INPUT0_X_PITCH;
     #endif
 #else // SHRINK_MODE
     const uint input_index = INPUT0_OFFSET +
-            (SLICE_BEGIN_BATCH + batch * SLICE_STEPS_BATCH) * INPUT0_BATCH_PITCH +
-            (SLICE_BEGIN_FEATURE + feature * SLICE_STEPS_FEATURE) * INPUT0_FEATURE_PITCH +
-            (SLICE_BEGIN_Z + z * SLICE_STEPS_Z) * INPUT0_Z_PITCH +
-            (SLICE_BEGIN_Y + y * SLICE_STEPS_Y) * INPUT0_Y_PITCH +
-            (SLICE_BEGIN_X + x * SLICE_STEPS_X) * INPUT0_X_PITCH;
+            (slice_begin_batch + batch * slice_steps_batch) * INPUT0_BATCH_PITCH +
+            (slice_begin_feature + feature * slice_steps_feature) * INPUT0_FEATURE_PITCH +
+            (slice_begin_z + z * slice_steps_z) * INPUT0_Z_PITCH +
+            (slice_begin_y + y * slice_steps_y) * INPUT0_Y_PITCH +
+            (slice_begin_x + x * slice_steps_x) * INPUT0_X_PITCH;
 #endif // SHRINK_MODE
 
     const uint output_index = OUTPUT_OFFSET +
@@ -70,5 +323,5 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP
             x * OUTPUT_X_PITCH;
 
     output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
-#endif
+#endif // NEW_AXIS_MODE
 }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
index 41d09bf9a50..8dbd3d6364f 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp
@@ -56,6 +56,7 @@ ParamsKey StridedSliceKernelRef::GetSupportedKey() const {
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
+    k.EnableDifferentTypes();
     return k;
 }
 
@@ -106,13 +107,46 @@ CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params&
     return dispatchData;
 }
 
+inline std::string GetInputTypeStr(uint32_t idx) {
+    return "INPUT" + std::to_string(idx) + "_TYPE";
+}
+
+inline std::string GetToInputTypeStr(uint32_t idx) {
+    return "TO_" + GetInputTypeStr(idx);
+}
+
+inline std::string GetInputIndexStr(uint32_t idx) {
+    return "INPUT" + std::to_string(idx) + "_GET_INDEX";
+}
+
 JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params& params) const {
     JitConstants jit = MakeBaseParamsJitConstants(params);
 
-    makeJitConstForParam(jit, "SLICE_BEGIN", params.striding_params[0]);
-    makeJitConstForParam(jit, "SLICE_END", params.striding_params[1]);
-    makeJitConstForParam(jit, "SLICE_STEPS", params.striding_params[2]);
-
+    if (params.begin_type == StridedSliceArgType::Input) {
+        jit.AddConstant(MakeJitConstant("BEGIN_TYPE", GetInputTypeStr(params.GetIndexBegin())));
+        jit.AddConstant(MakeJitConstant("TO_BEGIN_TYPE", GetToInputTypeStr(params.GetIndexBegin())));
+        jit.AddConstant(MakeJitConstant("BEGIN_GET_INDEX", GetInputIndexStr(params.GetIndexBegin())));
+        jit.AddConstant(MakeJitConstant("BEGIN_DIMS", params.begin_dims));
+        makeJitConstForParam(jit, "BEGIN", params.begin_mask);
+    } else {
+        makeJitConstForParam(jit, "SLICE_BEGIN", params.striding_params[0]);
+    }
+    if (params.end_type == StridedSliceArgType::Input) {
+        jit.AddConstant(MakeJitConstant("END_TYPE", GetInputTypeStr(params.GetIndexEnd())));
+        jit.AddConstant(MakeJitConstant("TO_END_TYPE", GetToInputTypeStr(params.GetIndexEnd())));
+        jit.AddConstant(MakeJitConstant("END_GET_INDEX", GetInputIndexStr(params.GetIndexEnd())));
+        jit.AddConstant(MakeJitConstant("END_DIMS", params.end_dims));
+        makeJitConstForParam(jit, "END", params.end_mask);
+    } else {
+        makeJitConstForParam(jit, "SLICE_END", params.striding_params[1]);
+    }
+    if (params.stride_type == StridedSliceArgType::Input) {
+        jit.AddConstant(MakeJitConstant("STRIDE_TYPE", GetInputTypeStr(params.GetIndexStride())));
+        jit.AddConstant(MakeJitConstant("STRIDE_GET_INDEX", GetInputIndexStr(params.GetIndexStride())));
+        jit.AddConstant(MakeJitConstant("STRIDE_DIMS", params.stride_dims));
+    } else {
+        makeJitConstForParam(jit, "SLICE_STEPS", params.striding_params[2]);
+    }
     jit.AddConstant(MakeJitConstant(
         "NEW_AXIS_MODE",
         std::find(params.new_axis_mask.begin(), params.new_axis_mask.end(), 1) != params.new_axis_mask.end()));
@@ -170,7 +204,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
 
     auto& kernel = kd.kernels[0];
 
-    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point);
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, static_cast<int>(newParams.inputs.size()));
 
     return {kd};
 }
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h
index 76f0610650e..0458061d021 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h
@@ -8,6 +8,12 @@
 #include <vector>
 
 namespace kernel_selector {
+
+enum class StridedSliceArgType {
+    Input,
+    Constant
+};
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // strided_slice_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -20,6 +26,30 @@ struct strided_slice_params : public base_params {
     std::vector<uint8_t> ellipsis_mask;
     std::vector<uint8_t> new_axis_mask;
     std::vector<uint8_t> shrink_axis_mask;
+    StridedSliceArgType begin_type;
+    StridedSliceArgType end_type;
+    StridedSliceArgType stride_type;
+    size_t begin_dims;
+    size_t end_dims;
+    size_t stride_dims;
+
+    uint32_t GetIndexBegin() const {
+        uint32_t input_idx = 0;
+        if (begin_type == StridedSliceArgType::Input) input_idx++;
+        return input_idx;
+    }
+
+    uint32_t GetIndexEnd() const {
+        uint32_t input_idx = GetIndexBegin();
+        if (end_type == StridedSliceArgType::Input) input_idx++;
+        return input_idx;
+    }
+
+    uint32_t GetIndexStride() const {
+        uint32_t input_idx = GetIndexEnd();
+        if (stride_type == StridedSliceArgType::Input) input_idx++;
+        return input_idx;
+    }
 };
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp b/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp
index d0e3b0bc12d..0e626f8cce2 100644
--- a/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp
@@ -21,15 +21,17 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
     std::string layerName = layer_type_name_ID(op);
 
     auto output_pshape = op->get_output_partial_shape(0);
+    auto input_pshape = op->get_input_partial_shape(0);
+
+    auto begin_constant = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(1).get_node_shared_ptr());
+    std::vector<int64_t> begin = begin_constant ? begin_constant->cast_vector<int64_t>() : std::vector<int64_t>{};
+    auto end_constant = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(2).get_node_shared_ptr());
+    std::vector<int64_t> end = end_constant ? end_constant->cast_vector<int64_t>() : std::vector<int64_t>{};
+    auto stride_constant = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(3).get_node_shared_ptr());
+    std::vector<int64_t> strides = stride_constant ? stride_constant->cast_vector<int64_t>() : std::vector<int64_t>{};
+
     do {
-        auto data_output = op->input_value(0);
-        auto begin_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(1).get_node_shared_ptr());
-        auto end_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(2).get_node_shared_ptr());
-        auto stride_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(3).get_node_shared_ptr());
-
-        auto partial_input_shape = op->get_input_partial_shape(0);
-
-        if (!begin_node || !end_node || !stride_node || partial_input_shape.is_dynamic()) {
+        if (!begin_constant || !end_constant || !stride_constant || input_pshape.is_dynamic()) {
             break;
         }
 
@@ -41,10 +43,6 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
         auto input_shape = input_pshape.to_shape();
         auto output_shape = output_pshape.to_shape();
 
-        auto begin = begin_node->cast_vector<int64_t>();
-        auto end = end_node->cast_vector<int64_t>();
-        auto strides = stride_node->cast_vector<int64_t>();
-
         bool ones_stride = true;
         for (auto & s : strides) {
             if (s != 1)
@@ -201,6 +199,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
             inPrimitive = cldnn::input_info(reshapeInName);
         }
 
+        auto data_output = op->input_value(0);
         auto data_node_shape = data_output.get_shape();
 
         std::vector<cldnn::tensor::value_type> offset_tensor{ 0, 0, 0, 0 };
@@ -245,18 +244,32 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
     // To be removed once we enable internal shape infer for all operations
     auto output_shape = output_pshape.is_static() ? output_pshape.to_shape() : ov::Shape{};
 
-    auto stridedSlicePrim = cldnn::strided_slice(layerName,
-                                                 inputs[0],
-                                                 inputs[1],
-                                                 inputs[2],
-                                                 inputs[3],
-                                                 op->get_begin_mask(),
-                                                 op->get_end_mask(),
-                                                 op->get_new_axis_mask(),
-                                                 op->get_shrink_axis_mask(),
-                                                 op->get_ellipsis_mask(),
-                                                 output_shape);
-
+    std::shared_ptr<cldnn::strided_slice> stridedSlicePrim = nullptr;
+    if (begin_constant && end_constant && stride_constant) {
+        stridedSlicePrim = std::make_shared<cldnn::strided_slice>(layerName,
+                                                                  inputs[0],
+                                                                  begin,
+                                                                  end,
+                                                                  strides,
+                                                                  op->get_begin_mask(),
+                                                                  op->get_end_mask(),
+                                                                  op->get_new_axis_mask(),
+                                                                  op->get_shrink_axis_mask(),
+                                                                  op->get_ellipsis_mask(),
+                                                                  output_shape);
+    } else {
+        stridedSlicePrim = std::make_shared<cldnn::strided_slice>(layerName,
+                                                                  inputs[0],
+                                                                  inputs[1],
+                                                                  inputs[2],
+                                                                  inputs[3],
+                                                                  op->get_begin_mask(),
+                                                                  op->get_end_mask(),
+                                                                  op->get_new_axis_mask(),
+                                                                  op->get_shrink_axis_mask(),
+                                                                  op->get_ellipsis_mask(),
+                                                                  output_shape);
+    }
     p.add_primitive(*op, stridedSlicePrim);
 }
 
diff --git a/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp
index 6a2994781d4..a01a39e77bf 100644
--- a/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp
+++ b/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp
@@ -43,6 +43,57 @@ TEST_P(strided_slice_test, shape_infer) {
 
     auto& engine = get_test_engine();
 
+    auto input_prim = std::make_shared<input_layout>("input", p.in_layout);
+    auto strided_slice_prim = std::make_shared<strided_slice>("output",
+                                                              input_info("input"),
+                                                              p.begin_data,
+                                                              p.end_data,
+                                                              p.strides_data,
+                                                              p.begin_mask,
+                                                              p.end_mask,
+                                                              p.new_axis_mask,
+                                                              p.shrink_axis_mask,
+                                                              p.ellipsis_mask,
+                                                              ov::Shape{});
+
+    cldnn::program prog(engine);
+
+    auto begin_mem = engine.allocate_memory(p.begin_layout);
+    auto end_mem = engine.allocate_memory(p.end_layout);
+    auto strides_mem = engine.allocate_memory(p.strides_layout);
+    set_values(begin_mem, p.begin_data);
+    set_values(end_mem, p.end_data);
+    set_values(strides_mem, p.strides_data);
+
+    auto& input_node = prog.get_or_create(input_prim);
+    auto& strided_slice_node = prog.get_or_create(strided_slice_prim);
+    program_wrapper::add_connection(prog, input_node, strided_slice_node);
+    auto params = strided_slice_node.get_kernel_impl_params();
+    auto res = strided_slice_inst::calc_output_layouts<ov::PartialShape>(strided_slice_node, *params);
+
+    ASSERT_EQ(res.size(), 1);
+    ASSERT_EQ(res[0], p.expected_layout);
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test,
+    testing::ValuesIn(std::vector<strided_slice_test_params>{
+        {
+            layout{ov::PartialShape{1, 128, 1024}, data_types::i64, format::bfyx},
+            layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {0, 0, 0},
+            layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {0, 1, 0},
+            layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {1, 1, 1},
+            {1, 0, 1}, {1, 0, 1}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0},
+            layout{ov::PartialShape{1, 1, 1024}, data_types::i64, format::bfyx}
+        },
+    }));
+
+class strided_slice_test_four_inputs : public testing::TestWithParam<strided_slice_test_params> { };
+
+TEST_P(strided_slice_test_four_inputs, shape_infer) {
+    auto p = GetParam();
+
+    auto& engine = get_test_engine();
+
     auto input_prim = std::make_shared<input_layout>("input", p.in_layout);
     auto begin_prim = std::make_shared<input_layout>("begin", p.begin_layout);
     auto end_prim = std::make_shared<input_layout>("end", p.end_layout);
@@ -85,7 +136,7 @@ TEST_P(strided_slice_test, shape_infer) {
     ASSERT_EQ(res[0], p.expected_layout);
 }
 
-INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test,
+INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test_four_inputs,
     testing::ValuesIn(std::vector<strided_slice_test_params>{
         {
             layout{ov::PartialShape{1, 128, 1024}, data_types::i64, format::bfyx},
diff --git a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
index 453fd101e01..8523cac4b1e 100644
--- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp
@@ -13,7 +13,7 @@
 using namespace cldnn;
 using namespace ::tests;
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) {
+TEST(strided_slice_gpu, test_2x2x2x2_full) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 0x0x0x0
     // End (BFYX): 2x2x2x2
@@ -21,31 +21,19 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) {
     // Output (BFYX): 2x2x2x2
 
     auto& engine = get_test_engine();
-    auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx,  });
-    auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx,  });
-    auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx });
-    auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx });
+    auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
             9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
     });
-    set_values(begin, {
-            0, 0, 0, 0
-    });
-    set_values(end, {
-            2, 2, 2, 2
-    });
-    set_values(strides, {
-            1, 1, 1, 1
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 1, 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2}));
 
     network network(engine, topology);
 
@@ -70,7 +58,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) {
+TEST(strided_slice_gpu_constants, test_2x2x2x2_full) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 0x0x0x0
     // End (BFYX): 2x2x2x2
@@ -127,7 +115,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) {
+TEST(strided_slice_gpu, test_2x2x2x2_ignore) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 1x1x1x1
     // End (BFYX): 2x2x2x2
@@ -136,30 +124,18 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) {
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
             9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
     });
-    set_values(begin, {
-            1, 1, 1, 1
-    });
-    set_values(end, {
-            2, 2, 2, 2
-    });
-    set_values(strides, {
-            1, 1, 1, 1
-    });
+    std::vector<int64_t> begin_data = { 1, 1, 1, 1 };
+    std::vector<int64_t> end_data = { 2, 2, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 1, 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2}));
 
     network network(engine, topology);
 
@@ -186,7 +162,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) {
+TEST(strided_slice_gpu_constants, test_2x2x2x2_ignore) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 1x1x1x1
     // End (BFYX): 2x2x2x2
@@ -245,7 +221,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) {
+TEST(strided_slice_gpu, test_2x2x2x2_single) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 1x1x1x1
     // End (BFYX): 2x2x2x2
@@ -254,30 +230,18 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) {
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
             9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
-               });
-    set_values(begin, {
-            1, 1, 1, 1
-               });
-    set_values(end, {
-            2, 2, 2, 2
-               });
-    set_values(strides, {
-            1, 1, 1, 1
-               });
+    });
+    std::vector<int64_t> begin_data = { 1, 1, 1, 1 };
+    std::vector<int64_t> end_data = { 2, 2, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 1, 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 1, 1, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 1, 1, 1}));
 
     network network(engine, topology);
 
@@ -301,7 +265,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) {
+TEST(strided_slice_gpu_constants, test_2x2x2x2_single) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 1x1x1x1
     // End (BFYX): 2x2x2x2
@@ -357,7 +321,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) {
+TEST(strided_slice_gpu, test_2x2x4x3_stride) {
     // Input (BFYX): 2x2x4x3
     // Begin (BFYX): 0x0x0x0
     // End (BFYX): 2x2x4x3
@@ -366,9 +330,6 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) {
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 4 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
 
     set_values(input, {
             0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
@@ -378,22 +339,13 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) {
             36.f, 37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, 44.f,
             45.f, 46.f, 47.f
     });
-    set_values(begin, {
-            0, 0, 0, 0
-    });
-    set_values(end, {
-            2, 2, 4, 3
-    });
-    set_values(strides, {
-            1, 1, 2, 1
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 4, 3 };
+    std::vector<int64_t> strides_data = { 1, 1, 2, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3}));
 
     network network(engine, topology);
 
@@ -420,7 +372,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) {
+TEST(strided_slice_gpu_constants, test_2x2x4x3_stride) {
     // Input (BFYX): 2x2x4x3
     // Begin (BFYX): 0x0x0x0
     // End (BFYX): 2x2x4x3
@@ -483,7 +435,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) {
+TEST(strided_slice_gpu, test_2x2x4x4_part_stride) {
     // Input (BFYX): 2x2x4x4
     // Begin (BFYX): 1x0x0x1
     // End (BFYX): 2x2x4x4
@@ -492,9 +444,6 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) {
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 4, 4 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f,
@@ -517,22 +466,13 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) {
             56.0f, 57.0f, 58.0f, 59.0f,
             60.0f, 61.0f, 62.0f, 63.0f
     });
-    set_values(begin, {
-            1, 0, 0, 1
-    });
-    set_values(end, {
-            2, 2, 4, 4
-    });
-    set_values(strides, {
-            1, 1, 1, 2
-    });
+    std::vector<int64_t> begin_data = { 1, 0, 0, 1 };
+    std::vector<int64_t> end_data = { 2, 2, 4, 4 };
+    std::vector<int64_t> strides_data = { 1, 1, 1, 2 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2}));
 
     network network(engine, topology);
 
@@ -566,7 +506,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) {
+TEST(strided_slice_gpu_constants, test_2x2x4x4_part_stride) {
     // Input (BFYX): 2x2x4x4
     // Begin (BFYX): 1x0x0x1
     // End (BFYX): 2x2x4x4
@@ -612,14 +552,17 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) {
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
+    topology.add(input_layout("input2", begin->get_layout()));
+    topology.add(input_layout("input3", end->get_layout()));
+    topology.add(input_layout("input4", strides->get_layout()));
     topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2}));
 
     network network(engine, topology);
 
     network.set_input_data("input", input);
+    network.set_input_data("input2", begin);
+    network.set_input_data("input3", end);
+    network.set_input_data("input4", strides);
 
     auto outputs = network.execute();
 
@@ -649,37 +592,25 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) {
+TEST(strided_slice_gpu, test_2x2x4x1_new_axis_mask) {
     // Input (BFYX): 2x2x4x1
     // New_axis_mask: 1
     // Output (BFYX): 1x2x2x4
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 4 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
             10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
     });
-    set_values(begin, {
-            1, 0, 1, 0
-    });
-    set_values(end, {
-            2, 2, 4, 4
-    });
-    set_values(strides, {
-            1, 1, 1, 2
-    });
+    std::vector<int64_t> begin_data = { 1, 0, 1, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 4, 4 };
+    std::vector<int64_t> strides_data = { 1, 1, 1, 2 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1 }, {}, {}, {2, 2, 4, 1}));
 
     network network(engine, topology);
 
@@ -705,7 +636,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) {
+TEST(strided_slice_gpu_constants, test_2x2x4x1_new_axis_mask) {
     // Input (BFYX): 2x2x4x1
     // New_axis_mask: 1
     // Output (BFYX): 1x2x2x4
@@ -761,36 +692,83 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) {
+TEST(strided_slice_gpu_four_inputs, test_2x2x4x1_new_axis_mask) {
+    // Input (BFYX): 2x2x4x1
+    // New_axis_mask: 1
+    // Output (BFYX): 1x2x2x4
+
+    auto& engine = get_test_engine();
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 4 } });
+    auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx });
+    auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx });
+    auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    });
+    set_values<int64_t>(begin, {
+            1, 0, 1, 0
+    });
+    set_values<int64_t>(end, {
+            2, 2, 4, 4
+    });
+    set_values<int64_t>(strides, {
+            1, 1, 1, 2
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(input_layout("input2", begin->get_layout()));
+    topology.add(input_layout("input3", end->get_layout()));
+    topology.add(input_layout("input4", strides->get_layout()));
+    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", begin);
+    network.set_input_data("input3", end);
+    network.set_input_data("input4", strides);
+
+    auto outputs = network.execute();
+
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    };
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu, test_2x2x1x1_new_axis_mask_2) {
     // Input (BFYX): 2x2x1x1
     // New_axis_mask: 101
     // Output (BFYX): 1x2x1x2
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f
     });
-    set_values(begin, {
-            1, 0, 1, 0
-    });
-    set_values(end, {
-            2, 2, 4, 4
-    });
-    set_values(strides, {
-            1, 1, 1, 2
-    });
+    std::vector<int64_t> begin_data = { 1, 0, 1, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 4, 4 };
+    std::vector<int64_t> strides_data = { 1, 1, 1, 2 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1}));
 
     network network(engine, topology);
 
@@ -815,7 +793,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) {
+TEST(strided_slice_gpu_constants, test_2x2x1x1_new_axis_mask_2) {
     // Input (BFYX): 2x2x1x1
     // New_axis_mask: 101
     // Output (BFYX): 1x2x1x2
@@ -869,35 +847,80 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) {
+TEST(strided_slice_gpu_four_inputs, test_2x2x1x1_new_axis_mask_2) {
+    // Input (BFYX): 2x2x1x1
+    // New_axis_mask: 101
+    // Output (BFYX): 1x2x1x2
+
+    auto& engine = get_test_engine();
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
+    auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx });
+    auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx });
+    auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f
+    });
+    set_values<int64_t>(begin, {
+            1, 0, 1, 0
+    });
+    set_values<int64_t>(end, {
+            2, 2, 4, 4
+    });
+    set_values<int64_t>(strides, {
+            1, 1, 1, 2
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(input_layout("input2", begin->get_layout()));
+    topology.add(input_layout("input3", end->get_layout()));
+    topology.add(input_layout("input4", strides->get_layout()));
+    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", begin);
+    network.set_input_data("input3", end);
+    network.set_input_data("input4", strides);
+
+    auto outputs = network.execute();
+
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f
+    };
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu, test_2x2x1x1) {
     // Input (BFYX): 2x2x1x1
     // Output (BFYX): 2x2x1x1
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
-    auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f
     });
-    set_values(begin, {
-            0, 0
-    });
-    set_values(end, {
-            2, 2
-    });
-    set_values(strides, {
-            1, 1
-    });
+    std::vector<int64_t> begin_data = { 0, 0 };
+    std::vector<int64_t> end_data = { 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1}));
 
     network network(engine, topology);
 
@@ -922,7 +945,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) {
+TEST(strided_slice_gpu_constants, test_2x2x1x1) {
     // Input (BFYX): 2x2x1x1
     // Output (BFYX): 2x2x1x1
 
@@ -935,6 +958,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) {
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f
     });
+
     set_values<int64_t>(begin, {
             0, 0
     });
@@ -975,35 +999,23 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) {
+TEST(strided_slice_gpu, test_2x2x2x1x1) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 1x2x2x1x1
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } });
-    auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
-    auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
-    auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
     });
-    set_values(begin, {
-            0, 0, 0
-    });
-    set_values(end, {
-            1, 2, 2
-    });
-    set_values(strides, {
-            1, 1, 1
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0 };
+    std::vector<int64_t> end_data = { 1, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 2, 2, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1}));
 
     network network(engine, topology);
 
@@ -1028,7 +1040,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) {
+TEST(strided_slice_gpu_constants, test_2x2x2x1x1) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 1x2x2x1x1
 
@@ -1081,36 +1093,23 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) {
     }
 }
 
-
-TEST(strided_slice_gpu_i8_i64, test_2x2x2x1x1) {
+TEST(strided_slice_gpu_i8, test_2x2x2x1x1) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 1x2x2x1x1
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::i8, format::bfzyx, { 2, 2, 1, 1, 2 } });
-    auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } });
-    auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } });
-    auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } });
 
     set_values<int8_t>(input, {
             0, 1, 2, 3, 4, 5, 6, 7
     });
-    set_values<int64_t>(begin, {
-            0, 0, 0
-    });
-    set_values<int64_t>(end, {
-            1, 2, 2
-    });
-    set_values<int64_t>(strides, {
-            1, 1, 1
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0 };
+    std::vector<int64_t> end_data = { 1, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 2, 2, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1}));
 
     network network(engine, topology);
 
@@ -1134,35 +1133,23 @@ TEST(strided_slice_gpu_i8_i64, test_2x2x2x1x1) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) {
+TEST(strided_slice_gpu, test_2x2x2x1x1_2) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 2x1x1x1x1
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } });
-    auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
-    auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
-    auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
     });
-    set_values(begin, {
-            0, 0, 0
-    });
-    set_values(end, {
-            2, 2, 2
-    });
-    set_values(strides, {
-            1, 2, 2
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 2, 2 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1}));
 
     network network(engine, topology);
 
@@ -1187,7 +1174,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) {
+TEST(strided_slice_gpu_constants, test_2x2x2x1x1_2) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 2x1x1x1x1
 
@@ -1293,39 +1280,27 @@ TEST(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) {
+TEST(strided_slice_gpu, test_2x2x2x2_full_negative_stride) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 0x0x0x0
     // End (BFYX): 2x2x2x2
-    // Stride (BFYX): -1x1x1x1
+    // Stride (BFYX): -1x-1x1x1
     // Output (BFYX): 2x2x2x2
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx });
-    auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx });
-    auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx });
-    auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
             9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
     });
-    set_values(begin, {
-            0, 0, 0, 0
-    });
-    set_values(end, {
-            2, 2, 2, 2
-    });
-    set_values(strides, {
-            -1, -1, 1, 1
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 2, 2 };
+    std::vector<int64_t> strides_data = { -1, -1, 1, 1 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2}));
 
     network network(engine, topology);
 
@@ -1350,11 +1325,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) {
+TEST(strided_slice_gpu_constants, test_2x2x2x2_full_negative_stride) {
     // Input (BFYX): 2x2x2x2
     // Begin (BFYX): 0x0x0x0
     // End (BFYX): 2x2x2x2
-    // Stride (BFYX): -1x1x1x1
+    // Stride (BFYX): -1x-1x1x1
     // Output (BFYX): 2x2x2x2
 
     auto& engine = get_test_engine();
@@ -1407,35 +1382,23 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) {
+TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 2x1x1x1x1
 
     auto& engine = get_test_engine();
     auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } });
-    auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
-    auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
-    auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx });
 
     set_values(input, {
             0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
     });
-    set_values(begin, {
-            0, 0, 0
-    });
-    set_values(end, {
-            2, 2, 2
-    });
-    set_values(strides, {
-            1, 2, 2
-    });
+    std::vector<int64_t> begin_data = { 0, 0, 0 };
+    std::vector<int64_t> end_data = { 2, 2, 2 };
+    std::vector<int64_t> strides_data = { 1, 2, 2 };
 
     topology topology;
     topology.add(input_layout("input", input->get_layout()));
-    topology.add(data("input2", begin));
-    topology.add(data("input3", end));
-    topology.add(data("input4", strides));
-    topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1}));
+    topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1}));
 
     network network(engine, topology);
 
@@ -1460,7 +1423,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) {
+TEST(strided_slice_gpu_constants, test_2x2x2x1x1_2_negative_all) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 2x1x1x1x1
 
@@ -1513,15 +1476,13 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) {
     }
 }
 
-
-
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic) {
+TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all_dynamic) {
     // Input (BFZYX): 2x2x2x1x1
     // Output (BFZYX): 2x1x1x1x1
 
     auto& engine = get_test_engine();
     auto input_lay = layout{ ov::PartialShape::dynamic(3), data_types::f32, format::bfyx };
-    auto input = engine.allocate_memory({  ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, });
+    auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx });
     auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
     auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
     auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
@@ -1563,9 +1524,9 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic) {
     }
 }
 
-TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic_begin) {
+TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all_dynamic_begin) {
     auto& engine = get_test_engine();
-    auto input = engine.allocate_memory({  ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, });
+    auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx });
     auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
     auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
     auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp
new file mode 100644
index 00000000000..5c20fcc35a5
--- /dev/null
+++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp
@@ -0,0 +1,251 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/strided_slice.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "common_test_utils/ov_tensor_utils.hpp"
+
+using namespace InferenceEngine;
+using namespace ov::test;
+
+namespace GPULayerTestsDefinitions {
+
+struct StridedSliceParams {
+    std::vector<int64_t> begin;
+    std::vector<int64_t> end;
+    std::vector<int64_t> stride;
+    std::vector<int64_t> beginMask;
+    std::vector<int64_t> endMask;
+    std::vector<int64_t> newAxisMask;
+    std::vector<int64_t> shrinkAxisMask;
+    std::vector<int64_t> ellipsisAxisMask;
+};
+
+typedef std::tuple<
+        InputShape,                        // Input shapes
+        StridedSliceParams,
+        ElementType,                       // Element type
+        ngraph::helpers::InputLayerType,   // begin/end/stride input type
+        std::map<std::string, std::string> // Additional network configuration
+> StridedSliceLayerParamSet;
+
+class StridedSliceLayerGPUTest : public testing::WithParamInterface<StridedSliceLayerParamSet>,
+                                 virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<StridedSliceLayerParamSet>& obj) {
+        InputShape shapes;
+        StridedSliceParams params;
+        ElementType elementType;
+        ngraph::helpers::InputLayerType restInputType;
+        TargetDevice targetDevice;
+        std::map<std::string, std::string> additionalConfig;
+        std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param;
+
+        std::ostringstream results;
+        results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
+        results << "TS=";
+        for (const auto& item : shapes.second) {
+            results << CommonTestUtils::vec2str(item) << "_";
+        }
+        results << "netPRC=" << elementType << "_";
+        results << "begin=" << CommonTestUtils::vec2str(params.begin) << "_";
+        results << "end=" << CommonTestUtils::vec2str(params.end) << "_";
+        results << "stride=" << CommonTestUtils::vec2str(params.stride) << "_";
+        results << "begin_m=" << CommonTestUtils::vec2str(params.beginMask) << "_";
+        results << "end_m=" << CommonTestUtils::vec2str(params.endMask) << "_";
+        results << "new_axis_m=" << (params.newAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.newAxisMask)) << "_";
+        results << "shrink_m=" << (params.shrinkAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.shrinkAxisMask)) << "_";
+        results << "ellipsis_m=" << (params.ellipsisAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.ellipsisAxisMask)) << "_";
+        results << "restInputType=" << restInputType << "_";
+        results << "config=(";
+        for (const auto configEntry : additionalConfig) {
+            results << configEntry.first << ", " << configEntry.second << ":";
+        }
+        results << ")";
+
+        return results.str();
+    }
+
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::Tensor tensor;
+            if (i == 1) {
+                tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                auto *dataPtr = tensor.data<float>();
+                for (size_t i = 0; i < begin.size(); i++) {
+                    dataPtr[i] = static_cast<float>(begin[i]);
+                }
+            } else if (i == 2) {
+                tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                auto *dataPtr = tensor.data<float>();
+                for (size_t i = 0; i < end.size(); i++) {
+                    dataPtr[i] = static_cast<float>(end[i]);
+                }
+            } else if (i == 3) {
+                tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+                auto *dataPtr = tensor.data<float>();
+                for (size_t i = 0; i < stride.size(); i++) {
+                    dataPtr[i] = static_cast<float>(stride[i]);
+                }
+            } else {
+                tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
+            }
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+        inferRequestNum++;
+    }
+
+protected:
+    std::vector<int64_t> begin;
+    std::vector<int64_t> end;
+    std::vector<int64_t> stride;
+    size_t inferRequestNum = 0;
+
+    void SetUp() override {
+        InputShape shapes;
+        StridedSliceParams ssParams;
+        ngraph::helpers::InputLayerType restInputType;
+        std::map<std::string, std::string> additionalConfig;
+        std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam();
+
+        begin = ssParams.begin;
+        end = ssParams.end;
+        stride = ssParams.stride;
+
+        targetDevice = CommonTestUtils::DEVICE_GPU;
+
+        std::vector<InputShape> inputShapes;
+        inputShapes.push_back(shapes);
+        if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) {
+            inputShapes.push_back(InputShape({static_cast<int64_t>(begin.size())}, std::vector<ov::Shape>(shapes.second.size(), {begin.size()})));
+            inputShapes.push_back(InputShape({static_cast<int64_t>(end.size())}, std::vector<ov::Shape>(shapes.second.size(), {end.size()})));
+            inputShapes.push_back(InputShape({static_cast<int64_t>(stride.size())}, std::vector<ov::Shape>(shapes.second.size(), {stride.size()})));
+        }
+
+        init_input_shapes(inputShapes);
+
+        auto params = ngraph::builder::makeDynamicParams(inType, {inputDynamicShapes.front()});
+        // auto paramNode = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(shape));
+        std::shared_ptr<ov::Node> beginInput, endInput, strideInput;
+        if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) {
+            auto beginNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{begin.size()});
+            auto endNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{end.size()});
+            auto strideNode = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::i64, ov::Shape{stride.size()});
+            params.push_back(beginNode);
+            params.push_back(endNode);
+            params.push_back(strideNode);
+            beginInput = beginNode;
+            endInput = endNode;
+            strideInput = strideNode;
+        } else {
+            beginInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin);
+            endInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end);
+            strideInput = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ov::Shape{stride.size()}, stride);
+        }
+        auto ss = std::make_shared<ngraph::op::v1::StridedSlice>(params[0], beginInput, endInput, strideInput, ssParams.beginMask, ssParams.endMask,
+                                                                 ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask);
+
+        ngraph::ResultVector results;
+        for (size_t i = 0; i < ss->get_output_size(); i++) {
+            results.push_back(std::make_shared<ngraph::opset1::Result>(ss->output(i)));
+        }
+
+        function = std::make_shared<ngraph::Function>(results, params, "StridedSlice");
+    }
+};
+
+TEST_P(StridedSliceLayerGPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+namespace {
+
+std::map<std::string, std::string> emptyAdditionalConfig;
+
+const std::vector<ElementType> inputPrecisions = {
+        ElementType::f32
+};
+
+const std::vector<ngraph::helpers::InputLayerType> restInputTypes = {
+    ngraph::helpers::InputLayerType::CONSTANT,
+    ngraph::helpers::InputLayerType::PARAMETER
+};
+
+const std::vector<InputShape> inputShapesDynamic2D = {
+        {{-1, -1},
+         {{32, 20}, {16, 16}, {24, 16}}},
+
+        {{-1, 16},
+         {{16, 16}, {20, 16}, {32, 16}}},
+
+        {{{16, 32}, {16, 32}},
+         {{16, 32}, {32, 16}, {24, 24}}},
+};
+
+const std::vector<StridedSliceParams> paramsPlain2D = {
+        StridedSliceParams{ { 0, 10 }, { 16, 16 }, { 1, 1 }, { 0, 0 }, { 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 2, 5 }, { 16, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 2, 5 }, { 16, 16 }, { 1, 2 }, { 0, 1 }, { 1, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0 }, { 16, 16 }, { 2, 1 }, { 0, 0 }, { 1, 0 },  { },  { },  { } },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Static_2D, StridedSliceLayerGPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(static_shapes_to_test_representation({{32, 20}})),
+                             ::testing::ValuesIn(paramsPlain2D),
+                             ::testing::ValuesIn(inputPrecisions),
+                             ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+                             ::testing::Values(emptyAdditionalConfig)),
+                         StridedSliceLayerGPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_2D, StridedSliceLayerGPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(inputShapesDynamic2D),
+                             ::testing::ValuesIn(paramsPlain2D),
+                             ::testing::ValuesIn(inputPrecisions),
+                             ::testing::ValuesIn(restInputTypes),
+                             ::testing::Values(emptyAdditionalConfig)),
+                         StridedSliceLayerGPUTest::getTestCaseName);
+
+const std::vector<StridedSliceParams> testCasesCommon4D = {
+        StridedSliceParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 1, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 0, 0, 1 }, { 0, 1, 0, 1 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 0, 0 }, { 0, 0, 0, 0 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 },  { },  { },  { } },
+        StridedSliceParams{ { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 },  { 0, 0, 1, 0 }, { 0, 0, 0, 1 },  { },  { },  { } },
+};
+
+const std::vector<InputShape> inputShapesDynamic4D = {
+        {{-1, -1, -1, -1},
+         {{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 1, 5, 64, 64 }}},
+
+        {{-1, 5, -1, -1},
+         {{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 3, 5, 32, 36 }}},
+
+        {{{1, 5}, 5, {32, 64}, {32, 64}},
+         {{ 2, 5, 32, 32 }, { 1, 5, 48, 32 }, { 5, 5, 32, 32 }}},
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, StridedSliceLayerGPUTest,
+                         ::testing::Combine(
+                             ::testing::ValuesIn(inputShapesDynamic4D),
+                             ::testing::ValuesIn(testCasesCommon4D),
+                             ::testing::ValuesIn(inputPrecisions),
+                             ::testing::ValuesIn(restInputTypes),
+                             ::testing::Values(emptyAdditionalConfig)),
+                         StridedSliceLayerGPUTest::getTestCaseName);
+
+} // namespace
+} // namespace GPULayerTestsDefinitions