[GPU] Slice operation (#8267)

2021-12-17 11:41:05 +02:00
parent 062d86f71f
commit 3f5fca80db
17 changed files with 674 additions and 1 deletions
--- a/inference-engine/thirdparty/clDNN/api/intel_gpu/primitives/slice.hpp
+++ b/inference-engine/thirdparty/clDNN/api/intel_gpu/primitives/slice.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct slice : public primitive_base<slice> {
+    CLDNN_DECLARE_PRIMITIVE(slice)
+
+    /// @brief Constructs slice primitive.
+    /// @param id This primitive id.
+    /// @param inputs List of primitive ids.
+    slice(const primitive_id& id,
+                  const std::vector<primitive_id>& inputs,
+                  const tensor output_shape,
+                  const primitive_id& ext_prim_id = "",
+                  const padding& output_padding = padding())
+        : primitive_base{id, inputs, ext_prim_id, output_padding},
+          output_shape {output_shape}
+    {}
+
+    tensor output_shape;
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@@ -55,6 +55,7 @@ enum class KernelType {
    DEPTH_TO_SPACE,
    BATCH_TO_SPACE,
    SHUFFLE_CHANNELS,
+    SLICE,
    STRIDED_SLICE,
    REVERSE_SEQUENCE,
    BINARY_CONVOLUTION,
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.cpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include"slice_kernel_ref.h"
+#include <kernel_selector_utils.h>
+#include <vector>
+
+namespace {
+
+void addJitConstantsForAttribute(kernel_selector::JitConstants &jit,
+        const std::string &name, const std::vector<std::int32_t> &attribute) {
+    using namespace kernel_selector;
+    jit.AddConstant(MakeJitConstant(name + "_BATCH", attribute[0]));
+    jit.AddConstant(MakeJitConstant(name + "_FEATURE", attribute[1]));
+    if (attribute.size() == 5) {  // BFZYX
+        jit.AddConstant(MakeJitConstant(name + "_Z", attribute[2]));
+        jit.AddConstant(MakeJitConstant(name + "_Y", attribute[3]));
+        jit.AddConstant(MakeJitConstant(name + "_X", attribute[4]));
+    } else {  // BFYX
+        jit.AddConstant(MakeJitConstant(name + "_Y", attribute[2]));
+        jit.AddConstant(MakeJitConstant(name + "_X", attribute[3]));
+    }
+}
+
+} // anonymous namespace
+
+namespace kernel_selector {
+
+KernelsData SliceKernelRef::GetKernelsData(const Params &params,
+        const optional_params &options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+    KernelData kernel_data = KernelData::Default<slice_params>(params);
+    slice_params &new_params =
+            dynamic_cast<slice_params&>(*kernel_data.params.get());
+    auto dispatch_data = SetDefault(new_params, options);
+    auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options);
+    auto slice_specific_jit = GetJitConstants(new_params);
+    auto jit = CreateJit(kernelName, slice_specific_jit, entry_point);
+
+    FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo,
+            kernelName, jit, entry_point);
+
+    return {kernel_data};
+}
+
+KernelsPriority SliceKernelRef::GetKernelsPriority(const Params&/*params*/,
+        const optional_params&/*options*/) const {
+    return DONT_USE_IF_HAVE_SOMETHING_ELSE;
+}
+
+ParamsKey SliceKernelRef::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::INT32);
+    k.EnableInputDataType(Datatype::INT64);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT32);
+    k.EnableOutputDataType(Datatype::INT64);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    return k;
+}
+
+bool SliceKernelRef::Validate(const Params &p, const optional_params &o) const {
+    if (p.GetType() != KernelType::SLICE || o.GetType() != KernelType::SLICE) {
+        return false;
+    }
+
+    const slice_params &params = dynamic_cast<const slice_params&>(p);
+    if (params.inputs.empty())
+        return false;
+
+    if (params.output.Dimentions() > 5 || params.inputs[0].Dimentions() > 5)
+        return false;
+
+    return true;
+}
+
+JitConstants SliceKernelRef::GetJitConstants(const slice_params &params) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+    addJitConstantsForAttribute(jit, "SLICE_BEGIN", params.start);
+    addJitConstantsForAttribute(jit, "SLICE_END", params.end);
+    addJitConstantsForAttribute(jit, "SLICE_STEP", params.step);
+    return jit;
+}
+
+CommonDispatchData SliceKernelRef::SetDefault(const slice_params &params,
+        const optional_params&) const {
+    CommonDispatchData dispatchData;
+    dispatchData.gws = { params.output.Batch().v, params.output.Feature().v,
+            params.output.Z().v * params.output.Y().v * params.output.X().v };
+
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws,
+            params.engineInfo);
+
+    return dispatchData;
+}
+
+} // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.h
@@ -0,0 +1,42 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_base_opencl.h"
+#include <vector>
+
+namespace kernel_selector {
+
+struct slice_params: public base_params {
+    slice_params() : base_params(KernelType::SLICE) {}
+
+    std::vector<std::int32_t> start;
+    std::vector<std::int32_t> end;
+    std::vector<std::int32_t> step;
+};
+
+struct slice_optional_params : optional_params {
+    slice_optional_params() : optional_params(KernelType::SLICE) {}
+};
+
+class SliceKernelRef: public KernelBaseOpenCL {
+public:
+    SliceKernelRef() :
+            KernelBaseOpenCL { "slice_ref" } {
+    }
+    KernelsData GetKernelsData(const Params &params,
+            const optional_params &options) const override;
+    KernelsPriority GetKernelsPriority(const Params &params,
+            const optional_params &options) const override;
+    ParamsKey GetSupportedKey() const override;
+    bool Validate(const Params &p, const optional_params &o) const override;
+
+private:
+    JitConstants GetJitConstants(const slice_params &params) const;
+    CommonDispatchData SetDefault(const slice_params &params,
+            const optional_params&) const;
+};
+
+} // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_selector.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "slice_kernel_selector.h"
+#include "slice_kernel_ref.h"
+
+namespace kernel_selector {
+
+slice_kernel_selector::slice_kernel_selector() {
+    Attach<SliceKernelRef>();
+}
+
+KernelsData slice_kernel_selector::GetBestKernels(const Params &params,
+        const optional_params &options) const {
+    return GetNaiveBestKernel(params, options, KernelType::SLICE);
+}
+
+} // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_selector.h
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <kernel_selector.h>
+
+namespace kernel_selector {
+
+class slice_kernel_selector : public kernel_selector_base {
+public:
+    static slice_kernel_selector& Instance() {
+        static slice_kernel_selector instance_;
+        return instance_;
+    }
+
+    slice_kernel_selector();
+
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+
+} // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/slice_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/slice_ref.cl
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/batch_headers/fetch_data.cl"
+
+KERNEL(slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
+{
+    const uint batch = get_global_id(0);
+    const uint feature = get_global_id(1);
+#if INPUT0_DIMS <= 4
+    const uint xy = get_global_id(2);
+    const uint y = xy / OUTPUT_SIZE_X;
+    const uint x = xy % OUTPUT_SIZE_X;
+    const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x);
+    const uint input_index = INPUT0_GET_INDEX(
+        SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
+        SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
+        SLICE_BEGIN_Y + y * SLICE_STEP_Y,
+        SLICE_BEGIN_X + x * SLICE_STEP_X);
+#elif INPUT0_DIMS == 5
+    const uint xyz = get_global_id(2);
+    const uint yx = xyz % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const uint z = xyz / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+    const uint y = yx / OUTPUT_SIZE_X;
+    const uint x = yx % OUTPUT_SIZE_X;
+    const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x);
+    const uint input_index = INPUT0_GET_INDEX(
+        SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
+        SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
+        SLICE_BEGIN_Z + z * SLICE_STEP_Z,
+        SLICE_BEGIN_Y + y * SLICE_STEP_Y,
+        SLICE_BEGIN_X + x * SLICE_STEP_X);
+#endif
+    output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
+}
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
@@ -65,6 +65,7 @@ void register_implementations() {
    REGISTER_OCL(softmax);
    REGISTER_OCL(space_to_batch);
    REGISTER_OCL(space_to_depth);
+    REGISTER_OCL(slice);
    REGISTER_OCL(strided_slice);
    REGISTER_OCL(tile);
    REGISTER_OCL(lstm_dynamic_input);
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
@@ -53,6 +53,7 @@
 #include "intel_gpu/primitives/scatter_nd_update.hpp"
 #include "intel_gpu/primitives/select.hpp"
 #include "intel_gpu/primitives/shuffle_channels.hpp"
+#include "intel_gpu/primitives/slice.hpp"
 #include "intel_gpu/primitives/softmax.hpp"
 #include "intel_gpu/primitives/space_to_batch.hpp"
 #include "intel_gpu/primitives/strided_slice.hpp"
@@ -73,7 +74,7 @@ void register_implementations();

 namespace detail {

-#define REGISTER_OCL(prim)              \
+#define REGISTER_OCL(prim)               \
    struct attach_##prim##_impl {        \
        attach_##prim##_impl();          \
    }
@@ -130,6 +131,7 @@ REGISTER_OCL(scatter_elements_update);
 REGISTER_OCL(scatter_nd_update);
 REGISTER_OCL(select);
 REGISTER_OCL(shuffle_channels);
+REGISTER_OCL(slice);
 REGISTER_OCL(softmax);
 REGISTER_OCL(space_to_batch);
 REGISTER_OCL(space_to_depth);
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/slice.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/slice.cpp
@@ -0,0 +1,138 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <slice_inst.h>
+#include <slice/slice_kernel_ref.h>
+#include <data_inst.h>
+#include <intel_gpu/runtime/error_handler.hpp>
+#include <impls/implementation_map.hpp>
+#include <slice/slice_kernel_selector.h>
+#include "primitive_base.hpp"
+#include <vector>
+#include <algorithm>
+#include <cstddef>
+
+namespace cldnn {
+namespace ocl {
+
+namespace {
+template<typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
+std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
+    mem_lock<T> lock{node.get_attached_memory_ptr(), stream};
+    T* data = lock.data();
+    std::vector<std::int32_t> integer_data;
+    integer_data.reserve(node.get_output_layout().count());
+    std::copy(data, data + node.get_output_layout().count(), std::back_inserter(integer_data));
+    return integer_data;
+}
+
+std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
+    switch (node.get_output_layout().data_type) {
+    case data_types::u8:
+        return extractIntegerData<std::uint8_t>(node, stream);
+    case data_types::i8:
+        return extractIntegerData<std::int8_t>(node, stream);
+    case data_types::i32:
+        return extractIntegerData<std::int32_t>(node, stream);
+    case data_types::i64:
+        return extractIntegerData<std::int64_t>(node, stream);
+    default:
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), "Slice parameter",
+                node.get_output_layout().data_type, "Any integral type",
+                data_types::i32, "Slice parameters should be of integral type.");
+    }
+    return {};
+}
+
+std::vector<std::int32_t> extractShape(kernel_selector::Tensor::DataTensor& tensor) {
+    auto logical_dims = tensor.LogicalDims();
+    // LogicalDims method returns dims in reversed order
+    return {logical_dims.rbegin(), logical_dims.rend()};
+}
+
+} // namespace
+
+struct slice_impl : typed_primitive_impl_ocl<slice> {
+    using parent = typed_primitive_impl_ocl<slice>;
+    using parent::parent;
+
+    enum InputIndices {
+        kData,
+        kStart,
+        kEnd,
+        kStep,
+        kAxes,
+        kInputsNum
+    };
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<slice_impl>(*this);
+    }
+
+    static primitive_impl* create(const slice_node& arg) {
+        auto params = get_default_params<kernel_selector::slice_params>(
+                arg);
+        auto op_params = get_default_optional_params<
+                kernel_selector::slice_optional_params>(
+                arg.get_program());
+        const auto& inputs = arg.get_dependencies();
+        const stream& stream = arg.get_program().get_stream();
+        auto start_elts = extractIntegerData(inputs[InputIndices::kStart]->as<data>(), stream);
+        auto end_elts = extractIntegerData(inputs[InputIndices::kEnd]->as<data>(), stream);
+        auto step_elts = extractIntegerData(inputs[InputIndices::kStep]->as<data>(), stream);
+        auto data_shape = extractShape(params.inputs[0]);
+        std::vector<std::int32_t> axes(data_shape.size());
+        if (inputs.size() == InputIndices::kInputsNum)
+            axes = std::move(extractIntegerData(inputs[InputIndices::kAxes]->as<data>(), stream));
+        else
+            std::iota(axes.begin(), axes.end(), 0);
+        std::vector<std::int32_t> selected_start(data_shape.size(), 0);
+        std::vector<std::int32_t> selected_step(data_shape.size(), 1);
+        std::vector<std::int32_t> selected_end(data_shape);
+        for (int axe = 0; axe < axes.size(); axe++) {
+            auto transformed_axe = axes[axe] < 0 ? data_shape.size() + axes[axe] : axes[axe];
+            auto start = start_elts[axe];
+            auto end = end_elts[axe];
+            auto dim_size = data_shape[transformed_axe];
+            selected_start[transformed_axe] = std::max(std::min(start < 0 ? dim_size + start : start, dim_size - 1), 0);
+            selected_end[transformed_axe] = std::max(std::min(end < 0 ? dim_size + end : end, dim_size - 1), 0);
+            selected_step[transformed_axe] = step_elts[axe];
+        }
+        params.start = std::move(selected_start);
+        params.end = std::move(selected_end);
+        params.step = std::move(selected_step);
+        auto &kernel_selector =
+                kernel_selector::slice_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(params, op_params);
+
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(),
+                "Cannot find a proper kernel with this arguments");
+
+        return new slice_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+
+attach_slice_impl::attach_slice_impl() {
+    implementation_map<slice>::add(impl_types::ocl, slice_impl::create, {
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+    });
+}
+
+}  // namespace detail
+
+} // namespace ocl
+} // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/include/slice_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/slice_inst.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <intel_gpu/primitives/slice.hpp>
+#include "primitive_inst.h"
+#include <intel_gpu/runtime/error_handler.hpp>
+
+namespace cldnn {
+
+template <>
+struct typed_program_node<slice> : public typed_program_node_base<slice> {
+    using parent = typed_program_node_base<slice>;
+
+public:
+    using parent::parent;
+
+    program_node& input(std::size_t index = 0) const { return get_dependency(index); }
+};
+
+using slice_node = typed_program_node<slice>;
+
+template <>
+class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
+    using parent = typed_primitive_inst_base<slice>;
+
+public:
+    static layout calc_output_layout(slice_node const& node);
+    static std::string to_string(slice_node const& node);
+
+public:
+    typed_primitive_inst(network& network, slice_node const& desc);
+};
+
+using slice_inst = typed_primitive_inst<slice>;
+
+} // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/slice.cpp
+++ b/inference-engine/thirdparty/clDNN/src/slice.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <slice_inst.h>
+#include "primitive_type_base.h"
+#include <sstream>
+#include <json_object.h>
+
+namespace cldnn {
+
+primitive_type_id slice::type_id() {
+    static primitive_type_base<slice> instance;
+    return &instance;
+}
+
+slice_inst::typed_primitive_inst(network& network, slice_node const& node)
+    : parent(network, node) {}
+
+layout slice_inst::calc_output_layout(slice_node const& node) {
+    auto primitive = node.get_primitive();
+    auto input_layout = node.input(0).get_output_layout();
+    return {input_layout.data_type, input_layout.format, primitive->output_shape};
+}
+
+std::string slice_inst::to_string(slice_node const& node) {
+    auto node_info = node.desc_to_json();
+    json_composite slice_info;
+    slice_info.add("input id", node.input().id());
+    slice_info.add("begin_param id", node.get_dependency(1).id());
+    slice_info.add("end_param id", node.get_dependency(2).id());
+    slice_info.add("step_param id", node.get_dependency(3).id());
+    slice_info.add("axis_param id", node.get_dependency(4).id());
+    node_info->add("slice info", slice_info);
+    std::stringstream primitive_description;
+    node_info->dump(primitive_description);
+    return primitive_description.str();
+}
+
+} // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/slice.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/slice.cpp
@@ -0,0 +1,144 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/slice.hpp>
+#include <intel_gpu/primitives/data.hpp>
+
+#include <random>
+#include <algorithm>
+#include <vector>
+
+using namespace cldnn;
+using namespace ::tests;
+
+namespace {
+
+template<typename T>
+class SliceTest : public ::testing::Test {
+public:
+    static std::vector<T> GenInput(int size) {
+        std::vector<T> result;
+        for (int i = 0; i < size; i++)
+            result.push_back(i);
+        return result;
+    }
+
+    void TearDown() override {
+        assert(input_shape_.size() == 4 || input_shape_.size() == 5);
+        format input_format = input_shape_.size() == 4 ? format::bfyx : format::bfzyx;
+        layout data_layout ( input_type_, input_format, tensor{input_shape_} );
+        std::vector<T> input_vals = GenInput(data_layout.get_linear_size());
+        memory::ptr input = engine_.allocate_memory(data_layout);
+        set_values(input, input_vals);
+        topology topology;
+        topology.add(input_layout("input", input->get_layout()));
+        topology.add(data("start", start_));
+        topology.add(data("stop", stop_));
+        topology.add(data("step", step_));
+        std::vector<primitive_id> inputs {"input", "start", "stop", "step"};
+        if (axes_) {
+            topology.add(data("axes", axes_));
+            inputs.push_back("axes");
+        }
+        topology.add(slice("slice", inputs, tensor{output_shape_}));
+
+        network network(engine_, topology);
+
+        network.set_input_data("input", input);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "slice");
+
+        auto output = outputs.at("slice").get_memory();
+
+        cldnn::mem_lock<T> output_ptr(output, get_test_stream());
+
+        ASSERT_EQ(output_ptr.size(), expected_output_.size());
+        for (size_t i = 0; i < output_ptr.size(); ++i)
+            EXPECT_TRUE(are_equal(expected_output_[i], output_ptr[i], 2e-3));
+    }
+
+    data_types DataType() const;
+
+protected:
+    engine& engine_ = get_test_engine();
+    std::vector<std::int32_t> input_shape_;
+    data_types input_type_ {DataType()};
+    memory::ptr start_;
+    memory::ptr stop_;
+    memory::ptr step_;
+    memory::ptr axes_;
+    std::vector<std::int32_t> output_shape_;
+    std::vector<T> expected_output_;
+};
+
+template<>
+data_types SliceTest<float>::DataType() const {return data_types::f32;}
+
+template<>
+data_types SliceTest<int>::DataType() const { return data_types::i32; }
+
+template<>
+data_types SliceTest<long long>::DataType() const { return data_types::i64; }
+
+using testing::Types;
+typedef Types<float, int, long long> DataTypes;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
+
+TYPED_TEST(SliceTest, bfyx_positive_step) {
+    this->input_shape_ = { 1, 2, 100, 12 };
+    this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
+    set_values<int64_t>(this->start_, {0, 1, 0, 1});
+    this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
+    set_values<int64_t>(this->stop_, { 1, 2, 5, 100 });
+    this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
+    set_values<int64_t>(this->step_, { 1, 1, 1, 10 });
+    this->output_shape_ = { 1, 1, 5, 10 };
+    this->expected_output_ = {
+            1201, 1211, 1221, 1231, 1241, 1301, 1311, 1321, 1331, 1341,
+            1401, 1411, 1421, 1431, 1441, 1501, 1511, 1521, 1531, 1541,
+            1601, 1611, 1621, 1631, 1641, 1701, 1711, 1721, 1731, 1741,
+            1801, 1811, 1821, 1831, 1841, 1901, 1911, 1921, 1931, 1941,
+            2001, 2011, 2021, 2031, 2041, 2101, 2111, 2121, 2131, 2141
+    };
+}
+
+TYPED_TEST(SliceTest, bfyx_negative_step) {
+    this->input_shape_ = { 1, 2, 100, 12 };
+    this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
+    set_values<int64_t>(this->start_, { 1, 2, 5, 100 });
+    this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
+    set_values<int64_t>(this->stop_, {0, 1, 0, 1});
+    this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
+    set_values<int64_t>(this->step_, { -1, -1, -1, -10 });
+    this->output_shape_ = { 1, 1, 5, 10 };
+    this->expected_output_ = {
+            1799, 1789, 1779, 1769, 1759, 1699, 1689, 1679, 1669, 1659,
+            1599, 1589, 1579, 1569, 1559, 1499, 1489, 1479, 1469, 1459,
+            1399, 1389, 1379, 1369, 1359, 1299, 1289, 1279, 1269, 1259,
+            1199, 1189, 1179, 1169, 1159, 1099, 1089, 1079, 1069, 1059,
+             999,   989,  979, 969,  959,  899,  889,  879,  869,  859
+    };
+}
+
+TYPED_TEST(SliceTest, bfzyx) {
+    this->input_shape_ = { 2, 3, 10, 12, 5 };
+    this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
+    set_values<int64_t>(this->start_, { 0, 0, 0, 0, 0 });
+    this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
+    set_values<int64_t>(this->stop_, {1, 2, 2, 2, 2});
+    this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
+    set_values<int64_t>(this->step_, { 1, 1, 1, 1, 1 });
+    this->output_shape_ = { 1, 2, 2, 2, 2 };
+    this->expected_output_ = {
+              0,   1,  10,  11, 120, 121, 130, 131,
+            600, 601, 610, 611, 720, 721, 730, 731
+    };
+}
+
+} // anonymous namespace