From efb602e13b7edf2ca617edc5593fb658f069bf94 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Thu, 5 Jan 2023 19:31:32 +0900 Subject: [PATCH] [GPU] Update strided slice impl for non-constant begin/end/stride support (#14619) * Add strided slice dynamic TCs for gpuFunctests Signed-off-by: Andrew Park * Update StridedSliceLayerGPUTest for rest inputs's type (parameter) Signed-off-by: Andrew Park * remove W/A for strided slice Signed-off-by: Andrew Park * primitive API updates for dynamic begin/end/stride inputs support Signed-off-by: Andrew Park * Update ocl impl and kernel param, and strided_slice_ref kernel w/o memory_deps Signed-off-by: Andrew Park * Update TCs for new API w/ vectors on strided_slice_gpu unittests Signed-off-by: Andrew Park * Apply code review Signed-off-by: Andrew Park * Apply padding when the size of rest input data is smaller than actual input rank Signed-off-by: Andrew Park * Update strided_slice_optimize pass to run onyl when all deps are constant Signed-off-by: Andrew Park Signed-off-by: Andrew Park --- .../intel_gpu/primitives/strided_slice.hpp | 44 ++ .../strided_slice_optimize.cpp | 9 + .../src/graph/impls/ocl/strided_slice.cpp | 121 +++-- .../intel_gpu/src/graph/primitive_inst.cpp | 16 +- .../intel_gpu/src/graph/program_node.cpp | 22 - .../intel_gpu/src/graph/strided_slice.cpp | 68 ++- .../cl_kernels/strided_slice_ref.cl | 283 ++++++++++- .../strided_slice_kernel_ref.cpp | 44 +- .../strided_slice/strided_slice_kernel_ref.h | 30 ++ .../src/plugin/ops/strided_slice.cpp | 61 ++- .../shape_infer/strided_slice_si_test.cpp | 53 +- .../test_cases/strided_slice_gpu_test.cpp | 455 ++++++++---------- .../dynamic/strided_slice.cpp | 251 ++++++++++ 13 files changed, 1065 insertions(+), 392 deletions(-) create mode 100644 src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp index 2ae712be600..1609d2ac6b8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/strided_slice.hpp @@ -46,6 +46,9 @@ struct strided_slice : public primitive_base { const ov::Shape out_size, const padding& output_padding = padding()) : primitive_base(id, {input, begin_id, end_id, strides_id}, {output_padding}), + begin({}), + end({}), + strides({}), begin_mask(begin_mask), end_mask(end_mask), new_axis_mask(new_axis_mask), @@ -53,6 +56,47 @@ struct strided_slice : public primitive_base { ellipsis_mask(ellipsis_mask), out_size(out_size) {} + /// @brief Constructs strided_slice primitive with constant begin/end/stride + /// @param id This primitive id. + /// @param input Input data primitive id. + /// @param begin Begin indexes for input. + /// @param end End indexes for input. + /// @param strides Strides for input. + /// @param begin_mask Array of bits, that provide replace begin[i] to max possible range in that dimension. + /// @param end_mask Array of bits, that provide replace end[i] to max possible range in that dimension. + /// @param new_axis_mask Array of bits, that provide adding a new length 1 dimension at ith position in the output tensor. + /// @param shrink_axis_mask Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i]. + /// @param ellipsis_mask Array of bits, that provide inserts missing dimensions on a position of a non-zero bit. + /// @param out_size Size of output tensor + strided_slice(const primitive_id& id, + const input_info& input, + const std::vector& begin, + const std::vector& end, + const std::vector& strides, + const std::vector& begin_mask, + const std::vector& end_mask, + const std::vector& new_axis_mask, + const std::vector& shrink_axis_mask, + const std::vector& ellipsis_mask, + const ov::Shape out_size, + const padding& output_padding = padding()) + : primitive_base(id, {input}, {output_padding}), + begin(begin), + end(end), + strides(strides), + begin_mask(begin_mask), + end_mask(end_mask), + new_axis_mask(new_axis_mask), + shrink_axis_mask(shrink_axis_mask), + ellipsis_mask(ellipsis_mask), + out_size(out_size) {} + + /// @brief Begin indexes for input + std::vector begin; + /// @brief End indexes for input + std::vector end; + /// @brief Strides for input + std::vector strides; /// @brief Array of bits, that provide replace begin[i] to max possible range in that dimension. std::vector begin_mask; /// @brief Array of bits, that provide replace end[i] to max possible range in that dimension. diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp index 10926ab55c6..79af41e6a58 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/strided_slice_optimize.cpp @@ -32,6 +32,15 @@ void strided_slice_optimize::run(program& p) { continue; auto& deps = node->get_dependencies(); + auto is_other_deps_constant = [deps]() { + for (size_t i = 1; i < deps.size(); i++) { + if (!deps[i].first->is_type()) return false; + } + return true; + }; + if (!is_other_deps_constant()) + continue; + for (size_t i = deps.size(); i--;) if (deps[i].first->is_type()) node->remove_dependency(i); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp index 10f181f17fd..03f78d1bc14 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp @@ -26,7 +26,7 @@ std::vector& pad_vector_to_size(std::vector& data, size_t size, DT value) template std::vector& vector_assign_if_not_mask(std::vector& dst, const T& src, const std::vector& mask) { for (size_t i = 0; i < dst.size(); ++i) { - if (!mask[i]) + if (mask[i]) dst[i] = src; } return dst; @@ -35,7 +35,7 @@ std::vector& vector_assign_if_not_mask(std::vector& dst, const T& src, con template std::vector& vector_assign_if_not_mask(std::vector& dst, const std::vector& src, const std::vector& mask) { for (size_t i = 0; i < dst.size(); ++i) { - if (!mask[i]) + if (mask[i]) dst[i] = src[i]; } return dst; @@ -64,13 +64,53 @@ public: auto op_params = get_default_optional_params(impl_param.get_program()); const size_t dims_num = params.inputs[0].Dimentions(); + std::vector begin(prim->begin.begin(), prim->begin.end()); + std::vector end(prim->end.begin(), prim->end.end()); + std::vector strides(prim->strides.begin(), prim->strides.end()); // Getting data from constant inputs. There are 3 args: Begin, End, Stride - for (size_t i = 1; i < arg.get_dependencies().size(); ++i) { - OPENVINO_ASSERT(impl_param.memory_deps.count(i) > 0, "[GPU] Can't find StridedSlice memory dependency"); - auto mem = impl_param.memory_deps.at(i); - std::vector sizes = read_vector(mem, impl_param.prog->get_stream()); - pad_vector_to_size(sizes, dims_num, i != 1); // for "begin" completion used 0 value, for other - 1 - params.striding_params.push_back(sizes); + if (!begin.empty()) { + pad_vector_to_size(begin, dims_num, 0); + params.begin_type = kernel_selector::StridedSliceArgType::Constant; + params.striding_params.push_back(begin); + } else { + params.begin_type = kernel_selector::StridedSliceArgType::Input; + auto begin_layout = impl_param.get_input_layout(1); + params.inputs.push_back(convert_data_tensor(begin_layout)); + params.begin_dims = begin_layout.count(); + } + + auto get_index_end = [&]() { + size_t offset = 1; + if (begin.empty() && params.begin_type == kernel_selector::StridedSliceArgType::Input) + offset++; + return offset; + }; + if (!end.empty()) { + pad_vector_to_size(end, dims_num, 1); + params.end_type = kernel_selector::StridedSliceArgType::Constant; + params.striding_params.push_back(end); + } else { + params.end_type = kernel_selector::StridedSliceArgType::Input; + auto end_layout = impl_param.get_input_layout(get_index_end()); + params.inputs.push_back(convert_data_tensor(end_layout)); + params.end_dims = end_layout.count(); + } + + auto get_index_stride = [&]() { + size_t offset = get_index_end(); + if (end.empty() && params.end_type == kernel_selector::StridedSliceArgType::Input) + offset++; + return offset; + }; + if (!strides.empty()) { + pad_vector_to_size(strides, dims_num, 1); + params.stride_type = kernel_selector::StridedSliceArgType::Constant; + params.striding_params.push_back(strides); + } else { + params.stride_type = kernel_selector::StridedSliceArgType::Input; + auto stride_layout = impl_param.get_input_layout(get_index_stride()); + params.inputs.push_back(convert_data_tensor(stride_layout)); + params.stride_dims = stride_layout.count(); } auto begin_mask_ = prim->begin_mask; @@ -82,17 +122,10 @@ public: std::vector end_mask(end_mask_.begin(), end_mask_.end()); std::vector new_axis_mask(new_axis_mask_.begin(), new_axis_mask_.end()); std::vector shrink_axis_mask(shrink_axis_mask_.begin(), shrink_axis_mask_.end()); - // Plugin requires inverted mask values. Consider changing primitive impl to be aligned with the spec. - for (auto& b : begin_mask) { - b = 1 - b; - } - for (auto& e : end_mask) { - e = 1 - e; - } params.end_mask = end_mask; - pad_vector_to_size(params.end_mask, dims_num, 1); + pad_vector_to_size(params.end_mask, dims_num, 0); params.begin_mask = begin_mask; - pad_vector_to_size(params.begin_mask, dims_num, 1); + pad_vector_to_size(params.begin_mask, dims_num, 0); params.new_axis_mask = new_axis_mask; params.shrink_axis_mask = shrink_axis_mask; @@ -103,34 +136,36 @@ public: std::vector out_shape; for (const auto& dim : logical_dims) out_shape.push_back(static_cast(dim)); - // If the ith bit of begin_mask is not set, begin[i] is ignored and the range of the appropriate dimension starts from 0. - vector_assign_if_not_mask(params.striding_params[0], 0, params.begin_mask); - // If the ith bit of end_mask is not set, end[i] is ignored and the fullest possible range in that dimension is used - // instead. - vector_assign_if_not_mask(params.striding_params[1], out_shape, params.end_mask); - for (size_t dim = 0; dim < params.striding_params[2].size(); dim++) { - if (params.striding_params[0][dim] < 0) - params.striding_params[0][dim] = std::max(out_shape[dim] + params.striding_params[0][dim], (int32_t)0); - if (params.striding_params[1][dim] < 0) - params.striding_params[1][dim] = std::max(out_shape[dim] + params.striding_params[1][dim], (int32_t)0); + if (params.striding_params.size() == 3) { + // If the ith bit of begin_mask is not set, begin[i] is ignored and the range of the appropriate dimension starts from 0. + vector_assign_if_not_mask(params.striding_params[0], 0, params.begin_mask); + // If the ith bit of end_mask is not set, end[i] is ignored and the fullest possible range in that dimension is used + // instead. + vector_assign_if_not_mask(params.striding_params[1], out_shape, params.end_mask); + for (size_t dim = 0; dim < params.striding_params[2].size(); dim++) { + if (params.striding_params[0][dim] < 0) + params.striding_params[0][dim] = std::max(out_shape[dim] + params.striding_params[0][dim], (int32_t)0); + if (params.striding_params[1][dim] < 0) + params.striding_params[1][dim] = std::max(out_shape[dim] + params.striding_params[1][dim], (int32_t)0); - params.striding_params[0][dim] = std::min(params.striding_params[0][dim], out_shape[dim]); - params.striding_params[1][dim] = std::min(params.striding_params[1][dim], out_shape[dim]); + params.striding_params[0][dim] = std::min(params.striding_params[0][dim], out_shape[dim]); + params.striding_params[1][dim] = std::min(params.striding_params[1][dim], out_shape[dim]); - auto& begin = params.striding_params[0][dim]; - auto& end = params.striding_params[1][dim]; - auto& stride = params.striding_params[2][dim]; - bool is_reverse = stride < 0; - // If begin > end && is_reverse, then we don't need to adjust begin/end values, the kernel will process it correctly - // If begin <= end, then we swap begin/end values and subtruct 1 from each of them - // E.g. out_shape[dim] = 100; begin=0; end=100; stride=-1 - // swap: begin=100; end=0; - // sub: begin=99; end=-1; - // So the kernel will put the slices [99, 0] in reversed order as expected. - if (is_reverse && begin <= end) { - std::swap(begin, end); - begin--; - end--; + auto& begin = params.striding_params[0][dim]; + auto& end = params.striding_params[1][dim]; + auto& stride = params.striding_params[2][dim]; + bool is_reverse = stride < 0; + // If begin > end && is_reverse, then we don't need to adjust begin/end values, the kernel will process it correctly + // If begin <= end, then we swap begin/end values and subtruct 1 from each of them + // E.g. out_shape[dim] = 100; begin=0; end=100; stride=-1 + // swap: begin=100; end=0; + // sub: begin=99; end=-1; + // So the kernel will put the slices [99, 0] in reversed order as expected. + if (is_reverse && begin <= end) { + std::swap(begin, end); + begin--; + end--; + } } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 99d32622d8c..1a24b526ad6 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -10,11 +10,9 @@ #include "arg_max_min_inst.h" #include "fully_connected_inst.h" #include "convolution_inst.h" -#include "strided_slice_inst.h" #include "crop_inst.h" #include "deconvolution_inst.h" #include "shape_of_inst.h" -#include "strided_slice_inst.h" #include "gemm_inst.h" #include "experimental_detectron_roi_feature_extractor_inst.hpp" #include "compilation_context.hpp" @@ -170,19 +168,7 @@ void primitive_inst::update_shape() { if (_node->is_type() && !input_shape_changed) return; - // Strided slice loads data from {1,2,3} dependencies in impl::create method. - // It means that this data must be put into impl_params map - // Thus we treat it as "dynamic" case - // TODO: Remove once strided slice impl support runtime tensors for begin/end/stride - bool strided_slice_wa = false; - if (_node->is_type()) { - for (size_t i = 1; i < _node->get_dependencies().size(); i++) { - if (!_node->get_dependency(i).is_type()) - strided_slice_wa = true; - } - } - - if (!strided_slice_wa && !input_shape_changed && !_node->generates_dynamic_output() && _impl_params->get_output_layout().is_static()) + if (!input_shape_changed && !_node->generates_dynamic_output() && _impl_params->get_output_layout().is_static()) return; std::vector dependencies_events; diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 2b90dc3c947..640c520f540 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -6,7 +6,6 @@ #include "program_helpers.h" #include "primitive_inst.h" #include "loop_inst.h" -#include "strided_slice_inst.h" #include "intel_gpu/runtime/debug_configuration.hpp" #ifdef ENABLE_ONEDNN_FOR_GPU #include "convolution_inst.h" @@ -348,16 +347,6 @@ bool program_node::recalc_output_layouts(bool invalidate_users_if_changed) { } bool program_node::is_dynamic() const { - // Strided slice loads data from {1,2,3} dependencies in impl::create method. - // It means that this data must be put into impl_params map - // Thus we treat it as "dynamic" case - // TODO: Remove once strided slice impl support runtime tensors for begin/end/stride - if (is_type()) { - for (size_t i = 1; i < get_dependencies().size(); i++) { - if (!get_dependency(i).is_type()) - return true; - } - } for (const auto& input : get_dependencies()) { if (input.first->is_dynamic_output_layout()) return true; @@ -371,17 +360,6 @@ bool program_node::is_dynamic() const { } bool program_node::is_dynamic() { - // Strided slice loads data from {1,2,3} dependencies in impl::create method. - // It means that this data must be put into impl_params map - // Thus we treat it as "dynamic" case - // TODO: Remove once strided slice impl support runtime tensors for begin/end/stride - if (is_type()) { - for (size_t i = 1; i < get_dependencies().size(); i++) { - if (!get_dependency(i).is_type()) - return true; - } - } - for (auto& input : get_dependencies()) { if (input.first->is_dynamic_output_layout()) return true; diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 341e22ead86..55e7e3c0ec6 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -33,21 +33,33 @@ template std::vector strided_slice_inst::calc_output_layouts(strided_slice_node const& /*node*/, const kernel_impl_params& impl_param) { auto desc = impl_param.typed_desc(); auto input0_layout = impl_param.get_input_layout(0); + auto input0_shape = input0_layout.get(); + auto input0_rank = input0_shape.size(); auto& constant_mem = impl_param.memory_deps; + auto begin_data = desc->begin; + auto end_data = desc->end; + auto strides_data = desc->strides; - if (!constant_mem.count(1) || !constant_mem.count(2) || !constant_mem.count(3)) { + if ((begin_data.empty() && !constant_mem.count(1)) + || (end_data.empty() && !constant_mem.count(2)) + || (strides_data.empty() && !constant_mem.count(3))) { auto out_shape = ov::PartialShape::dynamic(input0_layout.get_partial_shape().size()); return { layout{out_shape, input0_layout.data_type, format::get_default_format(out_shape.rank().get_length())} }; } ov::op::v1::StridedSlice op; + + ShapeType begin_shape = begin_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ begin_data.size() }; + ShapeType end_shape = end_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ end_data.size() }; + ShapeType strides_shape = strides_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ strides_data.size() }; + std::vector output_shapes = {ShapeType{}}; std::vector input_shapes = { - input0_layout.get(), - impl_param.get_input_layout(1).get(), - impl_param.get_input_layout(2).get(), - impl_param.get_input_layout(3).get() + input0_shape, + begin_shape, + end_shape, + strides_shape }; op.set_begin_mask(desc->begin_mask); @@ -56,24 +68,37 @@ std::vector strided_slice_inst::calc_output_layouts(strided_slice_node c op.set_shrink_axis_mask(desc->shrink_axis_mask); op.set_ellipsis_mask_mask(desc->ellipsis_mask); - auto mem1 = constant_mem.at(1); - auto mem2 = constant_mem.at(2); - auto mem3 = constant_mem.at(3); + std::map const_data; + if (!begin_data.empty() && !end_data.empty() && !strides_data.empty()) { + auto begin_tensor = make_host_tensor({ begin_shape, data_types::i64, format::bfyx }, static_cast(begin_data.data())); + auto end_tensor = make_host_tensor({ end_shape, data_types::i64, format::bfyx }, static_cast(end_data.data())); + auto strides_tensor = make_host_tensor({ strides_shape, data_types::i64, format::bfyx }, static_cast(strides_data.data())); - cldnn::mem_lock lock1(mem1, impl_param.prog->get_stream()); - cldnn::mem_lock lock2(mem2, impl_param.prog->get_stream()); - cldnn::mem_lock lock3(mem3, impl_param.prog->get_stream()); + const_data.emplace(1, begin_tensor); + const_data.emplace(2, end_tensor); + const_data.emplace(3, strides_tensor); - auto tensor1 = make_host_tensor(mem1->get_layout(), lock1.data()); - auto tensor2 = make_host_tensor(mem2->get_layout(), lock2.data()); - auto tensor3 = make_host_tensor(mem3->get_layout(), lock3.data()); + ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data); + } else { + auto begin_mem = constant_mem.at(1); + auto end_mem = constant_mem.at(2); + auto strides_mem = constant_mem.at(3); + + cldnn::mem_lock lock1(begin_mem, impl_param.prog->get_stream()); + cldnn::mem_lock lock2(end_mem, impl_param.prog->get_stream()); + cldnn::mem_lock lock3(strides_mem, impl_param.prog->get_stream()); + + auto begin_tensor = make_host_tensor(begin_mem->get_layout(), lock1.data()); + auto end_tensor = make_host_tensor(end_mem->get_layout(), lock2.data()); + auto strides_tensor = make_host_tensor(strides_mem->get_layout(), lock3.data()); + + const_data.emplace(1, begin_tensor); + const_data.emplace(2, end_tensor); + const_data.emplace(3, strides_tensor); + + ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data); + } - std::map> const_data = { - {1, tensor1}, - {2, tensor2}, - {3, tensor3}, - }; - ov::op::v1::shape_infer(&op, input_shapes, output_shapes, const_data); auto output_format = format::get_default_format(output_shapes[0].size()); return { layout{output_shapes[0], input0_layout.data_type, output_format} }; @@ -93,6 +118,9 @@ std::string strided_slice_inst::to_string(strided_slice_node const& node) { strided_slice_info.add("begin_param id", node.get_dependency(1).id()); strided_slice_info.add("end_param id", node.get_dependency(2).id()); strided_slice_info.add("stride_param id", node.get_dependency(3).id()); + strided_slice_info.add("begin", node.get_primitive()->begin); + strided_slice_info.add("end", node.get_primitive()->end); + strided_slice_info.add("strides", node.get_primitive()->strides); strided_slice_info.add("begin mask", node.get_primitive()->begin_mask); strided_slice_info.add("end mask", node.get_primitive()->end_mask); strided_slice_info.add("new axis mask", node.get_primitive()->new_axis_mask); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl index 55f73c6d31b..21e3f90e8c5 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/strided_slice_ref.cl @@ -4,10 +4,263 @@ #include "include/batch_headers/fetch_data.cl" -KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) +#ifdef STRIDE_TYPE +inline void FUNC(get_slice_step)(const __global STRIDE_TYPE* stride, + int* step_batch, int* step_feature, + int* step_z, int* step_y, int* step_x) +{ +#ifdef OUTPUT_LAYOUT_BFYX + const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0); + const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0); + const uint y_index = STRIDE_GET_INDEX(2, 0, 0, 0); + const uint x_index = STRIDE_GET_INDEX(3, 0, 0, 0); +#elif OUTPUT_LAYOUT_BFZYX + const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0, 0); + const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0, 0); + const uint z_index = STRIDE_GET_INDEX(2, 0, 0, 0, 0); + const uint y_index = STRIDE_GET_INDEX(3, 0, 0, 0, 0); + const uint x_index = STRIDE_GET_INDEX(4, 0, 0, 0, 0); +#endif + + *step_batch = batch_index < STRIDE_DIMS ? stride[batch_index] : 1; + *step_feature = feature_index < STRIDE_DIMS ? stride[feature_index] : 1; +#ifdef OUTPUT_LAYOUT_BFYX + *step_z = 0; +#elif OUTPUT_LAYOUT_BFZYX + *step_z = z_index < STRIDE_DIMS ? stride[z_index] : 1; +#endif + *step_y = y_index < STRIDE_DIMS ? stride[y_index] : 1; + *step_x = x_index < STRIDE_DIMS ? stride[x_index] : 1; +} +#endif // STRIDE_TYPE + +#ifdef END_TYPE +inline int FUNC(check_end_bound)(const end_num, + const uint out_num) +{ + int num; + if (end_num < 0) { + num = max(TO_END_TYPE(out_num) + end_num, TO_END_TYPE(0)); + } else { + num = end_num; + } + num = min(num, (int)out_num); + return num; +} + +inline void FUNC(get_slice_end)(const __global END_TYPE* end, + int* end_batch, int* end_feature, + int* end_z, int* end_y, int* end_x) +{ + const uint out_batch_num = INPUT0_BATCH_NUM; + const uint out_feature_num = INPUT0_FEATURE_NUM; + const uint out_z_num = INPUT0_SIZE_Z; + const uint out_y_num = INPUT0_SIZE_Y; + const uint out_x_num = INPUT0_SIZE_X; +#ifdef OUTPUT_LAYOUT_BFYX + const uint batch_index = END_GET_INDEX(0, 0, 0, 0); + const uint feature_index = END_GET_INDEX(1, 0, 0, 0); + const uint y_index = END_GET_INDEX(2, 0, 0, 0); + const uint x_index = END_GET_INDEX(3, 0, 0, 0); +#elif OUTPUT_LAYOUT_BFZYX + const uint batch_index = END_GET_INDEX(0, 0, 0, 0, 0); + const uint feature_index = END_GET_INDEX(1, 0, 0, 0, 0); + const uint z_index = END_GET_INDEX(2, 0, 0, 0, 0); + const uint y_index = END_GET_INDEX(3, 0, 0, 0, 0); + const uint x_index = END_GET_INDEX(4, 0, 0, 0, 0); +#endif + END_TYPE batch = batch_index < END_DIMS ? end[batch_index] : 0; + END_TYPE feature = feature_index < END_DIMS ? end[feature_index] : 0; +#ifdef OUTPUT_LAYOUT_BFZYX + END_TYPE z = z_index < END_DIMS ? end[z_index] : 0; +#endif + END_TYPE y = y_index < END_DIMS ? end[y_index] : 0; + END_TYPE x = x_index < END_DIMS ? end[x_index] : 0; + + batch = (END_BATCH == 0) ? batch : TO_END_TYPE(out_batch_num); + feature = (END_FEATURE == 0) ? feature : TO_END_TYPE(out_feature_num); +#ifdef OUTPUT_LAYOUT_BFZYX + z = (END_Z == 0) ? z: TO_END_TYPE(out_z_num); +#endif + y = (END_Y == 0) ? y : TO_END_TYPE(out_y_num); + x = (END_X == 0) ? x : TO_END_TYPE(out_x_num); + + *end_batch = FUNC_CALL(check_end_bound)(batch, out_batch_num); + *end_feature = FUNC_CALL(check_end_bound)(feature, out_feature_num); +#ifdef OUTPUT_LAYOUT_BFYX + *end_z = 0; +#elif OUTPUT_LAYOUT_BFZYX + *end_z = FUNC_CALL(check_end_bound)(z, out_z_num); +#endif + *end_y = FUNC_CALL(check_end_bound)(y, out_y_num); + *end_x = FUNC_CALL(check_end_bound)(x, out_x_num); +} + +inline void FUNC(check_negative_stride)(const __global END_TYPE* end, + const int steps_batch, const int steps_feature, + const int steps_z, const int steps_y, const int steps_x, + int* begin_batch, int* begin_feature, + int* begin_z, int* begin_y, int* begin_x) +{ + bool is_negative = (steps_batch < 0) || (steps_feature < 0) || (steps_z < 0) || (steps_y < 0) || (steps_x < 0); + if (is_negative) { + int end_batch, end_feature, end_z, end_y, end_x; + FUNC_CALL(get_slice_end)(end, &end_batch, &end_feature, &end_z, &end_y, &end_x); + const int slice_end_batch = end_batch; + const int slice_end_feature = end_feature; + const int slice_end_z = end_z; + const int slice_end_y = end_y; + const int slice_end_x = end_x; + + if ((steps_batch < 0) && (*begin_batch <= slice_end_batch)) + *begin_batch = slice_end_batch - 1; + if ((steps_feature < 0) && (*begin_feature <= slice_end_feature)) + *begin_feature = slice_end_feature - 1; + if ((steps_z < 0) && (*begin_z <= slice_end_z)) + *begin_z = slice_end_z - 1; + if ((steps_y < 0) && (*begin_y <= slice_end_y)) + *begin_y = slice_end_y - 1; + if ((steps_x < 0) && (*begin_x <= slice_end_x)) + *begin_x = slice_end_x - 1; + } +} +#else // END_TYPE +inline void FUNC(check_negative_stride)(const int steps_batch, const int steps_feature, + const int steps_z, const int steps_y, const int steps_x, + int* begin_batch, int* begin_feature, + int* begin_z, int* begin_y, int* begin_x) +{ + const int slice_end_batch = SLICE_END_BATCH; + const int slice_end_feature = SLICE_END_FEATURE; + const int slice_end_z = SLICE_END_Z; + const int slice_end_y = SLICE_END_Y; + const int slice_end_x = SLICE_END_X; + + if ((steps_batch < 0) && (*begin_batch <= slice_end_batch)) + *begin_batch = slice_end_batch - 1; + if ((steps_feature < 0) && (*begin_feature <= slice_end_feature)) + *begin_feature = slice_end_feature - 1; + if ((steps_z < 0) && (*begin_z <= slice_end_z)) + *begin_z = slice_end_z - 1; + if ((steps_y < 0) && (*begin_y <= slice_end_y)) + *begin_y = slice_end_y - 1; + if ((steps_x < 0) && (*begin_x <= slice_end_x)) + *begin_x = slice_end_x - 1; +} +#endif // END_TYPE + +#ifdef BEGIN_TYPE +inline int FUNC(check_begin_bound)(BEGIN_TYPE begin_num, + const uint out_num) +{ + int num; + if (begin_num < 0) { + num = max(TO_BEGIN_TYPE(out_num) + begin_num, TO_BEGIN_TYPE(0)); + } else { + num = begin_num; + } + num = min(num, (int)out_num); + return num; +} + +inline void FUNC(get_slice_begin)(const __global BEGIN_TYPE* begin, + int* begin_batch, int* begin_feature, + int* begin_z, int* begin_y, int* begin_x) +{ + const uint out_batch_num = INPUT0_BATCH_NUM; + const uint out_feature_num = INPUT0_FEATURE_NUM; + const uint out_z_num = INPUT0_SIZE_Z; + const uint out_y_num = INPUT0_SIZE_Y; + const uint out_x_num = INPUT0_SIZE_X; +#ifdef OUTPUT_LAYOUT_BFYX + const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0); + const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0); + const uint y_index = STRIDE_GET_INDEX(2, 0, 0, 0); + const uint x_index = STRIDE_GET_INDEX(3, 0, 0, 0); +#elif OUTPUT_LAYOUT_BFZYX + const uint batch_index = STRIDE_GET_INDEX(0, 0, 0, 0, 0); + const uint feature_index = STRIDE_GET_INDEX(1, 0, 0, 0, 0); + const uint z_index = STRIDE_GET_INDEX(2, 0, 0, 0, 0); + const uint y_index = STRIDE_GET_INDEX(3, 0, 0, 0, 0); + const uint x_index = STRIDE_GET_INDEX(4, 0, 0, 0, 0); +#endif + BEGIN_TYPE batch = batch_index < BEGIN_DIMS ? begin[batch_index] : 0; + BEGIN_TYPE feature = feature_index < BEGIN_DIMS ? begin[feature_index] : 0; +#ifdef OUTPUT_LAYOUT_BFZYX + BEGIN_TYPE z = z_index < BEGIN_DIMS ? begin[z_index] : 0; +#endif + BEGIN_TYPE y = y_index < BEGIN_DIMS ? begin[y_index] : 0; + BEGIN_TYPE x = x_index < BEGIN_DIMS ? begin[x_index] : 0; + + batch = (BEGIN_BATCH == 0) ? batch : 0; + feature = (BEGIN_FEATURE == 0) ? feature : 0; +#ifdef OUTPUT_LAYOUT_BFZYX + z = (BEGIN_Z == 0) ? z: 0; +#endif + y = (BEGIN_Y == 0) ? y : 0; + x = (BEGIN_X == 0) ? x : 0; + + *begin_batch = FUNC_CALL(check_begin_bound)(batch, out_batch_num); + *begin_feature = FUNC_CALL(check_begin_bound)(feature, out_feature_num); +#ifdef OUTPUT_LAYOUT_BFYX + *begin_z = 0; +#elif OUTPUT_LAYOUT_BFZYX + *begin_z = FUNC_CALL(check_begin_bound)(z, out_z_num); +#endif + *begin_y = FUNC_CALL(check_begin_bound)(y, out_y_num); + *begin_x = FUNC_CALL(check_begin_bound)(x, out_x_num); +} +#endif // BEGIN_TYPE + +KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, +#ifdef BEGIN_TYPE + const __global BEGIN_TYPE* begin, +#endif +#ifdef END_TYPE + const __global END_TYPE* end, +#endif +#ifdef STRIDE_TYPE + const __global STRIDE_TYPE* stride, +#endif + __global OUTPUT_TYPE* output) { const uint batch = get_global_id(0); const uint feature = get_global_id(1); +#ifdef STRIDE_TYPE + int step_batch, step_feature, step_z, step_y, step_x; + FUNC_CALL(get_slice_step)(stride, &step_batch, &step_feature, &step_z, &step_y, &step_x); + const int slice_steps_batch = step_batch; + const int slice_steps_feature = step_feature; + const int slice_steps_z = step_z; + const int slice_steps_y = step_y; + const int slice_steps_x = step_x; +#else // STRIDE_TYPE + const int slice_steps_batch = SLICE_STEPS_BATCH; + const int slice_steps_feature = SLICE_STEPS_FEATURE; + const int slice_steps_z = SLICE_STEPS_Z; + const int slice_steps_y = SLICE_STEPS_Y; + const int slice_steps_x = SLICE_STEPS_X; +#endif // STRIDE_TYPE +#ifdef BEGIN_TYPE + int begin_batch, begin_feature, begin_z, begin_y, begin_x; + FUNC_CALL(get_slice_begin)(begin, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x); +#ifdef END_TYPE + FUNC_CALL(check_negative_stride)(end, slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x); +#else // END_TYPE + FUNC_CALL(check_negative_stride)(slice_steps_batch, slice_steps_feature, slice_steps_z, slice_steps_y, slice_steps_x, &begin_batch, &begin_feature, &begin_z, &begin_y, &begin_x); +#endif // END_TYPE + const int slice_begin_batch = begin_batch; + const int slice_begin_feature = begin_feature; + const int slice_begin_z = begin_z; + const int slice_begin_y = begin_y; + const int slice_begin_x = begin_x; +#else // BEGIN_TYPE + const int slice_begin_batch = SLICE_BEGIN_BATCH; + const int slice_begin_feature = SLICE_BEGIN_FEATURE; + const int slice_begin_z = SLICE_BEGIN_Z; + const int slice_begin_y = SLICE_BEGIN_Y; + const int slice_begin_x = SLICE_BEGIN_X; +#endif // BEGIN_TYPE #if NEW_AXIS_MODE // If NEW_AXIS_MODE that just copy input to output @@ -28,7 +281,7 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP y_input * INPUT0_Y_PITCH + x_input * INPUT0_X_PITCH; output[input_index] = input[input_index]; -#else +#else // NEW_AXIS_MODE #ifdef OUTPUT_LAYOUT_BFYX const uint z = 0; const uint y = get_global_id(2) / OUTPUT_SIZE_X; @@ -43,23 +296,23 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP #if SHRINK_MODE const uint in_indices[] = {INPUT_INDICES_ORDER}; const uint input_index = INPUT0_OFFSET + - (SLICE_BEGIN_BATCH + in_indices[0] * SLICE_STEPS_BATCH) * INPUT0_BATCH_PITCH + - (SLICE_BEGIN_FEATURE + in_indices[1] * SLICE_STEPS_FEATURE) * INPUT0_FEATURE_PITCH + + (slice_begin_batch + in_indices[0] * slice_steps_batch) * INPUT0_BATCH_PITCH + + (slice_begin_feature + in_indices[1] * slice_steps_feature) * INPUT0_FEATURE_PITCH + #if INPUT0_LAYOUT_BFZYX - (SLICE_BEGIN_Z + in_indices[2] * SLICE_STEPS_Z) * INPUT0_Z_PITCH + - (SLICE_BEGIN_Y + in_indices[3] * SLICE_STEPS_Y) * INPUT0_Y_PITCH + - (SLICE_BEGIN_X + in_indices[4] * SLICE_STEPS_X) * INPUT0_X_PITCH; + (slice_begin_z + in_indices[2] * slice_steps_z) * INPUT0_Z_PITCH + + (slice_begin_y + in_indices[3] * slice_steps_y) * INPUT0_Y_PITCH + + (slice_begin_x + in_indices[4] * slice_steps_x) * INPUT0_X_PITCH; #else - (SLICE_BEGIN_Y + in_indices[2] * SLICE_STEPS_Y) * INPUT0_Y_PITCH + - (SLICE_BEGIN_X + in_indices[3] * SLICE_STEPS_X) * INPUT0_X_PITCH; + (slice_begin_y + in_indices[2] * slice_steps_y) * INPUT0_Y_PITCH + + (slice_begin_x + in_indices[3] * slice_steps_x) * INPUT0_X_PITCH; #endif #else // SHRINK_MODE const uint input_index = INPUT0_OFFSET + - (SLICE_BEGIN_BATCH + batch * SLICE_STEPS_BATCH) * INPUT0_BATCH_PITCH + - (SLICE_BEGIN_FEATURE + feature * SLICE_STEPS_FEATURE) * INPUT0_FEATURE_PITCH + - (SLICE_BEGIN_Z + z * SLICE_STEPS_Z) * INPUT0_Z_PITCH + - (SLICE_BEGIN_Y + y * SLICE_STEPS_Y) * INPUT0_Y_PITCH + - (SLICE_BEGIN_X + x * SLICE_STEPS_X) * INPUT0_X_PITCH; + (slice_begin_batch + batch * slice_steps_batch) * INPUT0_BATCH_PITCH + + (slice_begin_feature + feature * slice_steps_feature) * INPUT0_FEATURE_PITCH + + (slice_begin_z + z * slice_steps_z) * INPUT0_Z_PITCH + + (slice_begin_y + y * slice_steps_y) * INPUT0_Y_PITCH + + (slice_begin_x + x * slice_steps_x) * INPUT0_X_PITCH; #endif // SHRINK_MODE const uint output_index = OUTPUT_OFFSET + @@ -70,5 +323,5 @@ KERNEL(strided_slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYP x * OUTPUT_X_PITCH; output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS); -#endif +#endif // NEW_AXIS_MODE } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp index 41d09bf9a50..8dbd3d6364f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.cpp @@ -56,6 +56,7 @@ ParamsKey StridedSliceKernelRef::GetSupportedKey() const { k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); + k.EnableDifferentTypes(); return k; } @@ -106,13 +107,46 @@ CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params& return dispatchData; } +inline std::string GetInputTypeStr(uint32_t idx) { + return "INPUT" + std::to_string(idx) + "_TYPE"; +} + +inline std::string GetToInputTypeStr(uint32_t idx) { + return "TO_" + GetInputTypeStr(idx); +} + +inline std::string GetInputIndexStr(uint32_t idx) { + return "INPUT" + std::to_string(idx) + "_GET_INDEX"; +} + JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); - makeJitConstForParam(jit, "SLICE_BEGIN", params.striding_params[0]); - makeJitConstForParam(jit, "SLICE_END", params.striding_params[1]); - makeJitConstForParam(jit, "SLICE_STEPS", params.striding_params[2]); - + if (params.begin_type == StridedSliceArgType::Input) { + jit.AddConstant(MakeJitConstant("BEGIN_TYPE", GetInputTypeStr(params.GetIndexBegin()))); + jit.AddConstant(MakeJitConstant("TO_BEGIN_TYPE", GetToInputTypeStr(params.GetIndexBegin()))); + jit.AddConstant(MakeJitConstant("BEGIN_GET_INDEX", GetInputIndexStr(params.GetIndexBegin()))); + jit.AddConstant(MakeJitConstant("BEGIN_DIMS", params.begin_dims)); + makeJitConstForParam(jit, "BEGIN", params.begin_mask); + } else { + makeJitConstForParam(jit, "SLICE_BEGIN", params.striding_params[0]); + } + if (params.end_type == StridedSliceArgType::Input) { + jit.AddConstant(MakeJitConstant("END_TYPE", GetInputTypeStr(params.GetIndexEnd()))); + jit.AddConstant(MakeJitConstant("TO_END_TYPE", GetToInputTypeStr(params.GetIndexEnd()))); + jit.AddConstant(MakeJitConstant("END_GET_INDEX", GetInputIndexStr(params.GetIndexEnd()))); + jit.AddConstant(MakeJitConstant("END_DIMS", params.end_dims)); + makeJitConstForParam(jit, "END", params.end_mask); + } else { + makeJitConstForParam(jit, "SLICE_END", params.striding_params[1]); + } + if (params.stride_type == StridedSliceArgType::Input) { + jit.AddConstant(MakeJitConstant("STRIDE_TYPE", GetInputTypeStr(params.GetIndexStride()))); + jit.AddConstant(MakeJitConstant("STRIDE_GET_INDEX", GetInputIndexStr(params.GetIndexStride()))); + jit.AddConstant(MakeJitConstant("STRIDE_DIMS", params.stride_dims)); + } else { + makeJitConstForParam(jit, "SLICE_STEPS", params.striding_params[2]); + } jit.AddConstant(MakeJitConstant( "NEW_AXIS_MODE", std::find(params.new_axis_mask.begin(), params.new_axis_mask.end(), 1) != params.new_axis_mask.end())); @@ -170,7 +204,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, static_cast(newParams.inputs.size())); return {kd}; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h index 76f0610650e..0458061d021 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/strided_slice/strided_slice_kernel_ref.h @@ -8,6 +8,12 @@ #include namespace kernel_selector { + +enum class StridedSliceArgType { + Input, + Constant +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // strided_slice_params //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -20,6 +26,30 @@ struct strided_slice_params : public base_params { std::vector ellipsis_mask; std::vector new_axis_mask; std::vector shrink_axis_mask; + StridedSliceArgType begin_type; + StridedSliceArgType end_type; + StridedSliceArgType stride_type; + size_t begin_dims; + size_t end_dims; + size_t stride_dims; + + uint32_t GetIndexBegin() const { + uint32_t input_idx = 0; + if (begin_type == StridedSliceArgType::Input) input_idx++; + return input_idx; + } + + uint32_t GetIndexEnd() const { + uint32_t input_idx = GetIndexBegin(); + if (end_type == StridedSliceArgType::Input) input_idx++; + return input_idx; + } + + uint32_t GetIndexStride() const { + uint32_t input_idx = GetIndexEnd(); + if (stride_type == StridedSliceArgType::Input) input_idx++; + return input_idx; + } }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp b/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp index d0e3b0bc12d..0e626f8cce2 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/strided_slice.cpp @@ -21,15 +21,17 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrget_output_partial_shape(0); + auto input_pshape = op->get_input_partial_shape(0); + + auto begin_constant = std::dynamic_pointer_cast(op->input_value(1).get_node_shared_ptr()); + std::vector begin = begin_constant ? begin_constant->cast_vector() : std::vector{}; + auto end_constant = std::dynamic_pointer_cast(op->input_value(2).get_node_shared_ptr()); + std::vector end = end_constant ? end_constant->cast_vector() : std::vector{}; + auto stride_constant = std::dynamic_pointer_cast(op->input_value(3).get_node_shared_ptr()); + std::vector strides = stride_constant ? stride_constant->cast_vector() : std::vector{}; + do { - auto data_output = op->input_value(0); - auto begin_node = std::dynamic_pointer_cast(op->input_value(1).get_node_shared_ptr()); - auto end_node = std::dynamic_pointer_cast(op->input_value(2).get_node_shared_ptr()); - auto stride_node = std::dynamic_pointer_cast(op->input_value(3).get_node_shared_ptr()); - - auto partial_input_shape = op->get_input_partial_shape(0); - - if (!begin_node || !end_node || !stride_node || partial_input_shape.is_dynamic()) { + if (!begin_constant || !end_constant || !stride_constant || input_pshape.is_dynamic()) { break; } @@ -41,10 +43,6 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrcast_vector(); - auto end = end_node->cast_vector(); - auto strides = stride_node->cast_vector(); - bool ones_stride = true; for (auto & s : strides) { if (s != 1) @@ -201,6 +199,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrinput_value(0); auto data_node_shape = data_output.get_shape(); std::vector offset_tensor{ 0, 0, 0, 0 }; @@ -245,18 +244,32 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrget_begin_mask(), - op->get_end_mask(), - op->get_new_axis_mask(), - op->get_shrink_axis_mask(), - op->get_ellipsis_mask(), - output_shape); - + std::shared_ptr stridedSlicePrim = nullptr; + if (begin_constant && end_constant && stride_constant) { + stridedSlicePrim = std::make_shared(layerName, + inputs[0], + begin, + end, + strides, + op->get_begin_mask(), + op->get_end_mask(), + op->get_new_axis_mask(), + op->get_shrink_axis_mask(), + op->get_ellipsis_mask(), + output_shape); + } else { + stridedSlicePrim = std::make_shared(layerName, + inputs[0], + inputs[1], + inputs[2], + inputs[3], + op->get_begin_mask(), + op->get_end_mask(), + op->get_new_axis_mask(), + op->get_shrink_axis_mask(), + op->get_ellipsis_mask(), + output_shape); + } p.add_primitive(*op, stridedSlicePrim); } diff --git a/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp index 6a2994781d4..a01a39e77bf 100644 --- a/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp +++ b/src/plugins/intel_gpu/tests/shape_infer/strided_slice_si_test.cpp @@ -43,6 +43,57 @@ TEST_P(strided_slice_test, shape_infer) { auto& engine = get_test_engine(); + auto input_prim = std::make_shared("input", p.in_layout); + auto strided_slice_prim = std::make_shared("output", + input_info("input"), + p.begin_data, + p.end_data, + p.strides_data, + p.begin_mask, + p.end_mask, + p.new_axis_mask, + p.shrink_axis_mask, + p.ellipsis_mask, + ov::Shape{}); + + cldnn::program prog(engine); + + auto begin_mem = engine.allocate_memory(p.begin_layout); + auto end_mem = engine.allocate_memory(p.end_layout); + auto strides_mem = engine.allocate_memory(p.strides_layout); + set_values(begin_mem, p.begin_data); + set_values(end_mem, p.end_data); + set_values(strides_mem, p.strides_data); + + auto& input_node = prog.get_or_create(input_prim); + auto& strided_slice_node = prog.get_or_create(strided_slice_prim); + program_wrapper::add_connection(prog, input_node, strided_slice_node); + auto params = strided_slice_node.get_kernel_impl_params(); + auto res = strided_slice_inst::calc_output_layouts(strided_slice_node, *params); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], p.expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{1, 128, 1024}, data_types::i64, format::bfyx}, + layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {0, 0, 0}, + layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {0, 1, 0}, + layout{ov::PartialShape{3}, data_types::i64, format::bfyx}, {1, 1, 1}, + {1, 0, 1}, {1, 0, 1}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, + layout{ov::PartialShape{1, 1, 1024}, data_types::i64, format::bfyx} + }, + })); + +class strided_slice_test_four_inputs : public testing::TestWithParam { }; + +TEST_P(strided_slice_test_four_inputs, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + auto input_prim = std::make_shared("input", p.in_layout); auto begin_prim = std::make_shared("begin", p.begin_layout); auto end_prim = std::make_shared("end", p.end_layout); @@ -85,7 +136,7 @@ TEST_P(strided_slice_test, shape_infer) { ASSERT_EQ(res[0], p.expected_layout); } -INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test, +INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test_four_inputs, testing::ValuesIn(std::vector{ { layout{ov::PartialShape{1, 128, 1024}, data_types::i64, format::bfyx}, diff --git a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp index 453fd101e01..8523cac4b1e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp @@ -13,7 +13,7 @@ using namespace cldnn; using namespace ::tests; -TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { +TEST(strided_slice_gpu, test_2x2x2x2_full) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0 // End (BFYX): 2x2x2x2 @@ -21,31 +21,19 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { // Output (BFYX): 2x2x2x2 auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx, }); - auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx, }); - auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx }); - auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx }); + auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }); - set_values(begin, { - 0, 0, 0, 0 - }); - set_values(end, { - 2, 2, 2, 2 - }); - set_values(strides, { - 1, 1, 1, 1 - }); + std::vector begin_data = { 0, 0, 0, 0 }; + std::vector end_data = { 2, 2, 2, 2 }; + std::vector strides_data = { 1, 1, 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2})); network network(engine, topology); @@ -70,7 +58,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) { +TEST(strided_slice_gpu_constants, test_2x2x2x2_full) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0 // End (BFYX): 2x2x2x2 @@ -127,7 +115,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) { +TEST(strided_slice_gpu, test_2x2x2x2_ignore) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 1x1x1x1 // End (BFYX): 2x2x2x2 @@ -136,30 +124,18 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }); - set_values(begin, { - 1, 1, 1, 1 - }); - set_values(end, { - 2, 2, 2, 2 - }); - set_values(strides, { - 1, 1, 1, 1 - }); + std::vector begin_data = { 1, 1, 1, 1 }; + std::vector end_data = { 2, 2, 2, 2 }; + std::vector strides_data = { 1, 1, 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2})); network network(engine, topology); @@ -186,7 +162,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) { +TEST(strided_slice_gpu_constants, test_2x2x2x2_ignore) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 1x1x1x1 // End (BFYX): 2x2x2x2 @@ -245,7 +221,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) { +TEST(strided_slice_gpu, test_2x2x2x2_single) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 1x1x1x1 // End (BFYX): 2x2x2x2 @@ -254,30 +230,18 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f - }); - set_values(begin, { - 1, 1, 1, 1 - }); - set_values(end, { - 2, 2, 2, 2 - }); - set_values(strides, { - 1, 1, 1, 1 - }); + }); + std::vector begin_data = { 1, 1, 1, 1 }; + std::vector end_data = { 2, 2, 2, 2 }; + std::vector strides_data = { 1, 1, 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 1, 1, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 1, 1, 1})); network network(engine, topology); @@ -301,7 +265,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) { +TEST(strided_slice_gpu_constants, test_2x2x2x2_single) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 1x1x1x1 // End (BFYX): 2x2x2x2 @@ -357,7 +321,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { +TEST(strided_slice_gpu, test_2x2x4x3_stride) { // Input (BFYX): 2x2x4x3 // Begin (BFYX): 0x0x0x0 // End (BFYX): 2x2x4x3 @@ -366,9 +330,6 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 4 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, @@ -378,22 +339,13 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { 36.f, 37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f }); - set_values(begin, { - 0, 0, 0, 0 - }); - set_values(end, { - 2, 2, 4, 3 - }); - set_values(strides, { - 1, 1, 2, 1 - }); + std::vector begin_data = { 0, 0, 0, 0 }; + std::vector end_data = { 2, 2, 4, 3 }; + std::vector strides_data = { 1, 1, 2, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3})); network network(engine, topology); @@ -420,7 +372,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) { +TEST(strided_slice_gpu_constants, test_2x2x4x3_stride) { // Input (BFYX): 2x2x4x3 // Begin (BFYX): 0x0x0x0 // End (BFYX): 2x2x4x3 @@ -483,7 +435,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { +TEST(strided_slice_gpu, test_2x2x4x4_part_stride) { // Input (BFYX): 2x2x4x4 // Begin (BFYX): 1x0x0x1 // End (BFYX): 2x2x4x4 @@ -492,9 +444,6 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 4, 4 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -517,22 +466,13 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f }); - set_values(begin, { - 1, 0, 0, 1 - }); - set_values(end, { - 2, 2, 4, 4 - }); - set_values(strides, { - 1, 1, 1, 2 - }); + std::vector begin_data = { 1, 0, 0, 1 }; + std::vector end_data = { 2, 2, 4, 4 }; + std::vector strides_data = { 1, 1, 1, 2 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2})); network network(engine, topology); @@ -566,7 +506,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) { +TEST(strided_slice_gpu_constants, test_2x2x4x4_part_stride) { // Input (BFYX): 2x2x4x4 // Begin (BFYX): 1x0x0x1 // End (BFYX): 2x2x4x4 @@ -612,14 +552,17 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) { topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); + topology.add(input_layout("input2", begin->get_layout())); + topology.add(input_layout("input3", end->get_layout())); + topology.add(input_layout("input4", strides->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2})); network network(engine, topology); network.set_input_data("input", input); + network.set_input_data("input2", begin); + network.set_input_data("input3", end); + network.set_input_data("input4", strides); auto outputs = network.execute(); @@ -649,37 +592,25 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) { +TEST(strided_slice_gpu, test_2x2x4x1_new_axis_mask) { // Input (BFYX): 2x2x4x1 // New_axis_mask: 1 // Output (BFYX): 1x2x2x4 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 4 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }); - set_values(begin, { - 1, 0, 1, 0 - }); - set_values(end, { - 2, 2, 4, 4 - }); - set_values(strides, { - 1, 1, 1, 2 - }); + std::vector begin_data = { 1, 0, 1, 0 }; + std::vector end_data = { 2, 2, 4, 4 }; + std::vector strides_data = { 1, 1, 1, 2 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1 }, {}, {}, {2, 2, 4, 1})); network network(engine, topology); @@ -705,7 +636,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) { +TEST(strided_slice_gpu_constants, test_2x2x4x1_new_axis_mask) { // Input (BFYX): 2x2x4x1 // New_axis_mask: 1 // Output (BFYX): 1x2x2x4 @@ -761,36 +692,83 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) { +TEST(strided_slice_gpu_four_inputs, test_2x2x4x1_new_axis_mask) { + // Input (BFYX): 2x2x4x1 + // New_axis_mask: 1 + // Output (BFYX): 1x2x2x4 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 4 } }); + auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f + }); + set_values(begin, { + 1, 0, 1, 0 + }); + set_values(end, { + 2, 2, 4, 4 + }); + set_values(strides, { + 1, 1, 1, 2 + }); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", begin->get_layout())); + topology.add(input_layout("input3", end->get_layout())); + topology.add(input_layout("input4", strides->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1})); + + network network(engine, topology); + + network.set_input_data("input", input); + network.set_input_data("input2", begin); + network.set_input_data("input3", end); + network.set_input_data("input4", strides); + + auto outputs = network.execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < answers.size(); ++i) + { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } +} + +TEST(strided_slice_gpu, test_2x2x1x1_new_axis_mask_2) { // Input (BFYX): 2x2x1x1 // New_axis_mask: 101 // Output (BFYX): 1x2x1x2 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f }); - set_values(begin, { - 1, 0, 1, 0 - }); - set_values(end, { - 2, 2, 4, 4 - }); - set_values(strides, { - 1, 1, 1, 2 - }); + std::vector begin_data = { 1, 0, 1, 0 }; + std::vector end_data = { 2, 2, 4, 4 }; + std::vector strides_data = { 1, 1, 1, 2 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1})); network network(engine, topology); @@ -815,7 +793,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) { +TEST(strided_slice_gpu_constants, test_2x2x1x1_new_axis_mask_2) { // Input (BFYX): 2x2x1x1 // New_axis_mask: 101 // Output (BFYX): 1x2x1x2 @@ -869,35 +847,80 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) { +TEST(strided_slice_gpu_four_inputs, test_2x2x1x1_new_axis_mask_2) { + // Input (BFYX): 2x2x1x1 + // New_axis_mask: 101 + // Output (BFYX): 1x2x1x2 + + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i64, format::bfyx }); + + set_values(input, { + 0.0f, 1.0f, 2.0f, 3.0f + }); + set_values(begin, { + 1, 0, 1, 0 + }); + set_values(end, { + 2, 2, 4, 4 + }); + set_values(strides, { + 1, 1, 1, 2 + }); + + topology topology; + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", begin->get_layout())); + topology.add(input_layout("input3", end->get_layout())); + topology.add(input_layout("input4", strides->get_layout())); + topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1})); + + network network(engine, topology); + + network.set_input_data("input", input); + network.set_input_data("input2", begin); + network.set_input_data("input3", end); + network.set_input_data("input4", strides); + + auto outputs = network.execute(); + + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "strided_slice"); + + auto output = outputs.at("strided_slice").get_memory(); + + std::vector answers = { + 0.0f, 1.0f, 2.0f, 3.0f + }; + + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < answers.size(); ++i) + { + ASSERT_TRUE(are_equal(answers[i], output_ptr[i])); + } +} + +TEST(strided_slice_gpu, test_2x2x1x1) { // Input (BFYX): 2x2x1x1 // Output (BFYX): 2x2x1x1 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f }); - set_values(begin, { - 0, 0 - }); - set_values(end, { - 2, 2 - }); - set_values(strides, { - 1, 1 - }); + std::vector begin_data = { 0, 0 }; + std::vector end_data = { 2, 2 }; + std::vector strides_data = { 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1})); network network(engine, topology); @@ -922,7 +945,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) { +TEST(strided_slice_gpu_constants, test_2x2x1x1) { // Input (BFYX): 2x2x1x1 // Output (BFYX): 2x2x1x1 @@ -935,6 +958,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) { set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f }); + set_values(begin, { 0, 0 }); @@ -975,35 +999,23 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) { +TEST(strided_slice_gpu, test_2x2x2x1x1) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 1x2x2x1x1 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); - auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); - auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f }); - set_values(begin, { - 0, 0, 0 - }); - set_values(end, { - 1, 2, 2 - }); - set_values(strides, { - 1, 1, 1 - }); + std::vector begin_data = { 0, 0, 0 }; + std::vector end_data = { 1, 2, 2 }; + std::vector strides_data = { 1, 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 2, 2, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1})); network network(engine, topology); @@ -1028,7 +1040,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) { +TEST(strided_slice_gpu_constants, test_2x2x2x1x1) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 1x2x2x1x1 @@ -1081,36 +1093,23 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) { } } - -TEST(strided_slice_gpu_i8_i64, test_2x2x2x1x1) { +TEST(strided_slice_gpu_i8, test_2x2x2x1x1) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 1x2x2x1x1 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::i8, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0, 1, 2, 3, 4, 5, 6, 7 }); - set_values(begin, { - 0, 0, 0 - }); - set_values(end, { - 1, 2, 2 - }); - set_values(strides, { - 1, 1, 1 - }); + std::vector begin_data = { 0, 0, 0 }; + std::vector end_data = { 1, 2, 2 }; + std::vector strides_data = { 1, 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 2, 2, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1})); network network(engine, topology); @@ -1134,35 +1133,23 @@ TEST(strided_slice_gpu_i8_i64, test_2x2x2x1x1) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) { +TEST(strided_slice_gpu, test_2x2x2x1x1_2) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); - auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); - auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f }); - set_values(begin, { - 0, 0, 0 - }); - set_values(end, { - 2, 2, 2 - }); - set_values(strides, { - 1, 2, 2 - }); + std::vector begin_data = { 0, 0, 0 }; + std::vector end_data = { 2, 2, 2 }; + std::vector strides_data = { 1, 2, 2 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1})); network network(engine, topology); @@ -1187,7 +1174,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) { +TEST(strided_slice_gpu_constants, test_2x2x2x1x1_2) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 @@ -1293,39 +1280,27 @@ TEST(strided_slice_gpu_f32_i32, test_1x1x1x8x1_new_axis_5d) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) { +TEST(strided_slice_gpu, test_2x2x2x2_full_negative_stride) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0 // End (BFYX): 2x2x2x2 - // Stride (BFYX): -1x1x1x1 + // Stride (BFYX): -1x-1x1x1 // Output (BFYX): 2x2x2x2 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx }); - auto begin = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx }); - auto end = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx }); - auto strides = engine.allocate_memory({ ov::PartialShape{ 4 }, data_types::i32, format::bfyx }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }); - set_values(begin, { - 0, 0, 0, 0 - }); - set_values(end, { - 2, 2, 2, 2 - }); - set_values(strides, { - -1, -1, 1, 1 - }); + std::vector begin_data = { 0, 0, 0, 0 }; + std::vector end_data = { 2, 2, 2, 2 }; + std::vector strides_data = { -1, -1, 1, 1 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2})); network network(engine, topology); @@ -1350,11 +1325,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) { +TEST(strided_slice_gpu_constants, test_2x2x2x2_full_negative_stride) { // Input (BFYX): 2x2x2x2 // Begin (BFYX): 0x0x0x0 // End (BFYX): 2x2x2x2 - // Stride (BFYX): -1x1x1x1 + // Stride (BFYX): -1x-1x1x1 // Output (BFYX): 2x2x2x2 auto& engine = get_test_engine(); @@ -1407,35 +1382,23 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) { } } -TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) { +TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); - auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); - auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i32, format::bfyx }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f }); - set_values(begin, { - 0, 0, 0 - }); - set_values(end, { - 2, 2, 2 - }); - set_values(strides, { - 1, 2, 2 - }); + std::vector begin_data = { 0, 0, 0 }; + std::vector end_data = { 2, 2, 2 }; + std::vector strides_data = { 1, 2, 2 }; topology topology; topology.add(input_layout("input", input->get_layout())); - topology.add(data("input2", begin)); - topology.add(data("input3", end)); - topology.add(data("input4", strides)); - topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1})); + topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1})); network network(engine, topology); @@ -1460,7 +1423,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) { +TEST(strided_slice_gpu_constants, test_2x2x2x1x1_2_negative_all) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 @@ -1513,15 +1476,13 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) { } } - - -TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic) { +TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all_dynamic) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 auto& engine = get_test_engine(); auto input_lay = layout{ ov::PartialShape::dynamic(3), data_types::f32, format::bfyx }; - auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, }); + auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx }); auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); @@ -1563,9 +1524,9 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic) { } } -TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic_begin) { +TEST(strided_slice_gpu, test_2x2x2x1x1_2_negative_all_dynamic_begin) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, }); + auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx }); auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx }); diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp new file mode 100644 index 00000000000..5c20fcc35a5 --- /dev/null +++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/strided_slice.cpp @@ -0,0 +1,251 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/strided_slice.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/builders.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +struct StridedSliceParams { + std::vector begin; + std::vector end; + std::vector stride; + std::vector beginMask; + std::vector endMask; + std::vector newAxisMask; + std::vector shrinkAxisMask; + std::vector ellipsisAxisMask; +}; + +typedef std::tuple< + InputShape, // Input shapes + StridedSliceParams, + ElementType, // Element type + ngraph::helpers::InputLayerType, // begin/end/stride input type + std::map // Additional network configuration +> StridedSliceLayerParamSet; + +class StridedSliceLayerGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InputShape shapes; + StridedSliceParams params; + ElementType elementType; + ngraph::helpers::InputLayerType restInputType; + TargetDevice targetDevice; + std::map additionalConfig; + std::tie(shapes, params, elementType, restInputType, additionalConfig) = obj.param; + + std::ostringstream results; + results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_"; + results << "TS="; + for (const auto& item : shapes.second) { + results << CommonTestUtils::vec2str(item) << "_"; + } + results << "netPRC=" << elementType << "_"; + results << "begin=" << CommonTestUtils::vec2str(params.begin) << "_"; + results << "end=" << CommonTestUtils::vec2str(params.end) << "_"; + results << "stride=" << CommonTestUtils::vec2str(params.stride) << "_"; + results << "begin_m=" << CommonTestUtils::vec2str(params.beginMask) << "_"; + results << "end_m=" << CommonTestUtils::vec2str(params.endMask) << "_"; + results << "new_axis_m=" << (params.newAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.newAxisMask)) << "_"; + results << "shrink_m=" << (params.shrinkAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.shrinkAxisMask)) << "_"; + results << "ellipsis_m=" << (params.ellipsisAxisMask.empty() ? "def" : CommonTestUtils::vec2str(params.ellipsisAxisMask)) << "_"; + results << "restInputType=" << restInputType << "_"; + results << "config=("; + for (const auto configEntry : additionalConfig) { + results << configEntry.first << ", " << configEntry.second << ":"; + } + results << ")"; + + return results.str(); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (int i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < begin.size(); i++) { + dataPtr[i] = static_cast(begin[i]); + } + } else if (i == 2) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < end.size(); i++) { + dataPtr[i] = static_cast(end[i]); + } + } else if (i == 3) { + tensor = ov::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + auto *dataPtr = tensor.data(); + for (size_t i = 0; i < stride.size(); i++) { + dataPtr[i] = static_cast(stride[i]); + } + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + +protected: + std::vector begin; + std::vector end; + std::vector stride; + size_t inferRequestNum = 0; + + void SetUp() override { + InputShape shapes; + StridedSliceParams ssParams; + ngraph::helpers::InputLayerType restInputType; + std::map additionalConfig; + std::tie(shapes, ssParams, inType, restInputType, additionalConfig) = this->GetParam(); + + begin = ssParams.begin; + end = ssParams.end; + stride = ssParams.stride; + + targetDevice = CommonTestUtils::DEVICE_GPU; + + std::vector inputShapes; + inputShapes.push_back(shapes); + if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) { + inputShapes.push_back(InputShape({static_cast(begin.size())}, std::vector(shapes.second.size(), {begin.size()}))); + inputShapes.push_back(InputShape({static_cast(end.size())}, std::vector(shapes.second.size(), {end.size()}))); + inputShapes.push_back(InputShape({static_cast(stride.size())}, std::vector(shapes.second.size(), {stride.size()}))); + } + + init_input_shapes(inputShapes); + + auto params = ngraph::builder::makeDynamicParams(inType, {inputDynamicShapes.front()}); + // auto paramNode = std::make_shared(type, ngraph::Shape(shape)); + std::shared_ptr beginInput, endInput, strideInput; + if (restInputType == ngraph::helpers::InputLayerType::PARAMETER) { + auto beginNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}); + auto endNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}); + auto strideNode = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{stride.size()}); + params.push_back(beginNode); + params.push_back(endNode); + params.push_back(strideNode); + beginInput = beginNode; + endInput = endNode; + strideInput = strideNode; + } else { + beginInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{begin.size()}, begin); + endInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{end.size()}, end); + strideInput = std::make_shared(ngraph::element::Type_t::i64, ov::Shape{stride.size()}, stride); + } + auto ss = std::make_shared(params[0], beginInput, endInput, strideInput, ssParams.beginMask, ssParams.endMask, + ssParams.newAxisMask, ssParams.shrinkAxisMask, ssParams.ellipsisAxisMask); + + ngraph::ResultVector results; + for (size_t i = 0; i < ss->get_output_size(); i++) { + results.push_back(std::make_shared(ss->output(i))); + } + + function = std::make_shared(results, params, "StridedSlice"); + } +}; + +TEST_P(StridedSliceLayerGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); +} + +namespace { + +std::map emptyAdditionalConfig; + +const std::vector inputPrecisions = { + ElementType::f32 +}; + +const std::vector restInputTypes = { + ngraph::helpers::InputLayerType::CONSTANT, + ngraph::helpers::InputLayerType::PARAMETER +}; + +const std::vector inputShapesDynamic2D = { + {{-1, -1}, + {{32, 20}, {16, 16}, {24, 16}}}, + + {{-1, 16}, + {{16, 16}, {20, 16}, {32, 16}}}, + + {{{16, 32}, {16, 32}}, + {{16, 32}, {32, 16}, {24, 24}}}, +}; + +const std::vector paramsPlain2D = { + StridedSliceParams{ { 0, 10 }, { 16, 16 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 2, 5 }, { 16, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 2, 5 }, { 16, 16 }, { 1, 2 }, { 0, 1 }, { 1, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0 }, { 16, 16 }, { 2, 1 }, { 0, 0 }, { 1, 0 }, { }, { }, { } }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Static_2D, StridedSliceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(static_shapes_to_test_representation({{32, 20}})), + ::testing::ValuesIn(paramsPlain2D), + ::testing::ValuesIn(inputPrecisions), + ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT), + ::testing::Values(emptyAdditionalConfig)), + StridedSliceLayerGPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_2D, StridedSliceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic2D), + ::testing::ValuesIn(paramsPlain2D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + StridedSliceLayerGPUTest::getTestCaseName); + +const std::vector testCasesCommon4D = { + StridedSliceParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 0, 1, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 0, 0, 1 }, { 0, 1, 0, 1 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 0, 0 }, { 0, 0, 0, 0 }, { }, { }, { } }, + StridedSliceParams{ { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 }, { }, { }, { } }, + StridedSliceParams{ { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 0, 1, 0 }, { 0, 0, 0, 1 }, { }, { }, { } }, +}; + +const std::vector inputShapesDynamic4D = { + {{-1, -1, -1, -1}, + {{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 1, 5, 64, 64 }}}, + + {{-1, 5, -1, -1}, + {{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 3, 5, 32, 36 }}}, + + {{{1, 5}, 5, {32, 64}, {32, 64}}, + {{ 2, 5, 32, 32 }, { 1, 5, 48, 32 }, { 5, 5, 32, 32 }}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, StridedSliceLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapesDynamic4D), + ::testing::ValuesIn(testCasesCommon4D), + ::testing::ValuesIn(inputPrecisions), + ::testing::ValuesIn(restInputTypes), + ::testing::Values(emptyAdditionalConfig)), + StridedSliceLayerGPUTest::getTestCaseName); + +} // namespace +} // namespace GPULayerTestsDefinitions