[GPU] Fix strided_slice for dynamic cases (#12979)

This commit is contained in:
Vladimir Paramuzov 2022-09-13 09:17:50 +04:00 committed by GitHub
parent 3c24ee6cda
commit a73fc2dce1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 165 additions and 54 deletions

View File

@ -14,6 +14,34 @@
using namespace cldnn;
namespace {
template <typename T, typename DT, typename = typename std::enable_if<std::is_convertible<DT, T>::value>::type>
std::vector<T>& pad_vector_to_size(std::vector<T>& data, size_t size, DT value) {
for (size_t i = data.size(); i < size; ++i) {
data.push_back(static_cast<T>(value));
}
return data;
}
template <typename T, typename MT>
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const T& src, const std::vector<MT>& mask) {
for (size_t i = 0; i < dst.size(); ++i) {
if (!mask[i])
dst[i] = src;
}
return dst;
}
template <typename T, typename MT>
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const std::vector<T>& src, const std::vector<MT>& mask) {
for (size_t i = 0; i < dst.size(); ++i) {
if (!mask[i])
dst[i] = src[i];
}
return dst;
}
} // namespace
namespace cldnn {
namespace ocl {
@ -27,16 +55,16 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
public:
static primitive_impl* create(const strided_slice_node& arg, const kernel_impl_params& impl_param) {
const auto& prim = arg.get_primitive();
const auto& prim = impl_param.typed_desc<strided_slice>();
auto params = get_default_params<kernel_selector::strided_slice_params>(impl_param);
auto op_params = get_default_optional_params<kernel_selector::strided_slice_optional_params>(arg.get_program());
const size_t dims_num = params.inputs[0].Dimentions();
// Getting data from constant inputs. There are 3 args: Begin, End, Stride
for (size_t i = 1; i < arg.get_dependencies().size(); ++i) {
auto& input = arg.get_dependency(i).as<data>();
auto mem = input.get_attached_memory_ptr();
std::vector<int32_t> sizes = read_vector<int32_t>(mem, arg.get_program().get_stream());
OPENVINO_ASSERT(impl_param.memory_deps.count(i) > 0, "[GPU] Can't find StridedSlice memory dependency");
auto mem = impl_param.memory_deps.at(i);
std::vector<int32_t> sizes = read_vector<int32_t>(mem, impl_param.prog.get_stream());
pad_vector_to_size(sizes, dims_num, i != 1); // for "begin" completion used 0 value, for other - 1
params.striding_params.push_back(sizes);
}

View File

@ -14,53 +14,6 @@
namespace cldnn {
template <typename T, typename DT, typename = typename std::enable_if<std::is_convertible<DT, T>::value>::type>
std::vector<T>& pad_vector_to_size(std::vector<T>& data, size_t size, DT value) {
for (size_t i = data.size(); i < size; ++i) {
data.push_back(static_cast<T>(value));
}
return data;
}
template <typename T, typename MT>
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const T& src, const std::vector<MT>& mask) {
for (size_t i = 0; i < dst.size(); ++i) {
if (!mask[i])
dst[i] = src;
}
return dst;
}
template <typename T, typename MT>
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const std::vector<T>& src, const std::vector<MT>& mask) {
for (size_t i = 0; i < dst.size(); ++i) {
if (!mask[i])
dst[i] = src[i];
}
return dst;
}
inline format get_default_format_for_dim(size_t dimension) {
format dimensionFormat = format::bfyx;
switch (dimension) {
case 1:
case 2:
case 3:
case 4:
dimensionFormat = format::bfyx;
break;
case 5:
dimensionFormat = format::bfzyx;
break;
case 6:
dimensionFormat = format::bfwzyx;
break;
default:
CLDNN_ERROR_MESSAGE("Function get_default_format_for_dim", "Unsupported dimension number: " + std::to_string(dimension));
}
return dimensionFormat;
}
template <>
struct typed_program_node<strided_slice> : public typed_program_node_base<strided_slice> {
using parent = typed_program_node_base<strided_slice>;
@ -69,6 +22,7 @@ public:
using parent::parent;
program_node& input(size_t index = 0) const { return get_dependency(index); }
std::vector<size_t> get_shape_infer_dependencies() const override { return {1, 2, 3}; }
};
using strided_slice_node = typed_program_node<strided_slice>;

View File

@ -12,6 +12,7 @@
#include "convolution_inst.h"
#include "deconvolution_inst.h"
#include "shape_of_inst.h"
#include "strided_slice_inst.h"
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
#include "intel_gpu/graph/network.hpp"
@ -156,7 +157,19 @@ void primitive_inst::update_shape() {
if (_node.is_type<shape_of>())
return;
if (!input_shape_changed && !_node.generates_dynamic_output() && _impl_params->output_layout.is_static())
// Strided slice loads data from {1,2,3} dependencies in impl::create method.
// It means that this data must be put into impl_params map
// Thus we treat it as "dynamic" case
// TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
bool strided_slice_wa = false;
if (_node.is_type<strided_slice>()) {
for (size_t i = 1; i < _node.get_dependencies().size(); i++) {
if (!_node.get_dependency(i).is_type<data>())
strided_slice_wa = true;
}
}
if (!strided_slice_wa && !input_shape_changed && !_node.generates_dynamic_output() && _impl_params->output_layout.is_static())
return;
auto memory_deps = _node.get_const_memory_deps();

View File

@ -6,6 +6,7 @@
#include "program_helpers.h"
#include "primitive_inst.h"
#include "loop_inst.h"
#include "strided_slice_inst.h"
#ifdef ENABLE_ONEDNN_FOR_GPU
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "convolution_inst.h"
@ -281,6 +282,16 @@ bool program_node::recalc_output_layout(bool invalidate_users_if_changed) {
}
bool program_node::is_dynamic() const {
// Strided slice loads data from {1,2,3} dependencies in impl::create method.
// It means that this data must be put into impl_params map
// Thus we treat it as "dynamic" case
// TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
if (is_type<strided_slice>()) {
for (size_t i = 1; i < get_dependencies().size(); i++) {
if (!get_dependency(i).is_type<data>())
return true;
}
}
for (const auto* input : get_dependencies()) {
if (input->get_output_layout().is_dynamic())
return true;
@ -290,6 +301,17 @@ bool program_node::is_dynamic() const {
}
bool program_node::is_dynamic() {
// Strided slice loads data from {1,2,3} dependencies in impl::create method.
// It means that this data must be put into impl_params map
// Thus we treat it as "dynamic" case
// TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
if (is_type<strided_slice>()) {
for (size_t i = 1; i < get_dependencies().size(); i++) {
if (!get_dependency(i).is_type<data>())
return true;
}
}
for (auto& input : get_dependencies()) {
if (input->get_output_layout(true).is_dynamic())
return true;

View File

@ -39,8 +39,8 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
auto& constant_mem = impl_param.memory_deps;
if (constant_mem.empty()) {
auto out_shape = ov::PartialShape::dynamic(input0_layout.get_rank());
if (!constant_mem.count(1) || !constant_mem.count(2) || !constant_mem.count(3)) {
auto out_shape = ov::PartialShape::dynamic(input0_layout.get_partial_shape().size());
return { layout{out_shape, input0_layout.data_type, format::get_default_format(out_shape.rank().get_length())} };
}

View File

@ -1459,3 +1459,97 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) {
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}
TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic) {
// Input (BFZYX): 2x2x2x1x1
// Output (BFZYX): 2x1x1x1x1
auto& engine = get_test_engine();
auto input_lay = layout{ ov::PartialShape::dynamic(3), data_types::f32, format::bfyx };
auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, });
auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
set_values<int64_t>(begin, {0, 0, 0});
set_values<int64_t>(end, {2, 2, 2});
set_values<int64_t>(strides, {1, 2, 2});
topology topology;
topology.add(input_layout("input", input_lay));
topology.add(data("input2", begin));
topology.add(data("input3", end));
topology.add(data("input4", strides));
topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}, {}));
build_options bo;
bo.set_option(build_option::allow_new_shape_infer(true));
network network(engine, topology, bo);
network.set_input_data("input", input);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "strided_slice");
auto output = outputs.at("strided_slice").get_memory();
std::vector<float> answers = {
0.0f, 4.0f
};
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < answers.size(); ++i)
{
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}
TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic_begin) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, });
auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
set_values<int64_t>(begin, {0, 0, 0});
set_values<int64_t>(end, {2, 2, 2});
set_values<int64_t>(strides, {1, 2, 2});
topology topology;
topology.add(data("input", input));
topology.add(input_layout("input2", begin->get_layout()));
topology.add(data("input3", end));
topology.add(data("input4", strides));
topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}, {}));
build_options bo;
bo.set_option(build_option::allow_new_shape_infer(true));
network network(engine, topology, bo);
network.set_input_data("input2", begin);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "strided_slice");
auto output = outputs.at("strided_slice").get_memory();
std::vector<float> answers = {
0.0f, 4.0f
};
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < answers.size(); ++i)
{
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}