[GPU] Fix strided_slice for dynamic cases (#12979)
This commit is contained in:
parent
3c24ee6cda
commit
a73fc2dce1
@ -14,6 +14,34 @@
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace {
|
||||
template <typename T, typename DT, typename = typename std::enable_if<std::is_convertible<DT, T>::value>::type>
|
||||
std::vector<T>& pad_vector_to_size(std::vector<T>& data, size_t size, DT value) {
|
||||
for (size_t i = data.size(); i < size; ++i) {
|
||||
data.push_back(static_cast<T>(value));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename T, typename MT>
|
||||
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const T& src, const std::vector<MT>& mask) {
|
||||
for (size_t i = 0; i < dst.size(); ++i) {
|
||||
if (!mask[i])
|
||||
dst[i] = src;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
template <typename T, typename MT>
|
||||
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const std::vector<T>& src, const std::vector<MT>& mask) {
|
||||
for (size_t i = 0; i < dst.size(); ++i) {
|
||||
if (!mask[i])
|
||||
dst[i] = src[i];
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
@ -27,16 +55,16 @@ struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const strided_slice_node& arg, const kernel_impl_params& impl_param) {
|
||||
const auto& prim = arg.get_primitive();
|
||||
const auto& prim = impl_param.typed_desc<strided_slice>();
|
||||
auto params = get_default_params<kernel_selector::strided_slice_params>(impl_param);
|
||||
auto op_params = get_default_optional_params<kernel_selector::strided_slice_optional_params>(arg.get_program());
|
||||
const size_t dims_num = params.inputs[0].Dimentions();
|
||||
|
||||
// Getting data from constant inputs. There are 3 args: Begin, End, Stride
|
||||
for (size_t i = 1; i < arg.get_dependencies().size(); ++i) {
|
||||
auto& input = arg.get_dependency(i).as<data>();
|
||||
auto mem = input.get_attached_memory_ptr();
|
||||
std::vector<int32_t> sizes = read_vector<int32_t>(mem, arg.get_program().get_stream());
|
||||
OPENVINO_ASSERT(impl_param.memory_deps.count(i) > 0, "[GPU] Can't find StridedSlice memory dependency");
|
||||
auto mem = impl_param.memory_deps.at(i);
|
||||
std::vector<int32_t> sizes = read_vector<int32_t>(mem, impl_param.prog.get_stream());
|
||||
pad_vector_to_size(sizes, dims_num, i != 1); // for "begin" completion used 0 value, for other - 1
|
||||
params.striding_params.push_back(sizes);
|
||||
}
|
||||
|
@ -14,53 +14,6 @@
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <typename T, typename DT, typename = typename std::enable_if<std::is_convertible<DT, T>::value>::type>
|
||||
std::vector<T>& pad_vector_to_size(std::vector<T>& data, size_t size, DT value) {
|
||||
for (size_t i = data.size(); i < size; ++i) {
|
||||
data.push_back(static_cast<T>(value));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename T, typename MT>
|
||||
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const T& src, const std::vector<MT>& mask) {
|
||||
for (size_t i = 0; i < dst.size(); ++i) {
|
||||
if (!mask[i])
|
||||
dst[i] = src;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
template <typename T, typename MT>
|
||||
std::vector<T>& vector_assign_if_not_mask(std::vector<T>& dst, const std::vector<T>& src, const std::vector<MT>& mask) {
|
||||
for (size_t i = 0; i < dst.size(); ++i) {
|
||||
if (!mask[i])
|
||||
dst[i] = src[i];
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
inline format get_default_format_for_dim(size_t dimension) {
|
||||
format dimensionFormat = format::bfyx;
|
||||
switch (dimension) {
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
dimensionFormat = format::bfyx;
|
||||
break;
|
||||
case 5:
|
||||
dimensionFormat = format::bfzyx;
|
||||
break;
|
||||
case 6:
|
||||
dimensionFormat = format::bfwzyx;
|
||||
break;
|
||||
default:
|
||||
CLDNN_ERROR_MESSAGE("Function get_default_format_for_dim", "Unsupported dimension number: " + std::to_string(dimension));
|
||||
}
|
||||
return dimensionFormat;
|
||||
}
|
||||
|
||||
template <>
|
||||
struct typed_program_node<strided_slice> : public typed_program_node_base<strided_slice> {
|
||||
using parent = typed_program_node_base<strided_slice>;
|
||||
@ -69,6 +22,7 @@ public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {1, 2, 3}; }
|
||||
};
|
||||
|
||||
using strided_slice_node = typed_program_node<strided_slice>;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "convolution_inst.h"
|
||||
#include "deconvolution_inst.h"
|
||||
#include "shape_of_inst.h"
|
||||
#include "strided_slice_inst.h"
|
||||
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
|
||||
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
@ -156,7 +157,19 @@ void primitive_inst::update_shape() {
|
||||
if (_node.is_type<shape_of>())
|
||||
return;
|
||||
|
||||
if (!input_shape_changed && !_node.generates_dynamic_output() && _impl_params->output_layout.is_static())
|
||||
// Strided slice loads data from {1,2,3} dependencies in impl::create method.
|
||||
// It means that this data must be put into impl_params map
|
||||
// Thus we treat it as "dynamic" case
|
||||
// TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
|
||||
bool strided_slice_wa = false;
|
||||
if (_node.is_type<strided_slice>()) {
|
||||
for (size_t i = 1; i < _node.get_dependencies().size(); i++) {
|
||||
if (!_node.get_dependency(i).is_type<data>())
|
||||
strided_slice_wa = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!strided_slice_wa && !input_shape_changed && !_node.generates_dynamic_output() && _impl_params->output_layout.is_static())
|
||||
return;
|
||||
|
||||
auto memory_deps = _node.get_const_memory_deps();
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "program_helpers.h"
|
||||
#include "primitive_inst.h"
|
||||
#include "loop_inst.h"
|
||||
#include "strided_slice_inst.h"
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
#include "convolution_inst.h"
|
||||
@ -281,6 +282,16 @@ bool program_node::recalc_output_layout(bool invalidate_users_if_changed) {
|
||||
}
|
||||
|
||||
bool program_node::is_dynamic() const {
|
||||
// Strided slice loads data from {1,2,3} dependencies in impl::create method.
|
||||
// It means that this data must be put into impl_params map
|
||||
// Thus we treat it as "dynamic" case
|
||||
// TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
|
||||
if (is_type<strided_slice>()) {
|
||||
for (size_t i = 1; i < get_dependencies().size(); i++) {
|
||||
if (!get_dependency(i).is_type<data>())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (const auto* input : get_dependencies()) {
|
||||
if (input->get_output_layout().is_dynamic())
|
||||
return true;
|
||||
@ -290,6 +301,17 @@ bool program_node::is_dynamic() const {
|
||||
}
|
||||
|
||||
bool program_node::is_dynamic() {
|
||||
// Strided slice loads data from {1,2,3} dependencies in impl::create method.
|
||||
// It means that this data must be put into impl_params map
|
||||
// Thus we treat it as "dynamic" case
|
||||
// TODO: Remove once strided slice impl support runtime tensors for begin/end/stride
|
||||
if (is_type<strided_slice>()) {
|
||||
for (size_t i = 1; i < get_dependencies().size(); i++) {
|
||||
if (!get_dependency(i).is_type<data>())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& input : get_dependencies()) {
|
||||
if (input->get_output_layout(true).is_dynamic())
|
||||
return true;
|
||||
|
@ -39,8 +39,8 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
|
||||
|
||||
auto& constant_mem = impl_param.memory_deps;
|
||||
|
||||
if (constant_mem.empty()) {
|
||||
auto out_shape = ov::PartialShape::dynamic(input0_layout.get_rank());
|
||||
if (!constant_mem.count(1) || !constant_mem.count(2) || !constant_mem.count(3)) {
|
||||
auto out_shape = ov::PartialShape::dynamic(input0_layout.get_partial_shape().size());
|
||||
return { layout{out_shape, input0_layout.data_type, format::get_default_format(out_shape.rank().get_length())} };
|
||||
}
|
||||
|
||||
|
@ -1459,3 +1459,97 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) {
|
||||
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic) {
|
||||
// Input (BFZYX): 2x2x2x1x1
|
||||
// Output (BFZYX): 2x1x1x1x1
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
auto input_lay = layout{ ov::PartialShape::dynamic(3), data_types::f32, format::bfyx };
|
||||
auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, });
|
||||
auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
|
||||
auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
|
||||
auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
|
||||
|
||||
set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
|
||||
set_values<int64_t>(begin, {0, 0, 0});
|
||||
set_values<int64_t>(end, {2, 2, 2});
|
||||
set_values<int64_t>(strides, {1, 2, 2});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input_lay));
|
||||
topology.add(data("input2", begin));
|
||||
topology.add(data("input3", end));
|
||||
topology.add(data("input4", strides));
|
||||
topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}, {}));
|
||||
|
||||
build_options bo;
|
||||
bo.set_option(build_option::allow_new_shape_infer(true));
|
||||
network network(engine, topology, bo);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "strided_slice");
|
||||
|
||||
auto output = outputs.at("strided_slice").get_memory();
|
||||
|
||||
std::vector<float> answers = {
|
||||
0.0f, 4.0f
|
||||
};
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < answers.size(); ++i)
|
||||
{
|
||||
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all_dynamic_begin) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2 }, data_types::f32, format::bfyx, });
|
||||
auto begin = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
|
||||
auto end = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
|
||||
auto strides = engine.allocate_memory({ ov::PartialShape{ 3 }, data_types::i64, format::bfyx });
|
||||
|
||||
set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
|
||||
set_values<int64_t>(begin, {0, 0, 0});
|
||||
set_values<int64_t>(end, {2, 2, 2});
|
||||
set_values<int64_t>(strides, {1, 2, 2});
|
||||
|
||||
topology topology;
|
||||
topology.add(data("input", input));
|
||||
topology.add(input_layout("input2", begin->get_layout()));
|
||||
topology.add(data("input3", end));
|
||||
topology.add(data("input4", strides));
|
||||
topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}, {}));
|
||||
|
||||
build_options bo;
|
||||
bo.set_option(build_option::allow_new_shape_infer(true));
|
||||
network network(engine, topology, bo);
|
||||
|
||||
network.set_input_data("input2", begin);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "strided_slice");
|
||||
|
||||
auto output = outputs.at("strided_slice").get_memory();
|
||||
|
||||
std::vector<float> answers = {
|
||||
0.0f, 4.0f
|
||||
};
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < answers.size(); ++i)
|
||||
{
|
||||
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user