[GPU] Allow StridedSlice as predecessor for in place concat (#18836)
* Allow StridedSlice as predecessor for in place concat * Enable padding support for strided slice Signed-off-by: Andrew Park <andrew.park@intel.com> * Add prepare_buffer_fusing TC for ov_gpu_unit_tests --------- Signed-off-by: Andrew Park <andrew.park@intel.com>
This commit is contained in:
parent
8846b5ddd0
commit
1501e29cfe
@ -12,6 +12,7 @@
|
||||
#include "depth_to_space_inst.h"
|
||||
#include "resample_inst.h"
|
||||
#include "loop_inst.h"
|
||||
#include "strided_slice_inst.h"
|
||||
#include "non_max_suppression_inst.h"
|
||||
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
|
||||
#include "border_inst.h"
|
||||
@ -58,7 +59,8 @@ auto available_pred = [](const program_node& input) {
|
||||
if (!input.is_type<pooling>() && !input.is_type<convolution>() && !input.is_type<quantize>() &&
|
||||
!input.is_type<activation>() && !input.is_type<deconvolution>() && !input.is_type<concatenation>() &&
|
||||
!input.is_type<crop>() && !input.is_type<eltwise>() && !input.is_type<resample>() &&
|
||||
!input.is_type<reorder>() && !(input.is_type<permute>() && !input.as<permute>().is_rotating_except_batch()))
|
||||
!input.is_type<reorder>() && !(input.is_type<permute>() && !input.as<permute>().is_rotating_except_batch()) &&
|
||||
!input.is_type<strided_slice>())
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
@ -15,6 +15,9 @@ namespace cldnn {
|
||||
template <>
|
||||
struct typed_program_node<strided_slice> : public typed_program_node_base<strided_slice> {
|
||||
using parent = typed_program_node_base<strided_slice>;
|
||||
typed_program_node(const std::shared_ptr<strided_slice> prim, program& prog) : parent(prim, prog) {
|
||||
support_padding_all(true);
|
||||
}
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "reorder_inst.h"
|
||||
#include "shape_of_inst.h"
|
||||
#include "gather_inst.h"
|
||||
#include "strided_slice_inst.h"
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
#include "pass_manager.h"
|
||||
#include "to_string_utils.h"
|
||||
@ -264,6 +265,92 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(prepare_buffer_fusing, in_place_concat_strided_slice_dyn) {
|
||||
auto& engine = get_test_engine();
|
||||
auto in_layout1_0 = layout{ ov::PartialShape::dynamic(4), data_types::f32, format::bfyx };
|
||||
auto in_layout2_0 = layout{ ov::PartialShape::dynamic(4), data_types::f32, format::bfyx };
|
||||
auto in_layout3_0 = layout{ ov::PartialShape::dynamic(4), data_types::f32, format::bfyx };
|
||||
auto in_layout1 = layout{ ov::PartialShape{2, 2, 2, 2}, data_types::f32, format::bfyx };
|
||||
auto in_layout2 = layout{ ov::PartialShape{2, 2, 2, 2}, data_types::f32, format::bfyx };
|
||||
auto in_layout3 = layout{ ov::PartialShape{2, 2, 2, 2}, data_types::f32, format::bfyx };
|
||||
auto begin = engine.allocate_memory({ ov::PartialShape{4}, data_types::i64, format::bfyx });
|
||||
auto end = engine.allocate_memory({ ov::PartialShape{4}, data_types::i64, format::bfyx });
|
||||
auto strides = engine.allocate_memory({ ov::PartialShape{4}, data_types::i64, format::bfyx });
|
||||
set_values<int64_t>(begin, {0, 0, 0, 0});
|
||||
set_values<int64_t>(end, {2, 2, 2, 2 });
|
||||
set_values<int64_t>(strides, {1, 1, 1, 1});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input1", in_layout1_0));
|
||||
topology.add(input_layout("input2", in_layout2_0));
|
||||
topology.add(input_layout("input3", in_layout3_0));
|
||||
topology.add(data("input4", begin));
|
||||
topology.add(data("input5", end));
|
||||
topology.add(data("input6", strides));
|
||||
topology.add(reorder("reorder1", input_info("input1"), format::bfyx, data_types::f16));
|
||||
topology.add(reorder("reorder2", input_info("input2"), format::bfyx, data_types::f16));
|
||||
topology.add(reorder("reorder3", input_info("input3"), format::bfyx, data_types::f16));
|
||||
topology.add(eltwise("eltwise", { input_info("reorder1"), input_info("reorder2") }, eltwise_mode::prod));
|
||||
topology.add(strided_slice("strided_slice", input_info("reorder3"), input_info("input4"),
|
||||
input_info("input5"), input_info("input6"), {}, {}, {}, {}, {}, {}));
|
||||
topology.add(concatenation("concat", { input_info("eltwise"), input_info("strided_slice") }, 0));
|
||||
topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::f32));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, false);
|
||||
ASSERT_NE(prog, nullptr);
|
||||
cldnn::network net(prog, 0);
|
||||
|
||||
auto input_memory1 = engine.allocate_memory(in_layout1);
|
||||
auto input_memory2 = engine.allocate_memory(in_layout2);
|
||||
auto input_memory3 = engine.allocate_memory(in_layout3);
|
||||
set_values<float>(input_memory1, {
|
||||
1.f, 0.f, 5.f, 1.f, 2.f, 0.f, 6.f, 3.f,
|
||||
3.f, 0.5f, 7.f, 12.f, 4.f, -0.5f, 8.f, 7.5f
|
||||
});
|
||||
set_values<float>(input_memory2, {
|
||||
0.5f, 5.f, 15.f, 6.f, 0.5f, 2.f, 8.f, -0.5f,
|
||||
2.5f, 7.f, 17.f, 8.f, 2.5f, 4.f, 10.f, -2.5f
|
||||
});
|
||||
set_values<float>(input_memory3, {
|
||||
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
|
||||
8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
|
||||
});
|
||||
|
||||
net.set_input_data("input1", input_memory1);
|
||||
net.set_input_data("input2", input_memory2);
|
||||
net.set_input_data("input3", input_memory3);
|
||||
|
||||
std::map<cldnn::primitive_id, cldnn::network_output> output;
|
||||
EXPECT_NO_THROW(output = net.execute());
|
||||
|
||||
const auto& concat_node = net.get_primitive("concat")->get_node();
|
||||
auto concat_mem = net.get_primitive("concat")->output_memory_ptr();
|
||||
auto eltwise_mem = net.get_primitive("eltwise")->output_memory_ptr();
|
||||
auto strided_slice_mem = net.get_primitive("strided_slice")->output_memory_ptr();
|
||||
|
||||
ASSERT_TRUE(concat_node.can_be_optimized());
|
||||
ASSERT_EQ(concat_mem, eltwise_mem);
|
||||
ASSERT_EQ(concat_mem, strided_slice_mem);
|
||||
|
||||
auto out_lay = net.get_output_layout("output");
|
||||
auto out_mem = output.at("output").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(out_mem, get_test_stream());
|
||||
|
||||
std::vector<float> ref_output = {
|
||||
0.5f, 0.0f, 75.f, 6.0f, 1.0f, 0.0f, 48.f, -1.5f,
|
||||
7.5f, 3.5f, 119.f, 96.0f, 10.0f, -2.0f, 80.f, -18.75f,
|
||||
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
|
||||
9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f
|
||||
};
|
||||
|
||||
for (size_t x = 0; x < out_lay.count(); ++x) {
|
||||
ASSERT_EQ(ref_output[x], output_ptr[x]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(prepare_buffer_fusing, in_place_concat_dynamic_onednn_batch1) {
|
||||
auto& engine = get_test_engine();
|
||||
if (!engine.get_device_info().supports_immad)
|
||||
|
@ -118,6 +118,55 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void test_2x2x2x2_full_pad(bool is_caching_test) {
|
||||
// Input (BFYX): 2x2x2x2
|
||||
// Begin (BFYX): 0x0x0x0
|
||||
// End (BFYX): 2x2x2x2
|
||||
// Stride (BFYX): 1x1x1x1
|
||||
// Output (BFYX): 2x2x2x2
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx });
|
||||
|
||||
set_values(input, {
|
||||
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
||||
9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
|
||||
});
|
||||
std::vector<int64_t> begin_data = { 0, 0, 0, 0 };
|
||||
std::vector<int64_t> end_data = { 2, 2, 2, 2 };
|
||||
std::vector<int64_t> strides_data = { 1, 1, 1, 1 };
|
||||
|
||||
padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1});
|
||||
auto padded_layout = input->get_layout().with_padding(in_pad);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reorder("input_reorder", input_info("input"), padded_layout));
|
||||
topology.add(strided_slice("strided_slice", input_info("input_reorder"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2}));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "strided_slice");
|
||||
|
||||
auto output = outputs.at("strided_slice").get_memory();
|
||||
|
||||
std::vector<float> answers = {
|
||||
0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f };
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
ASSERT_EQ(output_ptr.size(), answers.size());
|
||||
for (size_t i = 0; i < answers.size(); ++i)
|
||||
{
|
||||
ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void test_2x2x2x2_ignore(bool is_caching_test) {
|
||||
// Input (BFYX): 2x2x2x2
|
||||
// Begin (BFYX): 1x1x1x1
|
||||
@ -597,6 +646,54 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void test_2x2x2x2_full_negative_stride_pad(bool is_caching_test) {
|
||||
// Input (BFYX): 2x2x2x2
|
||||
// Begin (BFYX): 0x0x0x0
|
||||
// End (BFYX): 2x2x2x2
|
||||
// Stride (BFYX): -1x-1x1x1
|
||||
// Output (BFYX): 2x2x2x2
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ ov::PartialShape{ 2, 2, 2, 2 }, data_types::f32, format::bfyx });
|
||||
|
||||
set_values(input, {
|
||||
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
|
||||
9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
|
||||
});
|
||||
std::vector<int64_t> begin_data = { 0, 0, 0, 0 };
|
||||
std::vector<int64_t> end_data = { 2, 2, 2, 2 };
|
||||
std::vector<int64_t> strides_data = { -1, -1, 1, 1 };
|
||||
|
||||
padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1});
|
||||
auto padded_layout = input->get_layout().with_padding(in_pad);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reorder("input_reorder", input_info("input"), padded_layout));
|
||||
topology.add(strided_slice("strided_slice", input_info("input_reorder"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2}));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "strided_slice");
|
||||
|
||||
auto output = outputs.at("strided_slice").get_memory();
|
||||
|
||||
std::vector<float> answers = {
|
||||
12.f, 13.f, 14.f, 15.f, 8.f, 9.f, 10.f, 11.f, 4.f, 5.f, 6.f, 7.f, 0.f, 1.f, 2.f, 3.f };
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < answers.size(); ++i)
|
||||
{
|
||||
ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void test_2x2x2x1x1_2_negative_all(bool is_caching_test) {
|
||||
// Input (BFZYX): 2x2x2x1x1
|
||||
// Output (BFZYX): 2x1x1x1x1
|
||||
@ -1902,6 +1999,10 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full) {
|
||||
this->test_2x2x2x2_full(false);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu, test_2x2x2x2_full_pad) {
|
||||
this->test_2x2x2x2_full_pad(false);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu, test_2x2x2x2_ignore) {
|
||||
this->test_2x2x2x2_ignore(false);
|
||||
}
|
||||
@ -1998,6 +2099,10 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full_negative_stride) {
|
||||
this->test_2x2x2x2_full_negative_stride(false);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu, test_2x2x2x2_full_negative_stride_pad) {
|
||||
this->test_2x2x2x2_full_negative_stride_pad(false);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full_negative_stride_f_axis) {
|
||||
this->test_2x2x2x2_full_negative_stride_f_axis(false);
|
||||
}
|
||||
@ -2081,6 +2186,10 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full_cached) {
|
||||
this->test_2x2x2x2_full(true);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu, test_2x2x2x2_full_pad_cached) {
|
||||
this->test_2x2x2x2_full_pad(true);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu, test_2x2x2x2_ignore_cached) {
|
||||
this->test_2x2x2x2_ignore(true);
|
||||
}
|
||||
@ -2177,6 +2286,10 @@ TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full_negative_stride_cached) {
|
||||
this->test_2x2x2x2_full_negative_stride(true);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu, test_2x2x2x2_full_negative_stride_pad_cached) {
|
||||
this->test_2x2x2x2_full_negative_stride_pad(true);
|
||||
}
|
||||
|
||||
TEST_F(strided_slice_gpu_constants, test_2x2x2x2_full_negative_stride_f_axis_cached) {
|
||||
this->test_2x2x2x2_full_negative_stride_f_axis(true);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user