[GPU] Fix loop issues (#21091)
* [GPU] Fix loop issues * if loop does not run inner body, return zero dimension layout for dynamic dimension layout * Support dynamic sliced input in inner body in creaet concate mapping * Modify update_shape to call reset_shape_changed() when inputs of loop are not changed * Add unit test * Follow-up codereview
This commit is contained in:
parent
60730f61a4
commit
8b5b7a627b
@ -23,7 +23,6 @@ std::map<size_t, memory::ptr> loop_node::get_memory_deps() const {
|
||||
auto memory_deps = get_const_memory_deps();
|
||||
for (auto& i : get_shape_infer_dependencies()) {
|
||||
auto& dep = get_dependency(i);
|
||||
auto dep_id = dep.id();
|
||||
if (memory_deps.count(i) > 0 || i >= get_dependencies().size()) {
|
||||
continue;
|
||||
}
|
||||
@ -91,6 +90,17 @@ static std::vector<layout> get_output_layouts(kernel_impl_params const& impl_par
|
||||
auto shape = loop_output_layout.get_partial_shape();
|
||||
shape[axis_to_iterate_through] = static_cast<int32_t>(num_iterations);
|
||||
loop_output_layout.set_partial_shape(shape);
|
||||
} else {
|
||||
// if num_iterations is zero, it means loop does not run inner body network.
|
||||
// in the case of dynamic output layout, dynamic dimension will be replaced to zero.
|
||||
if (num_iterations == 0) {
|
||||
auto shape = loop_output_layout.get_partial_shape();
|
||||
for (size_t i = 0; i < shape.size(); i++) {
|
||||
if (shape[i].is_dynamic())
|
||||
shape[i] = 0;
|
||||
}
|
||||
loop_output_layout.set_partial_shape(shape);
|
||||
}
|
||||
}
|
||||
output_layouts.push_back(loop_output_layout);
|
||||
}
|
||||
@ -348,6 +358,7 @@ event::ptr loop_inst::set_output_memory(memory::ptr mem, bool check, size_t idx)
|
||||
loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(const cldnn::loop::io_primitive_map& io_prim_map,
|
||||
memory::ptr mem_ptr,
|
||||
const int64_t num_iterations) {
|
||||
OPENVINO_ASSERT(io_prim_map.axis >= 0, "axis should not be negative");
|
||||
const auto& external_id = io_prim_map.external_id;
|
||||
const auto& internal_id = io_prim_map.internal_id;
|
||||
auto& engine = body_network->get_engine();
|
||||
@ -360,16 +371,37 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map(
|
||||
// In dynamic model, we can't calculate num_element_iteration, start, and sliced_layout.
|
||||
// will recalculate that parameters in backedge preprocessing map after first execution.
|
||||
if (mem_ptr != nullptr) {
|
||||
auto& out_mem = prim->output_memory(internal_id.idx);
|
||||
layout sliced_layout = out_mem.get_layout();
|
||||
layout sliced_layout = prim->get_output_layout(internal_id.idx);
|
||||
auto out_mem_ptr = prim->output_memory_ptr(internal_id.idx);
|
||||
if (out_mem_ptr != nullptr) {
|
||||
sliced_layout = out_mem_ptr->get_layout();
|
||||
} else {
|
||||
// if inner body prim has no output memory because it has dynamic shape,
|
||||
// calculate inner body prim layout using concat_mem's layout.
|
||||
auto updated_sliced_layout = sliced_layout.get_partial_shape();
|
||||
OPENVINO_ASSERT(updated_sliced_layout[io_prim_map.axis].is_static() || num_iterations > 0,
|
||||
"Not allowed dynamic dimension for axis when num_iteraiont is negative");
|
||||
auto concat_mem_pshape = mem_ptr->get_layout().get_partial_shape();
|
||||
const auto shape_size = concat_mem_pshape.size();
|
||||
for (size_t i = 0; i < shape_size; i++) {
|
||||
if (updated_sliced_layout[i].is_dynamic()) {
|
||||
updated_sliced_layout[i] = concat_mem_pshape[i];
|
||||
}
|
||||
}
|
||||
GPU_DEBUG_LOG << "output pshape for [" << prim->id() << "] is changed from "
|
||||
<< sliced_layout.get_partial_shape().to_string()
|
||||
<< " to " << updated_sliced_layout.to_string() << std::endl;
|
||||
sliced_layout.set_partial_shape(updated_sliced_layout);
|
||||
out_mem_ptr = engine.allocate_memory(sliced_layout);
|
||||
}
|
||||
|
||||
// When trip_count is -1, allocate first sliced_mem and allocate sliced memory if additional sliced mem is required
|
||||
// When num_iterations is -1, allocate first sliced_mem and allocate sliced memory if additional sliced mem is required
|
||||
if (num_iterations < 0) {
|
||||
memory::ptr sliced_mem = engine.allocate_memory(sliced_layout);
|
||||
sliced_mems.push_back(sliced_mem);
|
||||
sliced_mems.push_back(out_mem_ptr);
|
||||
} else {
|
||||
sliced_mems.reserve(num_iterations);
|
||||
for (int j=0; j < num_iterations; ++j) {
|
||||
sliced_mems.push_back(out_mem_ptr);
|
||||
for (int j=1; j < num_iterations; ++j) {
|
||||
memory::ptr sliced_mem = engine.allocate_memory(sliced_layout);
|
||||
sliced_mems.push_back(sliced_mem);
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "reshape_inst.h"
|
||||
#include "reorder_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "loop_inst.h"
|
||||
#include "deconvolution_inst.h"
|
||||
#include "shape_of_inst.h"
|
||||
#include "softmax_inst.h"
|
||||
@ -272,6 +273,13 @@ void primitive_inst::update_shape() {
|
||||
return;
|
||||
}
|
||||
|
||||
// if input shape is not changed, loop doesn't need to update anything.
|
||||
// because actual output layout will be calculated after the end of body network execution.
|
||||
if (_node->is_type<loop>() && !input_shape_changed) {
|
||||
reset_shape_change();
|
||||
return;
|
||||
}
|
||||
|
||||
// Do not update shapes in shape_of subraph if shape_of's input shape is not changed
|
||||
if (_node->is_in_shape_of_subgraph()) {
|
||||
bool subgraph_input_changed = false;
|
||||
|
@ -432,11 +432,11 @@ TEST(loop_gpu, basic_concat_nested_cached) {
|
||||
test_loop_gpu_basic_concat_nested<float>(true);
|
||||
}
|
||||
|
||||
static void test_loop_gpu_wo_trip_count(bool is_caching_test) {
|
||||
static void test_loop_gpu_wo_trip_count(ov::PartialShape body_input_layout, bool is_caching_test = false) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto e_input_layout = cldnn::layout{ { 1, 1, 5, 4 }, data_types::f32, format::bfyx };
|
||||
auto b_input_layout = cldnn::layout{ { 1, 1, 1, 4}, data_types::f32, format::bfyx };
|
||||
auto b_input_layout = cldnn::layout{ body_input_layout, data_types::f32, format::bfyx };
|
||||
auto const_layout = cldnn::layout{ {}, data_types::i64, format::bfyx };
|
||||
|
||||
auto e_input_mem = engine.allocate_memory(e_input_layout); // b,f,x,y
|
||||
@ -547,5 +547,9 @@ static void test_loop_gpu_wo_trip_count(bool is_caching_test) {
|
||||
}
|
||||
|
||||
TEST(loop_gpu, support_dynamic_tensoriterator) {
|
||||
test_loop_gpu_wo_trip_count(false);
|
||||
test_loop_gpu_wo_trip_count({ 1, 1, 1, 4 });
|
||||
}
|
||||
|
||||
TEST(loop_gpu, support_loop_w_dynamic_body_input) {
|
||||
test_loop_gpu_wo_trip_count({ 1, -1, 1, 4 });
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user