[GPU] Allocate reorder & permute's output memory for bounded dynamic shape (#18793)

* Previously reorder / permute was not allocating its memory at build time thought the shape has upper bound

* Update src/plugins/intel_gpu/src/graph/permute.cpp

Co-authored-by: Sergey Shlyapnikov <Sergeishlyapnikov@gmail.com>

* Fix as review comment

---------

Co-authored-by: Sergey Shlyapnikov <Sergeishlyapnikov@gmail.com>
This commit is contained in:
Taylor Yeonbok Lee 2023-07-26 17:08:58 -07:00 committed by GitHub
parent 2afcd950fa
commit c145d8f1e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 7 deletions

View File

@ -108,7 +108,8 @@ std::string permute_inst::to_string(permute_node const& node) {
}
permute_inst::typed_primitive_inst(network& network, permute_node const& node) :
parent(network, node, (!node.can_be_optimized() && node.get_output_layout().is_static()) ? true : false) {
parent(network, node, !node.can_be_optimized()
&& (node.get_output_layout().is_static() || node.get_output_layout().has_upper_bound())) {
auto permute_order = argument->permute_order;
auto required_order_values_size = static_cast<uint32_t>(permute_order.size());

View File

@ -1147,13 +1147,12 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
allocation_type type, bool reusable_across_network, bool reset = true, memory* curr_memory = nullptr) {
OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound");
// Use layout with max tensor for dynamic shape with upper bound
auto static_layout = cldnn::layout(layout.get_partial_shape().get_max_shape(), layout.data_type, layout.format, layout.data_padding);
if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) {
if (curr_memory != nullptr)
pool.release_memory(curr_memory, id, net_id);
return pool.get_memory(static_layout, id, net_id, dependencies, type, reusable_across_network, reset);
return pool.get_memory(layout, id, net_id, dependencies, type, reusable_across_network, reset);
}
return pool.get_memory(static_layout, type, reset);
return pool.get_memory(layout, type, reset);
};
auto layout = impl_params.get_output_layout(idx);
@ -1167,6 +1166,7 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
return a;
};
layout = cldnn::layout(layout.get_partial_shape().get_max_shape(), layout.data_type, layout.format, layout.data_padding);
bool usm_device_allocatable = true;
const auto& total_device_input_mem_size = std::accumulate(impl_params.input_layouts.begin(), impl_params.input_layouts.end(), (uint64_t)0, device_mem_acc);
if (total_device_input_mem_size > _engine.get_device_info().max_global_mem_size)

View File

@ -209,9 +209,10 @@ reorder_inst::typed_primitive_inst(network& network) : parent(network) {
_type = reorder::type_id();
}
reorder_inst::typed_primitive_inst(network& network, reorder_node const& node)
: parent(network, node, (!node.can_be_optimized() && node.get_output_layout().is_static()) ? true : false)
, _req_reinterpr(node.requires_reinterpret()) {
reorder_inst::typed_primitive_inst(network& network, reorder_node const& node) :
parent(network, node, !node.can_be_optimized()
&& (node.get_output_layout().is_static() || node.get_output_layout().has_upper_bound()))
, _req_reinterpr(node.requires_reinterpret()) {
if (node.can_be_optimized())
reuse_input();

View File

@ -0,0 +1,48 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/data.hpp>
#include "program_wrapper.h"
#include <cmath>
#include <algorithm>
using namespace cldnn;
using namespace ::tests;
namespace bounded_shape_mem_alloc_tests {
TEST(dyn_shape_bounded_shape_mem, reorder) {
auto& engine = get_test_engine();
auto input_lay = layout{ov::PartialShape{ov::Dimension(1, 10), ov::Dimension(1, 20)}, data_types::f32, format::bfyx};
topology topology(input_layout("input", input_lay),
reorder("reorder", input_info("input"), format::bfyx, data_types::f16));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
const auto reorder_mem = network.get_primitive("reorder")->output_memory_ptr();
ASSERT_NE(reorder_mem, nullptr);
ASSERT_EQ(reorder_mem->get_layout().count(), 10*20);
}
TEST(dyn_shape_bounded_shape_mem, permute) {
auto& engine = get_test_engine();
auto input_lay = layout{ov::PartialShape{ov::Dimension(1, 10), ov::Dimension(1,15), ov::Dimension(1,5), ov::Dimension(1, 20)}, data_types::f32, format::bfyx};
topology topology(input_layout("input", input_lay),
permute("permute", input_info("input"), {0, 2, 3, 1}));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
const auto permute_mem = network.get_primitive("permute")->output_memory_ptr();
ASSERT_NE(permute_mem, nullptr);
ASSERT_EQ(permute_mem->get_layout().count(), 10*15*5*20);
}
}