Fix memory realloc for fake alignment (#13996)
This commit is contained in:
parent
21ead7cbc6
commit
58f8f1a70e
@ -127,6 +127,7 @@ public:
|
||||
|
||||
memory::ptr get_output_memory(const primitive_id& output_id);
|
||||
layout get_node_output_layout(const primitive_id& output_id) const;
|
||||
layout get_output_layout(const primitive_id& output_id) const;
|
||||
std::vector<layout> get_input_layouts() const;
|
||||
|
||||
/// @brief Returns the list of primitive ids before and after graph optimization.
|
||||
|
@ -295,7 +295,7 @@ protected:
|
||||
|
||||
size_t max_output_layout_size = 0;
|
||||
|
||||
std::vector<memory::ptr> allocate_outputs();
|
||||
std::vector<memory::ptr> allocate_outputs(kernel_impl_params* updated_params = nullptr);
|
||||
static std::vector<std::shared_ptr<primitive_inst>> build_exec_deps(
|
||||
std::vector<std::shared_ptr<primitive_inst>> const& mem_deps);
|
||||
void convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const;
|
||||
|
@ -718,6 +718,10 @@ memory::ptr network::get_output_memory(const primitive_id& output_id) {
|
||||
return get_primitive(output_id)->output_memory_ptr();
|
||||
}
|
||||
|
||||
layout network::get_output_layout(const primitive_id& output_id) const {
|
||||
return get_primitive(output_id)->get_output_layout();
|
||||
}
|
||||
|
||||
layout network::get_node_output_layout(const primitive_id& output_id) const {
|
||||
auto res = std::find_if(_outputs.begin(), _outputs.end(), [&](const std::shared_ptr<primitive_inst>& v) {
|
||||
return v->id() == output_id;
|
||||
|
@ -272,8 +272,9 @@ void primitive_inst::realloc_if_needed() {
|
||||
<< " Current buffer_size=" << max_output_layout_size
|
||||
<< " Requested buffer_size=" << actual_layout.count() << std::endl;
|
||||
}
|
||||
_outputs = allocate_outputs();
|
||||
max_output_layout_size = _outputs[0]->get_layout().count();
|
||||
_outputs = allocate_outputs(&updated_params);
|
||||
// TODO : need to handle multiple outputs
|
||||
max_output_layout_size = updated_params.output_layouts[0].count();
|
||||
}
|
||||
// intermediate memory allocation is required for primitives consisting of multiple kernels in dynamic case
|
||||
allocate_internal_buffers();
|
||||
@ -388,11 +389,22 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
|
||||
for (auto& d : _deps) {
|
||||
if (!d->get_node().is_type<data>()) {
|
||||
subgraph->set_input_data(d->id(), d->output_memory_ptr());
|
||||
auto allocated_mem = d->output_memory_ptr();
|
||||
auto actual_input_layout = d->get_output_layout();
|
||||
auto& engine = _network.get_engine();
|
||||
// Need to use actual layout, not the fake aligned memory layout
|
||||
auto actual_mem = engine.reinterpret_buffer(*allocated_mem, actual_input_layout);
|
||||
subgraph->set_input_data(d->id(), actual_mem);
|
||||
}
|
||||
}
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 4) {
|
||||
GPU_DEBUG_COUT << "[Start] Executing unfused subgraph of " << id() << std::endl;
|
||||
}
|
||||
|
||||
auto outputs = subgraph->execute(events);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 4) {
|
||||
GPU_DEBUG_COUT << "[End] Finished executing unfused subgraph of " << id() << std::endl;
|
||||
}
|
||||
|
||||
auto last_fd = _impl_params->fused_desc.back();
|
||||
auto last_prim_id = last_fd.desc->id;
|
||||
@ -401,7 +413,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
|
||||
_outputs[0] = outputs.at(last_prim_id).get_memory();
|
||||
|
||||
_impl_params->output_layouts[0] = _outputs[0]->get_layout();
|
||||
_impl_params->output_layouts[0] = subgraph->get_output_layout(last_prim_id);
|
||||
return outputs.at(last_prim_id).get_event();
|
||||
}
|
||||
|
||||
@ -837,11 +849,12 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<memory::ptr> primitive_inst::allocate_outputs() {
|
||||
std::vector<memory::ptr> primitive_inst::allocate_outputs(kernel_impl_params* updated_params) {
|
||||
std::vector<memory::ptr> outputs;
|
||||
for (size_t i = 0; i < get_node().get_outputs_count() ; ++i) {
|
||||
outputs.push_back(allocate_output(get_network().get_engine(), _network.get_memory_pool(), *_node, *_impl_params,
|
||||
get_network_id(), _network.is_internal(), i));
|
||||
outputs.push_back(allocate_output(get_network().get_engine(), _network.get_memory_pool(),
|
||||
*_node, (updated_params != nullptr) ? *updated_params : *_impl_params,
|
||||
get_network_id(), _network.is_internal(), i));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
@ -359,11 +359,14 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_2) {
|
||||
net.set_input_data("extra_input", extra_input_memory);
|
||||
|
||||
auto output = net.execute();
|
||||
auto out_l = net.get_output_layout("reorder");
|
||||
auto out_mem = output.at("reorder").get_memory();
|
||||
|
||||
ASSERT_NE(out_mem, nullptr);
|
||||
|
||||
ASSERT_EQ(out_mem->count(),16);
|
||||
ASSERT_EQ(out_l.batch(), 4);
|
||||
ASSERT_EQ(out_l.feature(), 4);
|
||||
ASSERT_EQ(out_mem->count(), 16);
|
||||
ASSERT_EQ(out_mem->size(), 16 * sizeof(float));
|
||||
|
||||
mem_lock<float> lock(out_mem, net.get_stream());
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "test_utils.h"
|
||||
#include "network_test.h"
|
||||
|
||||
#include <intel_gpu/runtime/utils.hpp>
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include "intel_gpu/primitives/fully_connected.hpp"
|
||||
#include <intel_gpu/primitives/quantize.hpp>
|
||||
@ -1759,15 +1759,16 @@ TEST(fully_connected_gpu, dynamic) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(0.75f, output_ptr[1]);
|
||||
@ -1810,15 +1811,16 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(-1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(-0.75f, output_ptr[1]);
|
||||
@ -1833,15 +1835,16 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(0.75f, output_ptr[1]);
|
||||
@ -1888,15 +1891,16 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(2, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 2);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(-1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(-0.75f, output_ptr[1]);
|
||||
@ -1916,15 +1920,16 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(1, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 1);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(0.75f, output_ptr[1]);
|
||||
@ -1973,15 +1978,16 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
ASSERT_EQ(out_l.batch(), 2);
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(2, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 2); // fake_alignment
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(-1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(-0.75f, output_ptr[1]);
|
||||
@ -2001,15 +2007,16 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) {
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = output_prim->get_layout();
|
||||
ASSERT_EQ(out_l.batch(), 1);
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(1, 16)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 1); // fake_alignment
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
ASSERT_EQ(out_l.spatial(1), 1);
|
||||
|
||||
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr (output_prim_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(1.5f, output_ptr[0]);
|
||||
ASSERT_EQ(0.75f, output_ptr[1]);
|
||||
|
Loading…
Reference in New Issue
Block a user