[GPU] Allow simple attached mem as input memory for network (#19419)

This commit is contained in:
Vladimir Paramuzov 2023-09-01 09:54:30 +04:00 committed by GitHub
parent dd258f9607
commit 38cad619af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 199 additions and 20 deletions

View File

@ -42,7 +42,7 @@ struct memory {
size_t size() const { return _bytes_count; }
size_t count() const { return _layout.count(); }
virtual shared_mem_params get_internal_params() const = 0;
virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine; }
virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine && _type != allocation_type::unknown; }
engine* get_engine() const { return _engine; }
const layout& get_layout() const { return _layout; }
allocation_type get_allocation_type() const { return _type; }
@ -95,7 +95,7 @@ private:
struct simple_attached_memory : memory {
simple_attached_memory(const layout& layout, void* pointer)
: memory(nullptr, layout, allocation_type::unknown), _pointer(pointer) {}
: memory(nullptr, layout, allocation_type::unknown, true), _pointer(pointer) {}
void* lock(const stream& /* stream */, mem_lock_type /* type */) override { return _pointer; }
void unlock(const stream& /* stream */) override {}
@ -109,11 +109,18 @@ struct simple_attached_memory : memory {
#endif
0}; };
event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override { return nullptr; };
event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override { return nullptr; };
event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override {
OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory");
}
event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override {
OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory");
}
event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override {
OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory");
}
event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override {
OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory");
}
private:
void* _pointer;

View File

@ -40,20 +40,24 @@ event::ptr input_layout_inst::set_data(memory::ptr mem) {
check_memory_to_set(*mem, ol);
event::ptr ev = nullptr;
if (mem->is_allocated_by(get_network().get_engine())) {
auto& engine = get_network().get_engine();
auto& stream = get_network().get_stream();
if (mem->is_allocated_by(engine)) {
OPENVINO_ASSERT(!_outputs.empty(), "[GPU] Can't set data for empty input memory");
_outputs[0] = mem;
ev = get_network().get_stream().create_user_event(true);
ev = stream.create_user_event(true);
} else {
if ((mem->get_allocation_type() == allocation_type::usm_host) ||
(mem->get_allocation_type() == allocation_type::usm_device)) {
ev = _outputs[0]->copy_from(get_network().get_stream(), *mem, false);
} else {
mem_lock<char, mem_lock_type::read> src(mem, get_network().get_stream());
mem_lock<char, mem_lock_type::write> dst(_outputs[0], get_network().get_stream());
std::copy(src.begin(), src.end(), dst.begin());
ev = get_network().get_stream().create_user_event(true);
if (_outputs.empty() || !_outputs[0]) {
_outputs.resize(1);
_outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false);
}
if (ol.is_dynamic() && _outputs[0]->size() < mem->size()) {
_outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false);
}
mem_lock<uint8_t> src(mem, stream);
ev = _outputs[0]->copy_from(stream, src.data(), false);
}
_has_valid_input = true;
_output_changed = true;

View File

@ -0,0 +1,55 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/memory_caps.hpp"
#include "test_utils.h"
#include "runtime/ocl/ocl_engine.hpp"
#include "runtime/ocl/ocl_memory.hpp"
#include <memory>
using namespace cldnn;
using namespace ::tests;
TEST(engine, memory_creation) {
auto& engine = get_test_engine();
std::shared_ptr<memory> mem = nullptr;
layout layout_to_allocate = {{2, 4}, data_types::u8, format::bfyx};
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate));
ASSERT_NE(mem, nullptr);
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
ASSERT_TRUE(mem->is_allocated_by(engine));
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::cl_mem));
ASSERT_NE(mem, nullptr);
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_buffer>(mem), nullptr);
ASSERT_TRUE(mem->is_allocated_by(engine));
if (engine.supports_allocation(allocation_type::usm_host)) {
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_host));
ASSERT_NE(mem, nullptr);
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_usm>(mem), nullptr);
ASSERT_TRUE(mem->is_allocated_by(engine));
}
if (engine.supports_allocation(allocation_type::usm_device)) {
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_device));
ASSERT_NE(mem, nullptr);
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_usm>(mem), nullptr);
ASSERT_TRUE(mem->is_allocated_by(engine));
}
std::vector<uint8_t> host_data(2*4);
ASSERT_NO_THROW(mem = engine.attach_memory(layout_to_allocate, host_data.data()));
ASSERT_NE(mem, nullptr);
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
ASSERT_NE(std::dynamic_pointer_cast<simple_attached_memory>(mem), nullptr);
ASSERT_FALSE(mem->is_allocated_by(engine));
ASSERT_EQ(std::dynamic_pointer_cast<simple_attached_memory>(mem)->lock(get_test_stream(), mem_lock_type::read), host_data.data());
}

View File

@ -0,0 +1,111 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/runtime/internal_properties.hpp"
#include "intel_gpu/runtime/memory_caps.hpp"
#include "test_utils.h"
#include "random_generator.hpp"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/data.hpp>
#include <intel_gpu/primitives/fully_connected.hpp>
#include <cstddef>
using namespace cldnn;
using namespace ::tests;
TEST(data_gpu, attach_host_buffer) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx};
auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx};
std::vector<float> weights_data = {1.5f, 1.0f, -1.0f, 0.0f,
0.5f, -0.5f, -0.5f, 1.0f };
auto input_memory = engine.allocate_memory(in_layout);
auto weights_memory = engine.attach_memory(weights_layout, weights_data.data());
set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f,
-0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f,
-0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f});
cldnn::topology topology{
input_layout("input", in_layout),
data("weights", weights_memory),
fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()),
};
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input_memory);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim_mem = outputs.begin()->second.get_memory();
auto out_l = network.get_output_layout(outputs.begin()->first);
ov::Shape expected_shape = {6, 2};
ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape);
std::vector<float> expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 };
cldnn::mem_lock<float> output_ptr(output_prim_mem, get_test_stream());
for (size_t i = 0 ; i < out_l.get_linear_size(); i++) {
ASSERT_EQ(expected_output[i], output_ptr[i]);
}
}
TEST(data_gpu, usm_device_buffer) {
auto& engine = get_test_engine();
if (!engine.supports_allocation(allocation_type::usm_device))
GTEST_SKIP();
auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx};
auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx};
std::vector<float> weights_data = {1.5f, 1.0f, -1.0f, 0.0f,
0.5f, -0.5f, -0.5f, 1.0f };
auto input_memory = engine.allocate_memory(in_layout);
auto weights_memory = engine.allocate_memory(weights_layout, allocation_type::usm_device, false);
weights_memory->copy_from(get_test_stream(), weights_data.data(), true);
set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f,
-0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f,
-0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f});
cldnn::topology topology{
input_layout("input", in_layout),
data("weights", weights_memory),
fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()),
};
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input_memory);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim_mem = outputs.begin()->second.get_memory();
auto out_l = network.get_output_layout(outputs.begin()->first);
ov::Shape expected_shape = {6, 2};
ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape);
std::vector<float> expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 };
cldnn::mem_lock<float> output_ptr(output_prim_mem, get_test_stream());
for (size_t i = 0 ; i < out_l.get_linear_size(); i++) {
ASSERT_EQ(expected_output[i], output_ptr[i]);
}
}

View File

@ -122,9 +122,11 @@ memory::ptr TestRunnerProposal<Dtype, ImInfoType>::Run(std::vector<Dtype>& cls_s
memory::ptr image_info = engine.allocate_memory(_image_info_layout);
tests::set_values(image_info, image_info_vals);
_network->set_input_data(cls_scores_name, cls_scores);
_network->set_input_data(bbox_pred_name, bbox_pred);
_network->set_input_data(image_info_name, image_info);
std::vector<event::ptr> events;
events.push_back(_network->set_input_data(cls_scores_name, cls_scores));
events.push_back(_network->set_input_data(bbox_pred_name, bbox_pred));
events.push_back(_network->set_input_data(image_info_name, image_info));
_network->get_stream().wait_for_events(events);
std::map<primitive_id, network_output> network_output = _network->execute();
EXPECT_EQ(network_output.begin()->first, layer_name);