[GPU] Allow simple attached mem as input memory for network (#19419)
This commit is contained in:
parent
dd258f9607
commit
38cad619af
@ -42,7 +42,7 @@ struct memory {
|
||||
size_t size() const { return _bytes_count; }
|
||||
size_t count() const { return _layout.count(); }
|
||||
virtual shared_mem_params get_internal_params() const = 0;
|
||||
virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine; }
|
||||
virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine && _type != allocation_type::unknown; }
|
||||
engine* get_engine() const { return _engine; }
|
||||
const layout& get_layout() const { return _layout; }
|
||||
allocation_type get_allocation_type() const { return _type; }
|
||||
@ -95,7 +95,7 @@ private:
|
||||
|
||||
struct simple_attached_memory : memory {
|
||||
simple_attached_memory(const layout& layout, void* pointer)
|
||||
: memory(nullptr, layout, allocation_type::unknown), _pointer(pointer) {}
|
||||
: memory(nullptr, layout, allocation_type::unknown, true), _pointer(pointer) {}
|
||||
|
||||
void* lock(const stream& /* stream */, mem_lock_type /* type */) override { return _pointer; }
|
||||
void unlock(const stream& /* stream */) override {}
|
||||
@ -109,11 +109,18 @@ struct simple_attached_memory : memory {
|
||||
#endif
|
||||
0}; };
|
||||
|
||||
event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override { return nullptr; };
|
||||
event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
|
||||
|
||||
event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override { return nullptr; };
|
||||
event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
|
||||
event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override {
|
||||
OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory");
|
||||
}
|
||||
event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override {
|
||||
OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory");
|
||||
}
|
||||
event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override {
|
||||
OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory");
|
||||
}
|
||||
event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override {
|
||||
OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory");
|
||||
}
|
||||
|
||||
private:
|
||||
void* _pointer;
|
||||
|
@ -40,20 +40,24 @@ event::ptr input_layout_inst::set_data(memory::ptr mem) {
|
||||
|
||||
check_memory_to_set(*mem, ol);
|
||||
event::ptr ev = nullptr;
|
||||
if (mem->is_allocated_by(get_network().get_engine())) {
|
||||
auto& engine = get_network().get_engine();
|
||||
auto& stream = get_network().get_stream();
|
||||
|
||||
if (mem->is_allocated_by(engine)) {
|
||||
OPENVINO_ASSERT(!_outputs.empty(), "[GPU] Can't set data for empty input memory");
|
||||
_outputs[0] = mem;
|
||||
ev = get_network().get_stream().create_user_event(true);
|
||||
ev = stream.create_user_event(true);
|
||||
} else {
|
||||
if ((mem->get_allocation_type() == allocation_type::usm_host) ||
|
||||
(mem->get_allocation_type() == allocation_type::usm_device)) {
|
||||
ev = _outputs[0]->copy_from(get_network().get_stream(), *mem, false);
|
||||
} else {
|
||||
mem_lock<char, mem_lock_type::read> src(mem, get_network().get_stream());
|
||||
mem_lock<char, mem_lock_type::write> dst(_outputs[0], get_network().get_stream());
|
||||
std::copy(src.begin(), src.end(), dst.begin());
|
||||
ev = get_network().get_stream().create_user_event(true);
|
||||
if (_outputs.empty() || !_outputs[0]) {
|
||||
_outputs.resize(1);
|
||||
_outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false);
|
||||
}
|
||||
|
||||
if (ol.is_dynamic() && _outputs[0]->size() < mem->size()) {
|
||||
_outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false);
|
||||
}
|
||||
mem_lock<uint8_t> src(mem, stream);
|
||||
ev = _outputs[0]->copy_from(stream, src.data(), false);
|
||||
}
|
||||
_has_valid_input = true;
|
||||
_output_changed = true;
|
||||
|
@ -0,0 +1,55 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "intel_gpu/runtime/memory_caps.hpp"
|
||||
#include "test_utils.h"
|
||||
|
||||
#include "runtime/ocl/ocl_engine.hpp"
|
||||
#include "runtime/ocl/ocl_memory.hpp"
|
||||
#include <memory>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(engine, memory_creation) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
std::shared_ptr<memory> mem = nullptr;
|
||||
layout layout_to_allocate = {{2, 4}, data_types::u8, format::bfyx};
|
||||
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate));
|
||||
ASSERT_NE(mem, nullptr);
|
||||
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
|
||||
ASSERT_TRUE(mem->is_allocated_by(engine));
|
||||
|
||||
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::cl_mem));
|
||||
ASSERT_NE(mem, nullptr);
|
||||
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
|
||||
ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_buffer>(mem), nullptr);
|
||||
ASSERT_TRUE(mem->is_allocated_by(engine));
|
||||
|
||||
if (engine.supports_allocation(allocation_type::usm_host)) {
|
||||
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_host));
|
||||
ASSERT_NE(mem, nullptr);
|
||||
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
|
||||
ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_usm>(mem), nullptr);
|
||||
ASSERT_TRUE(mem->is_allocated_by(engine));
|
||||
}
|
||||
|
||||
if (engine.supports_allocation(allocation_type::usm_device)) {
|
||||
ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_device));
|
||||
ASSERT_NE(mem, nullptr);
|
||||
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
|
||||
ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_usm>(mem), nullptr);
|
||||
ASSERT_TRUE(mem->is_allocated_by(engine));
|
||||
}
|
||||
|
||||
std::vector<uint8_t> host_data(2*4);
|
||||
ASSERT_NO_THROW(mem = engine.attach_memory(layout_to_allocate, host_data.data()));
|
||||
ASSERT_NE(mem, nullptr);
|
||||
ASSERT_EQ(mem->get_layout(), layout_to_allocate);
|
||||
ASSERT_NE(std::dynamic_pointer_cast<simple_attached_memory>(mem), nullptr);
|
||||
ASSERT_FALSE(mem->is_allocated_by(engine));
|
||||
ASSERT_EQ(std::dynamic_pointer_cast<simple_attached_memory>(mem)->lock(get_test_stream(), mem_lock_type::read), host_data.data());
|
||||
}
|
111
src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp
Normal file
111
src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp
Normal file
@ -0,0 +1,111 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/runtime/internal_properties.hpp"
|
||||
#include "intel_gpu/runtime/memory_caps.hpp"
|
||||
#include "test_utils.h"
|
||||
#include "random_generator.hpp"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/data.hpp>
|
||||
#include <intel_gpu/primitives/fully_connected.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(data_gpu, attach_host_buffer) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx};
|
||||
auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx};
|
||||
std::vector<float> weights_data = {1.5f, 1.0f, -1.0f, 0.0f,
|
||||
0.5f, -0.5f, -0.5f, 1.0f };
|
||||
auto input_memory = engine.allocate_memory(in_layout);
|
||||
auto weights_memory = engine.attach_memory(weights_layout, weights_data.data());
|
||||
|
||||
set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f,
|
||||
-0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f,
|
||||
-0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f});
|
||||
|
||||
cldnn::topology topology{
|
||||
input_layout("input", in_layout),
|
||||
data("weights", weights_memory),
|
||||
fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()),
|
||||
};
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input_memory);
|
||||
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ov::Shape expected_shape = {6, 2};
|
||||
ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape);
|
||||
|
||||
std::vector<float> expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 };
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output_prim_mem, get_test_stream());
|
||||
|
||||
for (size_t i = 0 ; i < out_l.get_linear_size(); i++) {
|
||||
ASSERT_EQ(expected_output[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(data_gpu, usm_device_buffer) {
|
||||
auto& engine = get_test_engine();
|
||||
if (!engine.supports_allocation(allocation_type::usm_device))
|
||||
GTEST_SKIP();
|
||||
|
||||
auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx};
|
||||
auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx};
|
||||
std::vector<float> weights_data = {1.5f, 1.0f, -1.0f, 0.0f,
|
||||
0.5f, -0.5f, -0.5f, 1.0f };
|
||||
auto input_memory = engine.allocate_memory(in_layout);
|
||||
auto weights_memory = engine.allocate_memory(weights_layout, allocation_type::usm_device, false);
|
||||
|
||||
weights_memory->copy_from(get_test_stream(), weights_data.data(), true);
|
||||
|
||||
set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f,
|
||||
-0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f,
|
||||
-0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f});
|
||||
|
||||
cldnn::topology topology{
|
||||
input_layout("input", in_layout),
|
||||
data("weights", weights_memory),
|
||||
fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()),
|
||||
};
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input_memory);
|
||||
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "fc");
|
||||
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ov::Shape expected_shape = {6, 2};
|
||||
ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape);
|
||||
|
||||
std::vector<float> expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 };
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output_prim_mem, get_test_stream());
|
||||
|
||||
for (size_t i = 0 ; i < out_l.get_linear_size(); i++) {
|
||||
ASSERT_EQ(expected_output[i], output_ptr[i]);
|
||||
}
|
||||
}
|
@ -122,9 +122,11 @@ memory::ptr TestRunnerProposal<Dtype, ImInfoType>::Run(std::vector<Dtype>& cls_s
|
||||
memory::ptr image_info = engine.allocate_memory(_image_info_layout);
|
||||
tests::set_values(image_info, image_info_vals);
|
||||
|
||||
_network->set_input_data(cls_scores_name, cls_scores);
|
||||
_network->set_input_data(bbox_pred_name, bbox_pred);
|
||||
_network->set_input_data(image_info_name, image_info);
|
||||
std::vector<event::ptr> events;
|
||||
events.push_back(_network->set_input_data(cls_scores_name, cls_scores));
|
||||
events.push_back(_network->set_input_data(bbox_pred_name, bbox_pred));
|
||||
events.push_back(_network->set_input_data(image_info_name, image_info));
|
||||
_network->get_stream().wait_for_events(events);
|
||||
|
||||
std::map<primitive_id, network_output> network_output = _network->execute();
|
||||
EXPECT_EQ(network_output.begin()->first, layer_name);
|
||||
|
Loading…
Reference in New Issue
Block a user