diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp index 647a10581b4..62595de3bee 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp @@ -42,7 +42,7 @@ struct memory { size_t size() const { return _bytes_count; } size_t count() const { return _layout.count(); } virtual shared_mem_params get_internal_params() const = 0; - virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine; } + virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine && _type != allocation_type::unknown; } engine* get_engine() const { return _engine; } const layout& get_layout() const { return _layout; } allocation_type get_allocation_type() const { return _type; } @@ -95,7 +95,7 @@ private: struct simple_attached_memory : memory { simple_attached_memory(const layout& layout, void* pointer) - : memory(nullptr, layout, allocation_type::unknown), _pointer(pointer) {} + : memory(nullptr, layout, allocation_type::unknown, true), _pointer(pointer) {} void* lock(const stream& /* stream */, mem_lock_type /* type */) override { return _pointer; } void unlock(const stream& /* stream */) override {} @@ -109,11 +109,18 @@ struct simple_attached_memory : memory { #endif 0}; }; - event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override { return nullptr; }; - event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override { return nullptr; } - - event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override { return nullptr; }; - event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override { return nullptr; } + event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override { + OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory"); + } + event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override { + OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory"); + } + event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override { + OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory"); + } + event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override { + OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory"); + } private: void* _pointer; diff --git a/src/plugins/intel_gpu/src/graph/input_layout.cpp b/src/plugins/intel_gpu/src/graph/input_layout.cpp index 1868e1bfb85..07b26b9323c 100644 --- a/src/plugins/intel_gpu/src/graph/input_layout.cpp +++ b/src/plugins/intel_gpu/src/graph/input_layout.cpp @@ -40,20 +40,24 @@ event::ptr input_layout_inst::set_data(memory::ptr mem) { check_memory_to_set(*mem, ol); event::ptr ev = nullptr; - if (mem->is_allocated_by(get_network().get_engine())) { + auto& engine = get_network().get_engine(); + auto& stream = get_network().get_stream(); + + if (mem->is_allocated_by(engine)) { OPENVINO_ASSERT(!_outputs.empty(), "[GPU] Can't set data for empty input memory"); _outputs[0] = mem; - ev = get_network().get_stream().create_user_event(true); + ev = stream.create_user_event(true); } else { - if ((mem->get_allocation_type() == allocation_type::usm_host) || - (mem->get_allocation_type() == allocation_type::usm_device)) { - ev = _outputs[0]->copy_from(get_network().get_stream(), *mem, false); - } else { - mem_lock src(mem, get_network().get_stream()); - mem_lock dst(_outputs[0], get_network().get_stream()); - std::copy(src.begin(), src.end(), dst.begin()); - ev = get_network().get_stream().create_user_event(true); + if (_outputs.empty() || !_outputs[0]) { + _outputs.resize(1); + _outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false); } + + if (ol.is_dynamic() && _outputs[0]->size() < mem->size()) { + _outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false); + } + mem_lock src(mem, stream); + ev = _outputs[0]->copy_from(stream, src.data(), false); } _has_valid_input = true; _output_changed = true; diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/engine_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/engine_test.cpp new file mode 100644 index 00000000000..17d0cae90dd --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/module_tests/engine_test.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" +#include "test_utils.h" + +#include "runtime/ocl/ocl_engine.hpp" +#include "runtime/ocl/ocl_memory.hpp" +#include + +using namespace cldnn; +using namespace ::tests; + +TEST(engine, memory_creation) { + auto& engine = get_test_engine(); + + std::shared_ptr mem = nullptr; + layout layout_to_allocate = {{2, 4}, data_types::u8, format::bfyx}; + ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate)); + ASSERT_NE(mem, nullptr); + ASSERT_EQ(mem->get_layout(), layout_to_allocate); + ASSERT_TRUE(mem->is_allocated_by(engine)); + + ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::cl_mem)); + ASSERT_NE(mem, nullptr); + ASSERT_EQ(mem->get_layout(), layout_to_allocate); + ASSERT_NE(std::dynamic_pointer_cast(mem), nullptr); + ASSERT_TRUE(mem->is_allocated_by(engine)); + + if (engine.supports_allocation(allocation_type::usm_host)) { + ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_host)); + ASSERT_NE(mem, nullptr); + ASSERT_EQ(mem->get_layout(), layout_to_allocate); + ASSERT_NE(std::dynamic_pointer_cast(mem), nullptr); + ASSERT_TRUE(mem->is_allocated_by(engine)); + } + + if (engine.supports_allocation(allocation_type::usm_device)) { + ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_device)); + ASSERT_NE(mem, nullptr); + ASSERT_EQ(mem->get_layout(), layout_to_allocate); + ASSERT_NE(std::dynamic_pointer_cast(mem), nullptr); + ASSERT_TRUE(mem->is_allocated_by(engine)); + } + + std::vector host_data(2*4); + ASSERT_NO_THROW(mem = engine.attach_memory(layout_to_allocate, host_data.data())); + ASSERT_NE(mem, nullptr); + ASSERT_EQ(mem->get_layout(), layout_to_allocate); + ASSERT_NE(std::dynamic_pointer_cast(mem), nullptr); + ASSERT_FALSE(mem->is_allocated_by(engine)); + ASSERT_EQ(std::dynamic_pointer_cast(mem)->lock(get_test_stream(), mem_lock_type::read), host_data.data()); +} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp new file mode 100644 index 00000000000..2a02b135161 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/runtime/internal_properties.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" +#include "test_utils.h" +#include "random_generator.hpp" + +#include +#include +#include + +#include + +using namespace cldnn; +using namespace ::tests; + +TEST(data_gpu, attach_host_buffer) { + auto& engine = get_test_engine(); + + auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx}; + auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx}; + std::vector weights_data = {1.5f, 1.0f, -1.0f, 0.0f, + 0.5f, -0.5f, -0.5f, 1.0f }; + auto input_memory = engine.allocate_memory(in_layout); + auto weights_memory = engine.attach_memory(weights_layout, weights_data.data()); + + set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f, + -0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f, + -0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f}); + + cldnn::topology topology{ + input_layout("input", in_layout), + data("weights", weights_memory), + fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()), + }; + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + network.set_input_data("input", input_memory); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc"); + + auto output_prim_mem = outputs.begin()->second.get_memory(); + + auto out_l = network.get_output_layout(outputs.begin()->first); + ov::Shape expected_shape = {6, 2}; + ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape); + + std::vector expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 }; + + cldnn::mem_lock output_ptr(output_prim_mem, get_test_stream()); + + for (size_t i = 0 ; i < out_l.get_linear_size(); i++) { + ASSERT_EQ(expected_output[i], output_ptr[i]); + } +} + +TEST(data_gpu, usm_device_buffer) { + auto& engine = get_test_engine(); + if (!engine.supports_allocation(allocation_type::usm_device)) + GTEST_SKIP(); + + auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx}; + auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx}; + std::vector weights_data = {1.5f, 1.0f, -1.0f, 0.0f, + 0.5f, -0.5f, -0.5f, 1.0f }; + auto input_memory = engine.allocate_memory(in_layout); + auto weights_memory = engine.allocate_memory(weights_layout, allocation_type::usm_device, false); + + weights_memory->copy_from(get_test_stream(), weights_data.data(), true); + + set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f, -1.5f, 2.0f, 0.5f, 1.f, + -0.5f, 2.5f, 0.5f, 1.f, -0.5f, 3.0f, 0.5f, 1.f, + -0.5f, 2.0f, 0.5f, 1.f, -0.5f, 2.0f, 2.5f, 1.f}); + + cldnn::topology topology{ + input_layout("input", in_layout), + data("weights", weights_memory), + fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()), + }; + + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + network.set_input_data("input", input_memory); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "fc"); + + auto output_prim_mem = outputs.begin()->second.get_memory(); + + auto out_l = network.get_output_layout(outputs.begin()->first); + ov::Shape expected_shape = {6, 2}; + ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape); + + std::vector expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 }; + + cldnn::mem_lock output_ptr(output_prim_mem, get_test_stream()); + + for (size_t i = 0 ; i < out_l.get_linear_size(); i++) { + ASSERT_EQ(expected_output[i], output_ptr[i]); + } +} diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp index 3dddfcf6c60..7e9953b64a8 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp @@ -122,9 +122,11 @@ memory::ptr TestRunnerProposal::Run(std::vector& cls_s memory::ptr image_info = engine.allocate_memory(_image_info_layout); tests::set_values(image_info, image_info_vals); - _network->set_input_data(cls_scores_name, cls_scores); - _network->set_input_data(bbox_pred_name, bbox_pred); - _network->set_input_data(image_info_name, image_info); + std::vector events; + events.push_back(_network->set_input_data(cls_scores_name, cls_scores)); + events.push_back(_network->set_input_data(bbox_pred_name, bbox_pred)); + events.push_back(_network->set_input_data(image_info_name, image_info)); + _network->get_stream().wait_for_events(events); std::map network_output = _network->execute(); EXPECT_EQ(network_output.begin()->first, layer_name);