[GPU] Allow simple attached mem as input memory for network (#19419)

2023-09-01 09:54:30 +04:00 · 2023-09-01 09:54:30 +04:00 · 38cad619af
commit 38cad619af
parent dd258f9607
5 changed files with 199 additions and 20 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
@ -42,7 +42,7 @@ struct memory {
    size_t size() const { return _bytes_count; }
    size_t count() const { return _layout.count(); }
    virtual shared_mem_params get_internal_params() const = 0;
-    virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine; }
+    virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine && _type != allocation_type::unknown; }
    engine* get_engine() const { return _engine; }
    const layout& get_layout() const { return _layout; }
    allocation_type get_allocation_type() const { return _type; }
@ -95,7 +95,7 @@ private:

 struct simple_attached_memory : memory {
    simple_attached_memory(const layout& layout, void* pointer)
-        : memory(nullptr, layout, allocation_type::unknown), _pointer(pointer) {}
+        : memory(nullptr, layout, allocation_type::unknown, true), _pointer(pointer) {}

    void* lock(const stream& /* stream */, mem_lock_type /* type */) override { return _pointer; }
    void unlock(const stream& /* stream */) override {}
@ -109,11 +109,18 @@ struct simple_attached_memory : memory {
 #endif
        0}; };

-    event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override { return nullptr; };
-    event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
-
-    event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override { return nullptr; };
-    event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
+    event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override {
+        OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory");
+    }
+    event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override {
+        OPENVINO_THROW("[GPU] copy_from is not implemented for simple_attached_memory");
+    }
+    event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override {
+        OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory");
+    }
+    event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override {
+        OPENVINO_THROW("[GPU] copy_to is not implemented for simple_attached_memory");
+    }

 private:
    void* _pointer;
--- a/src/plugins/intel_gpu/src/graph/input_layout.cpp
+++ b/src/plugins/intel_gpu/src/graph/input_layout.cpp
@ -40,20 +40,24 @@ event::ptr input_layout_inst::set_data(memory::ptr mem) {

    check_memory_to_set(*mem, ol);
    event::ptr ev = nullptr;
-    if (mem->is_allocated_by(get_network().get_engine())) {
+    auto& engine = get_network().get_engine();
+    auto& stream = get_network().get_stream();
+
+    if (mem->is_allocated_by(engine)) {
        OPENVINO_ASSERT(!_outputs.empty(), "[GPU] Can't set data for empty input memory");
        _outputs[0] = mem;
-        ev = get_network().get_stream().create_user_event(true);
+        ev = stream.create_user_event(true);
    } else {
-        if ((mem->get_allocation_type() == allocation_type::usm_host) ||
-            (mem->get_allocation_type() == allocation_type::usm_device)) {
-            ev = _outputs[0]->copy_from(get_network().get_stream(), *mem, false);
-        } else {
-            mem_lock<char, mem_lock_type::read> src(mem, get_network().get_stream());
-            mem_lock<char, mem_lock_type::write> dst(_outputs[0], get_network().get_stream());
-            std::copy(src.begin(), src.end(), dst.begin());
-            ev = get_network().get_stream().create_user_event(true);
+        if (_outputs.empty() || !_outputs[0]) {
+            _outputs.resize(1);
+            _outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false);
        }
+
+        if (ol.is_dynamic() && _outputs[0]->size() < mem->size()) {
+            _outputs[0] = engine.allocate_memory(mem->get_layout(), engine.get_preferred_memory_allocation_type(), false);
+        }
+        mem_lock<uint8_t> src(mem, stream);
+        ev = _outputs[0]->copy_from(stream, src.data(), false);
    }
    _has_valid_input = true;
    _output_changed = true;
--- a/src/plugins/intel_gpu/tests/unit/module_tests/engine_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/engine_test.cpp
@ -0,0 +1,55 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/runtime/memory.hpp"
+#include "intel_gpu/runtime/memory_caps.hpp"
+#include "test_utils.h"
+
+#include "runtime/ocl/ocl_engine.hpp"
+#include "runtime/ocl/ocl_memory.hpp"
+#include <memory>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(engine, memory_creation) {
+    auto& engine = get_test_engine();
+
+    std::shared_ptr<memory> mem = nullptr;
+    layout layout_to_allocate = {{2, 4}, data_types::u8, format::bfyx};
+    ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate));
+    ASSERT_NE(mem, nullptr);
+    ASSERT_EQ(mem->get_layout(), layout_to_allocate);
+    ASSERT_TRUE(mem->is_allocated_by(engine));
+
+    ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::cl_mem));
+    ASSERT_NE(mem, nullptr);
+    ASSERT_EQ(mem->get_layout(), layout_to_allocate);
+    ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_buffer>(mem), nullptr);
+    ASSERT_TRUE(mem->is_allocated_by(engine));
+
+    if (engine.supports_allocation(allocation_type::usm_host)) {
+        ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_host));
+        ASSERT_NE(mem, nullptr);
+        ASSERT_EQ(mem->get_layout(), layout_to_allocate);
+        ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_usm>(mem), nullptr);
+        ASSERT_TRUE(mem->is_allocated_by(engine));
+    }
+
+    if (engine.supports_allocation(allocation_type::usm_device)) {
+        ASSERT_NO_THROW(mem = engine.allocate_memory(layout_to_allocate, allocation_type::usm_device));
+        ASSERT_NE(mem, nullptr);
+        ASSERT_EQ(mem->get_layout(), layout_to_allocate);
+        ASSERT_NE(std::dynamic_pointer_cast<ocl::gpu_usm>(mem), nullptr);
+        ASSERT_TRUE(mem->is_allocated_by(engine));
+    }
+
+    std::vector<uint8_t> host_data(2*4);
+    ASSERT_NO_THROW(mem = engine.attach_memory(layout_to_allocate, host_data.data()));
+    ASSERT_NE(mem, nullptr);
+    ASSERT_EQ(mem->get_layout(), layout_to_allocate);
+    ASSERT_NE(std::dynamic_pointer_cast<simple_attached_memory>(mem), nullptr);
+    ASSERT_FALSE(mem->is_allocated_by(engine));
+    ASSERT_EQ(std::dynamic_pointer_cast<simple_attached_memory>(mem)->lock(get_test_stream(), mem_lock_type::read), host_data.data());
+}
--- a/src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/data_gpu_test.cpp
@ -0,0 +1,111 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/runtime/internal_properties.hpp"
+#include "intel_gpu/runtime/memory_caps.hpp"
+#include "test_utils.h"
+#include "random_generator.hpp"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/data.hpp>
+#include <intel_gpu/primitives/fully_connected.hpp>
+
+#include <cstddef>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(data_gpu, attach_host_buffer) {
+    auto& engine = get_test_engine();
+
+    auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx};
+    auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx};
+    std::vector<float> weights_data = {1.5f, 1.0f, -1.0f, 0.0f,
+                                       0.5f, -0.5f, -0.5f, 1.0f };
+    auto input_memory = engine.allocate_memory(in_layout);
+    auto weights_memory = engine.attach_memory(weights_layout, weights_data.data());
+
+    set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f,  -1.5f, 2.0f, 0.5f, 1.f,
+                              -0.5f, 2.5f, 0.5f, 1.f,  -0.5f, 3.0f, 0.5f, 1.f,
+                              -0.5f, 2.0f, 0.5f, 1.f,  -0.5f, 2.0f, 2.5f, 1.f});
+
+    cldnn::topology topology{
+        input_layout("input", in_layout),
+        data("weights", weights_memory),
+        fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()),
+    };
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    network network(engine, topology, config);
+    network.set_input_data("input", input_memory);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "fc");
+
+    auto output_prim_mem = outputs.begin()->second.get_memory();
+
+    auto out_l = network.get_output_layout(outputs.begin()->first);
+    ov::Shape expected_shape = {6, 2};
+    ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape);
+
+    std::vector<float> expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 };
+
+    cldnn::mem_lock<float> output_ptr(output_prim_mem, get_test_stream());
+
+    for (size_t i = 0 ; i < out_l.get_linear_size(); i++) {
+        ASSERT_EQ(expected_output[i], output_ptr[i]);
+    }
+}
+
+TEST(data_gpu, usm_device_buffer) {
+    auto& engine = get_test_engine();
+    if (!engine.supports_allocation(allocation_type::usm_device))
+        GTEST_SKIP();
+
+    auto in_layout = layout{ov::PartialShape{6, 4}, data_types::f32, format::bfyx};
+    auto weights_layout = layout{ov::PartialShape{2, 4}, data_types::f32, format::bfyx};
+    std::vector<float> weights_data = {1.5f, 1.0f, -1.0f, 0.0f,
+                                       0.5f, -0.5f, -0.5f, 1.0f };
+    auto input_memory = engine.allocate_memory(in_layout);
+    auto weights_memory = engine.allocate_memory(weights_layout, allocation_type::usm_device, false);
+
+    weights_memory->copy_from(get_test_stream(), weights_data.data(), true);
+
+    set_values(input_memory, {-0.5f, 2.0f, 0.5f, 1.f,  -1.5f, 2.0f, 0.5f, 1.f,
+                              -0.5f, 2.5f, 0.5f, 1.f,  -0.5f, 3.0f, 0.5f, 1.f,
+                              -0.5f, 2.0f, 0.5f, 1.f,  -0.5f, 2.0f, 2.5f, 1.f});
+
+    cldnn::topology topology{
+        input_layout("input", in_layout),
+        data("weights", weights_memory),
+        fully_connected("fc", input_info("input"), "weights", "", cldnn::padding(), in_layout.get_partial_shape().size()),
+    };
+
+    ExecutionConfig config = get_test_default_config(engine);
+    config.set_property(ov::intel_gpu::optimize_data(true));
+    config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
+    network network(engine, topology, config);
+    network.set_input_data("input", input_memory);
+
+    auto outputs = network.execute();
+    ASSERT_EQ(outputs.size(), size_t(1));
+    ASSERT_EQ(outputs.begin()->first, "fc");
+
+    auto output_prim_mem = outputs.begin()->second.get_memory();
+
+    auto out_l = network.get_output_layout(outputs.begin()->first);
+    ov::Shape expected_shape = {6, 2};
+    ASSERT_EQ(output_prim_mem->get_layout().get_shape(), expected_shape);
+
+    std::vector<float> expected_output = { 0.75, -0.5, -0.75, -1, 1.25, -0.75, 1.75, -1, 0.75, -0.5, -1.25, -1.5 };
+
+    cldnn::mem_lock<float> output_ptr(output_prim_mem, get_test_stream());
+
+    for (size_t i = 0 ; i < out_l.get_linear_size(); i++) {
+        ASSERT_EQ(expected_output[i], output_ptr[i]);
+    }
+}
--- a/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/proposal_cpu_test.cpp
@ -122,9 +122,11 @@ memory::ptr TestRunnerProposal<Dtype, ImInfoType>::Run(std::vector<Dtype>& cls_s
    memory::ptr image_info = engine.allocate_memory(_image_info_layout);
    tests::set_values(image_info, image_info_vals);

-    _network->set_input_data(cls_scores_name, cls_scores);
-    _network->set_input_data(bbox_pred_name, bbox_pred);
-    _network->set_input_data(image_info_name, image_info);
+    std::vector<event::ptr> events;
+    events.push_back(_network->set_input_data(cls_scores_name, cls_scores));
+    events.push_back(_network->set_input_data(bbox_pred_name, bbox_pred));
+    events.push_back(_network->set_input_data(image_info_name, image_info));
+    _network->get_stream().wait_for_events(events);

    std::map<primitive_id, network_output> network_output = _network->execute();
    EXPECT_EQ(network_output.begin()->first, layer_name);