From e5e944ccc2abc9753eb508a7c2ad831809a909f0 Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Tue, 8 Aug 2023 11:47:22 +0900 Subject: [PATCH] [dGPU] Copy data from gpu_buffer to gpu_usm (#19010) * [dGPU] Copy data from gpu_buffer to gpu_usm * Add a unit test. --- .../intel_gpu/src/runtime/ocl/ocl_memory.cpp | 26 +++--- .../unit/module_tests/usm_memory_test.cpp | 82 +++++++++++++++++++ 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp index 793027dcf5c..fcab83a4be4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp @@ -436,18 +436,24 @@ event::ptr gpu_usm::fill(stream& stream) { event::ptr gpu_usm::copy_from(stream& stream, const memory& other, bool blocking) { auto& cl_stream = downcast(stream); - auto& casted = downcast(other); - auto dst_ptr = get_buffer().get(); - auto src_ptr = casted.get_buffer().get(); auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event(); cl::Event* ev_ocl = blocking ? nullptr : &downcast(ev.get())->get(); - cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), - dst_ptr, - src_ptr, - _bytes_count, - blocking, - nullptr, - ev_ocl); + if (other.get_allocation_type() == allocation_type::cl_mem) { + // Copy cl_mem to usm_memory by cl::CommandQueue::enqueueReadBuffer() + auto& mem_inst = downcast(other); + cl_stream.get_cl_queue().enqueueReadBuffer(mem_inst.get_buffer(), blocking, 0, size(), this->buffer_ptr(), nullptr, ev_ocl); + } else { + auto& casted = downcast(other); + auto dst_ptr = get_buffer().get(); + auto src_ptr = casted.get_buffer().get(); + cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), + dst_ptr, + src_ptr, + _bytes_count, + blocking, + nullptr, + ev_ocl); + } return ev; } diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp index 7f40e9976e5..844e09a0eeb 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/usm_memory_test.cpp @@ -259,3 +259,85 @@ INSTANTIATE_TEST_SUITE_P(cldnn_usm, fill_buffer, ::testing::ValuesIn(std::vector // usm_test_params{ allocation_type::usm_shared }, // Unsupported usm_test_params{ allocation_type::usm_device }, })); + + +class copy_between_gpu_buffer_and_gpu_usm : public BaseUSMTest {}; +TEST_P(copy_between_gpu_buffer_and_gpu_usm, basic) { + auto p = GetParam(); + if (!supports_usm()) { + return; + } + try { + ocl::ocl_stream stream(*_engine, {}); + + size_t values_count = 100; + size_t values_bytes_count = values_count * sizeof(float); + std::vector src_buffer(values_count); + std::iota(src_buffer.begin(), src_buffer.end(), 0.0f); + + cldnn::layout linear_layout = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, 1, int32_t(values_count), 1)); + auto usm_host_src = _engine->allocate_memory(linear_layout, allocation_type::usm_host); + + // Fill usm_host_src memory. + cldnn::mem_lock lock(usm_host_src, stream); + std::copy(src_buffer.begin(), src_buffer.end(), lock.data()); + + // Create dst memory + auto mem_dst = _engine->allocate_memory(linear_layout, p.type); + + // Fill dst memory + switch (p.type) { + case allocation_type::usm_host: + case allocation_type::usm_shared: + case allocation_type::usm_device: + { + auto casted = std::dynamic_pointer_cast(mem_dst); + auto ev = casted->copy_from(stream, *usm_host_src, true); + ev->wait(); + break; + } + case allocation_type::cl_mem: { + auto casted = std::dynamic_pointer_cast(mem_dst); + auto ev = casted->copy_from(stream, *usm_host_src, true); + ev->wait(); + break; + } + default: + FAIL() << "Not supported allocation type!"; + } + + // Read from src buffer + std::vector dst_buffer(values_count); + switch (p.type) { + case allocation_type::usm_host: + case allocation_type::usm_shared: { + cldnn::mem_lock lock(usm_host_src, stream); + std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count); + break; + } + case allocation_type::usm_device: + case allocation_type::cl_mem: { + auto host_buf = _engine->allocate_memory(linear_layout, allocation_type::usm_host); + host_buf->copy_from(stream, *mem_dst); + { + cldnn::mem_lock lock(host_buf, stream); + std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count); + } + break; + } + default: + FAIL() << "Not supported allocation type!"; + } + bool are_equal = std::equal(src_buffer.begin(), src_buffer.begin() + 100, dst_buffer.begin()); + ASSERT_EQ(true, are_equal); + } catch (const char* msg) { + FAIL() << msg; + } + +} + +INSTANTIATE_TEST_SUITE_P(cldnn_usm, copy_between_gpu_buffer_and_gpu_usm, ::testing::ValuesIn(std::vector{ + usm_test_params{ allocation_type::cl_mem }, + usm_test_params{ allocation_type::usm_host }, + usm_test_params{ allocation_type::usm_device }, +})); \ No newline at end of file