[dGPU] Copy data from gpu_buffer to gpu_usm (#19010)

* [dGPU] Copy data from gpu_buffer to gpu_usm

* Add a unit test.
This commit is contained in:
Jade Cho 2023-08-08 11:47:22 +09:00 committed by GitHub
parent a2807f1edb
commit e5e944ccc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 98 additions and 10 deletions

View File

@ -436,18 +436,24 @@ event::ptr gpu_usm::fill(stream& stream) {
event::ptr gpu_usm::copy_from(stream& stream, const memory& other, bool blocking) {
auto& cl_stream = downcast<const ocl_stream>(stream);
auto& casted = downcast<const gpu_usm>(other);
auto dst_ptr = get_buffer().get();
auto src_ptr = casted.get_buffer().get();
auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event();
cl::Event* ev_ocl = blocking ? nullptr : &downcast<ocl_event>(ev.get())->get();
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(),
dst_ptr,
src_ptr,
_bytes_count,
blocking,
nullptr,
ev_ocl);
if (other.get_allocation_type() == allocation_type::cl_mem) {
// Copy cl_mem to usm_memory by cl::CommandQueue::enqueueReadBuffer()
auto& mem_inst = downcast<const gpu_buffer>(other);
cl_stream.get_cl_queue().enqueueReadBuffer(mem_inst.get_buffer(), blocking, 0, size(), this->buffer_ptr(), nullptr, ev_ocl);
} else {
auto& casted = downcast<const gpu_usm>(other);
auto dst_ptr = get_buffer().get();
auto src_ptr = casted.get_buffer().get();
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(),
dst_ptr,
src_ptr,
_bytes_count,
blocking,
nullptr,
ev_ocl);
}
return ev;
}

View File

@ -259,3 +259,85 @@ INSTANTIATE_TEST_SUITE_P(cldnn_usm, fill_buffer, ::testing::ValuesIn(std::vector
// usm_test_params{ allocation_type::usm_shared }, // Unsupported
usm_test_params{ allocation_type::usm_device },
}));
class copy_between_gpu_buffer_and_gpu_usm : public BaseUSMTest {};
TEST_P(copy_between_gpu_buffer_and_gpu_usm, basic) {
auto p = GetParam();
if (!supports_usm()) {
return;
}
try {
ocl::ocl_stream stream(*_engine, {});
size_t values_count = 100;
size_t values_bytes_count = values_count * sizeof(float);
std::vector<float> src_buffer(values_count);
std::iota(src_buffer.begin(), src_buffer.end(), 0.0f);
cldnn::layout linear_layout = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, 1, int32_t(values_count), 1));
auto usm_host_src = _engine->allocate_memory(linear_layout, allocation_type::usm_host);
// Fill usm_host_src memory.
cldnn::mem_lock<float> lock(usm_host_src, stream);
std::copy(src_buffer.begin(), src_buffer.end(), lock.data());
// Create dst memory
auto mem_dst = _engine->allocate_memory(linear_layout, p.type);
// Fill dst memory
switch (p.type) {
case allocation_type::usm_host:
case allocation_type::usm_shared:
case allocation_type::usm_device:
{
auto casted = std::dynamic_pointer_cast<ocl::gpu_usm>(mem_dst);
auto ev = casted->copy_from(stream, *usm_host_src, true);
ev->wait();
break;
}
case allocation_type::cl_mem: {
auto casted = std::dynamic_pointer_cast<ocl::gpu_buffer>(mem_dst);
auto ev = casted->copy_from(stream, *usm_host_src, true);
ev->wait();
break;
}
default:
FAIL() << "Not supported allocation type!";
}
// Read from src buffer
std::vector<float> dst_buffer(values_count);
switch (p.type) {
case allocation_type::usm_host:
case allocation_type::usm_shared: {
cldnn::mem_lock<float> lock(usm_host_src, stream);
std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count);
break;
}
case allocation_type::usm_device:
case allocation_type::cl_mem: {
auto host_buf = _engine->allocate_memory(linear_layout, allocation_type::usm_host);
host_buf->copy_from(stream, *mem_dst);
{
cldnn::mem_lock<float> lock(host_buf, stream);
std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count);
}
break;
}
default:
FAIL() << "Not supported allocation type!";
}
bool are_equal = std::equal(src_buffer.begin(), src_buffer.begin() + 100, dst_buffer.begin());
ASSERT_EQ(true, are_equal);
} catch (const char* msg) {
FAIL() << msg;
}
}
INSTANTIATE_TEST_SUITE_P(cldnn_usm, copy_between_gpu_buffer_and_gpu_usm, ::testing::ValuesIn(std::vector<usm_test_params>{
usm_test_params{ allocation_type::cl_mem },
usm_test_params{ allocation_type::usm_host },
usm_test_params{ allocation_type::usm_device },
}));