[GPU] Add blocking option for cldnn::memory::copy_*() functions (#14012)
This commit is contained in:
parent
cc219d085e
commit
d464d079b2
@ -68,11 +68,11 @@ struct memory {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual event::ptr copy_from(stream& /* stream */, const memory& /* other */) = 0;
|
||||
virtual event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */) = 0;
|
||||
virtual event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool blocking = true) = 0;
|
||||
virtual event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool blocking = true) = 0;
|
||||
|
||||
virtual event::ptr copy_to(stream& stream, memory& other) { return other.copy_from(stream, *this); }
|
||||
virtual event::ptr copy_to(stream& /* stream */, void* /* host_ptr */) = 0;
|
||||
virtual event::ptr copy_to(stream& stream, memory& other, bool blocking = true) { return other.copy_from(stream, *this, blocking); }
|
||||
virtual event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool blocking = true) = 0;
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
virtual dnnl::memory get_onednn_memory(dnnl::memory::desc /* desc */, int64_t offset = 0) {
|
||||
@ -108,11 +108,11 @@ struct simple_attached_memory : memory {
|
||||
#endif
|
||||
0}; };
|
||||
|
||||
event::ptr copy_from(stream& /* stream */, const memory& /* other */) override { return nullptr; };
|
||||
event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */) override { return nullptr; }
|
||||
event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override { return nullptr; };
|
||||
event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
|
||||
|
||||
event::ptr copy_to(stream& /* stream */, memory& /* other */) override { return nullptr; };
|
||||
event::ptr copy_to(stream& /* stream */, void* /* host_ptr */) override { return nullptr; }
|
||||
event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override { return nullptr; };
|
||||
event::ptr copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) override { return nullptr; }
|
||||
|
||||
private:
|
||||
void* _pointer;
|
||||
|
@ -647,8 +647,7 @@ void InferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst) {
|
||||
|
||||
OPENVINO_ASSERT(intermediate_output_blob, "[GPU] Intermediate blob for outputs precessing is not allocated");
|
||||
|
||||
auto event = src->copy_to(stream, intermediate_output_blob->buffer());
|
||||
event->wait();
|
||||
src->copy_to(stream, intermediate_output_blob->buffer());
|
||||
|
||||
switch (dst->getTensorDesc().getPrecision()) {
|
||||
#define CASE(PRC, SRC_DT, DST_DT) \
|
||||
@ -670,8 +669,7 @@ void InferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst) {
|
||||
}
|
||||
} else {
|
||||
auto dst_ptr = dst->buffer().as<void*>();
|
||||
auto event = src->copy_to(stream, dst_ptr);
|
||||
event->wait();
|
||||
src->copy_to(stream, dst_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -959,7 +957,7 @@ void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr
|
||||
auto src_lock = inputBlob->cbuffer();
|
||||
auto src_ptr = src_lock.as<uint8_t*>();
|
||||
if (!same_host_mem(inputMem, src_ptr)) {
|
||||
auto ev = inputMem->copy_from(stream, src_ptr);
|
||||
auto ev = inputMem->copy_from(stream, src_ptr, false);
|
||||
dependencies.push_back(ev);
|
||||
}
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ struct ocl_base_event : public event {
|
||||
public:
|
||||
explicit ocl_base_event(uint64_t queue_stamp = 0) : event(), _queue_stamp(queue_stamp) { }
|
||||
uint64_t get_queue_stamp() const { return _queue_stamp; }
|
||||
virtual cl::Event get() = 0;
|
||||
virtual cl::Event& get() = 0;
|
||||
|
||||
protected:
|
||||
uint64_t _queue_stamp = 0;
|
||||
|
@ -20,7 +20,7 @@ public:
|
||||
: ocl_base_event(queue_stamp)
|
||||
, _event(ev) {}
|
||||
|
||||
cl::Event get() override { return _event; }
|
||||
cl::Event& get() override { return _event; }
|
||||
|
||||
private:
|
||||
bool _callback_set = false;
|
||||
@ -47,7 +47,7 @@ public:
|
||||
process_events(ev);
|
||||
}
|
||||
|
||||
cl::Event get() override { return _last_ocl_event; }
|
||||
cl::Event& get() override { return _last_ocl_event; }
|
||||
|
||||
void reset() override {
|
||||
event::reset();
|
||||
|
@ -70,7 +70,7 @@ event::ptr gpu_buffer::fill(stream& stream) {
|
||||
event::ptr gpu_buffer::fill(stream& stream, unsigned char pattern) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = stream.create_base_event();
|
||||
cl::Event ev_ocl = std::dynamic_pointer_cast<ocl_event>(ev)->get();
|
||||
cl::Event& ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_cl_queue().enqueueFillBuffer<unsigned char>(_buffer, pattern, 0, size(), nullptr, &ev_ocl);
|
||||
|
||||
// TODO: do we need sync here?
|
||||
@ -91,30 +91,33 @@ shared_mem_params gpu_buffer::get_internal_params() const {
|
||||
0};
|
||||
}
|
||||
|
||||
event::ptr gpu_buffer::copy_from(stream& stream, const memory& other) {
|
||||
event::ptr gpu_buffer::copy_from(stream& stream, const memory& other, bool blocking) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto& mem_inst = downcast<const gpu_buffer>(other);
|
||||
auto ev = stream.create_base_event();
|
||||
cl::Event ev_ocl = std::dynamic_pointer_cast<ocl_event>(ev)->get();
|
||||
cl::Event& ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_cl_queue().enqueueCopyBuffer(mem_inst.get_buffer(), get_buffer(), 0, 0, other.size(), nullptr, &ev_ocl);
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
event::ptr gpu_buffer::copy_from(stream& stream, const void* host_ptr) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = stream.create_base_event();
|
||||
cl::Event ev_ocl = std::dynamic_pointer_cast<ocl_event>(ev)->get();
|
||||
cl_stream.get_cl_queue().enqueueWriteBuffer(_buffer, false, 0, size(), host_ptr, nullptr, &ev_ocl);
|
||||
if (blocking)
|
||||
ev->wait();
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
event::ptr gpu_buffer::copy_to(stream& stream, void* host_ptr) {
|
||||
event::ptr gpu_buffer::copy_from(stream& stream, const void* host_ptr, bool blocking) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = stream.create_base_event();
|
||||
cl::Event ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_cl_queue().enqueueReadBuffer(_buffer, false, 0, size(), host_ptr, nullptr, &ev_ocl);
|
||||
auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event();
|
||||
cl::Event* ev_ocl = blocking ? nullptr : &downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_cl_queue().enqueueWriteBuffer(_buffer, blocking, 0, size(), host_ptr, nullptr, ev_ocl);
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
event::ptr gpu_buffer::copy_to(stream& stream, void* host_ptr, bool blocking) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event();
|
||||
cl::Event* ev_ocl = blocking ? nullptr : &downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_cl_queue().enqueueReadBuffer(_buffer, blocking, 0, size(), host_ptr, nullptr, ev_ocl);
|
||||
|
||||
return ev;
|
||||
}
|
||||
@ -195,7 +198,7 @@ event::ptr gpu_image2d::fill(stream& stream) {
|
||||
event::ptr gpu_image2d::fill(stream& stream, unsigned char pattern) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = stream.create_base_event();
|
||||
cl::Event ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
cl::Event& ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
cl_uint4 pattern_uint4 = {pattern, pattern, pattern, pattern};
|
||||
cl_stream.get_cl_queue().enqueueFillImage(_buffer, pattern_uint4, {0, 0, 0}, {_width, _height, 1}, 0, &ev_ocl);
|
||||
|
||||
@ -245,19 +248,19 @@ shared_mem_params gpu_image2d::get_internal_params() const {
|
||||
0};
|
||||
}
|
||||
|
||||
event::ptr gpu_image2d::copy_from(stream& /* stream */, const memory& /* other */) {
|
||||
event::ptr gpu_image2d::copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) {
|
||||
throw std::runtime_error("[GPU] copy_from is not implemented for gpu_image2d");
|
||||
}
|
||||
|
||||
event::ptr gpu_image2d::copy_from(stream& /* stream */, const void* /* host_ptr */) {
|
||||
event::ptr gpu_image2d::copy_from(stream& /* stream */, const void* /* host_ptr */, bool /* blocking */) {
|
||||
throw std::runtime_error("[GPU] copy_from is not implemented for gpu_image2d");
|
||||
}
|
||||
|
||||
event::ptr gpu_image2d::copy_to(stream& /* stream */, memory& /* other */) {
|
||||
event::ptr gpu_image2d::copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) {
|
||||
throw std::runtime_error("[GPU] copy_to is not implemented for gpu_image2d");
|
||||
}
|
||||
|
||||
event::ptr gpu_image2d::copy_to(stream& /* stream */, void* /* host_ptr */) {
|
||||
event::ptr gpu_image2d::copy_to(stream& /* stream */, void* /* host_ptr */, bool /* blocking */) {
|
||||
throw std::runtime_error("[GPU] copy_to is not implemented for gpu_image2d");
|
||||
}
|
||||
|
||||
@ -364,7 +367,7 @@ void gpu_usm::unlock(const stream& /* stream */) {
|
||||
event::ptr gpu_usm::fill(stream& stream, unsigned char pattern) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = stream.create_base_event();
|
||||
cl::Event ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
cl::Event& ev_ocl = downcast<ocl_event>(ev.get())->get();
|
||||
// enqueueFillUsm call will never finish. Driver bug? Uncomment when fixed. Some older drivers doesn't support enqueueFillUsm call at all.
|
||||
// cl_stream.get_usm_helper().enqueue_fill_mem<unsigned char>(cl_stream.get_cl_queue(), _buffer.get(), pattern, _bytes_count, nullptr, &ev_ocl)
|
||||
// Workarounded with enqeue_memcopy. ToDo: Remove below code. Uncomment above.
|
||||
@ -386,39 +389,51 @@ event::ptr gpu_usm::fill(stream& stream) {
|
||||
return fill(stream, 0);
|
||||
}
|
||||
|
||||
event::ptr gpu_usm::copy_from(stream& stream, const memory& other) {
|
||||
event::ptr gpu_usm::copy_from(stream& stream, const memory& other, bool blocking) {
|
||||
auto& cl_stream = downcast<const ocl_stream>(stream);
|
||||
auto& casted = downcast<const gpu_usm>(other);
|
||||
auto dst_ptr = get_buffer().get();
|
||||
auto src_ptr = casted.get_buffer().get();
|
||||
auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event();
|
||||
cl::Event* ev_ocl = blocking ? nullptr : &downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(),
|
||||
dst_ptr,
|
||||
src_ptr,
|
||||
_bytes_count,
|
||||
true);
|
||||
return stream.create_user_event(true);
|
||||
blocking,
|
||||
nullptr,
|
||||
ev_ocl);
|
||||
return ev;
|
||||
}
|
||||
|
||||
event::ptr gpu_usm::copy_from(stream& stream, const void* host_ptr) {
|
||||
event::ptr gpu_usm::copy_from(stream& stream, const void* host_ptr, bool blocking) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto dst_ptr = get_buffer().get();
|
||||
auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event();
|
||||
cl::Event* ev_ocl = blocking ? nullptr : &downcast<ocl_event>(ev.get())->get();
|
||||
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(),
|
||||
dst_ptr,
|
||||
host_ptr,
|
||||
_bytes_count,
|
||||
true);
|
||||
blocking,
|
||||
nullptr,
|
||||
ev_ocl);
|
||||
return stream.create_user_event(true);
|
||||
}
|
||||
|
||||
event::ptr gpu_usm::copy_to(stream& stream, void* host_ptr) {
|
||||
event::ptr gpu_usm::copy_to(stream& stream, void* host_ptr, bool blocking) {
|
||||
auto& cl_stream = downcast<ocl_stream>(stream);
|
||||
auto ev = blocking ? stream.create_user_event(true) : stream.create_base_event();
|
||||
cl::Event* ev_ocl = blocking ? nullptr : &downcast<ocl_event>(ev.get())->get();
|
||||
auto src_ptr = get_buffer().get();
|
||||
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(),
|
||||
host_ptr,
|
||||
src_ptr,
|
||||
_bytes_count,
|
||||
true);
|
||||
return stream.create_user_event(true);
|
||||
blocking,
|
||||
nullptr,
|
||||
ev_ocl);
|
||||
return ev;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
|
@ -40,10 +40,10 @@ struct gpu_buffer : public lockable_gpu_mem, public memory {
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
event::ptr copy_from(stream& stream, const memory& other) override;
|
||||
event::ptr copy_from(stream& stream, const void* host_ptr) override;
|
||||
event::ptr copy_from(stream& stream, const memory& other, bool blocking) override;
|
||||
event::ptr copy_from(stream& stream, const void* host_ptr, bool blocking) override;
|
||||
|
||||
event::ptr copy_to(stream& /* stream */, void* /* other */) override;
|
||||
event::ptr copy_to(stream& stream, void* other , bool blocking) override;
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
dnnl::memory get_onednn_memory(dnnl::memory::desc /* desc */, int64_t offset = 0) override;
|
||||
@ -67,11 +67,11 @@ struct gpu_image2d : public lockable_gpu_mem, public memory {
|
||||
return _buffer;
|
||||
}
|
||||
|
||||
event::ptr copy_from(stream& /* stream */, const memory& /* other */) override;
|
||||
event::ptr copy_from(stream& /* stream */, const void* /* other */) override;
|
||||
event::ptr copy_from(stream& /* stream */, const memory& /* other */, bool /* blocking */) override;
|
||||
event::ptr copy_from(stream& /* stream */, const void* /* other */, bool /* blocking */) override;
|
||||
|
||||
event::ptr copy_to(stream& /* stream */, memory& /* other */) override;
|
||||
event::ptr copy_to(stream& /* stream */, void* /* other */) override;
|
||||
event::ptr copy_to(stream& /* stream */, memory& /* other */, bool /* blocking */) override;
|
||||
event::ptr copy_to(stream& /* stream */, void* /* other */, bool /* blocking */) override;
|
||||
|
||||
protected:
|
||||
cl::Image2D _buffer;
|
||||
@ -119,10 +119,10 @@ struct gpu_usm : public lockable_gpu_mem, public memory {
|
||||
event::ptr fill(stream& stream) override;
|
||||
shared_mem_params get_internal_params() const override;
|
||||
|
||||
event::ptr copy_from(stream& stream, const memory& other) override;
|
||||
event::ptr copy_from(stream& stream, const void* host_ptr) override;
|
||||
event::ptr copy_from(stream& stream, const memory& other, bool blocking) override;
|
||||
event::ptr copy_from(stream& stream, const void* host_ptr, bool blocking) override;
|
||||
|
||||
event::ptr copy_to(stream& stream, void* host_ptr) override;
|
||||
event::ptr copy_to(stream& stream, void* host_ptr, bool blocking) override;
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
dnnl::memory get_onednn_memory(dnnl::memory::desc /* desc */, int64_t offset = 0) override;
|
||||
#endif
|
||||
|
@ -30,7 +30,7 @@ struct ocl_user_event : public ocl_base_event {
|
||||
|
||||
void set_impl() override;
|
||||
bool get_profiling_info_impl(std::list<instrumentation::profiling_interval>& info) override;
|
||||
cl::Event get() override { return _event; };
|
||||
cl::Event& get() override { return _event; };
|
||||
|
||||
protected:
|
||||
cldnn::instrumentation::timer<> _timer;
|
||||
|
@ -135,7 +135,7 @@ TEST_P(copy_and_read_buffer, basic) {
|
||||
cldnn::mem_lock<float> lock(host_buf, stream);
|
||||
std::copy(src_buffer.begin(), src_buffer.end(), lock.data());
|
||||
}
|
||||
casted->copy_from(stream, *host_buf);
|
||||
casted->copy_from(stream, *host_buf, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
Loading…
Reference in New Issue
Block a user