[GPU] Support usm_device dump. (#9103)

* [GPU] Support usm_device dump.

When lock/unlock copy to host buffer from device.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Remove dependency check in dump with usm.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Apply codereview feedback.

Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
hyunback kim 2021-12-14 14:58:52 +09:00 committed by GitHub
parent 7335387db9
commit d09bbb498e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 12 deletions

View File

@ -124,10 +124,6 @@ debug_configuration::debug_configuration()
get_gpu_debug_env_var("DryRunPath", dry_run_path);
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
if (dump_layers_path.length() > 0 && !disable_usm) {
disable_usm = 1;
GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
}
if (dump_layers.length() > 0)
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
#endif

View File

@ -659,13 +659,17 @@ public:
, _shared_memory(shared_memory) { }
void* ptr() { return _ptr; }
~UsmHolder() {
void memFree() {
try {
if (!_shared_memory)
_usmHelper.free_mem(_ptr);
} catch (...) {
// Exception may happen only when clMemFreeINTEL function is unavailable, thus can't free memory properly
}
_ptr = nullptr;
}
~UsmHolder() {
memFree();
}
private:
const cl::UsmHelper& _usmHelper;
@ -710,6 +714,12 @@ public:
detail::errHandler(error, "[CL_EXT] UsmDevice in cl extensions constructor failed");
}
void freeMem() {
if (!_usm_pointer)
throw std::runtime_error("[CL ext] Can not free memory of empty UsmHolder");
_usm_pointer->memFree();
}
virtual ~UsmMemory() = default;
protected:

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "intel_gpu/runtime/error_handler.hpp"
#include "intel_gpu/runtime/utils.hpp"
#include "ocl_memory.hpp"
@ -276,19 +277,22 @@ shared_mem_params gpu_dx_buffer::get_internal_params() const {
gpu_usm::gpu_usm(ocl_engine* engine, const layout& new_layout, const cl::UsmMemory& buffer, allocation_type type)
: lockable_gpu_mem()
, memory(engine, new_layout, type, true)
, _buffer(buffer) {
, _buffer(buffer)
, _host_buffer(engine->get_usm_helper()) {
}
gpu_usm::gpu_usm(ocl_engine* engine, const layout& new_layout, const cl::UsmMemory& buffer)
: lockable_gpu_mem()
, memory(engine, new_layout, detect_allocation_type(engine, buffer), true)
, _buffer(buffer) {
, _buffer(buffer)
, _host_buffer(engine->get_usm_helper()) {
}
gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type)
: lockable_gpu_mem()
, memory(engine, layout, type, false)
, _buffer(engine->get_usm_helper()) {
, _buffer(engine->get_usm_helper())
, _host_buffer(engine->get_usm_helper()) {
switch (get_allocation_type()) {
case allocation_type::usm_host:
_buffer.allocateHost(_bytes_count);
@ -305,12 +309,25 @@ gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type)
}
}
void* gpu_usm::lock(const stream& stream, mem_lock_type /*type*/) {
assert(get_allocation_type() != allocation_type::usm_device && "Can't lock usm device memory!");
void* gpu_usm::lock(const stream& stream, mem_lock_type type) {
std::lock_guard<std::mutex> locker(_mutex);
if (0 == _lock_count) {
stream.finish(); // Synchronization needed for OOOQ.
_mapped_ptr = _buffer.get();
auto& cl_stream = downcast<const ocl_stream>(stream);
cl_stream.finish(); // Synchronization needed for OOOQ.
if (get_allocation_type() == allocation_type::usm_device) {
if (type != mem_lock_type::read) {
throw std::runtime_error("Unable to lock allocation_type::usm_device with write lock_type.");
}
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 2) {
GPU_DEBUG_COUT << "Copy usm_device buffer to host buffer." << std::endl;
}
_host_buffer.allocateHost(_bytes_count);
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), _host_buffer.get(), _buffer.get(), _bytes_count, CL_TRUE);
_mapped_ptr = _host_buffer.get();
} else {
_mapped_ptr = _buffer.get();
}
}
_lock_count++;
return _mapped_ptr;
@ -320,6 +337,9 @@ void gpu_usm::unlock(const stream& /* stream */) {
std::lock_guard<std::mutex> locker(_mutex);
_lock_count--;
if (0 == _lock_count) {
if (get_allocation_type() == allocation_type::usm_device) {
_host_buffer.freeMem();
}
_mapped_ptr = nullptr;
}
}

View File

@ -121,6 +121,7 @@ struct gpu_usm : public lockable_gpu_mem, public memory {
protected:
cl::UsmMemory _buffer;
cl::UsmMemory _host_buffer;
static allocation_type detect_allocation_type(ocl_engine* engine, const cl::UsmMemory& buffer);
};