[GPU] Support usm_device dump. (#9103)
* [GPU] Support usm_device dump. When lock/unlock copy to host buffer from device. Signed-off-by: hyunback <hyunback.kim@intel.com> * Remove dependency check in dump with usm. Signed-off-by: hyunback <hyunback.kim@intel.com> * Apply codereview feedback. Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
parent
7335387db9
commit
d09bbb498e
@ -124,10 +124,6 @@ debug_configuration::debug_configuration()
|
||||
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
||||
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
||||
|
||||
if (dump_layers_path.length() > 0 && !disable_usm) {
|
||||
disable_usm = 1;
|
||||
GPU_DEBUG_COUT << "DisableUsm=1 because of DumpLayersPath" << std::endl;
|
||||
}
|
||||
if (dump_layers.length() > 0)
|
||||
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
|
||||
#endif
|
||||
|
@ -659,13 +659,17 @@ public:
|
||||
, _shared_memory(shared_memory) { }
|
||||
|
||||
void* ptr() { return _ptr; }
|
||||
~UsmHolder() {
|
||||
void memFree() {
|
||||
try {
|
||||
if (!_shared_memory)
|
||||
_usmHelper.free_mem(_ptr);
|
||||
} catch (...) {
|
||||
// Exception may happen only when clMemFreeINTEL function is unavailable, thus can't free memory properly
|
||||
}
|
||||
_ptr = nullptr;
|
||||
}
|
||||
~UsmHolder() {
|
||||
memFree();
|
||||
}
|
||||
private:
|
||||
const cl::UsmHelper& _usmHelper;
|
||||
@ -710,6 +714,12 @@ public:
|
||||
detail::errHandler(error, "[CL_EXT] UsmDevice in cl extensions constructor failed");
|
||||
}
|
||||
|
||||
void freeMem() {
|
||||
if (!_usm_pointer)
|
||||
throw std::runtime_error("[CL ext] Can not free memory of empty UsmHolder");
|
||||
_usm_pointer->memFree();
|
||||
}
|
||||
|
||||
virtual ~UsmMemory() = default;
|
||||
|
||||
protected:
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
#include "intel_gpu/runtime/utils.hpp"
|
||||
#include "ocl_memory.hpp"
|
||||
@ -276,19 +277,22 @@ shared_mem_params gpu_dx_buffer::get_internal_params() const {
|
||||
gpu_usm::gpu_usm(ocl_engine* engine, const layout& new_layout, const cl::UsmMemory& buffer, allocation_type type)
|
||||
: lockable_gpu_mem()
|
||||
, memory(engine, new_layout, type, true)
|
||||
, _buffer(buffer) {
|
||||
, _buffer(buffer)
|
||||
, _host_buffer(engine->get_usm_helper()) {
|
||||
}
|
||||
|
||||
gpu_usm::gpu_usm(ocl_engine* engine, const layout& new_layout, const cl::UsmMemory& buffer)
|
||||
: lockable_gpu_mem()
|
||||
, memory(engine, new_layout, detect_allocation_type(engine, buffer), true)
|
||||
, _buffer(buffer) {
|
||||
, _buffer(buffer)
|
||||
, _host_buffer(engine->get_usm_helper()) {
|
||||
}
|
||||
|
||||
gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type)
|
||||
: lockable_gpu_mem()
|
||||
, memory(engine, layout, type, false)
|
||||
, _buffer(engine->get_usm_helper()) {
|
||||
, _buffer(engine->get_usm_helper())
|
||||
, _host_buffer(engine->get_usm_helper()) {
|
||||
switch (get_allocation_type()) {
|
||||
case allocation_type::usm_host:
|
||||
_buffer.allocateHost(_bytes_count);
|
||||
@ -305,12 +309,25 @@ gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type)
|
||||
}
|
||||
}
|
||||
|
||||
void* gpu_usm::lock(const stream& stream, mem_lock_type /*type*/) {
|
||||
assert(get_allocation_type() != allocation_type::usm_device && "Can't lock usm device memory!");
|
||||
void* gpu_usm::lock(const stream& stream, mem_lock_type type) {
|
||||
std::lock_guard<std::mutex> locker(_mutex);
|
||||
if (0 == _lock_count) {
|
||||
stream.finish(); // Synchronization needed for OOOQ.
|
||||
_mapped_ptr = _buffer.get();
|
||||
auto& cl_stream = downcast<const ocl_stream>(stream);
|
||||
cl_stream.finish(); // Synchronization needed for OOOQ.
|
||||
if (get_allocation_type() == allocation_type::usm_device) {
|
||||
if (type != mem_lock_type::read) {
|
||||
throw std::runtime_error("Unable to lock allocation_type::usm_device with write lock_type.");
|
||||
}
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "Copy usm_device buffer to host buffer." << std::endl;
|
||||
}
|
||||
_host_buffer.allocateHost(_bytes_count);
|
||||
cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(), _host_buffer.get(), _buffer.get(), _bytes_count, CL_TRUE);
|
||||
_mapped_ptr = _host_buffer.get();
|
||||
} else {
|
||||
_mapped_ptr = _buffer.get();
|
||||
}
|
||||
}
|
||||
_lock_count++;
|
||||
return _mapped_ptr;
|
||||
@ -320,6 +337,9 @@ void gpu_usm::unlock(const stream& /* stream */) {
|
||||
std::lock_guard<std::mutex> locker(_mutex);
|
||||
_lock_count--;
|
||||
if (0 == _lock_count) {
|
||||
if (get_allocation_type() == allocation_type::usm_device) {
|
||||
_host_buffer.freeMem();
|
||||
}
|
||||
_mapped_ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -121,6 +121,7 @@ struct gpu_usm : public lockable_gpu_mem, public memory {
|
||||
|
||||
protected:
|
||||
cl::UsmMemory _buffer;
|
||||
cl::UsmMemory _host_buffer;
|
||||
|
||||
static allocation_type detect_allocation_type(ocl_engine* engine, const cl::UsmMemory& buffer);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user