From c33856b31f76f22dd7c8cad5c75967a371ca700b Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Thu, 9 Sep 2021 13:19:07 +0900 Subject: [PATCH] [GPU] Improve memory usage management to distinguish allocation type (#7318) Signed-off-by: Andrew Kwangwoong Park --- .../clDNN/api/cldnn/runtime/engine.hpp | 21 +++++---- .../thirdparty/clDNN/runtime/engine.cpp | 47 +++++++++++++++---- .../thirdparty/clDNN/runtime/memory.cpp | 30 ++++++------ .../clDNN/tests/test_cases/memory_test.cpp | 2 +- 4 files changed, 65 insertions(+), 35 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp index 8aa53a14fe2..fb79a20a785 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp @@ -96,17 +96,20 @@ public: /// Returns user context handle which was used to create the engine virtual void* get_user_context() const = 0; - /// Returns the maximum amount of GPU memory that engine allocated in current process + /// Returns the total maximum amount of GPU memory allocated by engine in current process for all allocation types uint64_t get_max_used_device_memory() const; - /// Returns the amount of GPU memory currently used by the engine - uint64_t get_used_device_memory() const; + /// Returns the maximum amount of GPU memory allocated by engine in current process for the specified allocation @p type + uint64_t get_max_used_device_memory(allocation_type type) const; - /// Adds @p bytes count to currently used memory size - void add_memory_used(uint64_t bytes); + /// Returns the amount of GPU memory specified allocation @p type that currently used by the engine + uint64_t get_used_device_memory(allocation_type type) const; - /// Subtracts @p bytes count from currently used memory size - void subtract_memory_used(uint64_t bytes); + /// Adds @p bytes count to currently used memory size of the specified allocation @p type + void add_memory_used(uint64_t bytes, allocation_type type); + + /// Subtracts @p bytes count from currently used memory size of the specified allocation @p type + void subtract_memory_used(uint64_t bytes, allocation_type type); /// Returns true if USM is enabled in engine config and device/driver supports required features bool use_unified_shared_memory() const; @@ -142,8 +145,8 @@ protected: const device::ptr _device; engine_configuration _configuration; - std::atomic memory_usage = {0}; - std::atomic peak_memory_usage = {0}; + std::map> memory_usage_map; + std::map> peak_memory_usage_map; }; } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/engine.cpp b/inference-engine/thirdparty/clDNN/runtime/engine.cpp index 976e7bae595..3738ec2ae9d 100644 --- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp @@ -120,22 +120,51 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint #endif // _WIN32 uint64_t engine::get_max_used_device_memory() const { - return peak_memory_usage.load(); + uint64_t total_peak_memory_usage {0}; + for (auto const& m : peak_memory_usage_map) { + total_peak_memory_usage += m.second.load(); + } + return total_peak_memory_usage; } -uint64_t engine::get_used_device_memory() const { - return memory_usage.load(); +uint64_t engine::get_max_used_device_memory(allocation_type type) const { + uint64_t peak_memory_usage {0}; + auto iter = peak_memory_usage_map.find(type); + if (iter != peak_memory_usage_map.end()) { + peak_memory_usage = iter->second.load(); + } + return peak_memory_usage; } -void engine::add_memory_used(size_t bytes) { - memory_usage += bytes; - if (memory_usage > peak_memory_usage) { - peak_memory_usage = memory_usage.load(); +uint64_t engine::get_used_device_memory(allocation_type type) const { + uint64_t memory_usage {0}; + auto iter = memory_usage_map.find(type); + if (iter != memory_usage_map.end()) { + memory_usage = iter->second.load(); + } + return memory_usage; +} + +void engine::add_memory_used(size_t bytes, allocation_type type) { + if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) { + static std::mutex m; + std::lock_guard guard(m); + memory_usage_map[type] = 0; + peak_memory_usage_map[type] = 0; + } + memory_usage_map[type] += bytes; + if (memory_usage_map[type] > peak_memory_usage_map[type]) { + peak_memory_usage_map[type] = memory_usage_map[type].load(); } } -void engine::subtract_memory_used(size_t bytes) { - memory_usage -= bytes; +void engine::subtract_memory_used(size_t bytes, allocation_type type) { + auto iter = memory_usage_map.find(type); + if (iter != memory_usage_map.end()) { + memory_usage_map[type] -= bytes; + } else { + throw std::runtime_error("Attempt to free unallocated memory"); + } } std::shared_ptr engine::create(engine_types engine_type, diff --git a/inference-engine/thirdparty/clDNN/runtime/memory.cpp b/inference-engine/thirdparty/clDNN/runtime/memory.cpp index 80a6ee980ed..9a22d3a2ae9 100644 --- a/inference-engine/thirdparty/clDNN/runtime/memory.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/memory.cpp @@ -20,27 +20,25 @@ namespace cldnn { memory::memory(engine* engine, const layout& layout, allocation_type type, bool reused) : _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) { if (!_reused && _engine) { - _engine->add_memory_used(_bytes_count); - } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 1) { - GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type" - << " (current=" << _engine->get_used_device_memory() << ";" - << " max=" << _engine->get_max_used_device_memory() << ")" << std::endl; + _engine->add_memory_used(_bytes_count, type); + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 1) { + GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type" + << " (current=" << _engine->get_used_device_memory(type) << ";" + << " max=" << _engine->get_max_used_device_memory(type) << ")" << std::endl; + } } } memory::~memory() { if (!_reused && _engine) { - _engine->subtract_memory_used(_bytes_count); - } - - GPU_DEBUG_GET_INSTANCE(debug_config); - GPU_DEBUG_IF(debug_config->verbose >= 1) { - GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes" - << " (current=" << _engine->get_used_device_memory() << ";" - << " max=" << _engine->get_max_used_device_memory() << ")" << std::endl; + _engine->subtract_memory_used(_bytes_count, _type); + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 1) { + GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes of " << _type << " allocation type" + << " (current=" << _engine->get_used_device_memory(_type) << ";" + << " max=" << _engine->get_max_used_device_memory(_type) << ")" << std::endl; + } } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp index 4582f2ad063..e5e8bd01f09 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp @@ -403,7 +403,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { network network_second(*engine, topo, bo); network_second.set_input_data("input", input_1); auto outputs_second = network_second.execute(); - EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328); } TEST(memory_pool, shared_dep_two_output) {