[GPU] Improve memory usage management to distinguish allocation type (#7318)

Signed-off-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
2021-09-09 13:19:07 +09:00 · 2021-09-09 13:19:07 +09:00 · c33856b31f
commit c33856b31f
parent 1c1401b069
4 changed files with 65 additions and 35 deletions
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
@ -96,17 +96,20 @@ public:
    /// Returns user context handle which was used to create the engine
    virtual void* get_user_context() const = 0;

-    /// Returns the maximum amount of GPU memory that engine allocated in current process
+    /// Returns the total maximum amount of GPU memory allocated by engine in current process for all allocation types
    uint64_t get_max_used_device_memory() const;

-    /// Returns the amount of GPU memory currently used by the engine
-    uint64_t get_used_device_memory() const;
+    /// Returns the maximum amount of GPU memory allocated by engine in current process for the specified allocation @p type
+    uint64_t get_max_used_device_memory(allocation_type type) const;

-    /// Adds @p bytes count to currently used memory size
-    void add_memory_used(uint64_t bytes);
+    /// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
+    uint64_t get_used_device_memory(allocation_type type) const;

-    /// Subtracts @p bytes count from currently used memory size
-    void subtract_memory_used(uint64_t bytes);
+    /// Adds @p bytes count to currently used memory size of the specified allocation @p type
+    void add_memory_used(uint64_t bytes, allocation_type type);
+
+    /// Subtracts @p bytes count from currently used memory size of the specified allocation @p type
+    void subtract_memory_used(uint64_t bytes, allocation_type type);

    /// Returns true if USM is enabled in engine config and device/driver supports required features
    bool use_unified_shared_memory() const;
@ -142,8 +145,8 @@ protected:
    const device::ptr _device;
    engine_configuration _configuration;

-    std::atomic<uint64_t> memory_usage = {0};
-    std::atomic<uint64_t> peak_memory_usage = {0};
+    std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map;
+    std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map;
 };

 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
@ -120,22 +120,51 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
 #endif  // _WIN32

 uint64_t engine::get_max_used_device_memory() const {
-    return peak_memory_usage.load();
+    uint64_t total_peak_memory_usage {0};
+    for (auto const& m : peak_memory_usage_map) {
+        total_peak_memory_usage += m.second.load();
+    }
+    return total_peak_memory_usage;
 }

-uint64_t engine::get_used_device_memory() const {
-    return memory_usage.load();
+uint64_t engine::get_max_used_device_memory(allocation_type type) const {
+    uint64_t peak_memory_usage {0};
+    auto iter = peak_memory_usage_map.find(type);
+    if (iter != peak_memory_usage_map.end()) {
+        peak_memory_usage = iter->second.load();
+    }
+    return peak_memory_usage;
 }

-void engine::add_memory_used(size_t bytes) {
-    memory_usage += bytes;
-    if (memory_usage > peak_memory_usage) {
-        peak_memory_usage = memory_usage.load();
+uint64_t engine::get_used_device_memory(allocation_type type) const {
+    uint64_t memory_usage {0};
+    auto iter = memory_usage_map.find(type);
+    if (iter != memory_usage_map.end()) {
+        memory_usage = iter->second.load();
+    }
+    return memory_usage;
+}
+
+void engine::add_memory_used(size_t bytes, allocation_type type) {
+    if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) {
+        static std::mutex m;
+        std::lock_guard<std::mutex> guard(m);
+        memory_usage_map[type] = 0;
+        peak_memory_usage_map[type] = 0;
+    }
+    memory_usage_map[type] += bytes;
+    if (memory_usage_map[type] > peak_memory_usage_map[type]) {
+        peak_memory_usage_map[type] = memory_usage_map[type].load();
    }
 }

-void engine::subtract_memory_used(size_t bytes) {
-    memory_usage -= bytes;
+void engine::subtract_memory_used(size_t bytes, allocation_type type) {
+    auto iter = memory_usage_map.find(type);
+    if (iter != memory_usage_map.end()) {
+        memory_usage_map[type] -= bytes;
+    } else {
+        throw std::runtime_error("Attempt to free unallocated memory");
+    }
 }

 std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,
--- a/inference-engine/thirdparty/clDNN/runtime/memory.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/memory.cpp
@ -20,27 +20,25 @@ namespace cldnn {
 memory::memory(engine* engine, const layout& layout, allocation_type type, bool reused)
    : _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) {
    if (!_reused && _engine) {
-        _engine->add_memory_used(_bytes_count);
-    }
-
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 1) {
-        GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
-                       << " (current=" << _engine->get_used_device_memory() << ";"
-                       << " max=" << _engine->get_max_used_device_memory() << ")" << std::endl;
+        _engine->add_memory_used(_bytes_count, type);
+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose >= 1) {
+            GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
+                           << " (current=" << _engine->get_used_device_memory(type) << ";"
+                           << " max=" << _engine->get_max_used_device_memory(type) << ")" << std::endl;
+        }
    }
 }

 memory::~memory() {
    if (!_reused && _engine) {
-        _engine->subtract_memory_used(_bytes_count);
-    }
-
-    GPU_DEBUG_GET_INSTANCE(debug_config);
-    GPU_DEBUG_IF(debug_config->verbose >= 1) {
-        GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes"
-                       << " (current=" << _engine->get_used_device_memory() << ";"
-                       << " max=" << _engine->get_max_used_device_memory() << ")" << std::endl;
+        _engine->subtract_memory_used(_bytes_count, _type);
+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose >= 1) {
+            GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes of " << _type << " allocation type"
+                           << " (current=" << _engine->get_used_device_memory(_type) << ";"
+                           << " max=" << _engine->get_max_used_device_memory(_type) << ")" << std::endl;
+        }
    }
 }

--- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
@ -403,7 +403,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
    network network_second(*engine, topo, bo);
    network_second.set_input_data("input", input_1);
    auto outputs_second = network_second.execute();
-    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928);
+    EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328);
 }

 TEST(memory_pool, shared_dep_two_output) {