[GPU] Improve memory usage management to distinguish allocation type (#7318)

Signed-off-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
This commit is contained in:
Andrew Kwangwoong Park 2021-09-09 13:19:07 +09:00 committed by GitHub
parent 1c1401b069
commit c33856b31f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 35 deletions

View File

@ -96,17 +96,20 @@ public:
/// Returns user context handle which was used to create the engine
virtual void* get_user_context() const = 0;
/// Returns the maximum amount of GPU memory that engine allocated in current process
/// Returns the total maximum amount of GPU memory allocated by engine in current process for all allocation types
uint64_t get_max_used_device_memory() const;
/// Returns the amount of GPU memory currently used by the engine
uint64_t get_used_device_memory() const;
/// Returns the maximum amount of GPU memory allocated by engine in current process for the specified allocation @p type
uint64_t get_max_used_device_memory(allocation_type type) const;
/// Adds @p bytes count to currently used memory size
void add_memory_used(uint64_t bytes);
/// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
uint64_t get_used_device_memory(allocation_type type) const;
/// Subtracts @p bytes count from currently used memory size
void subtract_memory_used(uint64_t bytes);
/// Adds @p bytes count to currently used memory size of the specified allocation @p type
void add_memory_used(uint64_t bytes, allocation_type type);
/// Subtracts @p bytes count from currently used memory size of the specified allocation @p type
void subtract_memory_used(uint64_t bytes, allocation_type type);
/// Returns true if USM is enabled in engine config and device/driver supports required features
bool use_unified_shared_memory() const;
@ -142,8 +145,8 @@ protected:
const device::ptr _device;
engine_configuration _configuration;
std::atomic<uint64_t> memory_usage = {0};
std::atomic<uint64_t> peak_memory_usage = {0};
std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map;
std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map;
};
} // namespace cldnn

View File

@ -120,22 +120,51 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
#endif // _WIN32
uint64_t engine::get_max_used_device_memory() const {
return peak_memory_usage.load();
uint64_t total_peak_memory_usage {0};
for (auto const& m : peak_memory_usage_map) {
total_peak_memory_usage += m.second.load();
}
return total_peak_memory_usage;
}
uint64_t engine::get_used_device_memory() const {
return memory_usage.load();
uint64_t engine::get_max_used_device_memory(allocation_type type) const {
uint64_t peak_memory_usage {0};
auto iter = peak_memory_usage_map.find(type);
if (iter != peak_memory_usage_map.end()) {
peak_memory_usage = iter->second.load();
}
return peak_memory_usage;
}
void engine::add_memory_used(size_t bytes) {
memory_usage += bytes;
if (memory_usage > peak_memory_usage) {
peak_memory_usage = memory_usage.load();
uint64_t engine::get_used_device_memory(allocation_type type) const {
uint64_t memory_usage {0};
auto iter = memory_usage_map.find(type);
if (iter != memory_usage_map.end()) {
memory_usage = iter->second.load();
}
return memory_usage;
}
void engine::add_memory_used(size_t bytes, allocation_type type) {
if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) {
static std::mutex m;
std::lock_guard<std::mutex> guard(m);
memory_usage_map[type] = 0;
peak_memory_usage_map[type] = 0;
}
memory_usage_map[type] += bytes;
if (memory_usage_map[type] > peak_memory_usage_map[type]) {
peak_memory_usage_map[type] = memory_usage_map[type].load();
}
}
void engine::subtract_memory_used(size_t bytes) {
memory_usage -= bytes;
void engine::subtract_memory_used(size_t bytes, allocation_type type) {
auto iter = memory_usage_map.find(type);
if (iter != memory_usage_map.end()) {
memory_usage_map[type] -= bytes;
} else {
throw std::runtime_error("Attempt to free unallocated memory");
}
}
std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,

View File

@ -20,27 +20,25 @@ namespace cldnn {
memory::memory(engine* engine, const layout& layout, allocation_type type, bool reused)
: _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) {
if (!_reused && _engine) {
_engine->add_memory_used(_bytes_count);
}
_engine->add_memory_used(_bytes_count, type);
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 1) {
GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
<< " (current=" << _engine->get_used_device_memory() << ";"
<< " max=" << _engine->get_max_used_device_memory() << ")" << std::endl;
<< " (current=" << _engine->get_used_device_memory(type) << ";"
<< " max=" << _engine->get_max_used_device_memory(type) << ")" << std::endl;
}
}
}
memory::~memory() {
if (!_reused && _engine) {
_engine->subtract_memory_used(_bytes_count);
}
_engine->subtract_memory_used(_bytes_count, _type);
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 1) {
GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes"
<< " (current=" << _engine->get_used_device_memory() << ";"
<< " max=" << _engine->get_max_used_device_memory() << ")" << std::endl;
GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes of " << _type << " allocation type"
<< " (current=" << _engine->get_used_device_memory(_type) << ";"
<< " max=" << _engine->get_max_used_device_memory(_type) << ")" << std::endl;
}
}
}

View File

@ -403,7 +403,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
network network_second(*engine, topo, bo);
network_second.set_input_data("input", input_1);
auto outputs_second = network_second.execute();
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928);
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328);
}
TEST(memory_pool, shared_dep_two_output) {