[GPU] Improve memory usage management to distinguish allocation type (#7318)
Signed-off-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
This commit is contained in:
parent
1c1401b069
commit
c33856b31f
@ -96,17 +96,20 @@ public:
|
||||
/// Returns user context handle which was used to create the engine
|
||||
virtual void* get_user_context() const = 0;
|
||||
|
||||
/// Returns the maximum amount of GPU memory that engine allocated in current process
|
||||
/// Returns the total maximum amount of GPU memory allocated by engine in current process for all allocation types
|
||||
uint64_t get_max_used_device_memory() const;
|
||||
|
||||
/// Returns the amount of GPU memory currently used by the engine
|
||||
uint64_t get_used_device_memory() const;
|
||||
/// Returns the maximum amount of GPU memory allocated by engine in current process for the specified allocation @p type
|
||||
uint64_t get_max_used_device_memory(allocation_type type) const;
|
||||
|
||||
/// Adds @p bytes count to currently used memory size
|
||||
void add_memory_used(uint64_t bytes);
|
||||
/// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
|
||||
uint64_t get_used_device_memory(allocation_type type) const;
|
||||
|
||||
/// Subtracts @p bytes count from currently used memory size
|
||||
void subtract_memory_used(uint64_t bytes);
|
||||
/// Adds @p bytes count to currently used memory size of the specified allocation @p type
|
||||
void add_memory_used(uint64_t bytes, allocation_type type);
|
||||
|
||||
/// Subtracts @p bytes count from currently used memory size of the specified allocation @p type
|
||||
void subtract_memory_used(uint64_t bytes, allocation_type type);
|
||||
|
||||
/// Returns true if USM is enabled in engine config and device/driver supports required features
|
||||
bool use_unified_shared_memory() const;
|
||||
@ -142,8 +145,8 @@ protected:
|
||||
const device::ptr _device;
|
||||
engine_configuration _configuration;
|
||||
|
||||
std::atomic<uint64_t> memory_usage = {0};
|
||||
std::atomic<uint64_t> peak_memory_usage = {0};
|
||||
std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map;
|
||||
std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map;
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -120,22 +120,51 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
|
||||
#endif // _WIN32
|
||||
|
||||
uint64_t engine::get_max_used_device_memory() const {
|
||||
return peak_memory_usage.load();
|
||||
uint64_t total_peak_memory_usage {0};
|
||||
for (auto const& m : peak_memory_usage_map) {
|
||||
total_peak_memory_usage += m.second.load();
|
||||
}
|
||||
return total_peak_memory_usage;
|
||||
}
|
||||
|
||||
uint64_t engine::get_used_device_memory() const {
|
||||
return memory_usage.load();
|
||||
uint64_t engine::get_max_used_device_memory(allocation_type type) const {
|
||||
uint64_t peak_memory_usage {0};
|
||||
auto iter = peak_memory_usage_map.find(type);
|
||||
if (iter != peak_memory_usage_map.end()) {
|
||||
peak_memory_usage = iter->second.load();
|
||||
}
|
||||
return peak_memory_usage;
|
||||
}
|
||||
|
||||
void engine::add_memory_used(size_t bytes) {
|
||||
memory_usage += bytes;
|
||||
if (memory_usage > peak_memory_usage) {
|
||||
peak_memory_usage = memory_usage.load();
|
||||
uint64_t engine::get_used_device_memory(allocation_type type) const {
|
||||
uint64_t memory_usage {0};
|
||||
auto iter = memory_usage_map.find(type);
|
||||
if (iter != memory_usage_map.end()) {
|
||||
memory_usage = iter->second.load();
|
||||
}
|
||||
return memory_usage;
|
||||
}
|
||||
|
||||
void engine::add_memory_used(size_t bytes, allocation_type type) {
|
||||
if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) {
|
||||
static std::mutex m;
|
||||
std::lock_guard<std::mutex> guard(m);
|
||||
memory_usage_map[type] = 0;
|
||||
peak_memory_usage_map[type] = 0;
|
||||
}
|
||||
memory_usage_map[type] += bytes;
|
||||
if (memory_usage_map[type] > peak_memory_usage_map[type]) {
|
||||
peak_memory_usage_map[type] = memory_usage_map[type].load();
|
||||
}
|
||||
}
|
||||
|
||||
void engine::subtract_memory_used(size_t bytes) {
|
||||
memory_usage -= bytes;
|
||||
void engine::subtract_memory_used(size_t bytes, allocation_type type) {
|
||||
auto iter = memory_usage_map.find(type);
|
||||
if (iter != memory_usage_map.end()) {
|
||||
memory_usage_map[type] -= bytes;
|
||||
} else {
|
||||
throw std::runtime_error("Attempt to free unallocated memory");
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<cldnn::engine> engine::create(engine_types engine_type,
|
||||
|
@ -20,27 +20,25 @@ namespace cldnn {
|
||||
memory::memory(engine* engine, const layout& layout, allocation_type type, bool reused)
|
||||
: _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) {
|
||||
if (!_reused && _engine) {
|
||||
_engine->add_memory_used(_bytes_count);
|
||||
}
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
|
||||
<< " (current=" << _engine->get_used_device_memory() << ";"
|
||||
<< " max=" << _engine->get_max_used_device_memory() << ")" << std::endl;
|
||||
_engine->add_memory_used(_bytes_count, type);
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "Allocate " << _bytes_count << " bytes of " << type << " allocation type"
|
||||
<< " (current=" << _engine->get_used_device_memory(type) << ";"
|
||||
<< " max=" << _engine->get_max_used_device_memory(type) << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memory::~memory() {
|
||||
if (!_reused && _engine) {
|
||||
_engine->subtract_memory_used(_bytes_count);
|
||||
}
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes"
|
||||
<< " (current=" << _engine->get_used_device_memory() << ";"
|
||||
<< " max=" << _engine->get_max_used_device_memory() << ")" << std::endl;
|
||||
_engine->subtract_memory_used(_bytes_count, _type);
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 1) {
|
||||
GPU_DEBUG_COUT << "Free " << _bytes_count << " bytes of " << _type << " allocation type"
|
||||
<< " (current=" << _engine->get_used_device_memory(_type) << ";"
|
||||
<< " max=" << _engine->get_max_used_device_memory(_type) << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -403,7 +403,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
|
||||
network network_second(*engine, topo, bo);
|
||||
network_second.set_input_data("input", input_1);
|
||||
auto outputs_second = network_second.execute();
|
||||
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928);
|
||||
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328);
|
||||
}
|
||||
|
||||
TEST(memory_pool, shared_dep_two_output) {
|
||||
|
Loading…
Reference in New Issue
Block a user