* Fix memory leak issue Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com> Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com>
This commit is contained in:
parent
b449481439
commit
03b0199521
@ -247,6 +247,8 @@ public:
|
|||||||
// returns {-1, -1} if it failed to estimate by allocating given batch size
|
// returns {-1, -1} if it failed to estimate by allocating given batch size
|
||||||
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
|
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
|
||||||
|
|
||||||
|
void remove_kernel(kernel_id id);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t prog_id = 0;
|
uint32_t prog_id = 0;
|
||||||
engine& _engine;
|
engine& _engine;
|
||||||
|
167
src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
Normal file
167
src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
// Copyright (C) 2018-2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <functional>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace cldnn {
|
||||||
|
|
||||||
|
struct primitive_impl;
|
||||||
|
|
||||||
|
/// @brief LRU cache which remove the least recently used data when cache is full.
|
||||||
|
template<typename Key, typename Value>
|
||||||
|
class LruCache {
|
||||||
|
public:
|
||||||
|
using data_type = std::pair<Key, Value>;
|
||||||
|
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit LruCache(size_t caps) : _capacity(caps) {}
|
||||||
|
|
||||||
|
~LruCache() {
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the least recently used element object in the cache
|
||||||
|
*
|
||||||
|
* @return Value
|
||||||
|
*/
|
||||||
|
Value get_lru_element() const {
|
||||||
|
if (_lru_data_list.size()) {
|
||||||
|
return _lru_data_list.back().second;
|
||||||
|
} else {
|
||||||
|
return Value();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Add new value with associated key into the LRU cache
|
||||||
|
*
|
||||||
|
* @param key if same key is existed in the cache, the value of key is updated new entry.
|
||||||
|
* @param value
|
||||||
|
* @return true, if cache is full and lease recently used entry are removed to add new entry.
|
||||||
|
* @return false Otherwise
|
||||||
|
*/
|
||||||
|
bool add(const Key& key, const Value& value) {
|
||||||
|
auto map_iter = _key_map.find(key);
|
||||||
|
if (map_iter != _key_map.end()) {
|
||||||
|
touch_data(map_iter->second);
|
||||||
|
map_iter->second->second = value;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool popped_last_element = false;
|
||||||
|
if (_capacity > 0 && _capacity == _key_map.size()) {
|
||||||
|
pop();
|
||||||
|
popped_last_element = true;
|
||||||
|
}
|
||||||
|
auto iter = _lru_data_list.insert(_lru_data_list.begin(), {key, value});
|
||||||
|
_key_map.insert({key, iter});
|
||||||
|
return popped_last_element;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check whether the value assocaited with key is existed in the cache
|
||||||
|
*
|
||||||
|
* @param key
|
||||||
|
* @return true if any value associated with the key is existed.
|
||||||
|
* @return false otherwise
|
||||||
|
*/
|
||||||
|
bool has(const Key& key) const {
|
||||||
|
return (_key_map.find(key) != _key_map.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Find a value associated with a key
|
||||||
|
*
|
||||||
|
* @param key
|
||||||
|
* @return Value a value associated with input key. if the key is not existed in the cache, return nullptr
|
||||||
|
*/
|
||||||
|
Value get(const Key& key) {
|
||||||
|
auto iter = _key_map.find(key);
|
||||||
|
if (iter == _key_map.end()) {
|
||||||
|
return Value();
|
||||||
|
}
|
||||||
|
touch_data(iter->second);
|
||||||
|
return _lru_data_list.front().second;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Remove all entries
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void clear() {
|
||||||
|
_lru_data_list.clear();
|
||||||
|
_key_map.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return current size of cache
|
||||||
|
*
|
||||||
|
* @return size_t
|
||||||
|
*/
|
||||||
|
size_t size() const {
|
||||||
|
return _lru_data_list.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return capacity of the cache
|
||||||
|
*
|
||||||
|
* @return size_t
|
||||||
|
*/
|
||||||
|
size_t capacity() const {
|
||||||
|
return _capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the all keys object
|
||||||
|
*
|
||||||
|
* @return std::vector<Key>
|
||||||
|
*/
|
||||||
|
std::vector<Key> get_all_keys() const {
|
||||||
|
std::vector<Key> key_list;
|
||||||
|
for (auto& iter : _lru_data_list) {
|
||||||
|
key_list.push_back(iter.first);
|
||||||
|
}
|
||||||
|
return key_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
using lru_data_list_type = std::list<data_type>;
|
||||||
|
using lru_data_list_iter = typename lru_data_list_type::iterator;
|
||||||
|
|
||||||
|
std::list<data_type> _lru_data_list;
|
||||||
|
std::unordered_map<Key, lru_data_list_iter> _key_map;
|
||||||
|
const size_t _capacity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Move data to front of list because the data is touched.
|
||||||
|
*
|
||||||
|
* @param iter iterator of current touched data
|
||||||
|
*/
|
||||||
|
void touch_data(lru_data_list_iter iter) {
|
||||||
|
_lru_data_list.splice(_lru_data_list.begin(), _lru_data_list, iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Pop n lease recently used cache data.
|
||||||
|
*
|
||||||
|
* @param n number of data to be popped
|
||||||
|
*/
|
||||||
|
void pop(size_t n = 1) {
|
||||||
|
for (size_t i = 0; i < n && !_lru_data_list.empty(); ++i) {
|
||||||
|
_key_map.erase(_lru_data_list.back().first);
|
||||||
|
_lru_data_list.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using ImplementationsCache = cldnn::LruCache<std::string, std::shared_ptr<primitive_impl>>;
|
||||||
|
} // namespace cldnn
|
@ -73,6 +73,10 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
|||||||
args.outputs = { instance.output_memory_ptr() };
|
args.outputs = { instance.output_memory_ptr() };
|
||||||
return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.node.is_output());
|
return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.node.is_output());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> get_kernel_ids() override {
|
||||||
|
return {_kernel_id};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static kernel_selector::kernel_argument_element get_arg(custom_gpu_primitive::arg_desc arg) {
|
static kernel_selector::kernel_argument_element get_arg(custom_gpu_primitive::arg_desc arg) {
|
||||||
|
@ -108,6 +108,10 @@ protected:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> get_kernel_ids() override {
|
||||||
|
return _kernel_ids;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<layout> get_internal_buffer_layouts_impl() const override {
|
std::vector<layout> get_internal_buffer_layouts_impl() const override {
|
||||||
if (_kernel_data.internalBufferSizes.empty())
|
if (_kernel_data.internalBufferSizes.empty())
|
||||||
return {};
|
return {};
|
||||||
|
@ -49,6 +49,9 @@ struct primitive_impl {
|
|||||||
virtual bool is_cpu() const { return true; }
|
virtual bool is_cpu() const { return true; }
|
||||||
virtual void init_kernels() = 0;
|
virtual void init_kernels() = 0;
|
||||||
virtual std::unique_ptr<primitive_impl> clone() const = 0;
|
virtual std::unique_ptr<primitive_impl> clone() const = 0;
|
||||||
|
virtual std::vector<std::string> get_kernel_ids() {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string _kernel_name;
|
std::string _kernel_name;
|
||||||
|
@ -1556,3 +1556,7 @@ std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
|
|||||||
|
|
||||||
return std::make_pair(const_sum, get_engine().get_used_device_memory(allocation_type::usm_device));
|
return std::make_pair(const_sum, get_engine().get_used_device_memory(allocation_type::usm_device));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void program::remove_kernel(kernel_id id) {
|
||||||
|
_kernels_cache->remove_kernel(id);
|
||||||
|
}
|
||||||
|
@ -148,7 +148,7 @@ kernel_id kernels_cache::set_kernel_source(
|
|||||||
bool dump_custom_program) {
|
bool dump_custom_program) {
|
||||||
std::lock_guard<std::mutex> lock(_mutex);
|
std::lock_guard<std::mutex> lock(_mutex);
|
||||||
// we need unique id in order to avoid conflict across topologies.
|
// we need unique id in order to avoid conflict across topologies.
|
||||||
const auto kernel_num = _kernels.size() + _kernels_code.size();
|
const auto kernel_num = _kernels.size() + (_kernel_idx++);
|
||||||
kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
|
kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
|
||||||
|
|
||||||
auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program);
|
auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program);
|
||||||
|
@ -75,6 +75,7 @@ private:
|
|||||||
engine& _engine;
|
engine& _engine;
|
||||||
uint32_t _prog_id = 0;
|
uint32_t _prog_id = 0;
|
||||||
kernels_code _kernels_code;
|
kernels_code _kernels_code;
|
||||||
|
size_t _kernel_idx = 0;
|
||||||
std::atomic<bool> _pending_compilation{false};
|
std::atomic<bool> _pending_compilation{false};
|
||||||
std::map<const std::string, kernel::ptr> _kernels;
|
std::map<const std::string, kernel::ptr> _kernels;
|
||||||
std::vector<std::string> batch_header_str;
|
std::vector<std::string> batch_header_str;
|
||||||
@ -97,6 +98,9 @@ public:
|
|||||||
// forces compilation of all pending kernels/programs
|
// forces compilation of all pending kernels/programs
|
||||||
void build_all();
|
void build_all();
|
||||||
void reset();
|
void reset();
|
||||||
|
void remove_kernel(kernel_id id) {
|
||||||
|
_kernels.erase(id);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace cldnn
|
} // namespace cldnn
|
||||||
|
124
src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp
Normal file
124
src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
// Copyright (C) 2018-2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "test_utils.h"
|
||||||
|
|
||||||
|
#include "intel_gpu/runtime/lru_cache.hpp"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace cldnn;
|
||||||
|
using namespace ::tests;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
TEST(lru_cache, basic_data_type)
|
||||||
|
{
|
||||||
|
const size_t cap = 4;
|
||||||
|
LruCache<int, int> ca(cap * sizeof(int));
|
||||||
|
|
||||||
|
std::vector<int> inputs = {1, 2, 3, 4, 2, 1, 5};
|
||||||
|
std::vector<std::pair<int, int>> input_values;
|
||||||
|
for (auto i : inputs) {
|
||||||
|
input_values.push_back(std::make_pair(i, i + 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(ca.get_lru_element(), int());
|
||||||
|
|
||||||
|
std::vector<bool> expected_hitted = {false, false, false, false, true, true, false};
|
||||||
|
for (size_t i = 0; i < input_values.size(); i++) {
|
||||||
|
auto& in = input_values[i];
|
||||||
|
int data = 0;
|
||||||
|
bool hitted = ca.has(in.first);
|
||||||
|
if (hitted) {
|
||||||
|
data = ca.get(in.first);
|
||||||
|
} else {
|
||||||
|
ca.add(in.first, in.second);
|
||||||
|
data = ca.get(in.first);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(data, in.second);
|
||||||
|
EXPECT_EQ(hitted, (bool)expected_hitted[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<int, int>> expected_value;
|
||||||
|
for (size_t i = ca.size(); i > 0; i--) { // 5, 1, 2, 4
|
||||||
|
int idx = input_values.size() - i;
|
||||||
|
expected_value.push_back(input_values[idx]);
|
||||||
|
}
|
||||||
|
|
||||||
|
int idx = expected_value.size() - 1;
|
||||||
|
for (auto key : ca.get_all_keys()) {
|
||||||
|
EXPECT_EQ(key, expected_value[idx--].first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class lru_cache_test_data {
|
||||||
|
public:
|
||||||
|
lru_cache_test_data(int a, int b, int c) : x(a), y(b), z(c) {
|
||||||
|
key = "key_" + std::to_string(a) + "_" + std::to_string(b) + "_" + std::to_string(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const lru_cache_test_data&rhs) {
|
||||||
|
return (this->x == rhs.x && this->y == rhs.y && this->z == rhs.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator!=(const lru_cache_test_data&rhs) {
|
||||||
|
return (this->x != rhs.x || this->y != rhs.y || this->z != rhs.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
operator std::string() {
|
||||||
|
return "(" + std::to_string(x) + "," + std::to_string(y) + "," + std::to_string(z) + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string key;
|
||||||
|
int x;
|
||||||
|
int y;
|
||||||
|
int z;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
using test_impl_cache = LruCache<std::string, std::shared_ptr<lru_cache_test_data>>;
|
||||||
|
|
||||||
|
TEST(lru_cache, custom_data_type) {
|
||||||
|
const size_t cap = 4;
|
||||||
|
test_impl_cache ca(cap);
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<lru_cache_test_data>> inputs;
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(1, 21, 11));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(2, 22, 12));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(3, 23, 13));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(4, 24, 14));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(2, 22, 12));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(1, 21, 11));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(3, 23, 13));
|
||||||
|
inputs.push_back(std::make_shared<lru_cache_test_data>(5, 25, 15));
|
||||||
|
|
||||||
|
std::vector<bool> expected_hitted = {false, false, false, false, true, true, true, false};
|
||||||
|
|
||||||
|
EXPECT_EQ(ca.get_lru_element(), std::shared_ptr<lru_cache_test_data>());
|
||||||
|
for (size_t i = 0; i < inputs.size(); i++) {
|
||||||
|
auto& in = inputs[i];
|
||||||
|
std::shared_ptr<lru_cache_test_data> p_data;
|
||||||
|
bool hitted = ca.has(in->key);
|
||||||
|
if (hitted) {
|
||||||
|
p_data = ca.get(in->key);
|
||||||
|
} else {
|
||||||
|
ca.add(in->key, in);
|
||||||
|
p_data = ca.get(in->key);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(p_data->key, in->key);
|
||||||
|
EXPECT_EQ(hitted, (bool)expected_hitted[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(cap, ca.size());
|
||||||
|
|
||||||
|
std::vector<std::string> expected_keys;
|
||||||
|
for (size_t i = cap; i > 0; i--) {
|
||||||
|
expected_keys.push_back(inputs[inputs.size() - i]->key);
|
||||||
|
}
|
||||||
|
|
||||||
|
int idx = expected_keys.size() - 1;
|
||||||
|
for (auto key : ca.get_all_keys()) {
|
||||||
|
EXPECT_EQ(key, expected_keys[idx--]);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user