* Fix memory leak issue Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com> Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com>
This commit is contained in:
parent
b449481439
commit
03b0199521
@ -247,6 +247,8 @@ public:
|
||||
// returns {-1, -1} if it failed to estimate by allocating given batch size
|
||||
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
|
||||
|
||||
void remove_kernel(kernel_id id);
|
||||
|
||||
private:
|
||||
uint32_t prog_id = 0;
|
||||
engine& _engine;
|
||||
|
167
src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
Normal file
167
src/plugins/intel_gpu/include/intel_gpu/runtime/lru_cache.hpp
Normal file
@ -0,0 +1,167 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
struct primitive_impl;
|
||||
|
||||
/// @brief LRU cache which remove the least recently used data when cache is full.
|
||||
template<typename Key, typename Value>
|
||||
class LruCache {
|
||||
public:
|
||||
using data_type = std::pair<Key, Value>;
|
||||
|
||||
|
||||
public:
|
||||
explicit LruCache(size_t caps) : _capacity(caps) {}
|
||||
|
||||
~LruCache() {
|
||||
clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the least recently used element object in the cache
|
||||
*
|
||||
* @return Value
|
||||
*/
|
||||
Value get_lru_element() const {
|
||||
if (_lru_data_list.size()) {
|
||||
return _lru_data_list.back().second;
|
||||
} else {
|
||||
return Value();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Add new value with associated key into the LRU cache
|
||||
*
|
||||
* @param key if same key is existed in the cache, the value of key is updated new entry.
|
||||
* @param value
|
||||
* @return true, if cache is full and lease recently used entry are removed to add new entry.
|
||||
* @return false Otherwise
|
||||
*/
|
||||
bool add(const Key& key, const Value& value) {
|
||||
auto map_iter = _key_map.find(key);
|
||||
if (map_iter != _key_map.end()) {
|
||||
touch_data(map_iter->second);
|
||||
map_iter->second->second = value;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool popped_last_element = false;
|
||||
if (_capacity > 0 && _capacity == _key_map.size()) {
|
||||
pop();
|
||||
popped_last_element = true;
|
||||
}
|
||||
auto iter = _lru_data_list.insert(_lru_data_list.begin(), {key, value});
|
||||
_key_map.insert({key, iter});
|
||||
return popped_last_element;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check whether the value assocaited with key is existed in the cache
|
||||
*
|
||||
* @param key
|
||||
* @return true if any value associated with the key is existed.
|
||||
* @return false otherwise
|
||||
*/
|
||||
bool has(const Key& key) const {
|
||||
return (_key_map.find(key) != _key_map.end());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Find a value associated with a key
|
||||
*
|
||||
* @param key
|
||||
* @return Value a value associated with input key. if the key is not existed in the cache, return nullptr
|
||||
*/
|
||||
Value get(const Key& key) {
|
||||
auto iter = _key_map.find(key);
|
||||
if (iter == _key_map.end()) {
|
||||
return Value();
|
||||
}
|
||||
touch_data(iter->second);
|
||||
return _lru_data_list.front().second;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Remove all entries
|
||||
*
|
||||
*/
|
||||
void clear() {
|
||||
_lru_data_list.clear();
|
||||
_key_map.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return current size of cache
|
||||
*
|
||||
* @return size_t
|
||||
*/
|
||||
size_t size() const {
|
||||
return _lru_data_list.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return capacity of the cache
|
||||
*
|
||||
* @return size_t
|
||||
*/
|
||||
size_t capacity() const {
|
||||
return _capacity;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the all keys object
|
||||
*
|
||||
* @return std::vector<Key>
|
||||
*/
|
||||
std::vector<Key> get_all_keys() const {
|
||||
std::vector<Key> key_list;
|
||||
for (auto& iter : _lru_data_list) {
|
||||
key_list.push_back(iter.first);
|
||||
}
|
||||
return key_list;
|
||||
}
|
||||
|
||||
private:
|
||||
using lru_data_list_type = std::list<data_type>;
|
||||
using lru_data_list_iter = typename lru_data_list_type::iterator;
|
||||
|
||||
std::list<data_type> _lru_data_list;
|
||||
std::unordered_map<Key, lru_data_list_iter> _key_map;
|
||||
const size_t _capacity;
|
||||
|
||||
/**
|
||||
* @brief Move data to front of list because the data is touched.
|
||||
*
|
||||
* @param iter iterator of current touched data
|
||||
*/
|
||||
void touch_data(lru_data_list_iter iter) {
|
||||
_lru_data_list.splice(_lru_data_list.begin(), _lru_data_list, iter);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Pop n lease recently used cache data.
|
||||
*
|
||||
* @param n number of data to be popped
|
||||
*/
|
||||
void pop(size_t n = 1) {
|
||||
for (size_t i = 0; i < n && !_lru_data_list.empty(); ++i) {
|
||||
_key_map.erase(_lru_data_list.back().first);
|
||||
_lru_data_list.pop_back();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
using ImplementationsCache = cldnn::LruCache<std::string, std::shared_ptr<primitive_impl>>;
|
||||
} // namespace cldnn
|
@ -73,6 +73,10 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
args.outputs = { instance.output_memory_ptr() };
|
||||
return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.node.is_output());
|
||||
}
|
||||
|
||||
std::vector<std::string> get_kernel_ids() override {
|
||||
return {_kernel_id};
|
||||
}
|
||||
};
|
||||
|
||||
static kernel_selector::kernel_argument_element get_arg(custom_gpu_primitive::arg_desc arg) {
|
||||
|
@ -108,6 +108,10 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> get_kernel_ids() override {
|
||||
return _kernel_ids;
|
||||
}
|
||||
|
||||
std::vector<layout> get_internal_buffer_layouts_impl() const override {
|
||||
if (_kernel_data.internalBufferSizes.empty())
|
||||
return {};
|
||||
|
@ -49,6 +49,9 @@ struct primitive_impl {
|
||||
virtual bool is_cpu() const { return true; }
|
||||
virtual void init_kernels() = 0;
|
||||
virtual std::unique_ptr<primitive_impl> clone() const = 0;
|
||||
virtual std::vector<std::string> get_kernel_ids() {
|
||||
return {};
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string _kernel_name;
|
||||
|
@ -1556,3 +1556,7 @@ std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
|
||||
|
||||
return std::make_pair(const_sum, get_engine().get_used_device_memory(allocation_type::usm_device));
|
||||
}
|
||||
|
||||
void program::remove_kernel(kernel_id id) {
|
||||
_kernels_cache->remove_kernel(id);
|
||||
}
|
||||
|
@ -148,7 +148,7 @@ kernel_id kernels_cache::set_kernel_source(
|
||||
bool dump_custom_program) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
// we need unique id in order to avoid conflict across topologies.
|
||||
const auto kernel_num = _kernels.size() + _kernels_code.size();
|
||||
const auto kernel_num = _kernels.size() + (_kernel_idx++);
|
||||
kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
|
||||
|
||||
auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program);
|
||||
|
@ -75,6 +75,7 @@ private:
|
||||
engine& _engine;
|
||||
uint32_t _prog_id = 0;
|
||||
kernels_code _kernels_code;
|
||||
size_t _kernel_idx = 0;
|
||||
std::atomic<bool> _pending_compilation{false};
|
||||
std::map<const std::string, kernel::ptr> _kernels;
|
||||
std::vector<std::string> batch_header_str;
|
||||
@ -97,6 +98,9 @@ public:
|
||||
// forces compilation of all pending kernels/programs
|
||||
void build_all();
|
||||
void reset();
|
||||
void remove_kernel(kernel_id id) {
|
||||
_kernels.erase(id);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
|
124
src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp
Normal file
124
src/plugins/intel_gpu/tests/test_cases/lru_caches_gpu_test.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include "intel_gpu/runtime/lru_cache.hpp"
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
|
||||
|
||||
TEST(lru_cache, basic_data_type)
|
||||
{
|
||||
const size_t cap = 4;
|
||||
LruCache<int, int> ca(cap * sizeof(int));
|
||||
|
||||
std::vector<int> inputs = {1, 2, 3, 4, 2, 1, 5};
|
||||
std::vector<std::pair<int, int>> input_values;
|
||||
for (auto i : inputs) {
|
||||
input_values.push_back(std::make_pair(i, i + 10));
|
||||
}
|
||||
|
||||
EXPECT_EQ(ca.get_lru_element(), int());
|
||||
|
||||
std::vector<bool> expected_hitted = {false, false, false, false, true, true, false};
|
||||
for (size_t i = 0; i < input_values.size(); i++) {
|
||||
auto& in = input_values[i];
|
||||
int data = 0;
|
||||
bool hitted = ca.has(in.first);
|
||||
if (hitted) {
|
||||
data = ca.get(in.first);
|
||||
} else {
|
||||
ca.add(in.first, in.second);
|
||||
data = ca.get(in.first);
|
||||
}
|
||||
EXPECT_EQ(data, in.second);
|
||||
EXPECT_EQ(hitted, (bool)expected_hitted[i]);
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> expected_value;
|
||||
for (size_t i = ca.size(); i > 0; i--) { // 5, 1, 2, 4
|
||||
int idx = input_values.size() - i;
|
||||
expected_value.push_back(input_values[idx]);
|
||||
}
|
||||
|
||||
int idx = expected_value.size() - 1;
|
||||
for (auto key : ca.get_all_keys()) {
|
||||
EXPECT_EQ(key, expected_value[idx--].first);
|
||||
}
|
||||
}
|
||||
|
||||
class lru_cache_test_data {
|
||||
public:
|
||||
lru_cache_test_data(int a, int b, int c) : x(a), y(b), z(c) {
|
||||
key = "key_" + std::to_string(a) + "_" + std::to_string(b) + "_" + std::to_string(c);
|
||||
}
|
||||
|
||||
bool operator==(const lru_cache_test_data&rhs) {
|
||||
return (this->x == rhs.x && this->y == rhs.y && this->z == rhs.z);
|
||||
}
|
||||
|
||||
bool operator!=(const lru_cache_test_data&rhs) {
|
||||
return (this->x != rhs.x || this->y != rhs.y || this->z != rhs.z);
|
||||
}
|
||||
|
||||
operator std::string() {
|
||||
return "(" + std::to_string(x) + "," + std::to_string(y) + "," + std::to_string(z) + ")";
|
||||
}
|
||||
|
||||
std::string key;
|
||||
int x;
|
||||
int y;
|
||||
int z;
|
||||
|
||||
};
|
||||
|
||||
using test_impl_cache = LruCache<std::string, std::shared_ptr<lru_cache_test_data>>;
|
||||
|
||||
TEST(lru_cache, custom_data_type) {
|
||||
const size_t cap = 4;
|
||||
test_impl_cache ca(cap);
|
||||
|
||||
std::vector<std::shared_ptr<lru_cache_test_data>> inputs;
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(1, 21, 11));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(2, 22, 12));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(3, 23, 13));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(4, 24, 14));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(2, 22, 12));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(1, 21, 11));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(3, 23, 13));
|
||||
inputs.push_back(std::make_shared<lru_cache_test_data>(5, 25, 15));
|
||||
|
||||
std::vector<bool> expected_hitted = {false, false, false, false, true, true, true, false};
|
||||
|
||||
EXPECT_EQ(ca.get_lru_element(), std::shared_ptr<lru_cache_test_data>());
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
auto& in = inputs[i];
|
||||
std::shared_ptr<lru_cache_test_data> p_data;
|
||||
bool hitted = ca.has(in->key);
|
||||
if (hitted) {
|
||||
p_data = ca.get(in->key);
|
||||
} else {
|
||||
ca.add(in->key, in);
|
||||
p_data = ca.get(in->key);
|
||||
}
|
||||
EXPECT_EQ(p_data->key, in->key);
|
||||
EXPECT_EQ(hitted, (bool)expected_hitted[i]);
|
||||
}
|
||||
|
||||
EXPECT_EQ(cap, ca.size());
|
||||
|
||||
std::vector<std::string> expected_keys;
|
||||
for (size_t i = cap; i > 0; i--) {
|
||||
expected_keys.push_back(inputs[inputs.size() - i]->key);
|
||||
}
|
||||
|
||||
int idx = expected_keys.size() - 1;
|
||||
for (auto key : ca.get_all_keys()) {
|
||||
EXPECT_EQ(key, expected_keys[idx--]);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user