[GPU] implement lru_cache(#12349) (#12349)

* Fix memory leak issue

Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com>

Co-authored-by: Taylor Yeonbok Lee <taylor.lee@intel.com>
This commit is contained in:
Paul Youngsoo Ahn 2022-08-03 15:25:44 +09:00 committed by GitHub
parent b449481439
commit 03b0199521
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 313 additions and 1 deletions

View File

@ -247,6 +247,8 @@ public:
// returns {-1, -1} if it failed to estimate by allocating given batch size
std::pair<int64_t/*const alloc*/, int64_t/*general alloc*/> get_estimated_device_mem_usage();
void remove_kernel(kernel_id id);
private:
uint32_t prog_id = 0;
engine& _engine;

View File

@ -0,0 +1,167 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <list>
#include <unordered_map>
#include <functional>
#include <iostream>
namespace cldnn {
struct primitive_impl;
/// @brief LRU cache which remove the least recently used data when cache is full.
template<typename Key, typename Value>
class LruCache {
public:
using data_type = std::pair<Key, Value>;
public:
explicit LruCache(size_t caps) : _capacity(caps) {}
~LruCache() {
clear();
}
/**
* @brief Get the least recently used element object in the cache
*
* @return Value
*/
Value get_lru_element() const {
if (_lru_data_list.size()) {
return _lru_data_list.back().second;
} else {
return Value();
}
}
/**
* @brief Add new value with associated key into the LRU cache
*
* @param key if same key is existed in the cache, the value of key is updated new entry.
* @param value
* @return true, if cache is full and lease recently used entry are removed to add new entry.
* @return false Otherwise
*/
bool add(const Key& key, const Value& value) {
auto map_iter = _key_map.find(key);
if (map_iter != _key_map.end()) {
touch_data(map_iter->second);
map_iter->second->second = value;
return false;
}
bool popped_last_element = false;
if (_capacity > 0 && _capacity == _key_map.size()) {
pop();
popped_last_element = true;
}
auto iter = _lru_data_list.insert(_lru_data_list.begin(), {key, value});
_key_map.insert({key, iter});
return popped_last_element;
}
/**
* @brief Check whether the value assocaited with key is existed in the cache
*
* @param key
* @return true if any value associated with the key is existed.
* @return false otherwise
*/
bool has(const Key& key) const {
return (_key_map.find(key) != _key_map.end());
}
/**
* @brief Find a value associated with a key
*
* @param key
* @return Value a value associated with input key. if the key is not existed in the cache, return nullptr
*/
Value get(const Key& key) {
auto iter = _key_map.find(key);
if (iter == _key_map.end()) {
return Value();
}
touch_data(iter->second);
return _lru_data_list.front().second;
}
/**
* @brief Remove all entries
*
*/
void clear() {
_lru_data_list.clear();
_key_map.clear();
}
/**
* @brief Return current size of cache
*
* @return size_t
*/
size_t size() const {
return _lru_data_list.size();
}
/**
* @brief Return capacity of the cache
*
* @return size_t
*/
size_t capacity() const {
return _capacity;
}
/**
* @brief Get the all keys object
*
* @return std::vector<Key>
*/
std::vector<Key> get_all_keys() const {
std::vector<Key> key_list;
for (auto& iter : _lru_data_list) {
key_list.push_back(iter.first);
}
return key_list;
}
private:
using lru_data_list_type = std::list<data_type>;
using lru_data_list_iter = typename lru_data_list_type::iterator;
std::list<data_type> _lru_data_list;
std::unordered_map<Key, lru_data_list_iter> _key_map;
const size_t _capacity;
/**
* @brief Move data to front of list because the data is touched.
*
* @param iter iterator of current touched data
*/
void touch_data(lru_data_list_iter iter) {
_lru_data_list.splice(_lru_data_list.begin(), _lru_data_list, iter);
}
/**
* @brief Pop n lease recently used cache data.
*
* @param n number of data to be popped
*/
void pop(size_t n = 1) {
for (size_t i = 0; i < n && !_lru_data_list.empty(); ++i) {
_key_map.erase(_lru_data_list.back().first);
_lru_data_list.pop_back();
}
}
};
using ImplementationsCache = cldnn::LruCache<std::string, std::shared_ptr<primitive_impl>>;
} // namespace cldnn

View File

@ -73,6 +73,10 @@ struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
args.outputs = { instance.output_memory_ptr() };
return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.node.is_output());
}
std::vector<std::string> get_kernel_ids() override {
return {_kernel_id};
}
};
static kernel_selector::kernel_argument_element get_arg(custom_gpu_primitive::arg_desc arg) {

View File

@ -108,6 +108,10 @@ protected:
}
}
std::vector<std::string> get_kernel_ids() override {
return _kernel_ids;
}
std::vector<layout> get_internal_buffer_layouts_impl() const override {
if (_kernel_data.internalBufferSizes.empty())
return {};

View File

@ -49,6 +49,9 @@ struct primitive_impl {
virtual bool is_cpu() const { return true; }
virtual void init_kernels() = 0;
virtual std::unique_ptr<primitive_impl> clone() const = 0;
virtual std::vector<std::string> get_kernel_ids() {
return {};
}
protected:
std::string _kernel_name;

View File

@ -1556,3 +1556,7 @@ std::pair<int64_t, int64_t> program::get_estimated_device_mem_usage() {
return std::make_pair(const_sum, get_engine().get_used_device_memory(allocation_type::usm_device));
}
void program::remove_kernel(kernel_id id) {
_kernels_cache->remove_kernel(id);
}

View File

@ -148,7 +148,7 @@ kernel_id kernels_cache::set_kernel_source(
bool dump_custom_program) {
std::lock_guard<std::mutex> lock(_mutex);
// we need unique id in order to avoid conflict across topologies.
const auto kernel_num = _kernels.size() + _kernels_code.size();
const auto kernel_num = _kernels.size() + (_kernel_idx++);
kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program);

View File

@ -75,6 +75,7 @@ private:
engine& _engine;
uint32_t _prog_id = 0;
kernels_code _kernels_code;
size_t _kernel_idx = 0;
std::atomic<bool> _pending_compilation{false};
std::map<const std::string, kernel::ptr> _kernels;
std::vector<std::string> batch_header_str;
@ -97,6 +98,9 @@ public:
// forces compilation of all pending kernels/programs
void build_all();
void reset();
void remove_kernel(kernel_id id) {
_kernels.erase(id);
}
};
} // namespace cldnn

View File

@ -0,0 +1,124 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include "intel_gpu/runtime/lru_cache.hpp"
#include <vector>
using namespace cldnn;
using namespace ::tests;
TEST(lru_cache, basic_data_type)
{
const size_t cap = 4;
LruCache<int, int> ca(cap * sizeof(int));
std::vector<int> inputs = {1, 2, 3, 4, 2, 1, 5};
std::vector<std::pair<int, int>> input_values;
for (auto i : inputs) {
input_values.push_back(std::make_pair(i, i + 10));
}
EXPECT_EQ(ca.get_lru_element(), int());
std::vector<bool> expected_hitted = {false, false, false, false, true, true, false};
for (size_t i = 0; i < input_values.size(); i++) {
auto& in = input_values[i];
int data = 0;
bool hitted = ca.has(in.first);
if (hitted) {
data = ca.get(in.first);
} else {
ca.add(in.first, in.second);
data = ca.get(in.first);
}
EXPECT_EQ(data, in.second);
EXPECT_EQ(hitted, (bool)expected_hitted[i]);
}
std::vector<std::pair<int, int>> expected_value;
for (size_t i = ca.size(); i > 0; i--) { // 5, 1, 2, 4
int idx = input_values.size() - i;
expected_value.push_back(input_values[idx]);
}
int idx = expected_value.size() - 1;
for (auto key : ca.get_all_keys()) {
EXPECT_EQ(key, expected_value[idx--].first);
}
}
class lru_cache_test_data {
public:
lru_cache_test_data(int a, int b, int c) : x(a), y(b), z(c) {
key = "key_" + std::to_string(a) + "_" + std::to_string(b) + "_" + std::to_string(c);
}
bool operator==(const lru_cache_test_data&rhs) {
return (this->x == rhs.x && this->y == rhs.y && this->z == rhs.z);
}
bool operator!=(const lru_cache_test_data&rhs) {
return (this->x != rhs.x || this->y != rhs.y || this->z != rhs.z);
}
operator std::string() {
return "(" + std::to_string(x) + "," + std::to_string(y) + "," + std::to_string(z) + ")";
}
std::string key;
int x;
int y;
int z;
};
using test_impl_cache = LruCache<std::string, std::shared_ptr<lru_cache_test_data>>;
TEST(lru_cache, custom_data_type) {
const size_t cap = 4;
test_impl_cache ca(cap);
std::vector<std::shared_ptr<lru_cache_test_data>> inputs;
inputs.push_back(std::make_shared<lru_cache_test_data>(1, 21, 11));
inputs.push_back(std::make_shared<lru_cache_test_data>(2, 22, 12));
inputs.push_back(std::make_shared<lru_cache_test_data>(3, 23, 13));
inputs.push_back(std::make_shared<lru_cache_test_data>(4, 24, 14));
inputs.push_back(std::make_shared<lru_cache_test_data>(2, 22, 12));
inputs.push_back(std::make_shared<lru_cache_test_data>(1, 21, 11));
inputs.push_back(std::make_shared<lru_cache_test_data>(3, 23, 13));
inputs.push_back(std::make_shared<lru_cache_test_data>(5, 25, 15));
std::vector<bool> expected_hitted = {false, false, false, false, true, true, true, false};
EXPECT_EQ(ca.get_lru_element(), std::shared_ptr<lru_cache_test_data>());
for (size_t i = 0; i < inputs.size(); i++) {
auto& in = inputs[i];
std::shared_ptr<lru_cache_test_data> p_data;
bool hitted = ca.has(in->key);
if (hitted) {
p_data = ca.get(in->key);
} else {
ca.add(in->key, in);
p_data = ca.get(in->key);
}
EXPECT_EQ(p_data->key, in->key);
EXPECT_EQ(hitted, (bool)expected_hitted[i]);
}
EXPECT_EQ(cap, ca.size());
std::vector<std::string> expected_keys;
for (size_t i = cap; i > 0; i--) {
expected_keys.push_back(inputs[inputs.size() - i]->key);
}
int idx = expected_keys.size() - 1;
for (auto key : ca.get_all_keys()) {
EXPECT_EQ(key, expected_keys[idx--]);
}
}