From 65f62945ddf7808598174d988534f62b283d640b Mon Sep 17 00:00:00 2001 From: Mikhail Letavin Date: Mon, 1 Jun 2020 12:01:28 +0300 Subject: [PATCH] [IE CLDNN] Free up first copy of weights/biases that were transferred to USM device memory (#561) --- .../thirdparty/clDNN/api/memory.hpp | 2 + .../clDNN/src/include/memory_pool.h | 2 + .../thirdparty/clDNN/src/memory.cpp | 35 +++++++--- .../thirdparty/clDNN/src/memory_pool.cpp | 69 +++++++++++++++++++ .../thirdparty/clDNN/src/network.cpp | 2 + .../thirdparty/clDNN/src/program.cpp | 3 +- .../clDNN/tests/test_cases/memory_test.cpp | 12 +--- 7 files changed, 104 insertions(+), 21 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/api/memory.hpp b/inference-engine/thirdparty/clDNN/api/memory.hpp index ed17461f516..b54ed4cf595 100644 --- a/inference-engine/thirdparty/clDNN/api/memory.hpp +++ b/inference-engine/thirdparty/clDNN/api/memory.hpp @@ -167,6 +167,8 @@ struct memory { /// C API memory handle memory_impl* get() const { return _impl; } + void reset(); + private: friend struct engine; memory_impl* _impl; diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_pool.h b/inference-engine/thirdparty/clDNN/src/include/memory_pool.h index 4b4db977855..3baf69e0855 100644 --- a/inference-engine/thirdparty/clDNN/src/include/memory_pool.h +++ b/inference-engine/thirdparty/clDNN/src/include/memory_pool.h @@ -140,6 +140,8 @@ public: allocation_type type); void clear_pool(); void clear_pool_for_network(uint32_t network_id); + void release_memory(memory_impl* memory, + const primitive_id& id); void color_graph(const program_impl&); void dump_memory_pool(const program_impl&, std::string&, std::string&); diff --git a/inference-engine/thirdparty/clDNN/src/memory.cpp b/inference-engine/thirdparty/clDNN/src/memory.cpp index fe42649b950..876adcfe484 100644 --- a/inference-engine/thirdparty/clDNN/src/memory.cpp +++ b/inference-engine/thirdparty/clDNN/src/memory.cpp @@ -72,26 +72,34 @@ memory memory::share_surface(const engine& engine, const layout& layout, shared_ #endif size_t memory::count() const { - return get_layout().count(); + if (_impl) return get_layout().count(); + else return 0; } size_t memory::size() const { - return _impl->size(); + if (_impl) return _impl->size(); + else return 0; } const layout& memory::get_layout() const { - return _impl->get_layout(); + if (_impl) return _impl->get_layout(); + else throw std::runtime_error("empty memory object"); } int memory::get_net_id() const { - return _impl->get_net_id(); + if (_impl) return _impl->get_net_id(); + else throw std::runtime_error("empty memory object"); } bool memory::is_allocated_by(const engine& engine) const { - return _impl->is_allocated_by(*engine.get()); + if (_impl) return _impl->is_allocated_by(*engine.get()); + else return false; } bool memory::is_the_same_buffer(const memory& other) const { + if (_impl == nullptr) + return false; + if (_impl == other.get()) return true; @@ -107,7 +115,8 @@ bool memory::is_the_same_buffer(const memory& other) const { } shared_mem_params memory::get_internal_params() const { - return _impl->get_internal_params(); + if (_impl) return _impl->get_internal_params(); + else throw std::runtime_error("empty memory object"); } memory memory::attach_impl(const cldnn::layout& layout, void* ptr, uint32_t net_id) { @@ -115,18 +124,24 @@ memory memory::attach_impl(const cldnn::layout& layout, void* ptr, uint32_t net_ } void* memory::lock_impl() const { - return _impl->lock(); + if (_impl) return _impl->lock(); + else return nullptr; } void memory::unlock() const { - _impl->unlock(); + if (_impl) _impl->unlock(); } void memory::retain() { - _impl->add_ref(); + if (_impl) _impl->add_ref(); } void memory::release() { - _impl->release(); + if (_impl) _impl->release(); +} + +void memory::reset() { + release(); + _impl = nullptr; } } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp b/inference-engine/thirdparty/clDNN/src/memory_pool.cpp index d49178d8af9..fc6cec3f5a6 100644 --- a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp +++ b/inference-engine/thirdparty/clDNN/src/memory_pool.cpp @@ -141,6 +141,75 @@ bool memory_pool::has_conflict(const memory_set& a, return !intersection.empty(); } +void memory_pool::release_memory(memory_impl* mem, + const primitive_id& id) { + // check nonpadded pool first + auto _layout = mem->get_layout(); + auto type = mem->get_allocation_type(); + auto network_id = mem->get_net_id(); + + { + auto range = _non_padded_pool.equal_range(_layout.bytes_count()); + auto it = range.first; + + while (it != range.second && it != _non_padded_pool.end()) { + if (it->second._network_id == network_id && + it->second._type == type && + it->second._memory.get() == mem) { + auto user_it = it->second._users.find({ id, network_id }); + + // normally there should be only one entry + if (user_it != it->second._users.end()) { + user_it = it->second._users.erase(user_it); + } + if (it->second._users.empty()) { + // if this was the only user of the memory, then free it up + it = _non_padded_pool.erase(it); + } + + //entry found and processed - so return + return; + } else { + ++it; + } + } + } + { + auto itr = _padded_pool.find(_layout); + + if (itr != _padded_pool.end()) { + auto& list = itr->second; + auto list_itr = list.begin(); + + while (list_itr != list.end()) { + if (list_itr->_memory.get() == mem && + list_itr->_network_id == network_id && + list_itr->_type == type) { + auto user_it = list_itr->_users.find({ id, network_id }); + + // normally there should be only one entry + if (user_it != list_itr->_users.end()) { + user_it = list_itr->_users.erase(user_it); + } + if (list_itr->_users.empty()) { + // if this was the only user of the memory, then free it up + list.erase(list_itr); + } + + //entry found and processed - so return + break; + } else { + list_itr++; + } + } + + if (list.empty()) { + _padded_pool.erase(itr); + } + } + } +} + memory_impl::ptr memory_pool::get_from_non_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp index 4711a1beed5..944f55e8026 100644 --- a/inference-engine/thirdparty/clDNN/src/network.cpp +++ b/inference-engine/thirdparty/clDNN/src/network.cpp @@ -736,11 +736,13 @@ void network_impl::transfer_memory_to_device(std::shared_ptr ins if (alloc_type == allocation_type::usm_host || alloc_type == allocation_type::usm_shared) { // Allocate and transfer memory + auto& mem_pool = inst_mem.get_engine()->get_memory_pool(); auto device_mem = inst_mem.get_engine()->allocate_memory( inst_mem.get_layout(), allocation_type::usm_device, inst_mem.get_net_id()); dynamic_cast(*device_mem).copy_from_other(dynamic_cast(inst_mem)); + mem_pool.release_memory(&inst_mem, node.id()); instance->set_output_memory(*device_mem); } } diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index bade80bc005..2359e928ed1 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -367,7 +367,7 @@ void program_impl::build_program(bool is_internal) { if (!is_internal) prim_info = get_current_stage_info(); - transfer_memory_to_device(); + if (!is_internal) transfer_memory_to_device(); cleanup(); } @@ -523,6 +523,7 @@ void program_impl::transfer_memory_to_device() { mem.get_net_id()); dynamic_cast(*device_mem).copy_from_other(dynamic_cast(mem)); data_node.attach_memory(*device_mem); + const_cast(data_node.get_primitive()->mem).reset(); } } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp index c4456fa08fe..c1bde3306bb 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp @@ -408,22 +408,14 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { auto outputs = network_first.execute(); auto dev_info = engine.get_info(); - if (dev_info.supports_usm) { - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)4312); - } else { - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928); - } + EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928); topo.change_input_layout("input", input_1.get_layout());//change input layout to batch=1 network network_second(engine, topo, bo); network_second.set_input_data("input", input_1); auto outputs_second = network_second.execute(); - if (dev_info.supports_usm) { - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)4312); - } else { - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928); - } + EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928); } TEST(memory_pool, shared_dep_two_output) {