diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp index 76ab15b79bf..de4e8dc6111 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp @@ -105,12 +105,12 @@ kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& sorted_code scode; for (const auto& code : kernels_source_code) { - const source_code org_source_code = {code.second.kernel_strings->jit, code.second.kernel_strings->str}; - std::string entry_point = code.second.kernel_strings->entry_point; - std::string options = code.second.kernel_strings->options; - bool batch_compilation = code.second.kernel_strings->batch_compilation; - bool dump_custom_program = code.second.dump_custom_program; - bool one_time_kernel = code.second.one_time_kernel; + const source_code org_source_code = {code.kernel_strings->jit, code.kernel_strings->str}; + std::string entry_point = code.kernel_strings->entry_point; + std::string options = code.kernel_strings->options; + bool batch_compilation = code.kernel_strings->batch_compilation; + bool dump_custom_program = code.dump_custom_program; + bool one_time_kernel = code.one_time_kernel; batch_compilation &= does_options_support_batch_compilation(options); @@ -144,7 +144,7 @@ kernels_cache::sorted_code kernels_cache::get_program_source(const kernels_code& current_bucket.source.push_back({}); } - current_bucket.entry_point_to_id[entry_point] = code.second.id; + current_bucket.entry_point_to_id[entry_point] = code.id; source_code new_source_code = org_source_code; @@ -168,26 +168,18 @@ kernels_cache::kernel_id kernels_cache::set_kernel_source( const std::shared_ptr& kernel_string, bool dump_custom_program, bool one_time_kernel) { - kernels_cache::kernel_id id; - - // same kernel_string == same kernel - const auto key = kernel_string.get()->get_hash(); - std::lock_guard lock(_context.get_cache_mutex()); - const auto it = _kernels_code.find(key); + // we need unique id in order to avoid conflict across topologies. + const auto kernel_num = _kernels.size() + _kernels_code.size(); + kernels_cache::kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num); - if (it == _kernels_code.end()) { - // we need unique id in order to avoid conflict across topologies. - const auto kernel_num = _kernels.size() + _kernels_code.size(); - id = kernel_string->entry_point + "_" + std::to_string(kernel_num); - _kernels_code[key] = {kernel_string, id, dump_custom_program, one_time_kernel}; - } else { - id = it->second.id; - } + auto res = _kernels_code.emplace( kernel_string, id, dump_custom_program, one_time_kernel ); assert(_kernels.find(id) == _kernels.end()); - _pending_compilation = true; + if (res.second) { + _pending_compilation = true; + } return id; } @@ -227,8 +219,6 @@ kernels_cache::kernels_map kernels_cache::build_program(const program_code& prog try { cl::Program program(_context.context(), sources); program.build({_context.device()}, program_source.options.c_str()); - // Store kernels for serialization process. - _context.store_binaries(program.getInfo(), _prog_id); if (dump_sources && dump_file.good()) { dump_file << "\n/* Build Log:\n"; @@ -240,7 +230,6 @@ kernels_cache::kernels_map kernels_cache::build_program(const program_code& prog cl::vector kernels; program.createKernels(&kernels); - for (auto& k : kernels) { auto kernel_name = k.getInfo(); kmap.emplace(kernel_name, kernels_cache::kernel_type(k, _context.get_device_info().supports_usm)); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h index bb1daecdcb5..e9667ac1d27 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h @@ -22,16 +22,14 @@ #include #include #include +#include +#include namespace cl { class Kernel; class KernelIntel; } -namespace kernel_selector { -struct KernelString; -} - namespace kernel_selector { using kernel_string = kernel_selector::KernelString; } @@ -59,13 +57,32 @@ public: std::string id; bool dump_custom_program; bool one_time_kernel; + + kernel_code(const std::shared_ptr& _kernel_strings, + const std::string& _id, + bool _dump_custom_program, + bool _one_time_kernel) + : kernel_strings(_kernel_strings), + id(_id), + dump_custom_program(_dump_custom_program), + one_time_kernel(_one_time_kernel) {} + + bool operator == (const kernel_code& c2) const { + return kernel_strings->get_hash() == c2.kernel_strings->get_hash(); + }; + }; + + struct hash_kernel_code { + size_t operator()(const kernel_code& x) const { + return std::hash()(x.kernel_strings->get_hash()); + } }; typedef std::string kernel_id; typedef cl::KernelIntel kernel_type; using sorted_code = std::map; using kernels_map = std::map; - using kernels_code = std::map; + using kernels_code = std::unordered_set; private: gpu_toolkit& _context; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp index c3da4f20027..dc8ea532467 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp @@ -166,10 +166,6 @@ kernels_cache& gpu_toolkit::get_kernels_cache(uint32_t prog_id) { return get_program_state(prog_id)._kernels_cache; } -void gpu_toolkit::store_binaries(kernels_binaries_vector binaries, uint32_t prog_id) { - get_program_state(prog_id)._binaries.push_back(binaries); -} - void gpu_toolkit::add_network(uint32_t net_id) { std::lock_guard lock(toolkit_mutex); command_queues_builder queue_builder(context(), device(), _device->get_platform()); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h index b009f2ea394..fdc8378afe9 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h @@ -62,7 +62,6 @@ protected: struct gpu_program_state { kernels_cache _kernels_cache; - kernels_binaries_container _binaries; gpu_program_state(gpu_toolkit& context, uint32_t prog_id) : _kernels_cache(context, prog_id) {} @@ -87,7 +86,6 @@ public: device_info_internal get_device_info() const { return _device->get_info(); } std::shared_ptr get_device_cache() const { return _device_cache; } kernels_cache& get_kernels_cache(uint32_t prog_id); - void store_binaries(kernels_binaries_vector binaries, uint32_t prog_id); bool get_serialization_flag() { return _serialize; } void set_serialization_flag(bool serialization_flag) { _serialize = serialization_flag; } @@ -136,7 +134,6 @@ private: std::map> _program_states; std::map _command_queues_w; std::shared_ptr _device_cache; - kernels_binaries_container _binaries; bool _serialize = false; std::string _extensions;