[GPU] Fix network loading time related to onednn engine creation (#12375)
+ Creation of onednn_engine in ocl_engine is changed to on-demand. + benchmark cache_dir option takes longer than cl_cache_dir env in loading network. + For clDNN execution, benchmark cache_dir created onednn_engine if just ONEDNN_ENABLE config is ON. Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
@@ -44,7 +44,7 @@ public:
|
||||
static queue_types detect_queue_type(engine_types engine_type, void* queue_handle);
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
virtual dnnl::stream& get_onednn_stream() = 0;
|
||||
virtual dnnl::stream& get_onednn_stream() const = 0;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
@@ -51,16 +51,20 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
|
||||
|
||||
_usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory()));
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
|
||||
#endif
|
||||
_program_stream.reset(new ocl_stream(*this));
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
dnnl::engine& ocl_engine::get_onednn_engine() const {
|
||||
if (!_onednn_engine)
|
||||
throw ov::Exception("[GPU] onednn engine is nullptr");
|
||||
const std::lock_guard<std::mutex> lock(onednn_mutex);
|
||||
if (!_onednn_engine) {
|
||||
auto casted = std::dynamic_pointer_cast<ocl_device>(_device);
|
||||
if (!casted)
|
||||
throw ov::Exception("[GPU] Invalid device type stored in ocl_engine");
|
||||
|
||||
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
|
||||
}
|
||||
|
||||
return *_onednn_engine;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -44,7 +44,8 @@ public:
|
||||
stream& get_program_stream() const override;
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
/// Returns onednn engine object which shares device and context with current engine
|
||||
// Returns onednn engine object which shares device and context with current engine
|
||||
// If onednn engine has not been created yet, it creates on-demand.
|
||||
dnnl::engine& get_onednn_engine() const override;
|
||||
#endif
|
||||
|
||||
@@ -57,7 +58,8 @@ private:
|
||||
std::unique_ptr<cl::UsmHelper> _usm_helper;
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
std::shared_ptr<dnnl::engine> _onednn_engine;
|
||||
mutable std::mutex onednn_mutex;
|
||||
mutable std::shared_ptr<dnnl::engine> _onednn_engine;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -289,6 +289,7 @@ ocl_stream::ocl_stream(const ocl_engine &engine)
|
||||
queue_builder.set_supports_queue_families(queue_families_extension);
|
||||
|
||||
_command_queue = queue_builder.build(context, device);
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
if (config.queue_type == queue_types::in_order) {
|
||||
auto onednn_engine = engine.get_onednn_engine();
|
||||
@@ -317,9 +318,10 @@ ocl_stream::ocl_stream(const ocl_engine &engine, void *handle)
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
dnnl::stream& ocl_stream::get_onednn_stream() {
|
||||
dnnl::stream& ocl_stream::get_onednn_stream() const {
|
||||
if (!_onednn_stream)
|
||||
throw std::runtime_error("[GPU] onednn stream is nullptr");
|
||||
|
||||
return *_onednn_stream;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -83,7 +83,7 @@ public:
|
||||
static queue_types detect_queue_type(void* queue_handle);
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
dnnl::stream& get_onednn_stream() override;
|
||||
dnnl::stream& get_onednn_stream() const override;
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
Reference in New Issue
Block a user