Files
openvino/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.cpp
Vladimir Paramuzov 6f754052cf [GPU] Extended remote context to accept user queue (#6235)
* [GPU] extended remote context to accept user queues for each stream

* [GPU] OV2.0 API for queue sharing. Removed deviceName arg for context creation
2021-10-21 10:45:25 +03:00

234 lines
9.2 KiB
C++

// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ocl_engine.hpp"
#include "ocl_common.hpp"
#include "ocl_memory.hpp"
#include "ocl_stream.hpp"
#include <string>
#include <vector>
#include <memory>
#include <set>
#include <stdexcept>
// NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation
// of some types (even though we already disabled them in scope of definition of these types).
// Moreover this warning is pretty much now only for annoyance: it is generated due to lack
// of proper support for mangling of custom GCC attributes into type name (usually when used
// with templates, even from standard library).
#if defined __GNUC__ && __GNUC__ >= 6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
// static class memebers - pointers to dynamically obtained OpenCL extension functions
cl::PFN_clEnqueueAcquireMediaSurfacesINTEL cl::SharedSurfLock::pfn_acquire = NULL;
cl::PFN_clEnqueueReleaseMediaSurfacesINTEL cl::SharedSurfLock::pfn_release = NULL;
cl::PFN_clCreateFromMediaSurfaceINTEL cl::ImageVA::pfn_clCreateFromMediaSurfaceINTEL = NULL;
#ifdef _WIN32
cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL;
#endif
#ifdef ENABLE_ONEDNN_FOR_GPU
#include <oneapi/dnnl/dnnl_ocl.hpp>
#endif
namespace cldnn {
namespace ocl {
ocl_error::ocl_error(cl::Error const& err)
: std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {}
ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type, const engine_configuration& conf)
: engine(dev, conf) {
if (runtime_type != runtime_types::ocl) {
throw std::runtime_error("Invalid runtime type specified for OCL engine. Only OCL runtime is supported");
}
auto casted = dynamic_cast<ocl_device*>(dev.get());
if (!casted)
throw std::runtime_error("[CLDNN] Invalid device type passed to ocl engine");
casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
_usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory()));
#ifdef ENABLE_ONEDNN_FOR_GPU
_onednn_engine = std::make_shared<dnnl::engine>(dnnl::ocl_interop::make_engine(casted->get_device().get(), casted->get_context().get()));
#endif
_program_stream.reset(new ocl_stream(*this));
}
#ifdef ENABLE_ONEDNN_FOR_GPU
dnnl::engine& ocl_engine::get_onednn_engine() const {
if (!_onednn_engine)
throw std::runtime_error("[GPU] onednn engine is nullptr");
return *_onednn_engine;
}
#endif
const cl::Context& ocl_engine::get_cl_context() const {
auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
if (!cl_device)
throw std::runtime_error("Invalid device type for ocl_engine");
return cl_device->get_context();
}
const cl::Device& ocl_engine::get_cl_device() const {
auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
if (!cl_device)
throw std::runtime_error("Invalid device type for ocl_engine");
return cl_device->get_device();
}
const cl::UsmHelper& ocl_engine::get_usm_helper() const {
return *_usm_helper;
}
memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
if (layout.bytes_count() > get_device_info().max_alloc_mem_size) {
throw std::runtime_error("exceeded max size of memory object allocation");
}
if (type != allocation_type::cl_mem && !supports_allocation(type)) {
std::ostringstream type_str;
type_str << type;
throw std::runtime_error("Unsupported allocation type " + type_str.str());
}
try {
memory::ptr res = nullptr;
if (layout.format.is_image_2d()) {
res = std::make_shared<ocl::gpu_image2d>(this, layout);
} else if (type == allocation_type::cl_mem) {
res = std::make_shared<ocl::gpu_buffer>(this, layout);
} else {
res = std::make_shared<ocl::gpu_usm>(this, layout, type);
}
if (reset || res->is_memory_reset_needed(layout)) {
res->fill(get_program_stream());
}
return res;
} catch (const cl::Error& clErr) {
switch (clErr.err()) {
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
case CL_OUT_OF_RESOURCES:
case CL_OUT_OF_HOST_MEMORY:
case CL_INVALID_BUFFER_SIZE:
throw std::runtime_error("out of GPU resources");
default:
throw std::runtime_error("GPU buffer allocation failed");
}
}
}
memory::ptr ocl_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) {
if (memory.get_engine() != this)
throw std::runtime_error("trying to reinterpret buffer allocated by a different engine");
if (new_layout.format.is_image() && !memory.get_layout().format.is_image())
throw std::runtime_error("trying to reinterpret non-image buffer as image");
if (!new_layout.format.is_image() && memory.get_layout().format.is_image())
throw std::runtime_error("trying to reinterpret image buffer as non-image buffer");
try {
if (new_layout.format.is_image_2d()) {
return std::make_shared<ocl::gpu_image2d>(this,
new_layout,
reinterpret_cast<const ocl::gpu_image2d&>(memory).get_buffer());
} else if (memory_capabilities::is_usm_type(memory.get_allocation_type())) {
return std::make_shared<ocl::gpu_usm>(this,
new_layout,
reinterpret_cast<const ocl::gpu_usm&>(memory).get_buffer(),
memory.get_allocation_type());
} else {
return std::make_shared<ocl::gpu_buffer>(this,
new_layout,
reinterpret_cast<const ocl::gpu_buffer&>(memory).get_buffer());
}
} catch (cl::Error const& err) {
throw ocl::ocl_error(err);
}
}
memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_params params) {
try {
if (new_layout.format.is_image_2d() && params.mem_type == shared_mem_type::shared_mem_image) {
cl::Image2D img(static_cast<cl_mem>(params.mem), true);
return std::make_shared<ocl::gpu_image2d>(this, new_layout, img);
} else if (new_layout.format.is_image_2d() && params.mem_type == shared_mem_type::shared_mem_vasurface) {
return std::make_shared<ocl::gpu_media_buffer>(this, new_layout, params);
#ifdef _WIN32
} else if (params.mem_type == shared_mem_type::shared_mem_dxbuffer) {
return std::make_shared<ocl::gpu_dx_buffer>(this, new_layout, params);
#endif
} else if (params.mem_type == shared_mem_type::shared_mem_buffer) {
cl::Buffer buf(static_cast<cl_mem>(params.mem), true);
return std::make_shared<ocl::gpu_buffer>(this, new_layout, buf);
} else {
throw std::runtime_error("unknown shared object fromat or type");
}
}
catch (const cl::Error& clErr) {
switch (clErr.err()) {
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
case CL_OUT_OF_RESOURCES:
case CL_OUT_OF_HOST_MEMORY:
case CL_INVALID_BUFFER_SIZE:
throw std::runtime_error("out of GPU resources");
default:
throw std::runtime_error("GPU buffer allocation failed");
}
}
}
bool ocl_engine::is_the_same_buffer(const memory& mem1, const memory& mem2) {
if (mem1.get_engine() != this || mem2.get_engine() != this)
return false;
if (mem1.get_allocation_type() != mem2.get_allocation_type())
return false;
if (&mem1 == &mem2)
return true;
if (!memory_capabilities::is_usm_type(mem1.get_allocation_type()))
return (reinterpret_cast<const ocl::gpu_buffer&>(mem1).get_buffer() ==
reinterpret_cast<const ocl::gpu_buffer&>(mem2).get_buffer());
else
return (reinterpret_cast<const ocl::gpu_usm&>(mem1).get_buffer() ==
reinterpret_cast<const ocl::gpu_usm&>(mem2).get_buffer());
}
void* ocl_engine::get_user_context() const {
auto& cl_device = downcast<ocl_device>(*_device);
return static_cast<void*>(cl_device.get_context().get());
}
bool ocl_engine::extension_supported(std::string extension) const {
return _extensions.find(extension) != std::string::npos;
}
stream::ptr ocl_engine::create_stream() const {
return std::make_shared<ocl_stream>(*this);
}
stream::ptr ocl_engine::create_stream(void* handle) const {
return std::make_shared<ocl_stream>(*this, handle);
}
stream& ocl_engine::get_program_stream() const {
return *_program_stream;
}
std::shared_ptr<cldnn::engine> ocl_engine::create(const device::ptr device, runtime_types runtime_type, const engine_configuration& configuration) {
return std::make_shared<ocl::ocl_engine>(device, runtime_type, configuration);
}
std::shared_ptr<cldnn::engine> create_ocl_engine(const device::ptr device, runtime_types runtime_type, const engine_configuration& configuration) {
return ocl_engine::create(device, runtime_type, configuration);
}
} // namespace ocl
} // namespace cldnn