Files
openvino/docs/snippets/gpu/context_sharing.cpp
2022-11-03 18:02:23 +04:00

79 lines
2.6 KiB
C++

#include <openvino/runtime/core.hpp>
#define OV_GPU_USE_OPENCL_HPP
#include <openvino/runtime/intel_gpu/ocl/ocl.hpp>
cl::Context get_ocl_context(); // a function which returns cl context created on the app side
int main() {
{
//! [context_sharing_get_from_ov]
// ...
// initialize the core and load the network
ov::Core core;
auto model = core.read_model("model.xml");
auto compiled_model = core.compile_model(model, "GPU");
auto infer_request = compiled_model.create_infer_request();
// obtain the RemoteContext from the compiled model object and cast it to ClContext
auto gpu_context = compiled_model.get_context().as<ov::intel_gpu::ocl::ClContext>();
// obtain the OpenCL context handle from the RemoteContext,
// get device info and create a queue
cl::Context cl_context = gpu_context;
cl::Device device = cl::Device(cl_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
cl::CommandQueue queue = cl::CommandQueue(cl_context, device, props);
// create the OpenCL buffer within the obtained context
auto input = model->get_parameters().at(0);
auto input_size = ov::shape_size(input->get_shape());
cl_int err;
cl::Buffer shared_buffer(cl_context, CL_MEM_READ_WRITE, input_size, NULL, &err);
// wrap the buffer into RemoteBlob
auto shared_blob = gpu_context.create_tensor(input->get_element_type(), input->get_shape(), shared_buffer);
// ...
// execute user kernel
cl::Program program;
cl::Kernel kernel(program, "user_kernel");
kernel.setArg(0, shared_buffer);
queue.enqueueNDRangeKernel(kernel,
cl::NDRange(0),
cl::NDRange(input_size),
cl::NDRange(1),
nullptr,
nullptr);
queue.finish();
// ...
// pass results to the inference
infer_request.set_tensor(input, shared_blob);
infer_request.infer();
//! [context_sharing_get_from_ov]
}
{
//! [context_sharing_user_handle]
cl::Context ctx = get_ocl_context();
ov::Core core;
auto model = core.read_model("model.xml");
// share the context with GPU plugin and compile ExecutableNetwork
auto remote_context = ov::intel_gpu::ocl::ClContext(core, ctx.get());
auto exec_net_shared = core.compile_model(model, remote_context);
auto inf_req_shared = exec_net_shared.create_infer_request();
// ...
// do OpenCL processing stuff
// ...
// run the inference
inf_req_shared.infer();
//! [context_sharing_user_handle]
}
return 0;
}