#define CL_HPP_MINIMUM_OPENCL_VERSION 120 #define CL_HPP_TARGET_OPENCL_VERSION 120 #include #include #include int main() { using namespace InferenceEngine; //! [part0] // ... // initialize the core and load the network InferenceEngine::Core ie; auto net = ie.ReadNetwork("network.xml"); auto exec_net = ie.LoadNetwork(net, "GPU"); // obtain the RemoteContext pointer from the executable network object auto cldnn_context = exec_net.GetContext(); // obtain the OpenCL context handle from the RemoteContext, // get device info and create a queue cl::Context ctx = std::dynamic_pointer_cast(cldnn_context); _device = cl::Device(_context.getInfo()[0].get(), true); cl::CommandQueue _queue; cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; _queue = cl::CommandQueue(_context, _device, props); // create the OpenCL buffer within the obtained context cl::Buffer shared_buffer(ctx, CL_MEM_READ_WRITE, image_size * num_channels, NULL, &err); // wrap the buffer into RemoteBlob auto shared_blob = gpu::make_shared_blob(input_info->getTensorDesc(), cldnn_context, shared_buffer); // ... // execute user kernel cl::Kernel kernel(program, kernelName.c_str()); kernel.setArg(0, shared_buffer); queue.enqueueNDRangeKernel(kernel, cl::NDRange(0), cl::NDRange(image_size), cl::NDRange(1), 0, // wait events * &profileEvent); queue.finish(); // ... // pass results to the inference inf_req_shared.SetBlob(input_name, shared_blob); inf_req_shared.Infer(); //! [part0] return 0; }