61 lines
1.7 KiB
C++
61 lines
1.7 KiB
C++
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
|
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
|
|
|
#include <ie_core.hpp>
|
|
#include <CL/cl2.hpp>
|
|
#include <gpu/gpu_context_api_ocl.hpp>
|
|
|
|
|
|
int main() {
|
|
using namespace InferenceEngine;
|
|
//! [part0]
|
|
|
|
|
|
// ...
|
|
|
|
|
|
// initialize the core and load the network
|
|
InferenceEngine::Core ie;
|
|
auto net = ie.ReadNetwork("network.xml");
|
|
auto exec_net = ie.LoadNetwork(net, "GPU");
|
|
|
|
|
|
// obtain the RemoteContext pointer from the executable network object
|
|
auto cldnn_context = exec_net.GetContext();
|
|
// obtain the OpenCL context handle from the RemoteContext,
|
|
// get device info and create a queue
|
|
cl::Context ctx = std::dynamic_pointer_cast<cl::Context>(cldnn_context);
|
|
_device = cl::Device(_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);
|
|
cl::CommandQueue _queue;
|
|
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
|
_queue = cl::CommandQueue(_context, _device, props);
|
|
|
|
|
|
// create the OpenCL buffer within the obtained context
|
|
cl::Buffer shared_buffer(ctx, CL_MEM_READ_WRITE, image_size * num_channels, NULL, &err);
|
|
// wrap the buffer into RemoteBlob
|
|
auto shared_blob = gpu::make_shared_blob(input_info->getTensorDesc(), cldnn_context, shared_buffer);
|
|
|
|
|
|
// ...
|
|
// execute user kernel
|
|
cl::Kernel kernel(program, kernelName.c_str());
|
|
kernel.setArg(0, shared_buffer);
|
|
queue.enqueueNDRangeKernel(kernel,
|
|
cl::NDRange(0),
|
|
cl::NDRange(image_size),
|
|
cl::NDRange(1),
|
|
0, // wait events *
|
|
&profileEvent);
|
|
queue.finish();
|
|
// ...
|
|
|
|
|
|
// pass results to the inference
|
|
inf_req_shared.SetBlob(input_name, shared_blob);
|
|
inf_req_shared.Infer();
|
|
//! [part0]
|
|
|
|
return 0;
|
|
}
|