Update md files. Add cpp in docs/examples (#1769)
* Update md files. Add cpp in docs/examples * Normalize all the line endings * Fix block_id in snippets * Fix utf-8 encoding * Add new folder for snippets * Fix issues with compiling code from snippets * Added conteiner iterator fix
This commit is contained in:
@@ -102,124 +102,15 @@ Refer to the sections below to see pseudo-code of usage examples.
|
||||
|
||||
This example uses the OpenCL context obtained from an executable network object.
|
||||
|
||||
```cpp
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
|
||||
#include <CL/cl2.hpp>
|
||||
#include <gpu/gpu_context_api_ocl.hpp>
|
||||
|
||||
...
|
||||
|
||||
// initialize the plugin and load the network
|
||||
InferenceEngine::Core ie;
|
||||
auto exec_net = ie.LoadNetwork(net, "GPU", config);
|
||||
|
||||
// obtain the RemoteContext pointer from the executable network object
|
||||
auto cldnn_context = exec_net.GetContext();
|
||||
// obtain the OpenCL context handle from the RemoteContext,
|
||||
// get device info and create a queue
|
||||
cl::Context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context);
|
||||
_device = cl::Device(_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);
|
||||
cl::CommandQueue _queue;
|
||||
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
_queue = cl::CommandQueue(_context, _device, props);
|
||||
|
||||
// create the OpenCL buffer within the obtained context
|
||||
cl::Buffer shared_buffer(ctx, CL_MEM_READ_WRITE, image_size * num_channels, NULL, &err);
|
||||
// wrap the buffer into RemoteBlob
|
||||
auto shared_blob = gpu::make_shared_blob(input_info->getTensorDesc(), cldnn_context, shared_buffer);
|
||||
|
||||
...
|
||||
// execute user kernel
|
||||
cl::Kernel kernel(program, kernelName.c_str());
|
||||
kernel.setArg(0, shared_buffer);
|
||||
queue.enqueueNDRangeKernel(kernel,
|
||||
cl::NDRange(0),
|
||||
cl::NDRange(image_size),
|
||||
cl::NDRange(1),
|
||||
0, // wait events *
|
||||
&profileEvent);
|
||||
queue.finish();
|
||||
...
|
||||
|
||||
// pass results to the inference
|
||||
inf_req_shared.SetBlob(input_name, shared_blob);
|
||||
inf_req_shared.Infer();
|
||||
|
||||
```
|
||||
@snippet openvino/docs/snippets/GPU_RemoteBlob_API0.cpp part0
|
||||
|
||||
### Running GPU Plugin Inference within User-Supplied Shared Context
|
||||
|
||||
```cpp
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
@snippet openvino/docs/snippets/GPU_RemoteBlob_API1.cpp part1
|
||||
|
||||
#include <CL/cl2.hpp>
|
||||
#include <gpu/gpu_context_api_ocl.hpp>
|
||||
|
||||
...
|
||||
|
||||
cl::Context ctx = get_my_OpenCL_context();
|
||||
|
||||
// share the context with GPU plugin and compile ExecutableNetwork
|
||||
auto remote_context = gpu::make_shared_context(ie, "GPU", ocl_instance->_context.get());
|
||||
auto exec_net_shared = ie.LoadNetwork(net, remote_context);
|
||||
auto inf_req_shared = exec_net_shared.CreateInferRequest();
|
||||
|
||||
...
|
||||
// do OpenCL processing stuff
|
||||
...
|
||||
|
||||
// run the inference
|
||||
inf_req_shared.Infer();
|
||||
|
||||
```
|
||||
### Direct Consuming of the NV12 VAAPI Video Decoder Surface on Linux
|
||||
|
||||
```cpp
|
||||
#include <gpu/gpu_context_api_va.hpp>
|
||||
#include <cldnn/cldnn_config.hpp>
|
||||
|
||||
...
|
||||
|
||||
// initialize the objects
|
||||
CNNNetwork network = ie.ReadNetwork(xmlFileName, binFileName);
|
||||
|
||||
...
|
||||
|
||||
auto inputInfoItem = *inputInfo.begin();
|
||||
inputInfoItem.second->setPrecision(Precision::U8);
|
||||
inputInfoItem.second->setLayout(Layout::NCHW);
|
||||
inputInfoItem.second->getPreProcess().setColorFormat(ColorFormat::NV12);
|
||||
|
||||
VADisplay disp = get_VA_Device();
|
||||
// create the shared context object
|
||||
auto shared_va_context = gpu::make_shared_context(ie, "GPU", disp);
|
||||
// compile network within a shared context
|
||||
ExecutableNetwork executable_network = ie.LoadNetwork(network,
|
||||
shared_va_context,
|
||||
{ { CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS,
|
||||
PluginConfigParams::YES } });
|
||||
|
||||
// decode/inference loop
|
||||
for (int i = 0; i < nframes; i++) {
|
||||
...
|
||||
// execute decoding and obtain decoded surface handle
|
||||
decoder.DecodeFrame();
|
||||
VASurfaceID va_surface = decoder.get_VA_output_surface();
|
||||
...
|
||||
//wrap decoder output into RemoteBlobs and set it as inference input
|
||||
auto nv12_blob = gpu::make_shared_blob_nv12(ieInHeight,
|
||||
ieInWidth,
|
||||
shared_va_context,
|
||||
va_surface
|
||||
);
|
||||
inferRequests[currentFrame].SetBlob(input_name, nv12_blob);
|
||||
inferRequests[currentFrame].StartAsync();
|
||||
inferRequests[prevFrame].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
|
||||
}
|
||||
```
|
||||
@snippet openvino/docs/snippets/GPU_RemoteBlob_API2.cpp part2
|
||||
|
||||
## See Also
|
||||
|
||||
|
||||
@@ -28,43 +28,15 @@ Default fallback policy decides which layer goes to which device automatically a
|
||||
|
||||
Another way to annotate a network is to set affinity manually using <code>ngraph::Node::get_rt_info</code> with key `"affinity"`:
|
||||
|
||||
```cpp
|
||||
for (auto && op : function->get_ops())
|
||||
op->get_rt_info()["affinity"] = std::shared_ptr<ngraph::VariantWrapper<std::string>>("CPU");
|
||||
```
|
||||
@snippet openvino/docs/snippets/HETERO0.cpp part0
|
||||
|
||||
The fallback policy does not work if even one layer has an initialized affinity. The sequence should be calling of automating affinity settings and then fix manually.
|
||||
```cpp
|
||||
InferenceEngine::Core core
|
||||
auto network = core.ReadNetwork("Model.xml");
|
||||
|
||||
// This example demonstrates how to perform default affinity initialization and then
|
||||
// correct affinity manually for some layers
|
||||
const std::string device = "HETERO:FPGA,CPU";
|
||||
|
||||
// QueryNetworkResult object contains map layer -> device
|
||||
InferenceEngine::QueryNetworkResult res = core.QueryNetwork(network, device, { });
|
||||
|
||||
// update default affinities
|
||||
res.supportedLayersMap["layerName"] = "CPU";
|
||||
|
||||
// set affinities to network
|
||||
for (auto&& node : function->get_ops()) {
|
||||
auto& affinity = res.supportedLayersMap[node->get_friendly_name()];
|
||||
// Store affinity mapping using node runtime information
|
||||
node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(affinity);
|
||||
}
|
||||
|
||||
// load network with affinities set before
|
||||
auto executable_network = core.LoadNetwork(network, device);
|
||||
```
|
||||
@snippet openvino/docs/snippets/HETERO1.cpp part1
|
||||
|
||||
If you rely on the default affinity distribution, you can avoid calling <code>InferenceEngine::Core::QueryNetwork</code> and just call <code>InferenceEngine::Core::LoadNetwork</code> instead:
|
||||
```cpp
|
||||
InferenceEngine::Core core
|
||||
auto network = core.ReadNetwork("Model.xml");
|
||||
auto executable_network = core.LoadNetwork(network, "HETERO:FPGA,CPU");
|
||||
```
|
||||
|
||||
@snippet openvino/docs/snippets/HETERO2.cpp part2
|
||||
|
||||
> **NOTE**: `InferenceEngine::Core::QueryNetwork` does not depend on affinities set by a user, but queries for layer support based on device capabilities.
|
||||
|
||||
@@ -100,16 +72,7 @@ Heterogeneous plugin can generate two files:
|
||||
* `hetero_affinity_<network name>.dot` - annotation of affinities per layer. This file is written to the disk only if default fallback policy was executed
|
||||
* `hetero_subgraphs_<network name>.dot` - annotation of affinities per graph. This file is written to the disk during execution of <code>ICNNNetwork::LoadNetwork()</code> for heterogeneous plugin
|
||||
|
||||
```cpp
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "hetero/hetero_plugin_config.hpp"
|
||||
using namespace InferenceEngine::PluginConfigParams;
|
||||
using namespace InferenceEngine::HeteroConfigParams;
|
||||
|
||||
...
|
||||
InferenceEngine::Core core;
|
||||
core.SetConfig({ { KEY_HETERO_DUMP_GRAPH_DOT, YES } }, "HETERO");
|
||||
```
|
||||
@snippet openvino/docs/snippets/HETERO3.cpp part3
|
||||
|
||||
You can use GraphViz* utility or converters to `.png` formats. On Ubuntu* operating system, you can use the following utilities:
|
||||
* `sudo apt-get install xdot`
|
||||
|
||||
@@ -31,33 +31,13 @@ The only configuration option for the multi-device is prioritized list of device
|
||||
You can use name of the configuration directly as a string, or use MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES from the multi/multi_device_config.hpp that defines the same string.
|
||||
|
||||
Basically, there are three ways to specify the devices to be use by the "MULTI":
|
||||
```cpp
|
||||
Core ie;
|
||||
//NEW IE-CENTRIC API, the "MULTI" plugin is (globally) pre-configured with the explicit option:
|
||||
ie.SetConfig({{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}}, "MULTI");
|
||||
ExecutableNetwork exec0 = ie.LoadNetwork(network, "MULTI", {});
|
||||
|
||||
//NEW IE-CENTRIC API, configuration of the "MULTI" is part of the network configuration (and hence specific to the network):
|
||||
ExecutableNetwork exec1 = ie.LoadNetwork(network, "MULTI", {{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}});
|
||||
//NEW IE-CENTRIC API, same as previous, but configuration of the "MULTI" is part of the name (so config is empty), also network-specific:
|
||||
ExecutableNetwork exec2 = ie.LoadNetwork(network, "MULTI:HDDL,GPU", {});
|
||||
```
|
||||
@snippet openvino/docs/snippets/MULTI0.cpp part0
|
||||
|
||||
Notice that the priorities of the devices can be changed in real-time for the executable network:
|
||||
```cpp
|
||||
Core ie;
|
||||
ExecutableNetwork exec = ie.LoadNetwork(network, "MULTI:HDDL,GPU", {});
|
||||
//...
|
||||
exec.SetConfig({{"MULTI_DEVICE_PRIORITIES", "GPU,HDDL"}});
|
||||
// you can even exclude some device
|
||||
exec.SetConfig({{"MULTI_DEVICE_PRIORITIES", "GPU"}});
|
||||
//...
|
||||
// and then return it back
|
||||
exec.SetConfig({{"MULTI_DEVICE_PRIORITIES", "GPU,HDDL"}});
|
||||
//but you cannot add new devices on the fly, the next line will trigger the following exception:
|
||||
//[ ERROR ] [NOT_FOUND] You can only change device priorities but not add new devices with the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES.
|
||||
//CPU device was not in the original device list!
|
||||
exec.SetConfig({{"MULTI_DEVICE_PRIORITIES", "CPU,GPU,HDDL"}});
|
||||
```
|
||||
|
||||
@snippet openvino/docs/snippets/MULTI1.cpp part1
|
||||
|
||||
Finally, there is a way to specify number of requests that the multi-device will internally keep for each device.
|
||||
Say if your original app was running 4 cameras with 4 inference requests now you would probably want to share these 4 requests between 2 devices used in the MULTI. The easiest way is to specify a number of requests for each device using parentheses: "MULTI:CPU(2),GPU(2)" and use the same 4 requests in your app. However, such an explicit configuration is not performance portable and hence not recommended. Instead, the better way is to configure the individual devices and query the resulting number of requests to be used in the application level (see [Configuring the Individual Devices and Creating the Multi-Device On Top](#configuring-the-individual-devices-and-creating-the-multi-device-on-top)).
|
||||
|
||||
@@ -74,16 +54,9 @@ Available devices:
|
||||
Device: HDDL
|
||||
```
|
||||
Simple programmatic way to enumerate the devices and use with the multi-device is as follows:
|
||||
```cpp
|
||||
Core ie;
|
||||
std::string allDevices = "MULTI:";
|
||||
std::vector<std::string> availableDevices = ie.GetAvailableDevices();
|
||||
for (auto && device : availableDevices) {
|
||||
allDevices += device;
|
||||
allDevices += ((device == availableDevices[availableDevices.size()-1]) ? "" : ",");
|
||||
}
|
||||
ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, allDevices, {});
|
||||
```
|
||||
|
||||
@snippet openvino/docs/snippets/MULTI2.cpp part2
|
||||
|
||||
Beyond trivial "CPU", "GPU", "HDDL" and so on, when multiple instances of a device are available the names are more qualified.
|
||||
For example this is how two Intel® Movidius™ Myriad™ X sticks are listed with the hello_query_sample:
|
||||
```
|
||||
@@ -94,33 +67,15 @@ For example this is how two Intel® Movidius™ Myriad™ X sticks are listed wi
|
||||
```
|
||||
So the explicit configuration to use both would be "MULTI:MYRIAD.1.2-ma2480,MYRIAD.1.4-ma2480".
|
||||
Accordingly, the code that loops over all available devices of "MYRIAD" type only is below:
|
||||
```cpp
|
||||
Core ie;
|
||||
std::string allDevices = "MULTI:";
|
||||
std::vector<std::string> myriadDevices = ie->GetMetric("MYRIAD", METRIC_KEY(myriadDevices)));
|
||||
for (int i = 0; i < myriadDevices.size(); ++i) {
|
||||
allDevices += std::string("MYRIAD.")
|
||||
+ myriadDevices[i]
|
||||
+ std::string(i < (myriadDevices.size() -1) ? "," : "");
|
||||
}
|
||||
ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, allDevices, {});
|
||||
```
|
||||
|
||||
@snippet openvino/docs/snippets/MULTI3.cpp part3
|
||||
|
||||
|
||||
## Configuring the Individual Devices and Creating the Multi-Device On Top
|
||||
As discussed in the first section, you shall configure each individual device as usual and then just create the "MULTI" device on top:
|
||||
```cpp
|
||||
#include <multi/multi_device_config.hpp>
|
||||
// configure the HDDL device first
|
||||
Core ie;
|
||||
ie.SetConfig(hddl_config, "HDDL");
|
||||
// configure the GPU device
|
||||
ie.SetConfig(gpu_config, "GPU");
|
||||
// load the network to the multi-device, while specifying the configuration (devices along with priorities):
|
||||
ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, "MULTI", {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "HDDL,GPU"}});
|
||||
// new metric allows to query the optimal number of requests:
|
||||
uint32_t nireq = exeNetwork.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
|
||||
```
|
||||
|
||||
@snippet openvino/docs/snippets/MULTI4.cpp part4
|
||||
|
||||
Alternatively, you can combine all the individual device settings into single config and load that, allowing the multi-device plugin to parse and apply that to the right devices. See code example in the next section.
|
||||
|
||||
Notice that while the performance of accelerators combines really well with multi-device, the CPU+GPU execution poses some performance caveats, as these devices share the power, bandwidth and other resources. For example it is recommended to enable the GPU throttling hint (which save another CPU thread for the CPU inference).
|
||||
@@ -128,12 +83,8 @@ See section of the [Using the multi-device with OpenVINO samples and benchmarkin
|
||||
|
||||
## Querying the Optimal Number of Inference Requests
|
||||
Notice that until R2 you had to calculate number of requests in your application for any device, e.g. you had to know that Intel® Vision Accelerator Design with Intel® Movidius™ VPUs required at least 32 inference requests to perform well. Now you can use the new GetMetric API to query the optimal number of requests. Similarly, when using the multi-device you don't need to sum over included devices yourself, you can query metric directly:
|
||||
```cpp
|
||||
// 'device_name' can be "MULTI:HDDL,GPU" to configure the multi-device to use HDDL and GPU
|
||||
ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, device_name, full_config);
|
||||
// new metric allows to query the optimal number of requests:
|
||||
uint32_t nireq = exeNetwork.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
|
||||
```
|
||||
|
||||
@snippet openvino/docs/snippets/MULTI5.cpp part5
|
||||
|
||||
## Using the Multi-Device with OpenVINO Samples and Benchmarking the Performance
|
||||
Notice that every OpenVINO sample that supports "-d" (which stays for "device") command-line option transparently accepts the multi-device.
|
||||
|
||||
Reference in New Issue
Block a user