From 79290a7dc0b4b26bf1b9b03225d790614bd26b5c Mon Sep 17 00:00:00 2001 From: Xie Zhengtian Date: Fri, 13 Aug 2021 19:23:44 +0800 Subject: [PATCH] [Doc] Add doc for Auto-Device Plugin 2021.4 (#5982) * [Doc] Add doc for Auto-Device Plugin 2021.4 (#6190) * Add doc for Auto-Device Plugin Signed-off-by: Zhengtian Xie * Update doc for auto-device plugin Signed-off-by: Zhengtian Xie * Update auto-device plugin doc * Add openvino_docs_IE_DG_supported_plugins_AUTO into web page Signed-off-by: Zhengtian Xie * Update AUTO.md Co-authored-by: Maxim Shevtsov --- docs/IE_DG/supported_plugins/AUTO.md | 128 ++++++++++++++++++ .../supported_plugins/Supported_Devices.md | 3 +- docs/doxygen/ie_docs.xml | 1 + docs/snippets/AUTO0.cpp | 12 ++ docs/snippets/AUTO1.cpp | 15 ++ docs/snippets/AUTO2.cpp | 10 ++ docs/snippets/AUTO3.cpp | 10 ++ docs/snippets/AUTO4.cpp | 19 +++ docs/snippets/AUTO5.cpp | 15 ++ 9 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 docs/IE_DG/supported_plugins/AUTO.md create mode 100644 docs/snippets/AUTO0.cpp create mode 100644 docs/snippets/AUTO1.cpp create mode 100644 docs/snippets/AUTO2.cpp create mode 100644 docs/snippets/AUTO3.cpp create mode 100644 docs/snippets/AUTO4.cpp create mode 100644 docs/snippets/AUTO5.cpp diff --git a/docs/IE_DG/supported_plugins/AUTO.md b/docs/IE_DG/supported_plugins/AUTO.md new file mode 100644 index 00000000000..55a5e01f212 --- /dev/null +++ b/docs/IE_DG/supported_plugins/AUTO.md @@ -0,0 +1,128 @@ +# Auto-Device Plugin {#openvino_docs_IE_DG_supported_plugins_AUTO} + +## Auto-Device Plugin Execution + +Auto-device is a new special "virtual" or "proxy" device in the OpenVINO™ toolkit. + +Use "AUTO" as the device name to delegate selection of an actual accelerator to OpenVINO. +With the 2021.4 release, Auto-device internally recognizes and selects devices from CPU, +integrated GPU and discrete Intel GPUs (when available) depending on the device capabilities and the characteristic of CNN models, +for example, precisions. Then Auto-device assigns inference requests to the selected device. + +From the application point of view, this is just another device that handles all accelerators in full system. + +With the 2021.4 release, Auto-device setup is done in three major steps: +* Step 1: Configure each device as usual (for example, via the conventional SetConfig method) +* Step 2: Load a network to the Auto-device plugin. This is the only change needed in your application +* Step 3: Just like with any other executable network (resulted from LoadNetwork), create as many requests as needed to saturate the devices. +These steps are covered below in details. + + +## Defining and Configuring the Auto-Device Plugin +Following the OpenVINO notions of “devices”, the Auto-device has “AUTO” name. The only configuration option for Auto-device is a limited device list: + +| Parameter name | Parameter values | Default | Description | +| :--- | :--- | :--- |:-----------------------------------------------------------------------------| +| "AUTO_DEVICE_LIST" | comma-separated device names with no spaces| N/A | Device candidate list to be selected | + +You can use the configuration name directly as a string or use IE::KEY_AUTO_DEVICE_LIST from ie_plugin_config.hpp, +which defines the same string. + +There are two ways to use Auto-device: +1. Directly indicate device by “AUTO” or empty string: + +@snippet snippets/AUTO0.cpp part0 + +2. Use Auto-device configuration to limit the device candidates list to be selected: + +@snippet snippets/AUTO1.cpp part1 + +Auto-device supports query device optimization capabilities in metric; + +| Parameter name | Parameter values | +| :--- | :--- | +| "OPTIMIZATION_CAPABILITIES" | Auto-Device capabilities | + +## Enumerating Available Devices and Auto-Device Selecting Logic + +### Enumerating Available Devices + +Inference Engine now features a dedicated API to enumerate devices and their capabilities. +See [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md). +This is the example output from the sample (truncated to the devices' names only): + +```sh +./hello_query_device +Available devices: + Device: CPU +... + Device: GPU.0 +... + Device: GPU.1 +``` + +### Default Auto-Device selecting logic + +With the 2021.4 release, Auto-Device selects the most suitable device with following default logic: +1. Check if dGPU, iGPU and CPU device are available +2. Get the precision of the input model, such as FP32 +3. According to the priority of dGPU, iGPU and CPU (in this order), if the device supports the precision of input network, select it as the most suitable device + +For example, CPU, dGPU and iGPU can support below precision and optimization capabilities: + +| Device | OPTIMIZATION_CAPABILITIES | +| :--- | :--- | +| CPU | WINOGRAD FP32 FP16 INT8 BIN | +| dGPU | FP32 BIN BATCHED_BLOB FP16 INT8 | +| iGPU | FP32 BIN BATCHED_BLOB FP16 INT8 | + +When application use Auto-device to run FP16 IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to dGPU. + +When application use Auto-device to run FP16 IR on system with CPU and iGPU, Auto-device will offload this workload to iGPU. + +When application use Auto-device to run WINOGRAD-enabled IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to CPU. + +In any case, when loading the network to dGPU or iGPU fails, the networks falls back to CPU as the last choice. + +### Limit Auto Target Devices Logic + +According to the Auto-device selection logic from the previous section, +the most suitable device from available devices to load mode as follows: + +@snippet snippets/AUTO2.cpp part2 + +Another way to load mode to device from limited choice of devices is with Auto-device: + +@snippet snippets/AUTO3.cpp part3 + +## Configuring the Individual Devices and Creating the Auto-Device on Top + +As described in the first section, configure each individual device as usual and then just create the "AUTO" device on top: + +@snippet snippets/AUTO4.cpp part4 + +Alternatively, you can combine all the individual device settings into single config and load it, +allowing the Auto-device plugin to parse and apply it to the right devices. See the code example here: + +@snippet snippets/AUTO5.cpp part5 + +## Using the Auto-Device with OpenVINO Samples and Benchmark App + +Note that every OpenVINO sample that supports "-d" (which stands for "device") command-line option transparently accepts the Auto-device. +The Benchmark Application is the best example of the optimal usage of the Auto-device. +You do not need to set the number of requests and CPU threads, as the application provides optimal out-of-the-box performance. +Below is the example command-line to evaluate AUTO performance with that: + +```sh +./benchmark_app –d AUTO –m -i -niter 1000 +``` +You can also use the auto-device with limit device choice: + +```sh +./benchmark_app –d AUTO:CPU,GPU –m -i -niter 1000 +``` +Note that the default CPU stream is 1 if using “-d AUTO”. + +Note that you can use the FP16 IR to work with auto-device. +Also note that no demos are (yet) fully optimized for the auto-device, by means of selecting the most suitable device, +using the GPU streams/throttling, and so on. diff --git a/docs/IE_DG/supported_plugins/Supported_Devices.md b/docs/IE_DG/supported_plugins/Supported_Devices.md index e1140ae4b74..5c003fc86bb 100644 --- a/docs/IE_DG/supported_plugins/Supported_Devices.md +++ b/docs/IE_DG/supported_plugins/Supported_Devices.md @@ -13,7 +13,8 @@ The Inference Engine provides unique capabilities to infer deep learning models |[CPU plugin](CPU.md) |Intel® Xeon® with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) | |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs | |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor| -|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel | +|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel | +|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel® device for inference automatically | |[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). | Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml index f5ef147751f..ee07308a19a 100644 --- a/docs/doxygen/ie_docs.xml +++ b/docs/doxygen/ie_docs.xml @@ -326,6 +326,7 @@ limitations under the License. + diff --git a/docs/snippets/AUTO0.cpp b/docs/snippets/AUTO0.cpp new file mode 100644 index 00000000000..b546e61a1c6 --- /dev/null +++ b/docs/snippets/AUTO0.cpp @@ -0,0 +1,12 @@ +#include + +int main() { +//! [part0] + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml"); + // these 2 lines below are equivalent + InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO"); + InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, ""); +//! [part0] +return 0; +} diff --git a/docs/snippets/AUTO1.cpp b/docs/snippets/AUTO1.cpp new file mode 100644 index 00000000000..22487b5aeb0 --- /dev/null +++ b/docs/snippets/AUTO1.cpp @@ -0,0 +1,15 @@ +#include + +int main() { +//! [part1] + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml"); + // "AUTO" plugin is (globally) pre-configured with the explicit option: + ie.SetConfig({{"AUTO_DEVICE_LIST", "CPU,GPU"}}, "AUTO"); + // the below 3 lines are equivalent (the first line leverages the pre-configured AUTO, while second and third explicitly pass the same settings) + InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO", {}); + InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "AUTO", {{"AUTO_DEVICE_LIST", "CPU,GPU"}}); + InferenceEngine::ExecutableNetwork exec2 = ie.LoadNetwork(network, "AUTO:CPU,GPU"); +//! [part1] +return 0; +} diff --git a/docs/snippets/AUTO2.cpp b/docs/snippets/AUTO2.cpp new file mode 100644 index 00000000000..c70e2923af7 --- /dev/null +++ b/docs/snippets/AUTO2.cpp @@ -0,0 +1,10 @@ +#include + +int main() { +//! [part2] + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml"); + InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO"); +//! [part2] +return 0; +} diff --git a/docs/snippets/AUTO3.cpp b/docs/snippets/AUTO3.cpp new file mode 100644 index 00000000000..37e8e350768 --- /dev/null +++ b/docs/snippets/AUTO3.cpp @@ -0,0 +1,10 @@ +#include + +int main() { +//! [part3] + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml"); + InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO:CPU,GPU"); +//! [part3] +return 0; +} diff --git a/docs/snippets/AUTO4.cpp b/docs/snippets/AUTO4.cpp new file mode 100644 index 00000000000..ee39e7103d7 --- /dev/null +++ b/docs/snippets/AUTO4.cpp @@ -0,0 +1,19 @@ +#include + +int main() { + const std::map cpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } }; + const std::map gpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } }; + //! [part4] + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml"); + // configure the CPU device first + ie.SetConfig(cpu_config, "CPU"); + // configure the GPU device + ie.SetConfig(gpu_config, "GPU"); + // load the network to the auto-device + InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO"); + // new metric allows to query the optimization capabilities + std::vector device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + //! [part4] + return 0; +} diff --git a/docs/snippets/AUTO5.cpp b/docs/snippets/AUTO5.cpp new file mode 100644 index 00000000000..e0678b4e0de --- /dev/null +++ b/docs/snippets/AUTO5.cpp @@ -0,0 +1,15 @@ +#include + +int main() { + std::string device_name = "AUTO:CPU,GPU"; + const std::map< std::string, std::string > full_config = {}; + //! [part5] + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml"); + // 'device_name' can be "AUTO:CPU,GPU" to configure the auto-device to use CPU and GPU + InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device_name, full_config); + // new metric allows to query the optimization capabilities + std::vector device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + //! [part5] + return 0; +}