Documentation updates (#1433)
This commit is contained in:
parent
8fedb0bf94
commit
9440561fa4
@ -19,6 +19,114 @@ Starting with the OpenVINO™ toolkit 2020.2 release, all of the features previo
|
||||
|
||||
Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting June 1, 2020 and will be completely removed on December 1, 2020. Users are recommended to migrate to the ONNX RT Execution Provider for OpenVINO™ toolkit as the unified solution for all AI inferencing on Intel® hardware.
|
||||
|
||||
## 2021.1
|
||||
|
||||
### Removed API
|
||||
|
||||
**Plugin API:**
|
||||
|
||||
* InferenceEngine::InferencePlugin C++ plugin wrapper class
|
||||
* InferenceEngine::IInferencePlugin plugin interface
|
||||
* InferenceEngine::PluginDispatcher class
|
||||
* InferenceEngine::InferenceEnginePluginPtr typedef
|
||||
* InferenceEngine::ICNNNetReader reader interface
|
||||
* InferenceEngine::CNNNetReader class
|
||||
|
||||
**Extensibility API:**
|
||||
|
||||
* InferenceEngine::ILayerImplFactory class
|
||||
* InferenceEngine::IShapeInferImpl class
|
||||
* InferenceEngine::IShapeInferExtension class
|
||||
* InferenceEngine::IExtension::getFactoryFor(ILayerImplFactory\*& factory, const CNNLayer\* cnnLayer, ResponseDesc\* resp) noexcept method
|
||||
* InferenceEngine::IExtension::getPrimitiveTypes(char\*\*& types, unsigned int& size, ResponseDesc\* resp) noexcept method
|
||||
* InferenceEngine::ShapeInferImpl class
|
||||
* InferenceEngine::Extension::getFactoryFor(ILayerImplFactory\*& factory, const CNNLayer\* cnnLayer, ResponseDesc\* resp) noexcept method
|
||||
* InferenceEngine::Extension::getPrimitiveTypes(char\*\*& types, unsigned int& size, ResponseDesc\* resp) noexcept method
|
||||
|
||||
**Network API:**
|
||||
|
||||
* InferenceEngine::details::CNNNetworkIterator class
|
||||
* InferenceEngine::CNNNetwork::getPrecision() const method
|
||||
* InferenceEngine::CNNNetwork::getLayerByName(const char\* layerName) const method
|
||||
* InferenceEngine::CNNNetwork::size() const method
|
||||
* InferenceEngine::CNNNetwork::begin() const method
|
||||
* InferenceEngine::CNNNetwork::end() const method
|
||||
* InferenceEngine::CNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension) method
|
||||
* InferenceEngine::ICNNNetwork::getPrecision() const noexcept method
|
||||
* InferenceEngine::ICNNNetwork::getName(char\* pName, size_t len) const noexcept method
|
||||
* InferenceEngine::ICNNNetwork::getData(const char\* dname) noexcept method
|
||||
* InferenceEngine::ICNNNetwork::addLayer(const CNNLayerPtr& layer) noexcept method
|
||||
* InferenceEngine::ICNNNetwork::getLayerByName(const char\* layerName, CNNLayerPtr& out, ResponseDesc\* resp) const noexcept method
|
||||
* InferenceEngine::ICNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension, ResponseDesc\* resp) noexcept method
|
||||
* InferenceEngine::ICNNNetwork::getStats(ICNNNetworkStats\*\* stats, ResponseDesc\* resp) const noexcept method
|
||||
* InferenceEngine::ICNNNetworkStats class
|
||||
* InferenceEngine::NetworkNodeStats class
|
||||
* InferenceEngine::Data::getCreatorLayer() method
|
||||
* InferenceEngine::Data::getInputTo() method
|
||||
* InferenceEngine::LayerParams class
|
||||
|
||||
**Layer API:**
|
||||
|
||||
* InferenceEngine::CNNLayer class
|
||||
* InferenceEngine::WeightableLayer class
|
||||
* InferenceEngine::BatchNormalizationLayer class
|
||||
* InferenceEngine::BatchToSpaceLayer class
|
||||
* InferenceEngine::BinaryConvolutionLayer class
|
||||
* InferenceEngine::BroadcastLayer class
|
||||
* InferenceEngine::BucketizeLayer class
|
||||
* InferenceEngine::ClampLayer class
|
||||
* InferenceEngine::ConcatLayer class
|
||||
* InferenceEngine::ConvolutionLayer class
|
||||
* InferenceEngine::CropLayer class
|
||||
* InferenceEngine::DeconvolutionLayer class
|
||||
* InferenceEngine::DeformableConvolutionLayer class
|
||||
* InferenceEngine::DepthToSpaceLayer class
|
||||
* InferenceEngine::EltwiseLayer class
|
||||
* InferenceEngine::ExperimentalDetectronPriorGridGenerator class
|
||||
* InferenceEngine::ExperimentalDetectronPriorGridGeneratorLayer class
|
||||
* InferenceEngine::ExperimentalSparseWeightedReduceLayer class
|
||||
* InferenceEngine::FillLayer class
|
||||
* InferenceEngine::FullyConnectedLayer class
|
||||
* InferenceEngine::GRNLayer class
|
||||
* InferenceEngine::GRUCell class
|
||||
* InferenceEngine::GatherLayer class
|
||||
* InferenceEngine::GemmLayer class
|
||||
* InferenceEngine::LSTMCell class
|
||||
* InferenceEngine::MVNLayer class
|
||||
* InferenceEngine::MathLayer class
|
||||
* InferenceEngine::NonMaxSuppression class
|
||||
* InferenceEngine::NormLayer class
|
||||
* InferenceEngine::OneHotLayer class
|
||||
* InferenceEngine::PReLULayer class
|
||||
* InferenceEngine::PadLayer class
|
||||
* InferenceEngine::PoolingLayer class
|
||||
* InferenceEngine::PowerLayer class
|
||||
* InferenceEngine::QuantizeLayer class
|
||||
* InferenceEngine::RNNCell class
|
||||
* InferenceEngine::RNNCellBase class
|
||||
* InferenceEngine::RNNSequenceLayer class
|
||||
* InferenceEngine::RangeLayer class
|
||||
* InferenceEngine::ReLU6Layer class
|
||||
* InferenceEngine::ReLULayer class
|
||||
* InferenceEngine::ReduceLayer class
|
||||
* InferenceEngine::ReshapeLayer class
|
||||
* InferenceEngine::ReverseSequenceLayer class
|
||||
* InferenceEngine::ScaleShiftLayer class
|
||||
* InferenceEngine::ScatterLayer class
|
||||
* InferenceEngine::SelectLayer class
|
||||
* InferenceEngine::ShuffleChannelsLayer class
|
||||
* InferenceEngine::SoftMaxLayer class
|
||||
* InferenceEngine::SpaceToBatchLayer class
|
||||
* InferenceEngine::SpaceToDepthLayer class
|
||||
* InferenceEngine::SparseFillEmptyRowsLayer class
|
||||
* InferenceEngine::SparseSegmentReduceLayer class
|
||||
* InferenceEngine::SparseToDenseLayer class
|
||||
* InferenceEngine::SplitLayer class
|
||||
* InferenceEngine::StridedSliceLayer class
|
||||
* InferenceEngine::TensorIterator class
|
||||
* InferenceEngine::TileLayer class
|
||||
* InferenceEngine::TopKLayer class
|
||||
* InferenceEngine::UniqueLayer class
|
||||
|
||||
## 2020.4
|
||||
|
||||
@ -33,6 +141,75 @@ Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting Jun
|
||||
* METRIC_KEY(OPTIMIZATION_CAPABILITIES)
|
||||
* METRIC_VALUE(BF16)
|
||||
|
||||
### Deprecated API
|
||||
|
||||
**Myriad Plugin API:**
|
||||
|
||||
* VPU_CONFIG_KEY(IGNORE_IR_STATISTIC)
|
||||
|
||||
### Removed API
|
||||
|
||||
**Inference Engine NN Builder API:**
|
||||
|
||||
* InferenceEngine::Builder::EltwiseLayer
|
||||
* InferenceEngine::Builder::MemoryLayer
|
||||
* InferenceEngine::Builder::ROIPoolingLayer
|
||||
* InferenceEngine::Builder::DeconvolutionLayer
|
||||
* InferenceEngine::Builder::ReLULayer
|
||||
* InferenceEngine::Builder::TanHLayer
|
||||
* InferenceEngine::Builder::InputLayer
|
||||
* InferenceEngine::Builder::PoolingLayer
|
||||
* InferenceEngine::Builder::CropLayer
|
||||
* InferenceEngine::Builder::GRUSequenceLayer
|
||||
* InferenceEngine::Builder::NormLayer
|
||||
* InferenceEngine::Builder::LSTMSequenceLayer
|
||||
* InferenceEngine::Builder::ClampLayer
|
||||
* InferenceEngine::Builder::PSROIPoolingLayer
|
||||
* InferenceEngine::Builder::Layer
|
||||
* InferenceEngine::Builder::RNNSequenceLayer
|
||||
* InferenceEngine::Builder::ReorgYoloLayer
|
||||
* InferenceEngine::Builder::NormalizeLayer
|
||||
* InferenceEngine::Builder::PriorBoxClusteredLayer
|
||||
* InferenceEngine::Builder::MVNLayer
|
||||
* InferenceEngine::Builder::PermuteLayer
|
||||
* InferenceEngine::Builder::SimplerNMSLayer
|
||||
* InferenceEngine::Builder::ConstLayer
|
||||
* InferenceEngine::Builder::DeformableConvolutionLayer
|
||||
* InferenceEngine::Builder::FullyConnectedLayer
|
||||
* InferenceEngine::Builder::PriorBoxLayer
|
||||
* InferenceEngine::Builder::SoftMaxLayer
|
||||
* InferenceEngine::Builder::OutputLayer
|
||||
* InferenceEngine::Builder::TileLayer
|
||||
* InferenceEngine::Builder::SplitLayer
|
||||
* InferenceEngine::Builder::PReLULayer
|
||||
* InferenceEngine::Builder::RegionYoloLayer
|
||||
* InferenceEngine::Builder::ReshapeLayer
|
||||
* InferenceEngine::Builder::ConvolutionLayer
|
||||
* InferenceEngine::Builder::DetectionOutputLayer
|
||||
* InferenceEngine::Builder::ConcatLayer
|
||||
* InferenceEngine::Builder::ELULayer
|
||||
* InferenceEngine::Builder::GRNLayer
|
||||
* InferenceEngine::Builder::LRNLayer
|
||||
* InferenceEngine::Builder::ArgMaxLayer
|
||||
* InferenceEngine::Builder::ReLU6Layer
|
||||
* InferenceEngine::Builder::ScaleShiftLayer
|
||||
* InferenceEngine::Builder::ProposalLayer
|
||||
* InferenceEngine::Builder::SigmoidLayer
|
||||
* InferenceEngine::Builder::ResampleLayer
|
||||
* InferenceEngine::Builder::CTCGreedyDecoderLayer
|
||||
* InferenceEngine::Builder::BatchNormalizationLayer
|
||||
* InferenceEngine::Builder::LayerDecorator
|
||||
* InferenceEngine::Builder::PowerLayer
|
||||
* InferenceEngine::Builder::Network
|
||||
* InferenceEngine::Builder::PortInfo
|
||||
* InferenceEngine::Builder::Connection
|
||||
* InferenceEngine::Builder::PortData
|
||||
* InferenceEngine::Builder::Port
|
||||
* InferenceEngine::Builder::ILayer
|
||||
* InferenceEngine::Builder::INetworkIterator
|
||||
* InferenceEngine::Builder::INetwork
|
||||
* InferenceEngine::Builder::ILayer
|
||||
|
||||
## 2020.2
|
||||
|
||||
### New API
|
||||
@ -273,7 +450,6 @@ Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting Jun
|
||||
* InferenceEngine::Builder::INetwork
|
||||
* InferenceEngine::Builder::ILayer
|
||||
|
||||
|
||||
**Plugin API:**
|
||||
|
||||
* InferenceEngine::InferencePlugin C++ plugin wrapper class
|
||||
|
@ -40,13 +40,6 @@ The following pages describe how to integrate custom _kernels_ into the Inferenc
|
||||
* [Introduction to development of custom GPU kernels](GPU_Kernel.md)
|
||||
* [Introduction to development of custom VPU kernels](VPU_Kernel.md)
|
||||
|
||||
## Deprecated Extensibility API
|
||||
|
||||
Shape Inference API and some methods of extensibility mechanism was deprecated and will be removed soon.
|
||||
Old Extensibility mechanism contains two parts shape inference and execution kernel.
|
||||
* [Shape Inference](deprecated/ShapeInfer.md)
|
||||
* [Execution Kernel](deprecated/Factory.md)
|
||||
|
||||
## Additional Resources
|
||||
|
||||
* [Build an extension library using CMake*](Building.md)
|
||||
|
@ -1,96 +0,0 @@
|
||||
# Deprecated API for CPU kernels creation {#openvino_docs_IE_DG_Extensibility_DG_deprecated_Factory}
|
||||
|
||||
List of deprecated API for kernels development:
|
||||
* `InferenceEngine::IExtension::getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp)` method
|
||||
* `InferenceEngine::IExtension::getFactoryFor(ILayerImplFactory *&factory, const CNNLayer *cnnLayer, ResponseDesc *resp)` method
|
||||
* `InferenceEngine::ILayerImplFactory` class
|
||||
|
||||
>**NOTE**: This guide demonstrates how to use deprecated API for kernels creation. However, keep in mind that this API will be deleted soon.
|
||||
|
||||
1. Create your custom layer factory `CustomLayerFactory` class:
|
||||
```cpp
|
||||
// custom_layer.h
|
||||
// A CustomLayerFactory class is an example layer, which makes exponentiation by 2 for the input and does not change dimensions
|
||||
class CustomLayerFactory {
|
||||
|
||||
};
|
||||
```
|
||||
2. Inherit it from the abstract `InferenceEngine::ILayerImplFactory` class:
|
||||
```cpp
|
||||
// custom_layer.h
|
||||
class CustomLayerFactory: public InferenceEngine::ILayerImplFactory {
|
||||
|
||||
};
|
||||
```
|
||||
|
||||
3. Create a constructor, a virtual destructor, and a data member to keep the layer info:
|
||||
```cpp
|
||||
// custom_layer.h
|
||||
class CustomLayerFactory: public InferenceEngine::ILayerImplFactory {
|
||||
public:
|
||||
explicit CustomLayerFactory(const CNNLayer *layer): cnnLayer(*layer) {}
|
||||
private:
|
||||
CNNLayer cnnLayer;
|
||||
};
|
||||
```
|
||||
|
||||
4. Overload and implement the abstract methods `getShapes` and `getImplementations` of the `InferenceEngine::ILayerImplFactory` class:
|
||||
```cpp
|
||||
// custom_layer.h
|
||||
class CustomLayerFactory: public InferenceEngine::ILayerImplFactory {
|
||||
public:
|
||||
// ... constructor and destructor
|
||||
|
||||
StatusCode getShapes(const std::vector<TensorDesc>& inShapes, std::vector<TensorDesc>& outShapes, ResponseDesc *resp) noexcept override {
|
||||
if (cnnLayer == nullptr) {
|
||||
std::string errorMsg = "Cannot get cnn layer!";
|
||||
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
|
||||
return GENERAL_ERROR;
|
||||
}
|
||||
if (inShapes.size() != 1) {
|
||||
std::string errorMsg = "Incorrect input shapes!";
|
||||
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
|
||||
return GENERAL_ERROR;
|
||||
}
|
||||
outShapes.clear();
|
||||
outShapes.emplace_back(inShapes[0]);
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode getImplementations(std::vector<ILayerImpl::Ptr>& impls, ResponseDesc *resp) noexcept override {
|
||||
// You can add cnnLayer to implementation if it is necessary
|
||||
impls.push_back(ILayerImpl::Ptr(new CustomLayerImpl()));
|
||||
return OK;
|
||||
}
|
||||
};
|
||||
```
|
||||
5. Create your custom layer implementation `CustomLayerImpl` class using the [instruction](../CPU_Kernel.md).
|
||||
|
||||
6. Implement methods in the `Extension` class:
|
||||
```cpp
|
||||
// custom_extension.h
|
||||
class CustomExtention : public InferenceEngine::IExtension {
|
||||
public:
|
||||
// ... utility methods
|
||||
// Retruns the list of supported kernels/layers
|
||||
StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override {
|
||||
std::string type_name = "CustomLayer";
|
||||
types = new char *[1];
|
||||
size = 1;
|
||||
types[0] = new char[type_name.size() + 1];
|
||||
std::copy(type_name.begin(), type_name.end(), types[0]);
|
||||
types[0][type_name.size()] = '\0';
|
||||
return OK;
|
||||
}
|
||||
// Main function
|
||||
StatusCode getFactoryFor(ILayerImplFactory *&factory, const CNNLayer *cnnLayer, ResponseDesc *resp) noexcept override {
|
||||
if (cnnLayer->type != "CustomLayer") {
|
||||
std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
|
||||
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
|
||||
return NOT_FOUND;
|
||||
}
|
||||
factory = new CustomLayerFactory(cnnLayer);
|
||||
return OK;
|
||||
}
|
||||
};
|
||||
```
|
@ -1,18 +0,0 @@
|
||||
# Old ShapeInference Extensibility API {#openvino_docs_IE_DG_Extensibility_DG_deprecated_ShapeInfer}
|
||||
|
||||
The new approach to shape inference suggests a creation of a custom nGraph operation that contains a special method for shape inference.
|
||||
The following classes and methods were deprecated:
|
||||
|
||||
* `InferenceEngine::IShapeInferExtension` class
|
||||
* `InferenceEngine::IShapeInferExtension::getShapeInferTypes(char**&, unsigned int&, ResponseDesc*)` method
|
||||
* `InferenceEngine::IShapeInferExtension::getShapeInferImpl(IShapeInferImpl::Ptr&, const char*, ResponseDesc*)` method
|
||||
|
||||
However, the old approach with the `InferenceEngine::IShapeInferExtension` method still works for already existing custom layers.
|
||||
Custom Shape Inference functions are registered by calling `InferenceEngine::ICNNNetwork::AddExtension` with the implemented `InferenceEngine::IShapeInferExtension` method, which is a holder of custom implementations.
|
||||
The holder requires to implement two key methods:
|
||||
* `InferenceEngine::IShapeInferExtension::getShapeInferImpl` - Returns custom shape inference implementation for the given type.
|
||||
* `InferenceEngine::IShapeInferExtension::getShapeInferTypes` - Provides all custom types.
|
||||
|
||||
Custom shape inference implementation is represented by the `InferenceEngine::IShapeInferImpl::inferShapes` method.
|
||||
|
||||
It is impossible to overwrite built-in shape inference functions. Custom type must be different from the supported ones.
|
@ -68,13 +68,11 @@ Glossary of terms used in the Inference Engine
|
||||
| Extensibility mechanism, Custom layers | The mechanism that provides you with capabilities to extend the Inference Engine and Model Optimizer so that they can work with topologies containing layers that are not yet supported |
|
||||
| <code>ICNNNetwork</code> | An Interface of the Convolutional Neural Network that Inference Engine reads from IR. Consists of topology, weights and biases |
|
||||
| <code>IExecutableNetwork</code> | An instance of the loaded network which allows the Inference Engine to request (several) infer requests and perform inference synchronously or asynchronously |
|
||||
| <code>IHeteroInferencePlugin</code> | Interface that is implemented by the heterogeneity plugin to allow the Inference Engine to set the default affinities for layers by devices before loading the network to the heterogeneous plugin. You can modify affinities manually before loading to the plugin. |
|
||||
| <code>IInferencePlugin</code> | Interface provided by each plugin to allow the Inference Engine to load <code>ICNNNetwork</code> to the plugin, create Executable network and set special dedicated options for the plugin |
|
||||
| <code>IInferRequest</code> | Interface that represents the end point of inference on the model loaded to the plugin and represented by executable network. Inputs are set here, outputs should be requested from this interface as well |
|
||||
| <code>InferenceEngineProfileInfo</code> | Represents basic inference profiling information per layer |
|
||||
| Inference Engine | A C++ library with a set of classes that you can use in your application to infer input data (images) and get the result |
|
||||
| Inference Engine API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation, set input and output formats and execute the model on various devices |
|
||||
| Inference Engine Plugin | Inference Engine plugin is a software component that contains complete implementation for inference on a certain Intel(R) hardware device: CPU, GPU, VPU, FPGA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs. |
|
||||
| Inference Engine <code>Core<code> | Inference Engine Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, MYRIAD, GNA, etc. |
|
||||
| Layer catalog or Operations specification | A list of supported layers or operations and its parameters. Sets of supported layers are different for different plugins, please check the documentation on plugins to verify if the Inference Engine supports certain layer on the dedicated hardware |
|
||||
| <code>Layout</code> | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch |
|
||||
| <code>OutputsDataMap</code> | Structure which contains information about output precisions and layouts |
|
||||
|
@ -21,21 +21,30 @@ Modules in the Inference Engine component
|
||||
### Core Inference Engine Libraries ###
|
||||
|
||||
Your application must link to the core Inference Engine libraries:
|
||||
* Linux* OS:
|
||||
* Linux* OS:
|
||||
- `libinference_engine.so`, which depends on `libinference_engine_transformations.so` and `libngraph.so`
|
||||
- `libinference_engine_legacy.so`, which depends on `libtbb.so`
|
||||
* Windows* OS:
|
||||
* Windows* OS:
|
||||
- `inference_engine.dll`, which depends on `inference_engine_transformations.dll` and `ngraph.dll`
|
||||
- `inference_engine_legacy.dll`, which depends on `tbb.dll`
|
||||
|
||||
The required C++ header files are located in the `include` directory.
|
||||
|
||||
This library contains the classes to:
|
||||
* Read the network (InferenceEngine::CNNNetReader)
|
||||
* Create Inference Engine Core object to work with devices and read network (InferenceEngine::Core)
|
||||
* Manipulate network information (InferenceEngine::CNNNetwork)
|
||||
* Create Inference Engine Core object to work with devices (InferenceEngine::Core)
|
||||
* Execute and pass inputs and outputs (InferenceEngine::ExecutableNetwork and InferenceEngine::InferRequest)
|
||||
|
||||
### Plugin Libraries to read a network object ###
|
||||
|
||||
Starting from 2020.4 release, Inference Engine introduced a concept of `CNNNetwork` reader plugins. Such plugins can be automatically dynamically loaded by Inference Engine in runtime depending on file format:
|
||||
* Linux* OS:
|
||||
- `libinference_engine_ir_reader.so` to read a network from IR
|
||||
- `libinference_engine_onnx_reader.so` to read a network from ONNX model format
|
||||
* Windows* OS:
|
||||
- `inference_engine_ir_reader.dll` to read a network from IR
|
||||
- `inference_engine_onnx_reader.dll` to read a network from ONNX model format
|
||||
|
||||
### Device-specific Plugin Libraries ###
|
||||
|
||||
For each supported target device, Inference Engine provides a plugin — a DLL/shared library that contains complete implementation for inference on this particular device. The following plugins are available:
|
||||
|
@ -43,7 +43,6 @@ Thus the OpenVINO IR becomes a new serialization format for the nGraph IR, and i
|
||||
> **IMPORTANT**: Conventional interfaces are used (`CNNNetwork`, the reader), so no changes required in most applications.
|
||||
|
||||
> **NOTE**: While you still can use old APIs, there is an independent process of continuous improvements in the Inference Engine API.
|
||||
> For example, the Core::Read API is recommended to use instead of `CNNNetworkReader`.
|
||||
> These changes are independent of nGraph integration and do not enable or disable new features.
|
||||
|
||||
Interpretation of the IR version 10 differs from the old IR version.
|
||||
|
@ -116,7 +116,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
|
||||
|
||||
## How to Interpret Performance Counters
|
||||
|
||||
As a result of collecting performance counters using `InferenceEngine::IInferencePlugin::GetPerformanceCounts`, you can find various performance data about execution on GNA.
|
||||
As a result of collecting performance counters using `InferenceEngine::InferRequest::GetPerformanceCounts`, you can find various performance data about execution on GNA.
|
||||
Returned map stores a counter description as a key, counter value is stored in the `realTime_uSec` field of the `InferenceEngineProfileInfo` structure. Current GNA implementation calculates counters for the whole utterance scoring and does not provide per-layer information. API allows to retrieve counter units in cycles, but they can be converted to seconds as follows:
|
||||
|
||||
```
|
||||
|
@ -41,17 +41,6 @@ Basically, there are three ways to specify the devices to be use by the "MULTI":
|
||||
ExecutableNetwork exec1 = ie.LoadNetwork(network, "MULTI", {{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}});
|
||||
//NEW IE-CENTRIC API, same as previous, but configuration of the "MULTI" is part of the name (so config is empty), also network-specific:
|
||||
ExecutableNetwork exec2 = ie.LoadNetwork(network, "MULTI:HDDL,GPU", {});
|
||||
|
||||
//Similarly for the deprecated (plugin-centric) API
|
||||
//for example globally pre-configuring the plugin with the explicit option:
|
||||
//auto plugin0 = PluginDispatcher().getPluginByDevice("MULTI");
|
||||
//plugin0.SetConfig({{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}});
|
||||
//ExecutableNetwork exec3 = plugin.LoadNetwork(network, {});
|
||||
// part of the config for the LoadNetwork or device name
|
||||
//ExecutableNetwork exec4 = plugin0.LoadNetwork(network, {{"MULTI_DEVICE_PRIORITIES", "HDDL,GPU"}});
|
||||
// part of the device name
|
||||
//auto plugin1 = PluginDispatcher().getPluginByDevice("MULTI:HDDL,GPU");
|
||||
//ExecutableNetwork exec5 = plugin1.LoadNetwork(network, {});
|
||||
```
|
||||
Notice that the priorities of the devices can be changed in real-time for the executable network:
|
||||
```cpp
|
||||
|
@ -260,8 +260,6 @@
|
||||
<tab type="user" title="GPU Kernels Extensibility" url="@ref openvino_docs_IE_DG_Extensibility_DG_GPU_Kernel"/>
|
||||
<tab type="user" title="VPU Kernels Extensibility" url="@ref openvino_docs_IE_DG_Extensibility_DG_VPU_Kernel"/>
|
||||
<tab type="user" title="Build Extension Library Using CMake" url="@ref openvino_docs_IE_DG_Extensibility_DG_Building"/>
|
||||
<tab type="user" title="[Deprecated] Shape Infer API" url="@ref openvino_docs_IE_DG_Extensibility_DG_deprecated_ShapeInfer"/>
|
||||
<tab type="user" title="[Deprecated] CPU Kernels Extensibility API" url="@ref openvino_docs_IE_DG_Extensibility_DG_deprecated_Factory"/>
|
||||
</tab>
|
||||
<tab type="user" title="Integrate the Inference Engine with Your Application" url="@ref openvino_docs_IE_DG_Integrate_with_customer_application_new_API"/>
|
||||
<tab type="user" title="Migration from Inference Engine Plugin API to Core API" url="@ref openvino_docs_IE_DG_Migration_CoreAPI"/>
|
||||
|
@ -272,15 +272,13 @@ using namespace InferenceEngine::PluginConfigParams;
|
||||
using namespace InferenceEngine::HeteroConfigParams;
|
||||
|
||||
...
|
||||
enginePtr = dispatcher.getPluginByDevice("HETERO:FPGA,CPU");
|
||||
InferencePlugin plugin(enginePtr);
|
||||
plugin.SetConfig({ {KEY_HETERO_DUMP_GRAPH_DOT, YES} });
|
||||
auto execNetwork = ie.LoadNetwork(network, "HETERO:FPGA,CPU", { {KEY_HETERO_DUMP_GRAPH_DOT, YES} });
|
||||
```
|
||||
|
||||
After enabling the configuration key, the heterogeneous plugin generates two files:
|
||||
|
||||
- `hetero_affinity.dot` - per-layer affinities. This file is generated only if default fallback policy was executed (as otherwise you have set the affinities by yourself, so you know them).
|
||||
- `hetero_subgraphs.dot` - affinities per sub-graph. This file is written to the disk during execution of `ICNNNetwork::LoadNetwork` for the heterogeneous plugin.
|
||||
- `hetero_subgraphs.dot` - affinities per sub-graph. This file is written to the disk during execution of `Core::LoadNetwork` for the heterogeneous flow.
|
||||
|
||||
You can use GraphViz\* utility or `.dot` converters (for example, to `.png` or `.pdf`), like xdot\*, available on Linux\* OS with `sudo apt-get install xdot`. Below is an example of the output trimmed to the two last layers (one executed on the FPGA and another on the CPU):
|
||||
|
||||
@ -439,16 +437,11 @@ Infer Request based API offers two types of request: Sync and Async. The Sync is
|
||||
More importantly, an infer request encapsulates the reference to the “executable” network and actual inputs/outputs. Now, when you load the network to the plugin, you get a reference to the executable network (you may consider that as a queue). Actual infer requests are created by the executable network:
|
||||
|
||||
```cpp
|
||||
CNNNetReader network_reader;
|
||||
network_reader.ReadNetwork("Model.xml");
|
||||
network_reader.ReadWeights("Model.bin");
|
||||
auto network = network_reader.getNetwork();
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork("Model.xml", "Model.bin");
|
||||
InferenceEngine::InputsDataMap input_info(network.getInputsInfo());
|
||||
|
||||
InferenceEnginePluginPtr engine_ptr = PluginDispatcher(pluginDirs).getSuitablePlugin(TargetDevice::eGPU);
|
||||
InferencePlugin plugin(engine_ptr);
|
||||
|
||||
auto executable_network = plugin.LoadNetwork(network, {/*opt config*/});
|
||||
auto executable_network = ie.LoadNetwork(network, "GPU");
|
||||
auto infer_request = executable_network.CreateInferRequest();
|
||||
|
||||
for (auto & item : inputInfo) {
|
||||
|
@ -175,8 +175,6 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
|
||||
}
|
||||
// ! [plugin:get_metric]
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
// ! [plugin:create_plugin_engine]
|
||||
INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
|
||||
try {
|
||||
@ -189,5 +187,3 @@ INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, R
|
||||
}
|
||||
}
|
||||
// ! [plugin:create_plugin_engine]
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
Loading…
Reference in New Issue
Block a user