Compare commits
42 Commits
2022.3.1
...
releases/v
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
45d159094b | ||
|
|
6f860ddcf2 | ||
|
|
a86ae42aed | ||
|
|
cef0696ef7 | ||
|
|
e57a96474d | ||
|
|
ed052022d3 | ||
|
|
6eda5c39c6 | ||
|
|
0100810dd6 | ||
|
|
882e377ef9 | ||
|
|
c9d5d95e2c | ||
|
|
d77bc36dcd | ||
|
|
19e1b6002e | ||
|
|
6bcd0f6072 | ||
|
|
0e8534a4a9 | ||
|
|
08d7c3e75f | ||
|
|
821d513150 | ||
|
|
38a48b9cbf | ||
|
|
c6d8905a88 | ||
|
|
3a80652d70 | ||
|
|
120d3a596d | ||
|
|
25af83db81 | ||
|
|
03c6f4e3fe | ||
|
|
5d3d323bed | ||
|
|
a53524a554 | ||
|
|
02d2dbd0fa | ||
|
|
bfe0748b4c | ||
|
|
d78577aecb | ||
|
|
e09f0e4808 | ||
|
|
ff73955354 | ||
|
|
18cb230af4 | ||
|
|
9067a25616 | ||
|
|
c4ff0ffa9d | ||
|
|
4675a12c8f | ||
|
|
3cd5da0797 | ||
|
|
9b402f226f | ||
|
|
784adca70a | ||
|
|
8e1603f7fd | ||
|
|
66ede40e4e | ||
|
|
40a29a7aa3 | ||
|
|
a7e00dae54 | ||
|
|
4c40494605 | ||
|
|
3e2a4a5df1 |
@@ -4,11 +4,13 @@ resources:
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: releases/2021/3
|
||||
|
||||
- repository: testdata
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
ref: releases/2021/3
|
||||
|
||||
jobs:
|
||||
- job: Lin
|
||||
|
||||
@@ -64,13 +64,13 @@ jobs:
|
||||
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
#-DENABLE_PROFILING_ITT=ON
|
||||
#-DSELECTIVE_BUILD=COLLECT
|
||||
cmakeArgs: >
|
||||
-GNinja
|
||||
-DVERBOSE_BUILD=ON
|
||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||
-DENABLE_FASTER_BUILD=ON
|
||||
-DENABLE_PROFILING_ITT=ON
|
||||
-DSELECTIVE_BUILD=COLLECT
|
||||
$(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
|
||||
|
||||
@@ -4,11 +4,13 @@ resources:
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: releases/2021/3
|
||||
|
||||
- repository: testdata
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
ref: releases/2021/3
|
||||
|
||||
jobs:
|
||||
- job: Mac
|
||||
|
||||
@@ -4,11 +4,13 @@ resources:
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: releases/2021/3
|
||||
|
||||
- repository: testdata
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
ref: releases/2021/3
|
||||
|
||||
jobs:
|
||||
- job: Win
|
||||
|
||||
89
.ci/azure/windows_conditional_compilation.yml
Normal file
89
.ci/azure/windows_conditional_compilation.yml
Normal file
@@ -0,0 +1,89 @@
|
||||
jobs:
|
||||
- job: WinCC
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 120
|
||||
|
||||
pool:
|
||||
name: WIN_VMSS_VENV_F8S_WU2
|
||||
|
||||
variables:
|
||||
system.debug: true
|
||||
VSTS_HTTP_RETRY: 5
|
||||
VSTS_HTTP_TIMEOUT: 200
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
|
||||
MODELS_PATH: $(REPO_DIR)\..\testdata
|
||||
WORK_DIR: $(Pipeline.Workspace)\_w
|
||||
BUILD_DIR: D:\build
|
||||
BIN_DIR: $(REPO_DIR)\bin\intel64
|
||||
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
|
||||
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
||||
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
||||
IB_DIR: C:\Program Files (x86)\IncrediBuild
|
||||
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
|
||||
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;$(IB_DIR);%PATH%
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
|
||||
where python3
|
||||
where python
|
||||
python --version
|
||||
where java
|
||||
java -version
|
||||
wmic computersystem get TotalPhysicalMemory
|
||||
wmic cpu list
|
||||
wmic logicaldisk get description,name
|
||||
wmic VOLUME list
|
||||
set
|
||||
displayName: 'System info'
|
||||
|
||||
- script: |
|
||||
rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR)
|
||||
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
||||
call install_ib_console.bat
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install IncrediBuild'
|
||||
|
||||
- checkout: self
|
||||
clean: true
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Win'
|
||||
|
||||
- script: dir $(REPO_DIR)\bin\ /s
|
||||
displayName: 'List files'
|
||||
|
||||
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Install'
|
||||
|
||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
displayName: Stop IncrediBuild
|
||||
continueOnError: true
|
||||
enabled: false
|
||||
15
README.md
15
README.md
@@ -1,4 +1,4 @@
|
||||
# [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
|
||||
# OpenVINO™ Toolkit
|
||||
[](https://github.com/openvinotoolkit/openvino/releases/tag/2021.2)
|
||||
[](LICENSE)
|
||||

|
||||
@@ -7,7 +7,7 @@
|
||||
This toolkit allows developers to deploy pre-trained deep learning models
|
||||
through a high-level C++ Inference Engine API integrated with application logic.
|
||||
|
||||
This open source version includes several components: namely [Model Optimizer], [ngraph] and
|
||||
This open source version includes several components: namely [Model Optimizer], [nGraph] and
|
||||
[Inference Engine], as well as CPU, GPU, MYRIAD, multi device and heterogeneous plugins to accelerate deep learning inferencing on Intel® CPUs and Intel® Processor Graphics.
|
||||
It supports pre-trained models from the [Open Model Zoo], along with 100+ open
|
||||
source and public models in popular formats such as Caffe\*, TensorFlow\*,
|
||||
@@ -15,7 +15,7 @@ MXNet\* and ONNX\*.
|
||||
|
||||
## Repository components:
|
||||
* [Inference Engine]
|
||||
* [ngraph]
|
||||
* [nGraph]
|
||||
* [Model Optimizer]
|
||||
|
||||
## License
|
||||
@@ -27,9 +27,10 @@ and release your contribution under these terms.
|
||||
* Docs: https://docs.openvinotoolkit.org/
|
||||
* Wiki: https://github.com/openvinotoolkit/openvino/wiki
|
||||
* Issue tracking: https://github.com/openvinotoolkit/openvino/issues
|
||||
* Additional OpenVINO modules: https://github.com/openvinotoolkit/openvino_contrib
|
||||
* [HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
|
||||
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
* Storage: https://storage.openvinotoolkit.org/
|
||||
* Additional OpenVINO™ modules: https://github.com/openvinotoolkit/openvino_contrib
|
||||
* [Intel® Distribution of OpenVINO™ toolkit Product Page](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
|
||||
* [Intel® Distribution of OpenVINO™ toolkit Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
|
||||
## Support
|
||||
Please report questions, issues and suggestions using:
|
||||
@@ -45,4 +46,4 @@ Please report questions, issues and suggestions using:
|
||||
[Inference Engine]:https://software.intel.com/en-us/articles/OpenVINO-InferEngine
|
||||
[Model Optimizer]:https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
|
||||
[tag on StackOverflow]:https://stackoverflow.com/search?q=%23openvino
|
||||
[ngraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html
|
||||
[nGraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html
|
||||
|
||||
@@ -2,6 +2,19 @@
|
||||
|
||||
The sections below contain detailed list of changes made to the Inference Engine API in recent releases.
|
||||
|
||||
## 2021.3
|
||||
|
||||
### New API
|
||||
|
||||
* InferenceEngine::InferRequest::Cancel to cancel inference request execution
|
||||
* InferenceEngine::Layout::HWC to support HWC layout for input or output blobs
|
||||
* InferenceEngine::Precision::F64 data precision for f64 data type
|
||||
* InferenceEngine::CNNNetwork::getOVNameForTensor to map frameworks tensor names to OpenVINO internal tensor names
|
||||
|
||||
### Deprecated API
|
||||
|
||||
* InferenceEngine::IVariableState interface is deprecated, use InferenceEngine::VariableState wrapper
|
||||
|
||||
## 2021.2
|
||||
|
||||
### New API
|
||||
|
||||
@@ -6,7 +6,7 @@ Inference Engine Extension API allows to register operation sets (opsets) with c
|
||||
|
||||
To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below:
|
||||
|
||||
1. Define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
|
||||
1. Add the `NGRAPH_RTTI_DECLARATION` and `NGRAPH_RTTI_DEFINITION` macros which define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
|
||||
|
||||
2. Implement constructors that can optionally take the operation inputs and attributes as parameters.
|
||||
|
||||
|
||||
@@ -113,8 +113,8 @@ CPU-specific settings:
|
||||
| Parameter name | Parameter values | Default | Description |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| KEY_CPU_THREADS_NUM | positive integer values| 0 | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores|
|
||||
| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (single execution stream, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
|
||||
| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior with all available cores processing requests one by one.<br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
|
||||
| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
|
||||
| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes. <br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
|
||||
| KEY_ENFORCE_BF16 | YES/NO| YES | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. |
|
||||
|
||||
> **NOTE**: To disable all internal threading, use the following set of configuration parameters: `KEY_CPU_THROUGHPUT_STREAMS=0`, `KEY_CPU_THREADS_NUM=1`, `KEY_CPU_BIND_THREAD=NO`.
|
||||
|
||||
@@ -69,7 +69,7 @@ Limitations include:
|
||||
- Only 1D convolutions are natively supported.
|
||||
- The number of output channels for convolutions must be a multiple of 4.
|
||||
- Permute layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8.
|
||||
- Concatinations and splittings are supported only along the channel dimension (axis=1).
|
||||
- Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4).
|
||||
|
||||
#### Experimental Support for 2D Convolutions
|
||||
|
||||
@@ -77,7 +77,7 @@ The Intel® GNA hardware natively supports only 1D convolution.
|
||||
|
||||
However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Permute` layers before or after convolutions.
|
||||
|
||||
For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
|
||||
For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
|
||||
|
||||
## Operation Precision
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ the supported output precision depends on the actual underlying devices. _Gener
|
||||
|:-------------|:------------:|:------------:|:------------:|:------------:|
|
||||
|CPU plugin |Supported |Supported |Supported |Supported |
|
||||
|GPU plugin |Supported |Supported |Supported |Supported |
|
||||
|VPU plugins |Not supported |Supported |Supported |Supported |
|
||||
|VPU plugins |Supported |Supported |Supported |Supported |
|
||||
|GNA plugin |Not supported |Supported |Supported |Supported |
|
||||
|
||||
### Supported Output Layout
|
||||
@@ -111,9 +111,9 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| Acosh | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Activation-Clamp | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-ELU | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Activation-Exp | Supported |Supported\*\*\*| Not Supported | Supported | Supported |
|
||||
| Activation-Exp | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-Leaky ReLU | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-Not | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
|
||||
| Activation-Not | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Activation-PReLU | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Activation-ReLU | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-ReLU6 | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
@@ -127,7 +127,7 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| BatchNormalization | Supported | Supported | Supported | Not Supported | Supported |
|
||||
| BinaryConvolution | Supported | Supported | Not Supported | Not Supported | Supported |
|
||||
| Broadcast | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Ceil | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Ceil | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Concat | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Const | Supported | Supported | Supported | Supported | Not Supported |
|
||||
| Convolution-Dilated | Supported | Supported | Supported | Not Supported | Supported |
|
||||
@@ -145,8 +145,8 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| DeformableConvolution | Supported | Supported | Not Supported | Not Supported | Supported |
|
||||
| DepthToSpace | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| DetectionOutput | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| Eltwise-And | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
|
||||
| Eltwise-Add | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
|
||||
| Eltwise-And | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Add | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Div | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Equal | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-FloorMod | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
@@ -166,12 +166,12 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| Eltwise-SquaredDiff | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Sub | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Eltwise-Sum | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Erf | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Exp | Supported | Supported | Not Supported | Supported | Supported |
|
||||
| Erf | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Exp | Supported | Supported | Supported | Supported | Supported |
|
||||
| FakeQuantize | Not Supported | Supported | Not Supported | Not Supported | Supported |
|
||||
| Fill | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Flatten | Supported | Supported | Supported | Not Supported | Supported |
|
||||
| Floor | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Floor | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| FullyConnected (Inner Product) | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Gather | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| GatherTree | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
@@ -191,9 +191,9 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| Memory | Not Supported | Supported | Not Supported | Supported | Supported |
|
||||
| MVN | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| Neg | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| NonMaxSuppression | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| NonMaxSuppression | Not Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Normalize | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| OneHot | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| OneHot | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Pad | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| Permute | Supported | Supported | Supported | Supported\* | Supported |
|
||||
| Pooling(AVG,MAX) | Supported | Supported | Supported | Supported | Supported |
|
||||
@@ -206,17 +206,17 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| PSROIPooling | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Range | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Reciprocal | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceAnd | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceAnd | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceL1 | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceL2 | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceLogSum | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceLogSumExp | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMax | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMean | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMin | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMax | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceMean | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceMin | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceOr | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceProd | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceSum | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceSum | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceSumSquare | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| RegionYolo | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReorgYolo | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
@@ -226,7 +226,7 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| RNN | Not Supported | Supported | Supported | Not Supported | Not Supported |
|
||||
| ROIPooling | Supported\* | Supported | Supported | Not Supported | Supported |
|
||||
| ScaleShift | Supported |Supported\*\*\*| Supported\* | Supported | Supported |
|
||||
| ScatterUpdate | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ScatterUpdate | Not Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Select | Supported | Supported | Supported | Not Supported | Supported |
|
||||
| Selu | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ShuffleChannels | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
@@ -236,17 +236,17 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| SimplerNMS | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Slice | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| SoftMax | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Softplus | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Softplus | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Softsign | Supported | Supported\*\* | Not Supported | Supported | Supported |
|
||||
| SpaceToDepth | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| SpatialTransformer | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Split | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Squeeze | Supported | Supported\*\* | Supported | Supported | Supported |
|
||||
| StridedSlice | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| StridedSlice | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Tan | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| TensorIterator | Not Supported | Supported | Supported | Supported | Not Supported |
|
||||
| Tile | Supported\*\* |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| TopK | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| TopK | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Unpooling | Supported | Not Supported | Not Supported | Not Supported | Not Supported |
|
||||
| Unsqueeze | Supported | Supported\*\* | Supported | Supported | Supported |
|
||||
| Upsampling | Supported | Not Supported | Not Supported | Not Supported | Not Supported |
|
||||
|
||||
@@ -255,6 +255,89 @@ Standard TensorFlow\* operations:
|
||||
| ZerosLike | No |
|
||||
|
||||
|
||||
## TensorFlow 2 Keras\* Supported Operations
|
||||
|
||||
Standard TensorFlow 2 Keras\* operations:
|
||||
|
||||
| Operation Name in TensorFlow 2 Keras\* | Limitations|
|
||||
| :----------| :----------|
|
||||
| ActivityRegularization | No |
|
||||
| Add | No |
|
||||
| AdditiveAttention | No |
|
||||
| AlphaDropout | No |
|
||||
| Attention | No |
|
||||
| Average | No |
|
||||
| AveragePooling1D | No |
|
||||
| AveragePooling2D | No |
|
||||
| AveragePooling3D | No |
|
||||
| BatchNormalization | No |
|
||||
| Bidirectional | No |
|
||||
| Concatenate | No |
|
||||
| Conv1D | No |
|
||||
| Conv1DTranspose | Not supported if dilation is not equal to 1 |
|
||||
| Conv2D | No |
|
||||
| Conv2DTranspose | No |
|
||||
| Conv3D | No |
|
||||
| Conv3DTranspose | No |
|
||||
| Cropping1D | No |
|
||||
| Cropping2D | No |
|
||||
| Cropping3D | No |
|
||||
| Dense | No |
|
||||
| DenseFeatures | Not supported for categorical and crossed features |
|
||||
| DepthwiseConv2D | No |
|
||||
| Dot | No |
|
||||
| Dropout | No |
|
||||
| ELU | No |
|
||||
| Embedding | No |
|
||||
| Flatten | No |
|
||||
| GRU | No |
|
||||
| GRUCell | No |
|
||||
| GaussianDropout | No |
|
||||
| GaussianNoise | No |
|
||||
| GlobalAveragePooling1D | No |
|
||||
| GlobalAveragePooling2D | No |
|
||||
| GlobalAveragePooling3D | No |
|
||||
| GlobalMaxPool1D | No |
|
||||
| GlobalMaxPool2D | No |
|
||||
| GlobalMaxPool3D | No |
|
||||
| LSTM | No |
|
||||
| LSTMCell | No |
|
||||
| Lambda | No |
|
||||
| LayerNormalization | No |
|
||||
| LeakyReLU | No |
|
||||
| LocallyConnected1D | No |
|
||||
| LocallyConnected2D | No |
|
||||
| MaxPool1D | No |
|
||||
| MaxPool2D | No |
|
||||
| MaxPool3D | No |
|
||||
| Maximum | No |
|
||||
| Minimum | No |
|
||||
| Multiply | No |
|
||||
| PReLU | No |
|
||||
| Permute | No |
|
||||
| RNN | Not supported for some custom cells |
|
||||
| ReLU | No |
|
||||
| RepeatVector | No |
|
||||
| Reshape | No |
|
||||
| SeparableConv1D | No |
|
||||
| SeparableConv2D | No |
|
||||
| SimpleRNN | No |
|
||||
| SimpleRNNCell | No |
|
||||
| Softmax | No |
|
||||
| SpatialDropout1D | No |
|
||||
| SpatialDropout2D | No |
|
||||
| SpatialDropout3D | No |
|
||||
| StackedRNNCells | No |
|
||||
| Subtract | No |
|
||||
| ThresholdedReLU | No |
|
||||
| TimeDistributed | No |
|
||||
| UpSampling1D | No |
|
||||
| UpSampling2D | No |
|
||||
| UpSampling3D | No |
|
||||
| ZeroPadding1D | No |
|
||||
| ZeroPadding2D | No |
|
||||
| ZeroPadding3D | No |
|
||||
|
||||
## Kaldi\* Supported Layers
|
||||
|
||||
Standard Kaldi\* Layers:
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
| VGG19 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/vgg19.tar.gz) |
|
||||
| zfnet512 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/zfnet512.tar.gz) |
|
||||
| GPT-2 | [model archive](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz) |
|
||||
| YOLOv3 | [model archive](https://github.com/onnx/models/blob/master/vision/object_detection_segmentation/yolov3/model/yolov3-10.tar.gz) |
|
||||
|
||||
Listed models are built with the operation set version 8 except the GPT-2 model. Models that are upgraded to higher operation set versions may not be supported.
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ Where `HEIGHT` and `WIDTH` are the input images height and width for which the m
|
||||
| Keras-TCN | [Repo](https://github.com/philipperemy/keras-tcn) |
|
||||
| PRNet | [Repo](https://github.com/YadiraF/PRNet) |
|
||||
| YOLOv4 | [Repo](https://github.com/Ma-Dan/keras-yolo4) |
|
||||
| STN | [Repo](https://github.com/oarriaga/STN.keras) |
|
||||
|
||||
* YOLO topologies from DarkNet* can be converted using [instruction](tf_specific/Convert_YOLO_From_Tensorflow.md),
|
||||
* FaceNet topologies can be converted using [instruction](tf_specific/Convert_FaceNet_From_Tensorflow.md).
|
||||
@@ -342,11 +343,9 @@ model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': Cu
|
||||
tf.saved_model.save(model,'model')
|
||||
```
|
||||
|
||||
Then follow the above instructions for the SavedModel format.
|
||||
Then follow the above instructions for the SavedModel format.
|
||||
|
||||
> **NOTE:** Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.
|
||||
|
||||
> **NOTE**: Currently, OpenVINO™ support for TensorFlow* 2 models is in preview (aka Beta), which means limited and not of production quality yet. OpenVINO™ does not support models with Keras RNN and Embedding layers.
|
||||
> **NOTE:** Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.
|
||||
|
||||
|
||||
## Custom Layer Definition
|
||||
@@ -360,7 +359,7 @@ See [Custom Layers in the Model Optimizer](../customize_model_optimizer/Customiz
|
||||
* Custom layer implementation details
|
||||
|
||||
|
||||
## Supported TensorFlow\* Layers
|
||||
## Supported TensorFlow\* and TensorFlow 2 Keras\* Layers
|
||||
Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
# Convert PyTorch* QuartzNet to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_QuartzNet}
|
||||
|
||||
[NeMo project](https://github.com/NVIDIA/NeMo) provides the QuartzNet model.
|
||||
|
||||
## Download the Pre-Trained QuartzNet Model
|
||||
|
||||
To download the pre-trained model, refer to the [NeMo Speech Models Catalog](https://ngc.nvidia.com/catalog/models/nvidia:nemospeechmodels).
|
||||
Here are the instructions on how to obtain QuartzNet in ONNX* format.
|
||||
```python
|
||||
import nemo
|
||||
import nemo.collections.asr as nemo_asr
|
||||
|
||||
quartznet = nemo_asr.models.ASRConvCTCModel.from_pretrained(model_info='QuartzNet15x5-En')
|
||||
# Export QuartzNet model to ONNX* format
|
||||
quartznet.export('qn.onnx')
|
||||
```
|
||||
This code produces 3 ONNX* model files: `encoder_qt.onnx`, `decoder_qt.onnx`, `qn.onnx`.
|
||||
They are `decoder`, `encoder` and a combined `decoder(encoder(x))` models, respectively.
|
||||
|
||||
## Convert ONNX* QuartzNet model to IR
|
||||
|
||||
If using a combined model:
|
||||
```sh
|
||||
./mo.py --input_model <MODEL_DIR>/qt.onnx --input_shape [B,64,X]
|
||||
```
|
||||
If using separate models:
|
||||
```sh
|
||||
./mo.py --input_model <MODEL_DIR>/encoder_qt.onnx --input_shape [B,64,X]
|
||||
./mo.py --input_model <MODEL_DIR>/decoder_qt.onnx --input_shape [B,1024,Y]
|
||||
```
|
||||
|
||||
Where shape is determined by the audio file Mel-Spectrogram length: B - batch dimension, X - dimension based on the input length, Y - determined by encoder output, usually `X / 2`.
|
||||
@@ -53,6 +53,7 @@ limitations under the License.
|
||||
<tab type="user" title="Convert ONNX* Faster R-CNN Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN"/>
|
||||
<tab type="user" title="Convert ONNX* Mask R-CNN Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN"/>
|
||||
<tab type="user" title="Converting DLRM ONNX* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
|
||||
<tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_QuartzNet"/>
|
||||
</tab>
|
||||
<tab type="user" title="Model Optimizations Techniques" url="@ref openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques"/>
|
||||
<tab type="user" title="Cutting off Parts of a Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model"/>
|
||||
|
||||
@@ -30,7 +30,7 @@ Now the dependencies are installed and you are ready to use the Intel® Vision A
|
||||
|
||||
## Optional Steps
|
||||
|
||||
* For advanced configuration steps for your IEI Mustang-V100-MX8 accelerator, see [Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-setup-guide.md).
|
||||
* For advanced configuration steps for your **IEI Mustang-V100-MX8-R10** accelerator, see [Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-setup-guide.md). **IEI Mustang-V100-MX8-R11** accelerator doesn't require any additional steps.
|
||||
|
||||
* After you've configured your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see [Intel® Movidius™ VPUs Programming Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-programming-guide.md) to learn how to distribute a model across all 8 VPUs to maximize performance.
|
||||
|
||||
|
||||
@@ -16,7 +16,8 @@ Your installation is complete when these are all completed:
|
||||
|
||||
2. Install the dependencies:
|
||||
|
||||
- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/)
|
||||
- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/)
|
||||
> **NOTE**: Clicking this link will directly download Visual Studio 2019 for Windows that has been validated with OpenVINO™.
|
||||
- [CMake **3.10 or higher** 64-bit](https://cmake.org/download/)
|
||||
> **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14.
|
||||
- [Python **3.6** - **3.8** 64-bit](https://www.python.org/downloads/windows/)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit {#openvino_docs_install_guides_movidius_setup_guide}
|
||||
|
||||
> **NOTE**: These steps are only required for **IEI Mustang-V100-MX8-R10** card. **IEI Mustang-V100-MX8-R11** card doesn't require any additional steps and it's completely configured using the [general guidance](installing-openvino-linux-ivad-vpu.md).
|
||||
|
||||
## See Also
|
||||
|
||||
- [Intel® Movidius™ VPUs Programming Guide for use with the Intel® Distribution of OpenVINO™](movidius-programming-guide.md)
|
||||
@@ -9,7 +11,7 @@
|
||||
- <a class="download" href="<domain_placeholder>/downloads/Intel Vision Accelerator Design with Intel Movidius™ VPUs Errata.pdf">Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Errata</a>
|
||||
|
||||
The IEI Mustang-V100-MX8 is an OEM version of the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
|
||||
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and the [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/en-us/openvino-toolkit).
|
||||
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and the [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html).
|
||||
|
||||
Instructions in this guide for configuring your accelerator include:
|
||||
1. Installing the required IEI\* BSL reset software
|
||||
|
||||
@@ -24,14 +24,19 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
|
||||
|
||||
## System Requirements
|
||||
|
||||
* [Python* distribution](https://www.python.org/) 3.6, 3.7, 3.8
|
||||
* Supported Operating Systems:
|
||||
- Ubuntu* 18.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- Ubuntu* 20.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- macOS* 10.15.x versions
|
||||
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions
|
||||
- Windows Server* 2016 or higher
|
||||
> NOTE: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
|
||||
The table below lists the supported operating systems and Python* versions required to run the installation.
|
||||
|
||||
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
|
||||
| :------------------------------------------------------------| :---------------------------------------------------|
|
||||
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Red Hat* Enterprise Linux* 8.2, 64-bit | 3.6, 3.7 |
|
||||
| CentOS* 7.4, 64-bit | 3.6, 3.7 |
|
||||
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
|
||||
| Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions | 3.6, 3.7, 3.8 |
|
||||
| Windows Server* 2016 or higher | 3.6, 3.7, 3.8 |
|
||||
|
||||
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
|
||||
|
||||
## Install the Developer Package
|
||||
|
||||
|
||||
@@ -21,14 +21,19 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
|
||||
|
||||
## System Requirements
|
||||
|
||||
* [Python* distribution](https://www.python.org/) 3.6, 3.7, 3.8
|
||||
* Supported Operating Systems:
|
||||
- Ubuntu* 18.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- Ubuntu* 20.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- macOS* 10.15.x version
|
||||
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions
|
||||
- Windows Server* 2016 or higher
|
||||
> NOTE: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated
|
||||
The table below lists the supported operating systems and Python* versions required to run the installation.
|
||||
|
||||
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
|
||||
| :------------------------------------------------------------| :---------------------------------------------------|
|
||||
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Red Hat* Enterprise Linux* 8.2, 64-bit | 3.6, 3.7 |
|
||||
| CentOS* 7.4, 64-bit | 3.6, 3.7 |
|
||||
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
|
||||
| Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions | 3.6, 3.7, 3.8 |
|
||||
| Windows Server* 2016 or higher | 3.6, 3.7, 3.8 |
|
||||
|
||||
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
|
||||
|
||||
## Install the Runtime Package
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
**Detailed description**: For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
|
||||
\f]
|
||||
\f[
|
||||
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
|
||||
\f]
|
||||
|
||||
The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).
|
||||
|
||||
|
||||
@@ -13,11 +13,11 @@ Deep Learning Inference Engine is a part of Intel® Deep Learning Deployment
|
||||
Below, there are the three main steps of the deployment process:
|
||||
|
||||
1. **Conversion**<br>
|
||||
Trained models are converted from a specific framework (like Caffe\* or TensorFlow\*) to a framework-agnostic Intermediate Representation (IR) format.
|
||||
Trained models are converted from a specific framework, like TensorFlow\*, or format, like ONNX\*, to the framework-agnostic Intermediate Representation (IR) format.
|
||||
|
||||
- *Performance flow*: This is an offline step where general topology-level optimizations happen automatically (see <a href="#mo-knobs-related-to-performance">Model Optimizer Knobs Related to Performance</a>).
|
||||
|
||||
- *Tools*: Intel DL Deployment Toolkit features the Model Optimizer that enables automatic and seamless transition from the training environment to the deployment environment.
|
||||
- *Tools*: OpenVINO™ features the Model Optimizer that enables automatic and seamless transition from a training to deployment environment.
|
||||
|
||||
2. **Model Inference/Execution**<br>
|
||||
After conversion, Inference Engine consumes the IR to perform inference. While Inference Engine API itself is target-agnostic, internally, it has a notion of plugins, which are device-specific libraries facilitating the hardware-assisted acceleration.
|
||||
@@ -55,14 +55,16 @@ In contrast, for the latency-oriented tasks, the time to a single frame is more
|
||||
|
||||
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.
|
||||
|
||||
> **NOTE**: Most samples also support batching (automatically packing multiple input images into a single request). However, high batch size results in a latency penalty. So for more real-time oriented usages, lower batch sizes (as low as a single input) are usually used. However, devices like CPU, Intel® Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance.
|
||||
> **NOTE**: The [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample also supports batching, that is automatically packing multiple input images into a single request. However, high batch size results in a latency penalty. So for more real-time oriented usages, batch sizes that are as low as a single input are usually used. Still, devices like CPU, Intel®Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance. Running multiple requests should be coupled with a device configured to the corresponding number of streams. See <a href="#cpu-streams">details on CPU streams</a> for an example.
|
||||
|
||||
[OpenVINO™ Deep Learning Workbench tool](https://docs.openvinotoolkit.org/latest/workbench_docs_Workbench_DG_Introduction.html) provides throughput versus latency charts for different numbers of streams, requests, and batch sizes to find the performance sweet spot.
|
||||
|
||||
### Comparing Performance with Native/Framework Code <a name="comparing-performance-with-native-framework-code"></a>
|
||||
|
||||
When comparing the Inference Engine performance with the framework or another reference code, make sure that both versions are as similar as possible:
|
||||
|
||||
- Wrap exactly the inference execution (refer to the [Inference Engine Samples](../IE_DG/Samples_Overview.md) for examples).
|
||||
- Do not include model loading time.
|
||||
- Wrap exactly the inference execution (refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for an example).
|
||||
- Track model loading time separately.
|
||||
- Ensure the inputs are identical for the Inference Engine and the framework. For example, Caffe\* allows to auto-populate the input with random values. Notice that it might give different performance than on real images.
|
||||
- Similarly, for correct performance comparison, make sure the access pattern, for example, input layouts, is optimal for Inference Engine (currently, it is NCHW).
|
||||
- Any user-side pre-processing should be tracked separately.
|
||||
@@ -77,7 +79,7 @@ You need to build your performance conclusions on reproducible data. Do the perf
|
||||
- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations and then average or find a mean of the results.
|
||||
- For time values that range too much, use geomean.
|
||||
|
||||
Refer to the [Inference Engine Samples](../IE_DG/Samples_Overview.md) for code examples for the performance measurements. Almost every sample, except interactive demos, has a `-ni` option to specify the number of iterations.
|
||||
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) for code examples of performance measurements. Almost every sample, except interactive demos, has the `-ni` option to specify the number of iterations.
|
||||
|
||||
## Model Optimizer Knobs Related to Performance <a name="mo-knobs-related-to-performance"></a>
|
||||
|
||||
|
||||
@@ -606,7 +606,7 @@ This example uses `curl` to download the `face-detection-retail-004` model from
|
||||
2. Download a model from the Model Zoo:
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
curl --create-dirs https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https:// download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin
|
||||
curl --create-dirs https://storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https:// storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin
|
||||
```
|
||||
The model is downloaded to the `OVSA_DEV_ARTEFACTS/model` directory.
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
using namespace TemplateExtension;
|
||||
|
||||
constexpr ngraph::NodeTypeInfo Operation::type_info;
|
||||
|
||||
//! [op:ctor]
|
||||
NGRAPH_RTTI_DEFINITION(TemplateExtension::Operation, "Template", 0);
|
||||
|
||||
Operation::Operation(const ngraph::Output<ngraph::Node> &arg, int64_t add) : Op({arg}), add(add) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@@ -11,8 +11,7 @@ namespace TemplateExtension {
|
||||
|
||||
class Operation : public ngraph::op::Op {
|
||||
public:
|
||||
static constexpr ngraph::NodeTypeInfo type_info{"Template", 0};
|
||||
const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
Operation() = default;
|
||||
Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add);
|
||||
|
||||
@@ -6,14 +6,14 @@ include_guard(GLOBAL)
|
||||
|
||||
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
|
||||
set(VPU_SUPPORTED_FIRMWARES_HASH
|
||||
"87389cef2aff63197f7787fb9b0ef7bfc74119200ef6b9f0c2c763b3ea4aabe9"
|
||||
"eba4fabfd71f9c81db12886b05f559f1c6092f9b65dfb4493c205f493d816fab")
|
||||
"cfba5fc0895a564fa51a1438f1c4d4f06198be982b1c2fb973c5cb9ab0a3c1f3"
|
||||
"4176456c96b151470de3a723b603503306cff2e52975b739927e37d730c053be")
|
||||
|
||||
#
|
||||
# Default packages
|
||||
#
|
||||
|
||||
set(FIRMWARE_PACKAGE_VERSION 1633)
|
||||
set(FIRMWARE_PACKAGE_VERSION 1639)
|
||||
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
|
||||
|
||||
#
|
||||
|
||||
@@ -31,8 +31,13 @@ int image_read(const char *img_path, c_mat_t *img) {
|
||||
img->mat_width = mat.size().width;
|
||||
img->mat_height = mat.size().height;
|
||||
img->mat_type = mat.type();
|
||||
img->mat_data_size = img->mat_channels * img->mat_width * img->mat_height;
|
||||
img->mat_data_size = mat.elemSize() * img->mat_width * img->mat_height;
|
||||
img->mat_data = (unsigned char *)malloc(sizeof(unsigned char) * img->mat_data_size);
|
||||
|
||||
if (img->mat_data == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < img->mat_data_size; ++i) {
|
||||
img->mat_data[i] = mat.data[i];
|
||||
}
|
||||
@@ -54,8 +59,13 @@ int image_resize(const c_mat_t *src_img, c_mat_t *dst_img, const int width, cons
|
||||
dst_img->mat_width = mat_dst.size().width;
|
||||
dst_img->mat_height = mat_dst.size().height;
|
||||
dst_img->mat_type = mat_dst.type();
|
||||
dst_img->mat_data_size = dst_img->mat_channels * dst_img->mat_width * dst_img->mat_height;
|
||||
dst_img->mat_data_size = mat_dst.elemSize() * dst_img->mat_width * dst_img->mat_height;
|
||||
dst_img->mat_data = (unsigned char *)malloc(sizeof(unsigned char) * dst_img->mat_data_size);
|
||||
|
||||
if (dst_img->mat_data == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < dst_img->mat_data_size; ++i) {
|
||||
dst_img->mat_data[i] = mat_dst.data[i];
|
||||
}
|
||||
|
||||
@@ -39,6 +39,9 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
|
||||
*n = output_dim.dims[1];
|
||||
|
||||
struct classify_res *cls = (struct classify_res *)malloc(sizeof(struct classify_res) * (*n));
|
||||
if (!cls) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ie_blob_buffer_t blob_cbuffer;
|
||||
status = ie_blob_get_cbuffer(blob, &blob_cbuffer);
|
||||
|
||||
@@ -38,6 +38,9 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
|
||||
*n = output_dim.dims[1];
|
||||
|
||||
struct classify_res *cls = (struct classify_res *)malloc(sizeof(struct classify_res) * (*n));
|
||||
if (!cls) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ie_blob_buffer_t blob_cbuffer;
|
||||
status = ie_blob_get_cbuffer(blob, &blob_cbuffer);
|
||||
@@ -76,8 +79,8 @@ size_t read_image_from_file(const char *img_path, unsigned char *img_data, size_
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
read_size = fread(img_data, 1, size, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
fclose(fp);
|
||||
return read_size;
|
||||
}
|
||||
|
||||
|
||||
@@ -122,6 +122,7 @@ void readInputFilesArgument(const char *arg) {
|
||||
for (i = 0; i < file_num; ++i) {
|
||||
free(file_paths[i]);
|
||||
}
|
||||
free(file_path);
|
||||
free(file_paths);
|
||||
file_num = 0;
|
||||
}
|
||||
@@ -279,6 +280,10 @@ int main(int argc, char **argv) {
|
||||
ie_version_free(&version);
|
||||
|
||||
char **argv_temp =(char **)calloc(argc, sizeof(char *));
|
||||
if (!argv_temp) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int i, j;
|
||||
for (i = 0; i < argc; ++i) {
|
||||
argv_temp[i] = argv[i];
|
||||
@@ -419,6 +424,10 @@ int main(int argc, char **argv) {
|
||||
/** Collect images data **/
|
||||
c_mat_t *originalImages = (c_mat_t *)calloc(file_num, sizeof(c_mat_t));
|
||||
c_mat_t *images = (c_mat_t *)calloc(file_num, sizeof(c_mat_t));
|
||||
|
||||
if (!originalImages || !images)
|
||||
goto err;
|
||||
|
||||
int image_num = 0;
|
||||
for (i = 0; i < file_num; ++i) {
|
||||
c_mat_t img = {NULL, 0, 0, 0, 0, 0};
|
||||
@@ -435,20 +444,27 @@ int main(int argc, char **argv) {
|
||||
resized_img.mat_height = img.mat_height;
|
||||
resized_img.mat_type = img.mat_type;
|
||||
resized_img.mat_data = calloc(1, resized_img.mat_data_size);
|
||||
if (resized_img.mat_data == NULL) {
|
||||
image_free(&img);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j = 0; j < resized_img.mat_data_size; ++j)
|
||||
resized_img.mat_data[j] = img.mat_data[j];
|
||||
} else {
|
||||
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n", \
|
||||
warn, img.mat_width, img.mat_height, input_width, input_height);
|
||||
warn, img.mat_width, img.mat_height, input_width, input_height);
|
||||
|
||||
image_resize(&img, &resized_img, (int)input_width, (int)input_height);
|
||||
if (image_resize(&img, &resized_img, (int)input_width, (int)input_height) == -1) {
|
||||
printf("%sImage %s cannot be resized!\n", warn, file_paths[i]);
|
||||
image_free(&img);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (resized_img.mat_data) {
|
||||
originalImages[image_num] = img;
|
||||
images[image_num] = resized_img;
|
||||
++image_num;
|
||||
}
|
||||
originalImages[image_num] = img;
|
||||
images[image_num] = resized_img;
|
||||
++image_num;
|
||||
}
|
||||
|
||||
if (!image_num) {
|
||||
@@ -523,8 +539,8 @@ int main(int argc, char **argv) {
|
||||
if (config_msg) {
|
||||
ie_config_t * config = parseConfig(config_msg, '#');
|
||||
status = ie_core_load_network(core, network, device_name, config, &exe_network);
|
||||
config_free(config);
|
||||
if (status != OK) {
|
||||
config_free(config);
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -53,8 +53,8 @@ size_t read_image_from_file(const char* img_path, unsigned char *img_data, size_
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
read_size = fread(img_data, 1, size, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
fclose(fp);
|
||||
return read_size;
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ networks like SSD-VGG. The sample shows how to use [Shape Inference feature](../
|
||||
|
||||
## Running
|
||||
|
||||
To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
|
||||
To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README).
|
||||
|
||||
> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
|
||||
>
|
||||
|
||||
@@ -9,7 +9,7 @@ networkx==2.2
|
||||
tqdm==4.31.1
|
||||
texttable==1.6.3
|
||||
py-cpuinfo!=5.0,!=6.0
|
||||
PyYAML>=5.4.2
|
||||
PyYAML>=5.4.1
|
||||
pillow>=8.1.0
|
||||
scikit-image
|
||||
scikit-learn
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
[options]
|
||||
py_modules =
|
||||
mo
|
||||
mo_tf
|
||||
mo_caffe
|
||||
mo_mxnet
|
||||
|
||||
@@ -85,6 +85,9 @@ public:
|
||||
* `InferenceEngine::Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights) const`
|
||||
* function overload which takes a filesystem path to the model.
|
||||
* For ONNX case the second parameter should contain empty blob.
|
||||
* @note Created InferenceEngine::CNNNetwork object shares the weights with `weights` object.
|
||||
* So, do not create `weights` on temporary data which can be later freed, since the network
|
||||
* constant datas become to point to invalid memory.
|
||||
* @return CNNNetwork
|
||||
*/
|
||||
CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const;
|
||||
|
||||
@@ -10,3 +10,4 @@ ie_add_sample(NAME benchmark_app
|
||||
HEADERS ${HDR}
|
||||
DEPENDENCIES format_reader
|
||||
OPENCV_DEPENDENCIES imgcodecs)
|
||||
|
||||
|
||||
@@ -105,6 +105,9 @@ Options:
|
||||
-nthreads "<integer>" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
|
||||
-enforcebf16 Optional. Enforcing of floating point operations execution in bfloat16 precision on platforms with native bfloat16 support. By default, this key sets "true" on platforms with native bfloat16 support and "false" for other platforms. Use "-enforcebf16=false" to disable this feature.
|
||||
-pin "YES"/"NO"/"NUMA" Optional. Enable threads->cores ("YES", default), threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU threads pinning for CPU-involved inference.
|
||||
-ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
|
||||
-op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
|
||||
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.
|
||||
|
||||
|
||||
Statistics dumping options:
|
||||
|
||||
@@ -108,6 +108,19 @@ static const char layout_message[] = "Optional. Prompts how network layouts shou
|
||||
// @brief message for quantization bits
|
||||
static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
|
||||
|
||||
// TODO: duplicate options from compile_tool
|
||||
static constexpr char inputs_precision_message[] =
|
||||
"Optional. Specifies precision for all input layers of the network.";
|
||||
|
||||
static constexpr char outputs_precision_message[] =
|
||||
"Optional. Specifies precision for all output layers of the network.";
|
||||
|
||||
static constexpr char iop_message[] =
|
||||
"Optional. Specifies precision for input and output layers by name.\n"
|
||||
" Example: -iop \"input:FP16, output:FP16\".\n"
|
||||
" Notice that quotes are required.\n"
|
||||
" Overwrites precision from ip and op options for specified layers.";
|
||||
|
||||
/// @brief Define flag for showing help message <br>
|
||||
DEFINE_bool(h, false, help_message);
|
||||
|
||||
@@ -198,6 +211,18 @@ DEFINE_string(layout, "", layout_message);
|
||||
/// @brief Define flag for quantization bits (default 16)
|
||||
DEFINE_int32(qb, 16, gna_qb_message);
|
||||
|
||||
/// @brief Specify precision for all input layers of the network
|
||||
DEFINE_string(ip, "", inputs_precision_message);
|
||||
|
||||
/// @brief Specify precision for all ouput layers of the network
|
||||
DEFINE_string(op, "", outputs_precision_message);
|
||||
|
||||
/// @brief Specify precision for input and output layers by name.\n"
|
||||
/// Example: -iop \"input:FP16, output:FP16\".\n"
|
||||
/// Notice that quotes are required.\n"
|
||||
/// Overwrites layout from ip and op options for specified layers.";
|
||||
DEFINE_string(iop, "", iop_message);
|
||||
|
||||
/**
|
||||
* @brief This function show a help message
|
||||
*/
|
||||
@@ -237,4 +262,7 @@ static void showUsage() {
|
||||
std::cout << " -load_config " << load_config_message << std::endl;
|
||||
#endif
|
||||
std::cout << " -qb " << gna_qb_message << std::endl;
|
||||
std::cout << " -ip <value> " << inputs_precision_message << std::endl;
|
||||
std::cout << " -op <value> " << outputs_precision_message << std::endl;
|
||||
std::cout << " -iop \"<value>\" " << iop_message << std::endl;
|
||||
}
|
||||
|
||||
@@ -67,6 +67,14 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
|
||||
throw std::logic_error("only " + std::string(detailedCntReport) + " report type is supported for MULTI device");
|
||||
}
|
||||
|
||||
bool isNetworkCompiled = fileExt(FLAGS_m) == "blob";
|
||||
bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty());
|
||||
if (isNetworkCompiled && isPrecisionSet) {
|
||||
std::string err = std::string("Cannot set precision for a compiled network. ") +
|
||||
std::string("Please re-compile your network with required precision using compile_tool");
|
||||
|
||||
throw std::logic_error(err);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -380,6 +388,10 @@ int main(int argc, char *argv[]) {
|
||||
item.second->setPrecision(app_inputs_info.at(item.first).precision);
|
||||
}
|
||||
}
|
||||
|
||||
processPrecision(cnnNetwork, FLAGS_ip, FLAGS_op, FLAGS_iop);
|
||||
|
||||
printInputAndOutputsInfo(cnnNetwork);
|
||||
// ----------------- 7. Loading the model to the device --------------------------------------------------------
|
||||
next_step();
|
||||
startTime = Time::now();
|
||||
|
||||
@@ -85,3 +85,240 @@ void parseInputFilesArguments(std::vector<std::string> &files) {
|
||||
readInputFilesArguments(files, args.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void splitStringList(const std::string& str, std::vector<std::string>& out, char delim) {
|
||||
out.clear();
|
||||
|
||||
if (str.empty())
|
||||
return;
|
||||
|
||||
std::istringstream istr(str);
|
||||
|
||||
std::string elem;
|
||||
while (std::getline(istr, elem, delim)) {
|
||||
if (elem.empty()) {
|
||||
continue;
|
||||
}
|
||||
out.emplace_back(std::move(elem));
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> parseArgMap(std::string argMap) {
|
||||
argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end());
|
||||
|
||||
std::vector<std::string> pairs;
|
||||
splitStringList(argMap, pairs, ',');
|
||||
|
||||
std::map<std::string, std::string> parsedMap;
|
||||
for (auto&& pair : pairs) {
|
||||
std::vector<std::string> keyValue;
|
||||
splitStringList(pair, keyValue, ':');
|
||||
if (keyValue.size() != 2) {
|
||||
throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected <layer_name>:<value>");
|
||||
}
|
||||
|
||||
parsedMap[keyValue[0]] = keyValue[1];
|
||||
}
|
||||
|
||||
return parsedMap;
|
||||
}
|
||||
|
||||
|
||||
using supported_precisions_t = std::unordered_map<std::string, InferenceEngine::Precision>;
|
||||
|
||||
InferenceEngine::Precision getPrecision(std::string value,
|
||||
const supported_precisions_t& supported_precisions) {
|
||||
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
|
||||
|
||||
const auto precision = supported_precisions.find(value);
|
||||
if (precision == supported_precisions.end()) {
|
||||
throw std::logic_error("\"" + value + "\"" + " is not a valid precision");
|
||||
}
|
||||
|
||||
return precision->second;
|
||||
}
|
||||
|
||||
InferenceEngine::Precision getPrecision(const std::string& value) {
|
||||
static const supported_precisions_t supported_precisions = {
|
||||
{ "FP32", InferenceEngine::Precision::FP32 },
|
||||
{ "FP16", InferenceEngine::Precision::FP16 },
|
||||
{ "BF16", InferenceEngine::Precision::BF16 },
|
||||
{ "U64", InferenceEngine::Precision::U64 },
|
||||
{ "I64", InferenceEngine::Precision::I64 },
|
||||
{ "U32", InferenceEngine::Precision::U32 },
|
||||
{ "I32", InferenceEngine::Precision::I32 },
|
||||
{ "U16", InferenceEngine::Precision::U16 },
|
||||
{ "I16", InferenceEngine::Precision::I16 },
|
||||
{ "U8", InferenceEngine::Precision::U8 },
|
||||
{ "I8", InferenceEngine::Precision::I8 },
|
||||
{ "BOOL", InferenceEngine::Precision::BOOL },
|
||||
};
|
||||
|
||||
return getPrecision(value, supported_precisions);
|
||||
}
|
||||
|
||||
void setPrecisions(const InferenceEngine::CNNNetwork& network, const std::string &iop) {
|
||||
const auto user_precisions_map = parseArgMap(iop);
|
||||
|
||||
auto inputs = network.getInputsInfo();
|
||||
auto outputs = network.getOutputsInfo();
|
||||
|
||||
for (auto&& item : user_precisions_map) {
|
||||
const auto& layer_name = item.first;
|
||||
const auto& user_precision = item.second;
|
||||
|
||||
const auto input = inputs.find(layer_name);
|
||||
const auto output = outputs.find(layer_name);
|
||||
|
||||
if (input != inputs.end()) {
|
||||
input->second->setPrecision(getPrecision(user_precision));
|
||||
} else if (output != outputs.end()) {
|
||||
output->second->setPrecision(getPrecision(user_precision));
|
||||
} else {
|
||||
throw std::logic_error(layer_name + " is not an input neither output");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void processPrecision(InferenceEngine::CNNNetwork& network, const std::string &ip, const std::string &op,
|
||||
const std::string &iop) {
|
||||
if (!ip.empty()) {
|
||||
const auto user_precision = getPrecision(ip);
|
||||
for (auto&& layer : network.getInputsInfo()) {
|
||||
layer.second->setPrecision(user_precision);
|
||||
}
|
||||
}
|
||||
|
||||
if (!op.empty()) {
|
||||
auto user_precision = getPrecision(op);
|
||||
for (auto&& layer : network.getOutputsInfo()) {
|
||||
layer.second->setPrecision(user_precision);
|
||||
}
|
||||
}
|
||||
|
||||
if (!iop.empty()) {
|
||||
setPrecisions(network, iop);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
using supported_layouts_t = std::unordered_map<std::string, InferenceEngine::Layout>;
|
||||
using matchLayoutToDims_t = std::unordered_map<size_t, size_t>;
|
||||
|
||||
InferenceEngine::Layout getLayout(std::string value,
|
||||
const supported_layouts_t& supported_layouts) {
|
||||
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
|
||||
|
||||
const auto layout = supported_layouts.find(value);
|
||||
if (layout == supported_layouts.end()) {
|
||||
throw std::logic_error("\"" + value + "\"" + " is not a valid layout");
|
||||
}
|
||||
|
||||
return layout->second;
|
||||
}
|
||||
|
||||
InferenceEngine::Layout getLayout(const std::string& value) {
|
||||
static const supported_layouts_t supported_layouts = {
|
||||
{ "NCDHW", InferenceEngine::Layout::NCDHW },
|
||||
{ "NDHWC", InferenceEngine::Layout::NDHWC },
|
||||
{ "NCHW", InferenceEngine::Layout::NCHW },
|
||||
{ "NHWC", InferenceEngine::Layout::NHWC },
|
||||
{ "CHW", InferenceEngine::Layout::CHW },
|
||||
{ "NC", InferenceEngine::Layout::NC },
|
||||
{ "C", InferenceEngine::Layout::C },
|
||||
};
|
||||
|
||||
return getLayout(value, supported_layouts);
|
||||
}
|
||||
|
||||
bool isMatchLayoutToDims(InferenceEngine::Layout layout, size_t dimension) {
|
||||
static const matchLayoutToDims_t matchLayoutToDims = {
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NCDHW), 5 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NDHWC), 5 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NCHW), 4 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NHWC), 4 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::CHW), 3 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NC), 2 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::C), 1 }
|
||||
};
|
||||
|
||||
const auto dims = matchLayoutToDims.find(static_cast<size_t>(layout));
|
||||
if (dims == matchLayoutToDims.end()) {
|
||||
throw std::logic_error("Layout is not valid.");
|
||||
}
|
||||
|
||||
return dimension == dims->second;
|
||||
}
|
||||
|
||||
void setLayouts(const InferenceEngine::CNNNetwork& network, const std::string iol) {
|
||||
const auto user_layouts_map = parseArgMap(iol);
|
||||
|
||||
auto inputs = network.getInputsInfo();
|
||||
auto outputs = network.getOutputsInfo();
|
||||
|
||||
for (auto&& item : user_layouts_map) {
|
||||
const auto& layer_name = item.first;
|
||||
const auto& user_layout = getLayout(item.second);
|
||||
|
||||
const auto input = inputs.find(layer_name);
|
||||
const auto output = outputs.find(layer_name);
|
||||
|
||||
if (input != inputs.end()) {
|
||||
if (!isMatchLayoutToDims(user_layout, input->second->getTensorDesc().getDims().size())) {
|
||||
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
|
||||
}
|
||||
|
||||
input->second->setLayout(user_layout);
|
||||
} else if (output != outputs.end()) {
|
||||
if (!isMatchLayoutToDims(user_layout, output->second->getTensorDesc().getDims().size())) {
|
||||
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
|
||||
}
|
||||
|
||||
output->second->setLayout(user_layout);
|
||||
} else {
|
||||
throw std::logic_error(layer_name + " is not an input neither output");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void processLayout(InferenceEngine::CNNNetwork& network, const std::string& il, const std::string& ol, const std::string& iol) {
|
||||
if (!il.empty()) {
|
||||
const auto layout = getLayout(il);
|
||||
for (auto&& layer : network.getInputsInfo()) {
|
||||
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
|
||||
layer.second->setLayout(layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ol.empty()) {
|
||||
const auto layout = getLayout(ol);
|
||||
for (auto&& layer : network.getOutputsInfo()) {
|
||||
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
|
||||
layer.second->setLayout(layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!iol.empty()) {
|
||||
setLayouts(network, iol);
|
||||
}
|
||||
}
|
||||
|
||||
void printInputAndOutputsInfo(const InferenceEngine::CNNNetwork& network) {
|
||||
std::cout << "Network inputs:" << std::endl;
|
||||
for (auto&& layer : network.getInputsInfo()) {
|
||||
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
|
||||
}
|
||||
std::cout << "Network outputs:" << std::endl;
|
||||
for (auto&& layer : network.getOutputsInfo()) {
|
||||
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ The package contains the following components:
|
||||
|
||||
* [Kaldi Statistical Language Model Conversion Tool](Kaldi_SLM_conversion_tool.md), which converts custom language models to use in the decoder
|
||||
|
||||
Additionally, [new acoustic and language models](http://download.01.org/opencv/2020/openvinotoolkit/2020.1/models_contrib/speech/kaldi/librispeech_s5/) to be used by new demos are located at [download.01.org](https://01.org/).
|
||||
Additionally, new acoustic and language models are available in the OpenVINO™ [storage](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/).
|
||||
|
||||
## <a name="run-demos">Run Speech Recognition Demos with Pretrained Models</a>
|
||||
|
||||
|
||||
@@ -109,6 +109,10 @@ Options:
|
||||
If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored.
|
||||
-cw_r "<integer>" Optional. Number of frames for right context windows (default is 0). Works only with context window networks.
|
||||
If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored.
|
||||
-oname "<outputs>" Optional. Layer names for output blobs. The names are separated with ",". Allows to change the order of output layers for -o flag.
|
||||
Example: Output1:port,Output2:port.
|
||||
-iname "<inputs>" Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order of input layers for -i flag.
|
||||
Example: Input1,Input2
|
||||
|
||||
```
|
||||
|
||||
@@ -136,7 +140,7 @@ The following pre-trained models are available:
|
||||
* rm\_lstm4f
|
||||
* rm\_cnn4a\_smbr
|
||||
|
||||
All of them can be downloaded from [https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi) or using the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) .
|
||||
All of them can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/) or using the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) .
|
||||
|
||||
|
||||
### Speech Inference
|
||||
|
||||
@@ -536,7 +536,12 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
|
||||
CLDNNPlugin::Config conf = _impl->m_config;
|
||||
UpdateConfig(conf, network, config);
|
||||
|
||||
Program prog;
|
||||
if (m_defaultContext == nullptr) {
|
||||
m_defaultContext.reset(new CLDNNRemoteCLContext(
|
||||
std::const_pointer_cast<InferenceEngine::IInferencePlugin>(shared_from_this()),
|
||||
ParamMap(), conf));
|
||||
}
|
||||
Program prog(m_defaultContext->getImpl()->GetEngine(), conf);
|
||||
auto function = network.getFunction();
|
||||
if (function == nullptr) {
|
||||
THROW_IE_EXCEPTION << "CNNetworkImpl representation is not supported anymore";
|
||||
|
||||
@@ -24,7 +24,7 @@ class clDNNEngine : public InferenceEngine::InferencePluginInternal,
|
||||
std::map<std::string, cldnn::device> device_map;
|
||||
std::mutex engine_mutex;
|
||||
|
||||
CLDNNRemoteCLContext::Ptr m_defaultContext;
|
||||
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
|
||||
|
||||
cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
|
||||
InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
|
||||
@@ -71,6 +71,8 @@ public:
|
||||
class Program {
|
||||
public:
|
||||
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
|
||||
Program(std::shared_ptr<const cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
|
||||
m_curBatch(-1), queryMode(false), m_max_batch(1) {}
|
||||
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {}
|
||||
|
||||
static const cldnn::primitive_id m_preProcessTag;
|
||||
|
||||
@@ -21,7 +21,7 @@ CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
|
||||
uint32_t plane,
|
||||
BlobType mem_type) :
|
||||
m_context(context), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
|
||||
_handle(nullptr) {
|
||||
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
|
||||
}
|
||||
|
||||
ParamMap CLDNNRemoteBlobImpl::getParams() const {
|
||||
|
||||
@@ -287,7 +287,27 @@ public:
|
||||
QueryNetworkResult QueryNetwork(const CNNNetwork& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) const override {
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
return GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config);
|
||||
auto res = GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config);
|
||||
if (!network.getFunction() || res.supportedLayersMap.empty())
|
||||
return res;
|
||||
|
||||
const auto& func = network.getFunction();
|
||||
auto specialized_function = ngraph::clone_function(*func);
|
||||
|
||||
std::string defDevice = res.supportedLayersMap.begin()->second;
|
||||
ngraph::pass::ConstantFolding().run_on_function(specialized_function);
|
||||
std::unordered_set<std::string> opNames;
|
||||
|
||||
for (const auto& op : specialized_function->get_ops())
|
||||
opNames.emplace(op->get_friendly_name());
|
||||
|
||||
for (const auto& op : func->get_ops()) {
|
||||
if (opNames.find(op->get_friendly_name()) == opNames.end() ||
|
||||
(!res.supportedLayersMap.count(op->get_friendly_name()) &&
|
||||
std::dynamic_pointer_cast<ngraph::op::Constant>(op)))
|
||||
res.supportedLayersMap[op->get_friendly_name()] = defDevice;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Parameter GetMetric(const std::string& deviceName, const std::string& name) const override {
|
||||
|
||||
@@ -325,7 +325,7 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding
|
||||
case Layout::HWC:
|
||||
checkDims(dims.size(), 3);
|
||||
l_order = {1, 2, 0};
|
||||
l_dims = dims;
|
||||
l_dims = {dims[1], dims[2], dims[0]};
|
||||
break;
|
||||
case Layout::CN:
|
||||
checkDims(dims.size(), 2);
|
||||
|
||||
@@ -169,9 +169,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
|
||||
}
|
||||
|
||||
std::string MKLDNNEdge::name() const {
|
||||
auto childPtr = getChild();
|
||||
auto parentPtr = getParent();
|
||||
return childPtr->getName() + "<->" + parentPtr->getName();
|
||||
auto childPtr = getChild();
|
||||
return parentPtr->getName() + std::to_string(parent_port) + "<->" + childPtr->getName() + std::to_string(child_port);
|
||||
}
|
||||
|
||||
void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <unordered_set>
|
||||
#include <limits>
|
||||
#include <fstream>
|
||||
@@ -67,6 +68,8 @@ using namespace InferenceEngine::details;
|
||||
typedef std::unordered_set<MKLDNNEdgePtr> edge_cluster_t;
|
||||
typedef std::vector<edge_cluster_t> edge_clusters_t;
|
||||
|
||||
mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0);
|
||||
|
||||
template<typename NET>
|
||||
void MKLDNNGraph::ApplyUnrollPasses(NET &net) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses");
|
||||
@@ -453,15 +456,24 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
|
||||
auto acquireSharedOutputs = [this](MKLDNNNodePtr & graphNode) {
|
||||
std::vector<shared_memory_ptr> outputs;
|
||||
bool hasLocalAllocatedEdges = false;
|
||||
bool hasExternalInvalidEdges = false;
|
||||
|
||||
for (size_t i = 0; i < graphNode->getChildEdges().size(); ++i) {
|
||||
auto edgePtr = graphNode->getChildEdgeAt(i);
|
||||
if (edgePtr && edgePtr->isUseExternalMemory()) {
|
||||
outputs.emplace_back(weightsCache->get(edgePtr->name()));
|
||||
if (edgePtr) {
|
||||
if (edgePtr->isUseExternalMemory()) {
|
||||
auto ptr = weightsCache->get(edgePtr->name());
|
||||
outputs.emplace_back(ptr);
|
||||
if (!ptr->isValid())
|
||||
hasExternalInvalidEdges = true;
|
||||
} else {
|
||||
hasLocalAllocatedEdges = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return outputs;
|
||||
return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs);
|
||||
};
|
||||
|
||||
for (auto &graphNode : graphNodes) {
|
||||
@@ -471,12 +483,10 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
if (weightsCache) {
|
||||
auto sharedOutputs = acquireSharedOutputs(graphNode);
|
||||
|
||||
if (std::find_if(sharedOutputs.begin(), sharedOutputs.end(),
|
||||
[](const shared_memory_ptr & ptr) {
|
||||
return !ptr->isValid();
|
||||
}) != sharedOutputs.end()) {
|
||||
if (std::get<0>(sharedOutputs) || std::get<1>(sharedOutputs)) {
|
||||
graphNode->execute(stream);
|
||||
for (auto & output : sharedOutputs)
|
||||
|
||||
for (auto & output : std::get<2>(sharedOutputs))
|
||||
output->valid(true);
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -30,7 +30,7 @@ public:
|
||||
Ready = 1,
|
||||
};
|
||||
|
||||
MKLDNNGraph(mkldnn::engine eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0)) : status(NotReady), eng(eng) {}
|
||||
MKLDNNGraph() = default;
|
||||
|
||||
Status GetStatus() {
|
||||
return status;
|
||||
@@ -172,7 +172,7 @@ protected:
|
||||
graphEdges.clear();
|
||||
_meanImages.clear();
|
||||
}
|
||||
Status status;
|
||||
Status status { NotReady };
|
||||
Config config;
|
||||
|
||||
// For dumping purposes. -1 - no counting, all other positive
|
||||
@@ -191,7 +191,7 @@ protected:
|
||||
std::map<std::string, MeanImage> _meanImages;
|
||||
std::string _name;
|
||||
|
||||
mkldnn::engine eng;
|
||||
static mkldnn::engine eng;
|
||||
|
||||
void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
|
||||
void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <nodes/mkldnn_concat_node.h>
|
||||
#include <nodes/mkldnn_split_node.h>
|
||||
#include <ie_compound_blob.h>
|
||||
#include <ie_common.h>
|
||||
#include "mkldnn_exec_network.h"
|
||||
#include "mkldnn_itt.h"
|
||||
#include "nodes/common/cpu_convert.h"
|
||||
@@ -128,6 +129,13 @@ void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
|
||||
default:
|
||||
THROW_IE_EXCEPTION << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
|
||||
}
|
||||
|
||||
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
|
||||
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
|
||||
if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) {
|
||||
input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout());
|
||||
}
|
||||
|
||||
pushInput(input.first, input.second, inPrec);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank)
|
||||
|
||||
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes) {
|
||||
size_t bitMask = 0ul;
|
||||
unsigned bitMask = 0ul;
|
||||
for (auto& item : supportedTypes) {
|
||||
bitMask |= 1 << static_cast<unsigned>(item);
|
||||
}
|
||||
|
||||
@@ -205,7 +205,7 @@ private:
|
||||
void parallelItInit(size_t start, std::vector<size_t>& counters, const std::vector<size_t>& iterationRange) {
|
||||
auto itCounter = counters.rbegin();
|
||||
auto itWork = iterationRange.rbegin();
|
||||
while (itCounter != counters.rend()) {
|
||||
while (itCounter != counters.rend() && itWork != iterationRange.rend()) {
|
||||
*itCounter = start % *itWork;
|
||||
start /= *itWork;
|
||||
++itCounter;
|
||||
@@ -217,7 +217,7 @@ private:
|
||||
auto itCounter = counters.rbegin();
|
||||
auto itWork = iterationRange.rbegin();
|
||||
|
||||
while (itCounter != counters.rend()) {
|
||||
while (itCounter != counters.rend() && itWork != iterationRange.rend()) {
|
||||
*itCounter = (*itCounter + 1) % *itWork;
|
||||
if (*itCounter != 0) {
|
||||
break;
|
||||
|
||||
@@ -991,13 +991,17 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
void MKLDNNBinaryConvolutionNode::createPrimitive() {
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
auto srcDims = config.inConfs[0].desc.getDims();
|
||||
auto weiDims = config.inConfs[1].desc.getDims();
|
||||
auto dstDims = config.outConfs[0].desc.getDims();
|
||||
|
||||
auto implType = getSelectedPrimitiveDescriptor()->getImplementationType();
|
||||
auto implType = selectedPrimitiveDescriptor->getImplementationType();
|
||||
|
||||
jcp.ngroups = group;
|
||||
jcp.mb = srcDims[0];
|
||||
@@ -1295,7 +1299,11 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
|
||||
auto weights = reinterpret_cast<const uint8_t*>(weightsMemory->GetPtr());
|
||||
auto dst = reinterpret_cast<uint8_t*>(dstMemory->GetPtr());
|
||||
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
auto srcBlockDesc = config.inConfs[0].desc.getBlockingDesc();
|
||||
std::vector<size_t> srcStride(srcBlockDesc.getStrides().size());
|
||||
@@ -1315,7 +1323,7 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
|
||||
dstStride[dstBlockDesc.getOrder()[i]] = dstBlockDesc.getStrides()[i];
|
||||
}
|
||||
|
||||
auto implType = getSelectedPrimitiveDescriptor()->getImplementationType();
|
||||
auto implType = selectedPrimitiveDescriptor->getImplementationType();
|
||||
if (implType != impl_desc_type::ref) {
|
||||
executeOptimized(src, weights, dst, srcStride, weightsStride, dstStride);
|
||||
} else {
|
||||
|
||||
@@ -854,7 +854,10 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
void MKLDNNDeformableConvolutionNode::createPrimitive() {
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
auto srcDims = config.inConfs[0].desc.getDims();
|
||||
auto weiDims = config.inConfs[2].desc.getDims();
|
||||
@@ -1057,7 +1060,10 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
|
||||
const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
|
||||
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
|
||||
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
auto src_block_desc = config.inConfs[0].desc.getBlockingDesc();
|
||||
std::vector<size_t> src_strides(src_block_desc.getStrides().size());
|
||||
|
||||
@@ -942,7 +942,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
arg.src_stride = src_stride_size;
|
||||
arg.dst_stride = dst_stride_size;
|
||||
arg.work_amount = static_cast<size_t>(C2 / blk_size); // work amount for vector part
|
||||
arg.oc_off = static_cast<size_t>(c * sizeof(float));
|
||||
arg.oc_off = sizeof(float) * c;
|
||||
(*mvn_kernel)(&arg);
|
||||
});
|
||||
} else {
|
||||
@@ -956,7 +956,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
arg.src_stride = src_stride_size;
|
||||
arg.dst_stride = dst_stride_size;
|
||||
arg.work_amount = static_cast<size_t>(C2 / blk_size);
|
||||
arg.oc_off = static_cast<size_t>(c * sizeof(float));
|
||||
arg.oc_off = sizeof(float) * c;
|
||||
(*mvn_kernel)(&arg);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -252,7 +252,10 @@ void MKLDNNPadNode::padConstant() {
|
||||
return;
|
||||
}
|
||||
|
||||
InferenceEngine::Precision precision = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU Pad node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc.getPrecision();
|
||||
OV_SWITCH(MKLDNNPlugin, PadConstantEmitter, this, precision,
|
||||
OV_CASE(InferenceEngine::Precision::FP32, float),
|
||||
OV_CASE(InferenceEngine::Precision::I32, int32_t),
|
||||
|
||||
@@ -49,11 +49,11 @@ private:
|
||||
InferenceEngine::SizeVector srcStrides;
|
||||
InferenceEngine::SizeVector dstStrides;
|
||||
InferenceEngine::SizeVector srcDimsForReflectOrSymmetric;
|
||||
size_t nDimsForWork;
|
||||
size_t workAmount;
|
||||
size_t lastDstDim;
|
||||
size_t shift;
|
||||
uint8_t sizeData;
|
||||
size_t nDimsForWork = 0lu;
|
||||
size_t workAmount = 0lu;
|
||||
size_t lastDstDim = 1lu;
|
||||
size_t shift = 0lu;
|
||||
uint8_t sizeData = 1;
|
||||
} params;
|
||||
|
||||
template<typename T>
|
||||
|
||||
@@ -279,8 +279,8 @@ private:
|
||||
Reg64 reg_output_scale = rbx;
|
||||
Reg64 reg_output_shift = rdx;
|
||||
|
||||
bool do_rounding;
|
||||
bool do_dequantization;
|
||||
bool do_rounding = true;
|
||||
bool do_dequantization = true;
|
||||
|
||||
inline void compute_planar() {
|
||||
int src_type_size = jqp_.src_prc.size();
|
||||
@@ -1209,7 +1209,11 @@ void MKLDNNQuantizeNode::createPrimitive() {
|
||||
|
||||
jqp.op_type = quantizeOpType;
|
||||
|
||||
if (getSelectedPrimitiveDescriptor()->getImplementationType() != impl_desc_type::ref) {
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
if (isBinarization())
|
||||
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(jqp));
|
||||
@@ -1523,7 +1527,11 @@ void MKLDNNQuantizeNode::executeQuantization() {
|
||||
}
|
||||
|
||||
void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
|
||||
if (getSelectedPrimitiveDescriptor()->getImplementationType() != impl_desc_type::ref) {
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
|
||||
if (jqp.op_type == QuantizeOpType::Binarization)
|
||||
executeBinarization();
|
||||
else
|
||||
|
||||
@@ -332,7 +332,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
void MKLDNNROIPoolingNode::createPrimitive() {
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
|
||||
jpp.c_block = simd_w;
|
||||
@@ -378,7 +381,10 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
|
||||
const auto *src_roi = reinterpret_cast<const float *>(srcMemory1.GetPtr());
|
||||
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
|
||||
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
auto src_strides = config.inConfs[0].desc.getBlockingDesc().getStrides();
|
||||
auto dst_strides = config.outConfs[0].desc.getBlockingDesc().getStrides();
|
||||
@@ -526,8 +532,8 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
|
||||
arg.xf = in_x - left_x_index;
|
||||
arg.yf = in_y - top_y_index;
|
||||
|
||||
arg.xoff = (size_t) ((right_x_index - left_x_index) * jpp.c_block * sizeof(float));
|
||||
arg.yoff = (size_t) ((bottom_y_index - top_y_index) * jpp.iw * jpp.c_block * sizeof(float));
|
||||
arg.xoff = sizeof(float) * (right_x_index - left_x_index) * jpp.c_block;
|
||||
arg.yoff = sizeof(float) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block;
|
||||
|
||||
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
|
||||
top_y_index * src_strides[2] + left_x_index * src_strides[3]];
|
||||
|
||||
@@ -458,7 +458,10 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
|
||||
}
|
||||
|
||||
void MKLDNNSplitNode::prepareOptimizedParams() {
|
||||
const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
THROW_IE_EXCEPTION << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
const auto& inpTensorDesc = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc;
|
||||
const auto outputPortsCount = outDims.size();
|
||||
|
||||
//find axis order position
|
||||
|
||||
@@ -187,8 +187,7 @@ private:
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(layer, eng, cache),
|
||||
sub_graph(eng) {}
|
||||
MKLDNNNode(layer, eng, cache) {}
|
||||
|
||||
void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
|
||||
auto *ti = dynamic_cast<class InferenceEngine::TensorIterator*>(getCnnLayer().get());
|
||||
|
||||
@@ -433,11 +433,11 @@ private:
|
||||
size_t num_boxes;
|
||||
size_t num_classes;
|
||||
|
||||
size_t max_output_boxes_per_class;
|
||||
float iou_threshold;
|
||||
float score_threshold;
|
||||
float soft_nms_sigma;
|
||||
float scale;
|
||||
size_t max_output_boxes_per_class = 0lu;
|
||||
float iou_threshold = 0.0f;
|
||||
float score_threshold = 0.0f;
|
||||
float soft_nms_sigma = 0.0f;
|
||||
float scale = 1.f;
|
||||
|
||||
std::vector<std::vector<size_t>> numFiltBox;
|
||||
const std::string inType = "input", outType = "output";
|
||||
|
||||
@@ -40,8 +40,8 @@ struct jit_args_logistic {
|
||||
struct jit_logistic_config_params {
|
||||
InferenceEngine::Precision src_dt;
|
||||
InferenceEngine::Precision dst_dt;
|
||||
unsigned src_data_size;
|
||||
unsigned dst_data_size;
|
||||
unsigned src_data_size = 0;
|
||||
unsigned dst_data_size = 0;
|
||||
};
|
||||
|
||||
struct jit_uni_logistic_kernel {
|
||||
|
||||
@@ -131,6 +131,9 @@ void FrontEnd::parseCTCGreedyDecoderSeqLen(const Model& model, const ie::CNNLaye
|
||||
"provided {} outputs",
|
||||
layer->type, layer->name, outputs.size());
|
||||
|
||||
DataVector conditionalOutputs(2);
|
||||
conditionalOutputs[0] = outputs[0];
|
||||
conditionalOutputs[1] = outputs[1] != nullptr ? outputs[1] : model->addFakeData();
|
||||
|
||||
const auto mergeRepeated = layer->GetParamAsBool("merge_repeated");
|
||||
const auto blankIndex = [&] {
|
||||
@@ -167,7 +170,7 @@ void FrontEnd::parseCTCGreedyDecoderSeqLen(const Model& model, const ie::CNNLaye
|
||||
sequenceLengthType);
|
||||
|
||||
_stageBuilder->addCTCGreedyDecoderSeqLenStage(model, layer->name, layer,
|
||||
inputs, outputs, mergeRepeated, blankIndex);
|
||||
inputs, conditionalOutputs, mergeRepeated, blankIndex);
|
||||
}
|
||||
|
||||
} // namespace vpu
|
||||
|
||||
@@ -41,6 +41,11 @@ target_link_libraries(${TARGET_NAME}
|
||||
PRIVATE
|
||||
mvnc inference_engine inference_engine_legacy vpu_graph_transformer)
|
||||
|
||||
# MyriadPlugin is not safe to unload it at runtime
|
||||
if(LINUX AND LINUX_OS_NAME MATCHES "Ubuntu")
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES LINK_OPTIONS "-Wl,-z,nodelete")
|
||||
endif()
|
||||
|
||||
ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
|
||||
|
||||
# LTO
|
||||
|
||||
@@ -44,6 +44,19 @@ TEST_F(TensorDescTests, CreateBlockedBlobNCDHW) {
|
||||
ASSERT_EQ(Layout::BLOCKED, blockedBlob->getTensorDesc().getLayout());
|
||||
}
|
||||
|
||||
TEST_F(TensorDescTests, CompareHWCandCHWLayouts) {
|
||||
TensorDesc descCHW(Precision::FP32, {1, 3, 4}, Layout::CHW);
|
||||
TensorDesc descHWC(Precision::FP32, {1, 3, 4}, Layout::HWC);
|
||||
SizeVector chw = {0, 1, 2};
|
||||
SizeVector hwc = {1, 2, 0};
|
||||
|
||||
ASSERT_NE(descCHW, descHWC);
|
||||
ASSERT_NE(descCHW.getBlockingDesc(), descHWC.getBlockingDesc());
|
||||
ASSERT_NE(descCHW.getBlockingDesc().getOrder(), descHWC.getBlockingDesc().getOrder());
|
||||
ASSERT_EQ(descCHW.getBlockingDesc().getOrder(), chw);
|
||||
ASSERT_EQ(descHWC.getBlockingDesc().getOrder(), hwc);
|
||||
}
|
||||
|
||||
TEST_F(TensorDescTests, CompareNHWCandNCHWLayouts) {
|
||||
TensorDesc descNCHW(Precision::FP32, {1, 3, 4, 2}, Layout::NCHW);
|
||||
TensorDesc descNHWC(Precision::FP32, {1, 3, 4, 2}, Layout::NHWC);
|
||||
|
||||
@@ -572,6 +572,58 @@ TEST_P(IEClassNetworkTestP, QueryNetworkWithKSO) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(IEClassNetworkTestP, SetAffinityWithConstantBranches) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
|
||||
try {
|
||||
std::shared_ptr<ngraph::Function> func;
|
||||
{
|
||||
ngraph::PartialShape shape({1, 84});
|
||||
ngraph::element::Type type(ngraph::element::Type_t::f32);
|
||||
auto param = std::make_shared<ngraph::opset6::Parameter>(type, shape);
|
||||
auto matMulWeights =
|
||||
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {10, 84}, {1});
|
||||
auto shapeOf = std::make_shared<ngraph::opset6::ShapeOf>(matMulWeights);
|
||||
auto gConst1 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i32, {1}, {1});
|
||||
auto gConst2 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {}, {0});
|
||||
auto gather = std::make_shared<ngraph::opset6::Gather>(shapeOf, gConst1, gConst2);
|
||||
auto concatConst = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {1}, {1});
|
||||
auto concat =
|
||||
std::make_shared<ngraph::opset6::Concat>(ngraph::NodeVector{concatConst, gather}, 0);
|
||||
auto relu = std::make_shared<ngraph::opset6::Relu>(param);
|
||||
auto reshape = std::make_shared<ngraph::opset6::Reshape>(relu, concat, false);
|
||||
auto matMul = std::make_shared<ngraph::opset6::MatMul>(reshape, matMulWeights, false, true);
|
||||
auto matMulBias =
|
||||
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {1, 10}, {1});
|
||||
auto addBias = std::make_shared<ngraph::opset6::Add>(matMul, matMulBias);
|
||||
auto result = std::make_shared<ngraph::opset6::Result>(addBias);
|
||||
|
||||
ngraph::ParameterVector params = {param};
|
||||
ngraph::ResultVector results = {result};
|
||||
|
||||
func = std::make_shared<ngraph::Function>(results, params);
|
||||
}
|
||||
CNNNetwork net(func);
|
||||
|
||||
auto rres = ie.QueryNetwork(net, deviceName);
|
||||
auto rl_map = rres.supportedLayersMap;
|
||||
for (const auto & op : func->get_ops()) {
|
||||
if (!rl_map.count(op->get_friendly_name())) {
|
||||
FAIL() << "Op " << op->get_friendly_name() << " is not supported by " << deviceName;
|
||||
}
|
||||
}
|
||||
for (const auto & op : net.getFunction()->get_ops()) {
|
||||
std::string affinity = rl_map[op->get_friendly_name()];
|
||||
op->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(affinity);
|
||||
}
|
||||
ExecutableNetwork exeNetwork = ie.LoadNetwork(ksoNetwork, deviceName);
|
||||
} catch (const NotImplementedException& ex) {
|
||||
std::string message = ex.what();
|
||||
ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(IEClassNetworkTestP, SetAffinityWithKSO) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
|
||||
40
inference-engine/thirdparty/clDNN/README.md
vendored
40
inference-engine/thirdparty/clDNN/README.md
vendored
@@ -21,7 +21,7 @@ clDNN uses 3<sup>rd</sup>-party components licensed under following licenses:
|
||||
- *RapidJSON* under [Tencent\* License](https://github.com/Tencent/rapidjson/blob/master/license.txt)
|
||||
|
||||
## Documentation
|
||||
There is inline documentation available that can be [generated with Doxygen](#generating-documentation).
|
||||
There is inline documentation available that can be generated with Doxygen.
|
||||
|
||||
Accelerate Deep Learning Inference with Intel® Processor Graphics whitepaper [link](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
|
||||
|
||||
@@ -45,7 +45,7 @@ request will be merged into our GitHub repository.
|
||||
## System Requirements
|
||||
clDNN supports Intel® HD Graphics and Intel® Iris® Graphics and is optimized for Gen9-Gen12LP architectures
|
||||
|
||||
clDNN currently uses OpenCL™ with multiple Intel® OpenCL™ extensions and requires Intel® Graphics Driver to run.
|
||||
clDNN currently uses OpenCL™ with multiple Intel OpenCL™ extensions and requires Intel® Graphics Driver to run.
|
||||
|
||||
clDNN requires CPU with Intel® SSE/Intel® AVX support.
|
||||
|
||||
@@ -62,32 +62,20 @@ The software dependencies are:
|
||||
> Intel® CPU intrinsics header (`<immintrin.h>`) must be available during compilation.
|
||||
|
||||
- [python™](https://www.python.org/downloads/) 2.7 or later (scripts are both compatible with python™ 2.7.x and python™ 3.x)
|
||||
- *(optional)* [Doxygen\*](http://www.stack.nl/~dimitri/doxygen/download.html) 1.8.13 or later
|
||||
Needed for manual generation of documentation from inline comments or running `docs` custom target which will generate it automatically.
|
||||
|
||||
> [GraphViz\*](http://www.graphviz.org/Download..php) (2.38 or later) is also recommended to generate documentation with all embedded diagrams.
|
||||
(Make sure that `dot` application is visible in the `PATH` environment variable.)
|
||||
|
||||
|
||||
### Generating documentation
|
||||
|
||||
Documentation is provided inline and can be generated in HTML format with Doxygen. We recommend to use latest
|
||||
[Doxygen\*](http://www.stack.nl/~dimitri/doxygen/download.html) and [GraphViz\*](http://www.graphviz.org/Download..php).
|
||||
|
||||
Documentation templates and configuration files are stored in `docs` subdirectory. You can simply call:
|
||||
|
||||
```shellscript
|
||||
cd docs && doxygen
|
||||
```
|
||||
to generate HTML documentation in `docs/html` subdirectory.
|
||||
|
||||
There is also custom CMake target named `docs` which will generate documentation in `CLDNN__OUTPUT_BIN_DIR/html` directory. For example, when using Unix makefiles, you can run:
|
||||
```
|
||||
make docs
|
||||
```
|
||||
in order to create it.
|
||||
# Trademark Information
|
||||
|
||||
Intel, the Intel logo, Intel Atom, Intel Core, Intel Xeon Phi, Iris, OpenVINO,
|
||||
the OpenVINO logo, Pentium, VTune, and Xeon are trademarks
|
||||
of Intel Corporation or its subsidiaries.
|
||||
|
||||
\* Other names and brands may be claimed as the property of others.
|
||||
|
||||
Copyright © 2020, Intel® Corporation
|
||||
Microsoft, Windows, and the Windows logo are trademarks, or registered
|
||||
trademarks of Microsoft Corporation in the United States and/or other
|
||||
countries.
|
||||
|
||||
OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission
|
||||
by Khronos.
|
||||
|
||||
Copyright © 2021, Intel Corporation
|
||||
|
||||
@@ -25,7 +25,7 @@ struct ctc_greedy_decoder_params : public base_params {
|
||||
ctc_greedy_decoder_params() : base_params(KernelType::CTC_GREEDY_DECODER) {}
|
||||
|
||||
bool merge_repeated = true;
|
||||
uint32_t blank_index;
|
||||
uint32_t blank_index = 0;
|
||||
uint32_t outputs_num = 1;
|
||||
};
|
||||
|
||||
|
||||
@@ -27,9 +27,9 @@ struct mvn_params : public base_params {
|
||||
mvn_params() : base_params(KernelType::MVN) {}
|
||||
|
||||
MVNMode mvnMode = MVNMode::WITHIN_CHANNELS;
|
||||
bool mvnNormalizeVariance;
|
||||
float epsilon;
|
||||
MVNEpsMode mvnEpsMode;
|
||||
bool mvnNormalizeVariance = false;
|
||||
float epsilon = 0.0f;
|
||||
MVNEpsMode mvnEpsMode = MVNEpsMode::INSIDE_SQRT;
|
||||
|
||||
virtual ParamsKey GetParamsKey() const {
|
||||
ParamsKey k = base_params::GetParamsKey();
|
||||
|
||||
@@ -393,19 +393,19 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(const layout& input_layout,
|
||||
}
|
||||
// A set of rules that define when b_fs_yx_fsv16 mem format can be used for fp16/fp32 case
|
||||
int32_t feature_block_size = 16;
|
||||
int32_t correct_data_type = input_layout.data_type == data_types::f16 || input_layout.data_type == data_types::f32;
|
||||
correct_data_type &= weights_layout.data_type == input_layout.data_type;
|
||||
int32_t correct_batch = (input_layout.size.batch[0] == 1) || (input_layout.size.batch[0] > 1 && input_layout.data_type == data_types::f32);
|
||||
int32_t correct_spatial_dims = input_layout.size.spatial[2] == 1 && input_layout.size.spatial[3] == 1;
|
||||
bool correct_data_type = (input_layout.data_type == data_types::f16 || input_layout.data_type == data_types::f32) &&
|
||||
(weights_layout.data_type == input_layout.data_type);
|
||||
bool correct_batch = (input_layout.size.batch[0] == 1) || (input_layout.size.batch[0] > 1 && input_layout.data_type == data_types::f32);
|
||||
bool correct_spatial_dims = input_layout.size.spatial[2] == 1 && input_layout.size.spatial[3] == 1;
|
||||
int32_t required_feature_num = weak_restrictions ? feature_block_size / 2 : feature_block_size;
|
||||
int32_t correct_in_feature = (input_layout.size.feature[0] >= required_feature_num &&
|
||||
bool correct_in_feature = (input_layout.size.feature[0] >= required_feature_num &&
|
||||
output_layout.size.feature[0] >= required_feature_num);
|
||||
int32_t in_features_per_group = input_layout.size.feature[0] / conv->groups;
|
||||
int32_t out_features_per_group = output_layout.size.feature[0] / conv->groups;
|
||||
if (!correct_in_feature && input_layout.size.feature[0] <= 4 && out_features_per_group >= feature_block_size)
|
||||
correct_in_feature = true;
|
||||
int32_t depthwise = conv->groups == static_cast<uint32_t>(input_layout.size.feature[0]); // depthwise conv
|
||||
int32_t grouped = ((feature_block_size % out_features_per_group == 0) &&
|
||||
bool depthwise = conv->groups == static_cast<uint32_t>(input_layout.size.feature[0]); // depthwise conv
|
||||
bool grouped = ((feature_block_size % out_features_per_group == 0) &&
|
||||
(feature_block_size % in_features_per_group == 0) &&
|
||||
(feature_block_size / out_features_per_group > 1) &&
|
||||
(feature_block_size / in_features_per_group > 1) &&
|
||||
|
||||
@@ -1,20 +1,6 @@
|
||||
/*
|
||||
* Copyright 2017-2019 Intel Corporation.
|
||||
* The source code, information and material ("Material") contained herein is
|
||||
* owned by Intel Corporation or its suppliers or licensors, and title to such
|
||||
* Material remains with Intel Corporation or its suppliers or licensors.
|
||||
* The Material contains proprietary information of Intel or its suppliers and
|
||||
* licensors. The Material is protected by worldwide copyright laws and treaty
|
||||
* provisions.
|
||||
* No part of the Material may be used, copied, reproduced, modified, published,
|
||||
* uploaded, posted, transmitted, distributed or disclosed in any way without
|
||||
* Intel's prior express written permission. No license under any patent,
|
||||
* copyright or other intellectual property rights in the Material is granted to
|
||||
* or conferred upon you, either expressly, by implication, inducement, estoppel
|
||||
* or otherwise.
|
||||
* Any license under such intellectual property rights must be express and
|
||||
* approved by Intel in writing.
|
||||
*/
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "XLinkStringUtils.h"
|
||||
|
||||
|
||||
@@ -1,20 +1,6 @@
|
||||
/*
|
||||
* Copyright 2017-2019 Intel Corporation.
|
||||
* The source code, information and material ("Material") contained herein is
|
||||
* owned by Intel Corporation or its suppliers or licensors, and title to such
|
||||
* Material remains with Intel Corporation or its suppliers or licensors.
|
||||
* The Material contains proprietary information of Intel or its suppliers and
|
||||
* licensors. The Material is protected by worldwide copyright laws and treaty
|
||||
* provisions.
|
||||
* No part of the Material may be used, copied, reproduced, modified, published,
|
||||
* uploaded, posted, transmitted, distributed or disclosed in any way without
|
||||
* Intel's prior express written permission. No license under any patent,
|
||||
* copyright or other intellectual property rights in the Material is granted to
|
||||
* or conferred upon you, either expressly, by implication, inducement, estoppel
|
||||
* or otherwise.
|
||||
* Any license under such intellectual property rights must be express and
|
||||
* approved by Intel in writing.
|
||||
*/
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mvnc_data.h"
|
||||
#include "mvnc_tool.h"
|
||||
|
||||
@@ -18,7 +18,9 @@
|
||||
#include <vpu/vpu_plugin_config.hpp>
|
||||
#include <vpu/private_plugin_config.hpp>
|
||||
#include <vpu/utils/string.hpp>
|
||||
|
||||
#include "samples/common.hpp"
|
||||
#include "samples/args_helper.hpp"
|
||||
|
||||
static constexpr char help_message[] =
|
||||
"Optional. Print the usage message.";
|
||||
@@ -208,106 +210,6 @@ IE_SUPPRESS_DEPRECATED_END
|
||||
return config;
|
||||
}
|
||||
|
||||
static std::map<std::string, std::string> parseArgMap(std::string argMap) {
|
||||
argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end());
|
||||
|
||||
std::vector<std::string> pairs;
|
||||
vpu::splitStringList(argMap, pairs, ',');
|
||||
|
||||
std::map<std::string, std::string> parsedMap;
|
||||
for (auto&& pair : pairs) {
|
||||
std::vector<std::string> keyValue;
|
||||
vpu::splitStringList(pair, keyValue, ':');
|
||||
if (keyValue.size() != 2) {
|
||||
throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected <layer_name>:<value>");
|
||||
}
|
||||
|
||||
parsedMap[keyValue[0]] = keyValue[1];
|
||||
}
|
||||
|
||||
return parsedMap;
|
||||
}
|
||||
|
||||
using supported_precisions_t = std::unordered_map<std::string, InferenceEngine::Precision>;
|
||||
using supported_layouts_t = std::unordered_map<std::string, InferenceEngine::Layout>;
|
||||
using matchLayoutToDims_t = std::unordered_map<size_t, size_t>;
|
||||
|
||||
static InferenceEngine::Layout getLayout(std::string value,
|
||||
const supported_layouts_t& supported_layouts) {
|
||||
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
|
||||
|
||||
const auto layout = supported_layouts.find(value);
|
||||
if (layout == supported_layouts.end()) {
|
||||
throw std::logic_error("\"" + value + "\"" + " is not a valid layout");
|
||||
}
|
||||
|
||||
return layout->second;
|
||||
}
|
||||
|
||||
static InferenceEngine::Layout getLayout(const std::string& value) {
|
||||
static const supported_layouts_t supported_layouts = {
|
||||
{ "NCDHW", InferenceEngine::Layout::NCDHW },
|
||||
{ "NDHWC", InferenceEngine::Layout::NDHWC },
|
||||
{ "NCHW", InferenceEngine::Layout::NCHW },
|
||||
{ "NHWC", InferenceEngine::Layout::NHWC },
|
||||
{ "CHW", InferenceEngine::Layout::CHW },
|
||||
{ "NC", InferenceEngine::Layout::NC },
|
||||
{ "C", InferenceEngine::Layout::C },
|
||||
};
|
||||
|
||||
return getLayout(value, supported_layouts);
|
||||
}
|
||||
|
||||
static bool isMatchLayoutToDims(InferenceEngine::Layout layout, size_t dimension) {
|
||||
static const matchLayoutToDims_t matchLayoutToDims = {
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NCDHW), 5 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NDHWC), 5 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NCHW), 4 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NHWC), 4 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::CHW), 3 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::NC), 2 },
|
||||
{static_cast<size_t>(InferenceEngine::Layout::C), 1 }
|
||||
};
|
||||
|
||||
const auto dims = matchLayoutToDims.find(static_cast<size_t>(layout));
|
||||
if (dims == matchLayoutToDims.end()) {
|
||||
throw std::logic_error("Layout is not valid.");
|
||||
}
|
||||
|
||||
return dimension == dims->second;
|
||||
}
|
||||
|
||||
static InferenceEngine::Precision getPrecision(std::string value,
|
||||
const supported_precisions_t& supported_precisions) {
|
||||
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
|
||||
|
||||
const auto precision = supported_precisions.find(value);
|
||||
if (precision == supported_precisions.end()) {
|
||||
throw std::logic_error("\"" + value + "\"" + " is not a valid precision");
|
||||
}
|
||||
|
||||
return precision->second;
|
||||
}
|
||||
|
||||
static InferenceEngine::Precision getPrecision(const std::string& value) {
|
||||
static const supported_precisions_t supported_precisions = {
|
||||
{ "FP32", InferenceEngine::Precision::FP32 },
|
||||
{ "FP16", InferenceEngine::Precision::FP16 },
|
||||
{ "BF16", InferenceEngine::Precision::BF16 },
|
||||
{ "U64", InferenceEngine::Precision::U64 },
|
||||
{ "I64", InferenceEngine::Precision::I64 },
|
||||
{ "U32", InferenceEngine::Precision::U32 },
|
||||
{ "I32", InferenceEngine::Precision::I32 },
|
||||
{ "U16", InferenceEngine::Precision::U16 },
|
||||
{ "I16", InferenceEngine::Precision::I16 },
|
||||
{ "U8", InferenceEngine::Precision::U8 },
|
||||
{ "I8", InferenceEngine::Precision::I8 },
|
||||
{ "BOOL", InferenceEngine::Precision::BOOL },
|
||||
};
|
||||
|
||||
return getPrecision(value, supported_precisions);
|
||||
}
|
||||
|
||||
bool isFP16(InferenceEngine::Precision precision) {
|
||||
return precision == InferenceEngine::Precision::FP16;
|
||||
}
|
||||
@@ -320,29 +222,6 @@ bool isFloat(InferenceEngine::Precision precision) {
|
||||
return isFP16(precision) || isFP32(precision);
|
||||
}
|
||||
|
||||
static void setPrecisions(const InferenceEngine::CNNNetwork& network) {
|
||||
const auto user_precisions_map = parseArgMap(FLAGS_iop);
|
||||
|
||||
auto inputs = network.getInputsInfo();
|
||||
auto outputs = network.getOutputsInfo();
|
||||
|
||||
for (auto&& item : user_precisions_map) {
|
||||
const auto& layer_name = item.first;
|
||||
const auto& user_precision = item.second;
|
||||
|
||||
const auto input = inputs.find(layer_name);
|
||||
const auto output = outputs.find(layer_name);
|
||||
|
||||
if (input != inputs.end()) {
|
||||
input->second->setPrecision(getPrecision(user_precision));
|
||||
} else if (output != outputs.end()) {
|
||||
output->second->setPrecision(getPrecision(user_precision));
|
||||
} else {
|
||||
throw std::logic_error(layer_name + " is not an input neither output");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setDefaultIO(InferenceEngine::CNNNetwork& network) {
|
||||
const bool isMYRIAD = FLAGS_d.find("MYRIAD") != std::string::npos;
|
||||
const bool isVPUX = FLAGS_d.find("VPUX") != std::string::npos;
|
||||
@@ -377,81 +256,6 @@ static void setDefaultIO(InferenceEngine::CNNNetwork& network) {
|
||||
}
|
||||
}
|
||||
|
||||
static void processPrecisions(InferenceEngine::CNNNetwork& network) {
|
||||
if (!FLAGS_ip.empty()) {
|
||||
const auto user_precision = getPrecision(FLAGS_ip);
|
||||
for (auto&& layer : network.getInputsInfo()) {
|
||||
layer.second->setPrecision(user_precision);
|
||||
}
|
||||
}
|
||||
|
||||
if (!FLAGS_op.empty()) {
|
||||
auto user_precision = getPrecision(FLAGS_op);
|
||||
for (auto&& layer : network.getOutputsInfo()) {
|
||||
layer.second->setPrecision(user_precision);
|
||||
}
|
||||
}
|
||||
|
||||
if (!FLAGS_iop.empty()) {
|
||||
setPrecisions(network);
|
||||
}
|
||||
}
|
||||
|
||||
static void setLayouts(const InferenceEngine::CNNNetwork& network) {
|
||||
const auto user_layouts_map = parseArgMap(FLAGS_iol);
|
||||
|
||||
auto inputs = network.getInputsInfo();
|
||||
auto outputs = network.getOutputsInfo();
|
||||
|
||||
for (auto&& item : user_layouts_map) {
|
||||
const auto& layer_name = item.first;
|
||||
const auto& user_layout = getLayout(item.second);
|
||||
|
||||
const auto input = inputs.find(layer_name);
|
||||
const auto output = outputs.find(layer_name);
|
||||
|
||||
if (input != inputs.end()) {
|
||||
if (!isMatchLayoutToDims(user_layout, input->second->getTensorDesc().getDims().size())) {
|
||||
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
|
||||
}
|
||||
|
||||
input->second->setLayout(user_layout);
|
||||
} else if (output != outputs.end()) {
|
||||
if (!isMatchLayoutToDims(user_layout, output->second->getTensorDesc().getDims().size())) {
|
||||
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
|
||||
}
|
||||
|
||||
output->second->setLayout(user_layout);
|
||||
} else {
|
||||
throw std::logic_error(layer_name + " is not an input neither output");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void processLayout(InferenceEngine::CNNNetwork& network) {
|
||||
if (!FLAGS_il.empty()) {
|
||||
const auto layout = getLayout(FLAGS_il);
|
||||
for (auto&& layer : network.getInputsInfo()) {
|
||||
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
|
||||
layer.second->setLayout(layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!FLAGS_ol.empty()) {
|
||||
const auto layout = getLayout(FLAGS_ol);
|
||||
for (auto&& layer : network.getOutputsInfo()) {
|
||||
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
|
||||
layer.second->setLayout(layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!FLAGS_iol.empty()) {
|
||||
setLayouts(network);
|
||||
}
|
||||
}
|
||||
|
||||
std::string getFileNameFromPath(const std::string& path,
|
||||
#if defined(_WIN32)
|
||||
const std::string& sep = "\\") {
|
||||
@@ -487,18 +291,10 @@ int main(int argc, char* argv[]) {
|
||||
auto network = ie.ReadNetwork(FLAGS_m);
|
||||
|
||||
setDefaultIO(network);
|
||||
processPrecisions(network);
|
||||
processLayout(network);
|
||||
processPrecision(network, FLAGS_ip, FLAGS_op, FLAGS_iop);
|
||||
processLayout(network, FLAGS_il, FLAGS_ol, FLAGS_iol);
|
||||
|
||||
std::cout << "Network inputs:" << std::endl;
|
||||
for (auto&& layer : network.getInputsInfo()) {
|
||||
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
|
||||
}
|
||||
std::cout << "Network outputs:" << std::endl;
|
||||
for (auto&& layer : network.getOutputsInfo()) {
|
||||
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
printInputAndOutputsInfo(network);
|
||||
|
||||
auto timeBeforeLoadNetwork = std::chrono::steady_clock::now();
|
||||
auto executableNetwork = ie.LoadNetwork(network, FLAGS_d, configure());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
Copyright (C) 2018-2021 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -16,68 +16,20 @@
|
||||
|
||||
import logging as log
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.front.common.replacement import FrontReplacementPattern
|
||||
from mo.graph.graph import Graph
|
||||
from mo.utils.error import Error
|
||||
|
||||
|
||||
class AssignElimination(FrontReplacementOp):
|
||||
op = "Assign"
|
||||
class AssignAndAssertElimination(FrontReplacementPattern):
|
||||
# The solution with removal of Assign and Assert operations is temporary.
|
||||
# The proper solution is to keep these operations until the partial inference
|
||||
# phase when control flow edges are properly handled and later unnecessary ones are eliminated.
|
||||
# In order to achieve this we need to implement control flow inference function
|
||||
# for these operations similar to "Merge" and "Switch" operations.
|
||||
enabled = True
|
||||
|
||||
def replace_sub_graph(self, graph: Graph, match: dict):
|
||||
node = match['op']
|
||||
# here we request all data flow output edges (control flow edges will not be listed)
|
||||
out_edges = node.out_edges()
|
||||
if len(out_edges) == 0:
|
||||
graph.remove_node(node.id)
|
||||
log.debug('Assign op was removed {}'.format(node.id))
|
||||
else:
|
||||
raise Error('Data flow edge coming out of Assign node {}'.format(node.id))
|
||||
|
||||
|
||||
class AssignSubElimination(FrontReplacementOp):
|
||||
op = "AssignSub"
|
||||
enabled = True
|
||||
|
||||
def replace_sub_graph(self, graph: Graph, match: dict):
|
||||
node = match['op']
|
||||
# here we request all data flow output edges (control flow edges will not be listed)
|
||||
out_edges = node.out_edges()
|
||||
if len(out_edges) == 0:
|
||||
graph.remove_node(node.id)
|
||||
log.debug('AssignSub op was removed {}'.format(node.id))
|
||||
else:
|
||||
raise Error('Data flow edge coming out of AssignSub node {}'.format(node.id))
|
||||
|
||||
|
||||
class AssignAddElimination(FrontReplacementOp):
|
||||
op = "AssignAdd"
|
||||
enabled = True
|
||||
|
||||
def replace_sub_graph(self, graph: Graph, match: dict):
|
||||
node = match['op']
|
||||
# here we request all data flow output edges (control flow edges will not be listed)
|
||||
out_edges = node.out_edges()
|
||||
if len(out_edges) == 0:
|
||||
graph.remove_node(node.id)
|
||||
log.debug('AssignAdd op was removed {}'.format(node.id))
|
||||
else:
|
||||
raise Error('Data flow edge coming out of AssignAdd node {}'.format(node.id))
|
||||
|
||||
|
||||
class AssertElimination(FrontReplacementOp):
|
||||
op = "Assert"
|
||||
enabled = True
|
||||
|
||||
def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
|
||||
node = match['op']
|
||||
# here we request all data flow output edges (control flow edges will not be listed)
|
||||
out_edges = node.out_edges()
|
||||
if len(out_edges) == 0:
|
||||
graph.remove_node(node.id)
|
||||
log.debug('Assert op was removed {}'.format(node.id))
|
||||
else:
|
||||
raise Error('Data flow edge coming out of Assert node {}'.format(node.id))
|
||||
def find_and_replace_pattern(self, graph: Graph):
|
||||
for node in graph.get_op_nodes():
|
||||
if node.soft_get('op') in ["Assign", "AssignSub", "AssignAdd", "Assert"]:
|
||||
log.debug('"{}" op with id="{}" was removed'.format(node.op, node.id))
|
||||
graph.remove_node(node.id)
|
||||
|
||||
@@ -51,6 +51,12 @@ def update_body_graph(body_graph: Graph, subgraph_proto: dict,
|
||||
# add incoming edges based on data_nodes_map
|
||||
for dst_port, inp in enumerate(pb_node.input):
|
||||
orig_src_id = inp.split(":")[0]
|
||||
|
||||
# TODO: avoid this temporal workaround for TF 2.4 or higher RNN layers:
|
||||
# skip control flow dependency
|
||||
if orig_src_id[0] == '^':
|
||||
continue
|
||||
|
||||
src_id = map_original_name[orig_src_id]
|
||||
src_port = 0 if len(inp.split(":")) == 1 else int(inp.split(":")[-1])
|
||||
assert (body_graph.has_node(src_id))
|
||||
|
||||
@@ -20,6 +20,7 @@ import numpy as np
|
||||
from extensions.ops.tensor_iterator import TensorIterator
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.middle.passes.fusing.helpers import common_bfs
|
||||
from mo.middle.passes.infer import partial_infer
|
||||
from mo.ops.const import Const
|
||||
|
||||
@@ -312,14 +313,52 @@ class Loop(TensorIterator):
|
||||
'from_port': 0,
|
||||
'to_port': 0})
|
||||
|
||||
@staticmethod
|
||||
def parameter_unchanged_after_iteration(loop_node: Node, body_parameter: Node):
|
||||
"""
|
||||
Checks if the body Parameter node is connected to some body Result and the data provided to Result is not
|
||||
changed between iterations. The data is considered unchanged if:
|
||||
1. There is no back edge for this Parameter OR
|
||||
2. There is a back edge from some Result to Parameter and there are only Identity ops in between or
|
||||
Parameter is connected to Result directly.
|
||||
|
||||
:param loop_node: the Loop node to check
|
||||
:param body_parameter: the body Parameter node
|
||||
:return: the result of the check
|
||||
"""
|
||||
assert body_parameter.id in loop_node.body
|
||||
assert body_parameter.soft_get('op') == 'Parameter'
|
||||
if not any([attr['to_layer'] == body_parameter.soft_get('internal_layer_id') for attr in loop_node.back_edges]):
|
||||
return True
|
||||
|
||||
for back_edge_attrs in loop_node.back_edges:
|
||||
if back_edge_attrs['to_layer'] == body_parameter.soft_get('internal_layer_id'):
|
||||
result_internal_id = back_edge_attrs['from_layer']
|
||||
result_nodes = loop_node.body.get_op_nodes(internal_layer_id=result_internal_id)
|
||||
assert len(result_nodes) == 1, 'There should be exactly one node with id {}, but there are {}' \
|
||||
''.format(result_internal_id, len(result_nodes))
|
||||
result_node = result_nodes[0]
|
||||
# check that the Result node consumes data from Parameter node directly or through Identity operations
|
||||
parameters = common_bfs(result_node, ['Identity'], ['Parameter'], is_backward=True, attr_to_check='op',
|
||||
follow_multi_consumer_data_nodes=True)
|
||||
if any([node.soft_get('internal_layer_id') == body_parameter.internal_layer_id for node in parameters]):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def pull_constant_inputs_into_body(loop_node: Node):
|
||||
for port_idx, in_port in reversed(loop_node.in_ports().items()):
|
||||
if port_idx > 1 and not in_port.disconnected() and in_port.get_source().node.soft_get('type') == 'Const':
|
||||
body_parameter = Loop.external_port_id_to_body_node(loop_node, port_idx, loop_node.input_port_map)
|
||||
# if there is a back edge into a body Parameter then we cannot replace it with a Const if the value
|
||||
# is updated during each iteration. So we need to check that the tensor is passed to the next iteration
|
||||
# unchanged
|
||||
if not Loop.parameter_unchanged_after_iteration(loop_node, body_parameter):
|
||||
continue
|
||||
|
||||
original_const_node = in_port.get_source().node
|
||||
new_const_node = Const(loop_node.body, original_const_node.attrs()).create_node()
|
||||
|
||||
body_parameter = Loop.external_port_id_to_body_node(loop_node, port_idx, loop_node.input_port_map)
|
||||
body_parameter.out_port(0).get_connection().set_source(new_const_node.out_port(0))
|
||||
loop_node.body.remove_nodes_from([body_parameter.id])
|
||||
loop_node.delete_input_port(port_idx)
|
||||
@@ -336,7 +375,7 @@ class Loop(TensorIterator):
|
||||
|
||||
@staticmethod
|
||||
def update_port_map_value_ext(port_map: dict, layer_id_attr: str, layer_id_value: int,
|
||||
updated_attr: str, new_attr_value: int):
|
||||
updated_attr: str, new_attr_value: int):
|
||||
"""
|
||||
Updates a value of requested attribute for a certain layer id in a port map
|
||||
:param port_map: a map of external ports to internal layer ids
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
Copyright (C) 2018-2021 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -59,7 +59,8 @@ def get_value_in_port(node) -> Port:
|
||||
return None if len(value_ports) != 1 else value_ports[0]
|
||||
|
||||
|
||||
def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward: bool = True, allowed_all: bool = False):
|
||||
def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward: bool = True, allowed_all: bool = False,
|
||||
attr_to_check='type', follow_multi_consumer_data_nodes=False):
|
||||
"""
|
||||
The purpose of this algorithm is to find layers with 'op_name' located in given direction.
|
||||
In case of branching algorithm goes into each branch, but if it can't find layer in one of them it returns
|
||||
@@ -70,6 +71,8 @@ def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward:
|
||||
:param op_name: The list with names of operations for searching
|
||||
:param is_backward: The direction of BFS algorithm
|
||||
:param allowed_all: Bool flag meaning we can jump over all operations
|
||||
:param attr_to_check: the attribute to check when looking if the node is in "op_name" list
|
||||
:param follow_multi_consumer_data_nodes: for backward traversal allow to follow data nodes with multiple consumers
|
||||
"""
|
||||
ret = []
|
||||
q = deque([start_node])
|
||||
@@ -83,8 +86,8 @@ def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward:
|
||||
in_nodes_size = len(node.in_nodes()) if is_backward else len(node.out_nodes())
|
||||
for id in range(in_nodes_size): # in_nodes() can return either list or dict
|
||||
pnode = node.in_node(id) if is_backward else node.out_node(id)
|
||||
if pnode.has_valid('type'):
|
||||
if pnode.type in op_name:
|
||||
if pnode.has_valid(attr_to_check):
|
||||
if pnode[attr_to_check] in op_name:
|
||||
if pnode.id not in ret:
|
||||
ret.append(pnode.id)
|
||||
elif allowed_all or pnode.op in allowed_ops:
|
||||
@@ -93,7 +96,7 @@ def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward:
|
||||
return []
|
||||
elif pnode.kind == 'data' and pnode.value is None:
|
||||
# If we go backward we don't use data node that have more than one consumer
|
||||
if not is_backward or (is_backward and len(pnode.out_nodes()) == 1):
|
||||
if not is_backward or (len(pnode.out_nodes()) == 1 or follow_multi_consumer_data_nodes):
|
||||
q.append(pnode)
|
||||
return [Node(start_node.graph, x) for x in ret]
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
Copyright (C) 2018-2021 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -16,9 +16,11 @@
|
||||
|
||||
import unittest
|
||||
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.graph.graph import Node
|
||||
from mo.middle.passes.fusing.helpers import forward_bfs, backward_bfs, get_next_operation
|
||||
from mo.utils.unittest.graph import build_graph
|
||||
from mo.middle.passes.fusing.helpers import forward_bfs, backward_bfs, get_next_operation, common_bfs
|
||||
from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, connect, const, result, \
|
||||
valued_const_with_data, connect_data
|
||||
|
||||
nodes_attributes = {
|
||||
'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
|
||||
@@ -256,6 +258,67 @@ class BFSTests(unittest.TestCase):
|
||||
res = backward_bfs(Node(graph, 'add_1_data'), ['Add', 'ScaleShift', 'Mul', 'Parameter'], ['Conv2D'])
|
||||
self.assertTrue(len(res) == 0, 'Sholdn\'t find any nodes due to cycle in graph')
|
||||
|
||||
def test_backward_bfs_check_op_instead_of_type(self):
|
||||
# Placeholder->ScaleShift->Mul1->Add1---->Concat
|
||||
# `----------->Add2->Mul2--'
|
||||
graph = build_graph(nodes_attributes,
|
||||
[('placeholder_1', 'placeholder_1_data'),
|
||||
('placeholder_1_data', 'add_2'),
|
||||
('scaleshift_1_w', 'scaleshift_1'),
|
||||
('scaleshift_1', 'scaleshift_1_data'),
|
||||
('scaleshift_1_data', 'mul_1'),
|
||||
('mul_1', 'mul_1_data'),
|
||||
('mul_1_data', 'add_1'),
|
||||
('add_1', 'add_1_data'),
|
||||
('add_2', 'add_2_data'),
|
||||
('add_2_data', 'mul_2'),
|
||||
('mul_2', 'mul_2_data'),
|
||||
('add_1_data', 'concat_1'),
|
||||
('mul_2_data', 'concat_1'),
|
||||
('concat_1', 'concat_1_data'),
|
||||
('concat_1_data', 'op_output')
|
||||
])
|
||||
|
||||
res = common_bfs(Node(graph, 'concat_1'), ['Mul', 'Add'], ['Parameter'], is_backward=True, attr_to_check='op')
|
||||
self.assertTrue(len(res) == 0, 'Smth went wrong with bfs')
|
||||
|
||||
res = common_bfs(Node(graph, 'concat_1'), ['Mul'], ['Add'], is_backward=True, attr_to_check='op')
|
||||
self.assertTrue(len(res) == 2 and all([res[x].id in ['add_1', 'add_2'] for x in range(len(res))]),
|
||||
'Add operations was not found by bfs')
|
||||
|
||||
res = common_bfs(Node(graph, 'concat_1'), ['ScaleShift'], ['Add'], is_backward=True, attr_to_check='op')
|
||||
self.assertTrue(len(res) == 0, 'BFS shouldn\'t find any operations')
|
||||
|
||||
res = common_bfs(Node(graph, 'concat_1'), [], ['Add'], allowed_all=True, is_backward=True, attr_to_check='op')
|
||||
self.assertTrue(len(res) == 2 and all([res[x].id in ['add_1', 'add_2'] for x in range(len(res))]),
|
||||
'Add operations was not found by bfs')
|
||||
|
||||
res = common_bfs(Node(graph, 'concat_1'), ['ScaleShift'], ['ScaleShift'], is_backward=True, attr_to_check='op')
|
||||
self.assertTrue(len(res) == 0, 'No one node should be found! But bfs found {} nodes'.format(len(res)))
|
||||
|
||||
def test_backward_bfs_multi_consumer_data_nodes(self):
|
||||
# Placeholder-> Mul -> Result
|
||||
# Const -/ \- Result2
|
||||
|
||||
graph = build_graph({**regular_op_with_shaped_data('parameter', [1], {'op': 'Parameter'}),
|
||||
**valued_const_with_data('const', int64_array([5])),
|
||||
**regular_op_with_shaped_data('mul', [1], {'op': 'Mul'}),
|
||||
**result('result'),
|
||||
**result('result2'),
|
||||
},
|
||||
[*connect('parameter', '0:mul'),
|
||||
*connect('const', '1:mul'),
|
||||
*connect('mul:0', 'result'),
|
||||
*connect_data('mul', 'result2'),
|
||||
])
|
||||
|
||||
res = common_bfs(Node(graph, 'result'), ['Mul'], ['Parameter'], is_backward=True, attr_to_check='op',
|
||||
follow_multi_consumer_data_nodes=True)
|
||||
self.assertTrue(len(res) == 1, 'The multi-consumer data node "mul_d" was not followed')
|
||||
|
||||
res = common_bfs(Node(graph, 'result'), ['Mul'], ['Parameter'], is_backward=True, attr_to_check='op')
|
||||
self.assertTrue(len(res) == 0, 'The multi-consumer data node "mul_d" was followed')
|
||||
|
||||
|
||||
# Unit tests for get_next_operation
|
||||
class GetNextOperationTests(unittest.TestCase):
|
||||
|
||||
@@ -37,7 +37,7 @@ py_modules = []
|
||||
for name in os.listdir():
|
||||
if re.match('requirements(.*)\.txt', name):
|
||||
requirements_txt.append(name)
|
||||
if re.match('mo_(.*)\.py', name):
|
||||
if re.match('mo(.*)\.py', name):
|
||||
py_modules.append(name.split('.')[0])
|
||||
|
||||
# Minimal set of dependencies
|
||||
|
||||
@@ -194,8 +194,10 @@ namespace ngraph
|
||||
|
||||
// Check execution condition
|
||||
bool body_exec_condition(false);
|
||||
body_outputs[special_ports.body_condition_output_idx]->read(
|
||||
&body_exec_condition, sizeof(bool));
|
||||
if (body_outputs.size() > special_ports.body_condition_output_idx &&
|
||||
body_outputs[special_ports.body_condition_output_idx])
|
||||
body_outputs[special_ports.body_condition_output_idx]->read(
|
||||
&body_exec_condition, sizeof(bool));
|
||||
if (!body_exec_condition)
|
||||
break;
|
||||
|
||||
|
||||
@@ -161,6 +161,8 @@ namespace ngraph
|
||||
for (size_t i = 0; i < concat_outputs.size(); ++i)
|
||||
{
|
||||
const auto& concat_desc = concat_outputs[i];
|
||||
if (!concat_desc)
|
||||
continue;
|
||||
auto shape =
|
||||
func->get_results().at(concat_desc->m_body_value_index)->get_shape();
|
||||
std::vector<Shape> shapes_to_concat(values_to_concat[i].size(), shape);
|
||||
|
||||
@@ -1248,7 +1248,19 @@ void propagate_rt_info(Node* node, const Output<Node>& final_port)
|
||||
if (stop_nodes.count(in.get_node()))
|
||||
continue;
|
||||
auto consumer = in.get_node()->shared_from_this();
|
||||
// FIXME: Here we have a WA in order to save some original fields
|
||||
// if we have conflicts because Variant merge doesn't work.
|
||||
// We can restore original fields because we don't change the operation
|
||||
auto orig_rt_info = consumer->get_rt_info();
|
||||
|
||||
copy_runtime_info({curr_node, consumer}, consumer);
|
||||
|
||||
auto& rt_info = consumer->get_rt_info();
|
||||
for (const auto& it : orig_rt_info)
|
||||
{
|
||||
if (rt_info.find(it.first) == rt_info.end())
|
||||
rt_info[it.first] = it.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1497,17 +1497,17 @@ def lstm_sequence(
|
||||
Shape: [batch_size]. Integer type.
|
||||
@param W: Tensor with weights for matrix multiplication operation with input portion of data.
|
||||
Shape: [num_directions, 4*hidden_size, input_size].
|
||||
:param R: The tensor with weights for matrix multiplication operation with hidden state.
|
||||
@param R: The tensor with weights for matrix multiplication operation with hidden state.
|
||||
Shape: [num_directions, 4*hidden_size, hidden_size].
|
||||
:param B: The tensor with biases.
|
||||
@param B: The tensor with biases.
|
||||
Shape: [num_directions, 4*hidden_size].
|
||||
:param hidden_size: Specifies hidden state size.
|
||||
:param direction: Specifies if the RNN is forward, reverse, or bidirectional.
|
||||
:param activations: The list of three activation functions for gates.
|
||||
:param activations_alpha: The list of alpha parameters for activation functions.
|
||||
:param activations_beta: The list of beta parameters for activation functions.
|
||||
:param clip: Specifies bound values [-C, C] for tensor clipping performed before activations.
|
||||
:param name: An optional name of the output node.
|
||||
@param hidden_size: Specifies hidden state size.
|
||||
@param direction: Specifies if the RNN is forward, reverse, or bidirectional.
|
||||
@param activations: The list of three activation functions for gates.
|
||||
@param activations_alpha: The list of alpha parameters for activation functions.
|
||||
@param activations_beta: The list of beta parameters for activation functions.
|
||||
@param clip: Specifies bound values [-C, C] for tensor clipping performed before activations.
|
||||
@param name: An optional name of the output node.
|
||||
|
||||
@return The new node represents LSTMSequence. Node outputs count: 3.
|
||||
"""
|
||||
@@ -2800,11 +2800,11 @@ def tensor_iterator(
|
||||
"""
|
||||
attributes = {
|
||||
"body": graph_body.serialize(),
|
||||
"slice_input_desc": [desc.serialize() for desc in slice_input_desc],
|
||||
"merged_input_desc": [desc.serialize() for desc in merged_input_desc],
|
||||
"invariant_input_desc": [desc.serialize() for desc in invariant_input_desc],
|
||||
"body_output_desc": [desc.serialize() for desc in body_output_desc],
|
||||
"concat_output_desc": [desc.serialize() for desc in concat_output_desc],
|
||||
"input_descriptions": {"slice_input_desc": [desc.serialize() for desc in slice_input_desc],
|
||||
"merged_input_desc": [desc.serialize() for desc in merged_input_desc],
|
||||
"invariant_input_desc": [desc.serialize() for desc in invariant_input_desc]},
|
||||
"output_descriptions": {"body_output_desc": [desc.serialize() for desc in body_output_desc],
|
||||
"concat_output_desc": [desc.serialize() for desc in concat_output_desc]}
|
||||
}
|
||||
|
||||
return _get_node_factory_opset1().create("TensorIterator", as_nodes(*inputs), attributes)
|
||||
|
||||
@@ -385,16 +385,56 @@ def rnn_sequence(
|
||||
def loop(
|
||||
trip_count: NodeInput,
|
||||
execution_condition: NodeInput,
|
||||
inputs: List[Node],
|
||||
graph_body: GraphBody,
|
||||
slice_input_desc: List[TensorIteratorSliceInputDesc],
|
||||
merged_input_desc: List[TensorIteratorMergedInputDesc],
|
||||
invariant_input_desc: List[TensorIteratorInvariantInputDesc],
|
||||
body_output_desc: List[TensorIteratorBodyOutputDesc],
|
||||
concat_output_desc: List[TensorIteratorConcatOutputDesc],
|
||||
body_condition_output_idx: int,
|
||||
current_iteration_input_idx: int = -1,
|
||||
name: Optional[str] = None,
|
||||
) -> Node:
|
||||
"""Return a node which performs Loop.
|
||||
"""Perform recurrent execution of the network described in the body, iterating through the data.
|
||||
|
||||
@param trip_count: A scalar or 1D tensor with 1 element specifying
|
||||
maximum number of iterations.
|
||||
@param execution_condition: A scalar or 1D tensor with 1 element
|
||||
specifying whether to execute the first iteration or not.
|
||||
@param inputs: The provided to TensorIterator operator.
|
||||
@param graph_body: The graph representing the body we execute.
|
||||
@param slice_input_desc: The descriptors describing sliced inputs, that is nodes
|
||||
representing tensors we iterate through, processing single
|
||||
data slice in one iteration.
|
||||
@param merged_input_desc: The descriptors describing merged inputs, that is nodes
|
||||
representing variables with initial value at first iteration,
|
||||
which may be changing through iterations.
|
||||
@param invariant_input_desc: The descriptors describing invariant inputs, that is nodes
|
||||
representing variable with persistent value through all
|
||||
iterations.
|
||||
@param body_output_desc: The descriptors describing body outputs from specified
|
||||
iteration.
|
||||
@param concat_output_desc: The descriptors describing specified output values through
|
||||
all the iterations concatenated into one node.
|
||||
@param body_condition_output_idx: Determines the purpose of the corresponding result in
|
||||
the graph_body. This result will determine the dynamic
|
||||
exit condition. If the value of this result is False,
|
||||
then iterations stop.
|
||||
@param current_iteration_input_idx: Determines the purpose of the corresponding parameter
|
||||
in the graph_body. This parameter will be used as
|
||||
an iteration counter. Optional.
|
||||
@return: The new node which performs Loop.
|
||||
"""
|
||||
inputs = as_nodes(trip_count, execution_condition)
|
||||
|
||||
return _get_node_factory_opset5().create("Loop", inputs)
|
||||
attributes = {
|
||||
"body": graph_body.serialize(),
|
||||
"input_descriptions": {"slice_input_desc": [desc.serialize() for desc in slice_input_desc],
|
||||
"merged_input_desc": [desc.serialize() for desc in merged_input_desc],
|
||||
"invariant_input_desc": [desc.serialize() for desc in invariant_input_desc]},
|
||||
"output_descriptions": {"body_output_desc": [desc.serialize() for desc in body_output_desc],
|
||||
"concat_output_desc": [desc.serialize() for desc in concat_output_desc]},
|
||||
"special_body_ports": {"body_condition_output_idx": body_condition_output_idx,
|
||||
"current_iteration_input_idx": current_iteration_input_idx}
|
||||
}
|
||||
return _get_node_factory_opset5().create("Loop", as_nodes(trip_count, execution_condition, *inputs),
|
||||
attributes)
|
||||
|
||||
@@ -22,7 +22,7 @@ from ngraph.opset4.ops import acosh
|
||||
from ngraph.opset1.ops import add
|
||||
from ngraph.opset1.ops import asin
|
||||
from ngraph.opset4.ops import asinh
|
||||
from ngraph.opset3.ops import assign
|
||||
from ngraph.opset6.ops import assign
|
||||
from ngraph.opset1.ops import atan
|
||||
from ngraph.opset4.ops import atanh
|
||||
from ngraph.opset1.ops import avg_pool
|
||||
@@ -114,7 +114,7 @@ from ngraph.opset1.ops import prior_box_clustered
|
||||
from ngraph.opset1.ops import psroi_pooling
|
||||
from ngraph.opset4.ops import proposal
|
||||
from ngraph.opset1.ops import range
|
||||
from ngraph.opset3.ops import read_value
|
||||
from ngraph.opset6.ops import read_value
|
||||
from ngraph.opset4.ops import reduce_l1
|
||||
from ngraph.opset4.ops import reduce_l2
|
||||
from ngraph.opset1.ops import reduce_logical_and
|
||||
|
||||
@@ -142,3 +142,35 @@ def mvn(
|
||||
}
|
||||
|
||||
return _get_node_factory_opset6().create("MVN", inputs, attributes)
|
||||
|
||||
|
||||
@nameable_op
|
||||
def assign(new_value: NodeInput, variable_id: str, name: Optional[str] = None) -> Node:
|
||||
"""Return a node which produces the Assign operation.
|
||||
|
||||
@param new_value: Node producing a value to be assigned to a variable.
|
||||
@param variable_id: Id of a variable to be updated.
|
||||
@param name: Optional name for output node.
|
||||
@return Assign node
|
||||
"""
|
||||
return _get_node_factory_opset6().create(
|
||||
"Assign",
|
||||
[as_node(new_value)],
|
||||
{"variable_id": variable_id}
|
||||
)
|
||||
|
||||
|
||||
@nameable_op
|
||||
def read_value(init_value: NodeInput, variable_id: str, name: Optional[str] = None) -> Node:
|
||||
"""Return a node which produces the Assign operation.
|
||||
|
||||
@param init_value: Node producing a value to be returned instead of an unassigned variable.
|
||||
@param variable_id: Id of a variable to be read.
|
||||
@param name: Optional name for output node.
|
||||
@return ReadValue node
|
||||
"""
|
||||
return _get_node_factory_opset6().create(
|
||||
"ReadValue",
|
||||
[as_node(init_value)],
|
||||
{"variable_id": variable_id}
|
||||
)
|
||||
|
||||
@@ -21,11 +21,16 @@
|
||||
#include <pybind11/stl.h>
|
||||
|
||||
#include "dict_attribute_visitor.hpp"
|
||||
#include "ngraph/op/loop.hpp"
|
||||
#include "ngraph/op/util/sub_graph_base.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
util::DictAttributeDeserializer::DictAttributeDeserializer(const py::dict& attributes)
|
||||
util::DictAttributeDeserializer::DictAttributeDeserializer(
|
||||
const py::dict& attributes,
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>>& variables)
|
||||
: m_attributes(attributes)
|
||||
, m_variables(variables)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -34,7 +39,116 @@ void util::DictAttributeDeserializer::on_adapter(const std::string& name,
|
||||
{
|
||||
if (m_attributes.contains(name))
|
||||
{
|
||||
NGRAPH_CHECK(false, "No AttributeVisitor support for accessing attribute named: ", name);
|
||||
if (const auto& a = ngraph::as_type<ngraph::AttributeAdapter<
|
||||
std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>>(
|
||||
&adapter))
|
||||
{
|
||||
std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>
|
||||
input_descs;
|
||||
const py::dict& input_desc = m_attributes[name.c_str()].cast<py::dict>();
|
||||
const auto& merged_input_desc = input_desc["merged_input_desc"].cast<py::list>();
|
||||
const auto& slice_input_desc = input_desc["slice_input_desc"].cast<py::list>();
|
||||
const auto& invariant_input_desc = input_desc["invariant_input_desc"].cast<py::list>();
|
||||
for (py::handle h : slice_input_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
auto slice_in =
|
||||
std::make_shared<ngraph::op::util::SubGraphOp::SliceInputDescription>(
|
||||
desc["input_idx"].cast<int64_t>(),
|
||||
desc["body_parameter_idx"].cast<int64_t>(),
|
||||
desc["start"].cast<int64_t>(),
|
||||
desc["stride"].cast<int64_t>(),
|
||||
desc["part_size"].cast<int64_t>(),
|
||||
desc["end"].cast<int64_t>(),
|
||||
desc["axis"].cast<int64_t>());
|
||||
input_descs.push_back(slice_in);
|
||||
}
|
||||
|
||||
for (py::handle h : merged_input_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
auto merged_in =
|
||||
std::make_shared<ngraph::op::util::SubGraphOp::MergedInputDescription>(
|
||||
desc["input_idx"].cast<int64_t>(),
|
||||
desc["body_parameter_idx"].cast<int64_t>(),
|
||||
desc["body_value_idx"].cast<int64_t>());
|
||||
input_descs.push_back(merged_in);
|
||||
}
|
||||
|
||||
for (py::handle h : invariant_input_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
auto invariant_in =
|
||||
std::make_shared<ngraph::op::util::SubGraphOp::InvariantInputDescription>(
|
||||
desc["input_idx"].cast<int64_t>(),
|
||||
desc["body_parameter_idx"].cast<int64_t>());
|
||||
input_descs.push_back(invariant_in);
|
||||
}
|
||||
a->set(input_descs);
|
||||
}
|
||||
else if (const auto& a = ngraph::as_type<ngraph::AttributeAdapter<std::vector<
|
||||
std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>>(&adapter))
|
||||
{
|
||||
std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>
|
||||
output_descs;
|
||||
const py::dict& output_desc = m_attributes[name.c_str()].cast<py::dict>();
|
||||
const auto& body_output_desc = output_desc["body_output_desc"].cast<py::list>();
|
||||
const auto& concat_output_desc = output_desc["concat_output_desc"].cast<py::list>();
|
||||
for (py::handle h : body_output_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
auto body_output =
|
||||
std::make_shared<ngraph::op::util::SubGraphOp::BodyOutputDescription>(
|
||||
desc["body_value_idx"].cast<int64_t>(),
|
||||
desc["output_idx"].cast<int64_t>(),
|
||||
desc["iteration"].cast<int64_t>());
|
||||
output_descs.push_back(body_output);
|
||||
}
|
||||
|
||||
for (py::handle h : concat_output_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
auto concat_output =
|
||||
std::make_shared<ngraph::op::util::SubGraphOp::ConcatOutputDescription>(
|
||||
desc["body_value_idx"].cast<int64_t>(),
|
||||
desc["output_idx"].cast<int64_t>(),
|
||||
desc["start"].cast<int64_t>(),
|
||||
desc["stride"].cast<int64_t>(),
|
||||
desc["part_size"].cast<int64_t>(),
|
||||
desc["end"].cast<int64_t>(),
|
||||
desc["axis"].cast<int64_t>());
|
||||
output_descs.push_back(concat_output);
|
||||
}
|
||||
a->set(output_descs);
|
||||
}
|
||||
else if (const auto& a = ngraph::as_type<
|
||||
ngraph::AttributeAdapter<ngraph::op::v5::Loop::SpecialBodyPorts>>(&adapter))
|
||||
{
|
||||
ngraph::op::v5::Loop::SpecialBodyPorts special_body_ports;
|
||||
const py::dict& special_ports_dict = m_attributes[name.c_str()].cast<py::dict>();
|
||||
special_body_ports.body_condition_output_idx =
|
||||
special_ports_dict["body_condition_output_idx"].cast<int64_t>();
|
||||
special_body_ports.current_iteration_input_idx =
|
||||
special_ports_dict["current_iteration_input_idx"].cast<int64_t>();
|
||||
a->set(special_body_ports);
|
||||
}
|
||||
else if (const auto& a =
|
||||
ngraph::as_type<ngraph::AttributeAdapter<std::shared_ptr<ngraph::Variable>>>(
|
||||
&adapter))
|
||||
{
|
||||
std::string variable_id = m_attributes[name.c_str()].cast<std::string>();
|
||||
if (!m_variables.count(variable_id))
|
||||
{
|
||||
m_variables[variable_id] = std::make_shared<ngraph::Variable>(ngraph::VariableInfo{
|
||||
ngraph::PartialShape::dynamic(), ngraph::element::dynamic, variable_id});
|
||||
}
|
||||
a->set(m_variables[variable_id]);
|
||||
}
|
||||
else
|
||||
{
|
||||
NGRAPH_CHECK(
|
||||
false, "No AttributeVisitor support for accessing attribute named: ", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
void util::DictAttributeDeserializer::on_adapter(const std::string& name,
|
||||
@@ -222,6 +336,28 @@ void util::DictAttributeDeserializer::on_adapter(
|
||||
}
|
||||
}
|
||||
|
||||
void util::DictAttributeDeserializer::on_adapter(
|
||||
const std::string& name, ngraph::ValueAccessor<std::shared_ptr<ngraph::Function>>& adapter)
|
||||
{
|
||||
if (m_attributes.contains(name))
|
||||
{
|
||||
if (name == "body")
|
||||
{
|
||||
const py::dict& body_attrs = m_attributes[name.c_str()].cast<py::dict>();
|
||||
const auto& body_outputs =
|
||||
as_output_vector(body_attrs["results"].cast<ngraph::NodeVector>());
|
||||
const auto& body_parameters = body_attrs["parameters"].cast<ngraph::ParameterVector>();
|
||||
auto body = std::make_shared<ngraph::Function>(body_outputs, body_parameters);
|
||||
adapter.set(body);
|
||||
}
|
||||
else
|
||||
{
|
||||
NGRAPH_CHECK(
|
||||
false, "No AttributeVisitor support for accessing attribute named: ", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
util::DictAttributeSerializer::DictAttributeSerializer(const std::shared_ptr<ngraph::Node>& node)
|
||||
{
|
||||
node->visit_attributes(*this);
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "ngraph/attribute_visitor.hpp"
|
||||
#include "ngraph/node.hpp"
|
||||
#include "ngraph/op/util/variable.hpp"
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
@@ -32,114 +33,96 @@ namespace util
|
||||
class DictAttributeDeserializer : public ngraph::AttributeVisitor
|
||||
{
|
||||
public:
|
||||
DictAttributeDeserializer(const py::dict& attributes);
|
||||
DictAttributeDeserializer(
|
||||
const py::dict& attributes,
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>>& variables);
|
||||
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<void>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<bool>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::string>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int8_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int16_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int32_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int64_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint8_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint16_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint32_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint64_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<float>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<double>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::string>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int8_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int16_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int32_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int64_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint8_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint16_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint32_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint64_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<float>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
|
||||
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::shared_ptr<ngraph::Function>>& adapter) override;
|
||||
|
||||
protected:
|
||||
const py::dict& m_attributes;
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>>& m_variables;
|
||||
};
|
||||
|
||||
class DictAttributeSerializer : public ngraph::AttributeVisitor
|
||||
{
|
||||
public:
|
||||
DictAttributeSerializer(const std::shared_ptr<ngraph::Node>& node);
|
||||
explicit DictAttributeSerializer(const std::shared_ptr<ngraph::Node>& node);
|
||||
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<void>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<bool>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::string>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int8_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int16_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int32_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<int64_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint8_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint16_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint32_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<uint64_t>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<float>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<double>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
|
||||
virtual void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::string>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int8_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int16_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int32_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<int64_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint8_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint16_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint32_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint64_t>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<float>& adapter) override;
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
|
||||
void on_adapter(const std::string& name,
|
||||
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
|
||||
|
||||
template <typename T>
|
||||
T get_attribute(const std::string& name)
|
||||
|
||||
@@ -117,7 +117,8 @@ void regclass_pyngraph_Node(py::module m)
|
||||
[](std::shared_ptr<ngraph::Node>& self, const std::string& atr_name, py::object value) {
|
||||
py::dict attr_dict;
|
||||
attr_dict[atr_name.c_str()] = value;
|
||||
util::DictAttributeDeserializer dict_deserializer(attr_dict);
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>> variables;
|
||||
util::DictAttributeDeserializer dict_deserializer(attr_dict, variables);
|
||||
self->visit_attributes(dict_deserializer);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -31,9 +31,9 @@
|
||||
#include "ngraph/except.hpp"
|
||||
#include "ngraph/node.hpp"
|
||||
#include "ngraph/op/util/op_types.hpp"
|
||||
#include "ngraph/op/util/variable.hpp"
|
||||
#include "ngraph/opsets/opset.hpp"
|
||||
#include "node_factory.hpp"
|
||||
#include "tensor_iterator_builder.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
@@ -60,14 +60,7 @@ namespace
|
||||
"Currently NodeFactory doesn't support Constant node: ",
|
||||
op_type_name);
|
||||
|
||||
if (op_type_name == "TensorIterator")
|
||||
{
|
||||
// XXX: How to differentiate opsets?
|
||||
return util::TensorIteratorBuilder(as_node_vector(arguments), attributes)
|
||||
.configure(std::static_pointer_cast<ngraph::op::TensorIterator>(op_node));
|
||||
}
|
||||
|
||||
util::DictAttributeDeserializer visitor(attributes);
|
||||
util::DictAttributeDeserializer visitor(attributes, m_variables);
|
||||
|
||||
op_node->set_arguments(arguments);
|
||||
op_node->visit_attributes(visitor);
|
||||
@@ -104,6 +97,7 @@ namespace
|
||||
}
|
||||
|
||||
const ngraph::OpSet& m_opset = ngraph::get_opset6();
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>> m_variables;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
||||
@@ -1,224 +0,0 @@
|
||||
//*****************************************************************************
|
||||
// Copyright 2017-2021 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//*****************************************************************************
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ngraph/check.hpp"
|
||||
#include "ngraph/except.hpp"
|
||||
#include "tensor_iterator_builder.hpp"
|
||||
|
||||
util::TensorIteratorBuilder::TensorIteratorBuilder(const ngraph::NodeVector& arguments,
|
||||
const py::dict& attributes)
|
||||
: m_arguments(arguments)
|
||||
, m_attributes(attributes)
|
||||
{
|
||||
get_graph_body();
|
||||
// Set-up TI inputs.
|
||||
NGRAPH_CHECK(m_attributes.contains("slice_input_desc"),
|
||||
"The required \"slice_input_desc\" attribute is missing. Can't build "
|
||||
"TensorIterator operator.");
|
||||
m_slice_input_desc = m_attributes["slice_input_desc"].cast<py::list>();
|
||||
|
||||
if (m_attributes.contains("merged_input_desc"))
|
||||
{
|
||||
m_merged_input_desc = m_attributes["merged_input_desc"].cast<py::list>();
|
||||
}
|
||||
|
||||
if (m_attributes.contains("invariant_input_desc"))
|
||||
{
|
||||
m_invariant_input_desc = m_attributes["invariant_input_desc"].cast<py::list>();
|
||||
}
|
||||
|
||||
if (m_attributes.contains("body_output_desc"))
|
||||
{
|
||||
py::list body_output_desc = m_attributes["body_output_desc"].cast<py::list>();
|
||||
for (py::handle h : body_output_desc)
|
||||
{
|
||||
py::dict desc = h.cast<py::dict>();
|
||||
desc["type"] = "BodyOutputDesc";
|
||||
check_attribute(desc, "output_idx", "BodyOutputDesc");
|
||||
m_outputs.emplace(desc["output_idx"].cast<int64_t>(), desc);
|
||||
}
|
||||
}
|
||||
if (m_attributes.contains("concat_output_desc"))
|
||||
{
|
||||
py::list concat_output_desc = m_attributes["concat_output_desc"].cast<py::list>();
|
||||
for (py::handle h : concat_output_desc)
|
||||
{
|
||||
py::dict desc = h.cast<py::dict>();
|
||||
desc["type"] = "ConcatOutputDesc";
|
||||
check_attribute(desc, "output_idx", "ConcatOutputDesc");
|
||||
m_outputs.emplace(desc["output_idx"].cast<int64_t>(), desc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::op::TensorIterator>
|
||||
util::TensorIteratorBuilder::configure(std::shared_ptr<ngraph::op::TensorIterator>&& ti_node)
|
||||
{
|
||||
ti_node->set_body(m_body);
|
||||
set_tensor_iterator_sliced_inputs(ti_node);
|
||||
set_tensor_iterator_merged_inputs(ti_node);
|
||||
set_tensor_iterator_invariant_inputs(ti_node);
|
||||
set_tensor_iterator_outputs(ti_node);
|
||||
ti_node->constructor_validate_and_infer_types();
|
||||
|
||||
return std::move(ti_node);
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::check_attribute(const py::dict& attrs,
|
||||
std::string attr_name,
|
||||
std::string desc_name) const
|
||||
{
|
||||
NGRAPH_CHECK(attrs.contains(attr_name),
|
||||
"The required \"",
|
||||
attr_name,
|
||||
"\" attribute is missing. Can't build TensorIterator's ",
|
||||
desc_name,
|
||||
".");
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::get_graph_body()
|
||||
{
|
||||
NGRAPH_CHECK(m_attributes.contains("body"),
|
||||
"The required \"body\" attribute is missing. Can't build TensorIterator "
|
||||
"operator.");
|
||||
|
||||
const py::dict& body_attrs = m_attributes["body"].cast<py::dict>();
|
||||
|
||||
NGRAPH_CHECK(body_attrs.contains("parameters"),
|
||||
"The required body's \"parameters\" "
|
||||
"attribute is missing. Can't build TensorIterator's body.");
|
||||
NGRAPH_CHECK(body_attrs.contains("results"),
|
||||
"The required body's \"results\" "
|
||||
"attribute is missing. Can't build TensorIterator's body.");
|
||||
|
||||
m_body_outputs = as_output_vector(body_attrs["results"].cast<ngraph::NodeVector>());
|
||||
m_body_parameters = body_attrs["parameters"].cast<ngraph::ParameterVector>();
|
||||
m_body = std::make_shared<ngraph::Function>(m_body_outputs, m_body_parameters);
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::set_tensor_iterator_sliced_inputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
|
||||
{
|
||||
for (py::handle h : m_slice_input_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
check_attribute(desc, "input_idx", "SliceInputDesc");
|
||||
check_attribute(desc, "body_parameter_idx", "SliceInputDesc");
|
||||
check_attribute(desc, "start", "SliceInputDesc");
|
||||
check_attribute(desc, "stride", "SliceInputDesc");
|
||||
check_attribute(desc, "part_size", "SliceInputDesc");
|
||||
check_attribute(desc, "end", "SliceInputDesc");
|
||||
check_attribute(desc, "axis", "SliceInputDesc");
|
||||
|
||||
ti_node->set_sliced_input(m_body_parameters.at(desc["body_parameter_idx"].cast<int64_t>()),
|
||||
m_arguments.at(desc["input_idx"].cast<int64_t>()),
|
||||
desc["start"].cast<int64_t>(),
|
||||
desc["stride"].cast<int64_t>(),
|
||||
desc["part_size"].cast<int64_t>(),
|
||||
desc["end"].cast<int64_t>(),
|
||||
desc["axis"].cast<int64_t>());
|
||||
}
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::set_tensor_iterator_merged_inputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
|
||||
{
|
||||
for (py::handle h : m_merged_input_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
check_attribute(desc, "input_idx", "MergedInputDesc");
|
||||
check_attribute(desc, "body_parameter_idx", "MergedInputDesc");
|
||||
check_attribute(desc, "body_value_idx", "MergedInputDesc");
|
||||
|
||||
ti_node->set_merged_input(m_body_parameters.at(desc["body_parameter_idx"].cast<int64_t>()),
|
||||
m_arguments.at(desc["input_idx"].cast<int64_t>()),
|
||||
m_body_outputs.at(desc["body_value_idx"].cast<int64_t>()));
|
||||
}
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::set_tensor_iterator_invariant_inputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
|
||||
{
|
||||
for (py::handle h : m_invariant_input_desc)
|
||||
{
|
||||
const py::dict& desc = h.cast<py::dict>();
|
||||
check_attribute(desc, "input_idx", "InvariantInputDesc");
|
||||
check_attribute(desc, "body_parameter_idx", "InvariantInputDesc");
|
||||
|
||||
ti_node->set_invariant_input(
|
||||
m_body_parameters.at(desc["body_parameter_idx"].cast<int64_t>()),
|
||||
m_arguments.at(desc["input_idx"].cast<int64_t>()));
|
||||
}
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::set_tensor_iterator_outputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
|
||||
{
|
||||
for (const auto& elem : m_outputs)
|
||||
{
|
||||
const py::dict& desc = elem.second.cast<py::dict>();
|
||||
if (desc["type"].cast<std::string>() == "BodyOutputDesc")
|
||||
{
|
||||
set_tensor_iterator_body_output(desc, ti_node);
|
||||
}
|
||||
else if (desc["type"].cast<std::string>() == "ConcatOutputDesc")
|
||||
{
|
||||
set_tensor_iterator_concatenated_body_output(desc, ti_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw ngraph::ngraph_error("Unrecognized TensorIterator output type.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::set_tensor_iterator_body_output(
|
||||
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
|
||||
{
|
||||
check_attribute(desc, "body_value_idx", "BodyOutputDesc");
|
||||
check_attribute(desc, "iteration", "BodyOutputDesc");
|
||||
|
||||
NGRAPH_CHECK(desc["output_idx"].cast<size_t>() == ti_node->get_output_size(),
|
||||
"Descriptor output idx value is different from currently configured "
|
||||
"TensorIterator output.");
|
||||
|
||||
ti_node->get_iter_value(m_body_outputs.at(desc["body_value_idx"].cast<int64_t>()),
|
||||
desc["iteration"].cast<int64_t>());
|
||||
}
|
||||
|
||||
void util::TensorIteratorBuilder::set_tensor_iterator_concatenated_body_output(
|
||||
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
|
||||
{
|
||||
check_attribute(desc, "body_value_idx", "ConcatOutputDesc");
|
||||
check_attribute(desc, "start", "ConcatOutputDesc");
|
||||
check_attribute(desc, "stride", "ConcatOutputDesc");
|
||||
check_attribute(desc, "part_size", "ConcatOutputDesc");
|
||||
check_attribute(desc, "end", "ConcatOutputDesc");
|
||||
check_attribute(desc, "axis", "ConcatOutputDesc");
|
||||
|
||||
NGRAPH_CHECK(desc["output_idx"].cast<size_t>() == ti_node->get_output_size(),
|
||||
"Descriptor output idx value is different from currently configured "
|
||||
"TensorIterator output.");
|
||||
|
||||
ti_node->get_concatenated_slices(m_body_outputs.at(desc["body_value_idx"].cast<int64_t>()),
|
||||
desc["start"].cast<int64_t>(),
|
||||
desc["stride"].cast<int64_t>(),
|
||||
desc["part_size"].cast<int64_t>(),
|
||||
desc["end"].cast<int64_t>(),
|
||||
desc["axis"].cast<int64_t>());
|
||||
}
|
||||
@@ -1,135 +0,0 @@
|
||||
//*****************************************************************************
|
||||
// Copyright 2017-2021 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//*****************************************************************************
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cctype>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
#include <pybind11/numpy.h>
|
||||
#include <pybind11/stl.h>
|
||||
|
||||
#include "ngraph/node.hpp"
|
||||
#include "ngraph/op/parameter.hpp"
|
||||
#include "ngraph/op/tensor_iterator.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace util
|
||||
{
|
||||
class TensorIteratorBuilder
|
||||
{
|
||||
public:
|
||||
///
|
||||
/// \brief Initialize TensorIterator node builder.
|
||||
///
|
||||
/// \param[in] arguments The arguments passed to TensorIterator node.
|
||||
/// \param[in] attributes The TensorIterator's attributes. This
|
||||
/// py::dict contains all descriptors for
|
||||
/// plethora of TensorIterator available inputs
|
||||
/// and outputs.
|
||||
///
|
||||
TensorIteratorBuilder(const ngraph::NodeVector& arguments, const py::dict& attributes);
|
||||
|
||||
///
|
||||
/// \brief Configure instance of TensorIterator node with set-up parameters.
|
||||
///
|
||||
/// \param ti_node The TensorIterator node instance to configure.
|
||||
///
|
||||
/// \return TensorIterator node.
|
||||
///
|
||||
std::shared_ptr<ngraph::op::TensorIterator>
|
||||
configure(std::shared_ptr<ngraph::op::TensorIterator>&& ti_node);
|
||||
|
||||
private:
|
||||
///
|
||||
/// \brief Helper to conduct attribute presence.
|
||||
///
|
||||
/// \param[in] attrs The attributes
|
||||
/// \param[in] attr_name The attribute name
|
||||
/// \param[in] desc_name The description name
|
||||
///
|
||||
inline void check_attribute(const py::dict& attrs,
|
||||
std::string attr_name,
|
||||
std::string desc_name) const;
|
||||
|
||||
///
|
||||
/// \brief Retrieve the TI graph body.
|
||||
///
|
||||
void get_graph_body();
|
||||
|
||||
///
|
||||
/// \brief Sets the tensor iterator sliced inputs.
|
||||
///
|
||||
/// \param ti_node The TI node we will set input to.
|
||||
///
|
||||
void set_tensor_iterator_sliced_inputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
|
||||
|
||||
///
|
||||
/// \brief Sets the tensor iterator merged inputs.
|
||||
///
|
||||
/// \param ti_node The TI node we will set inputs to.
|
||||
///
|
||||
void set_tensor_iterator_merged_inputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
|
||||
|
||||
///
|
||||
/// \brief Sets the tensor iterator invariant inputs.
|
||||
///
|
||||
/// \param ti_node The TI node we will set inputs to.
|
||||
///
|
||||
void set_tensor_iterator_invariant_inputs(
|
||||
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
|
||||
|
||||
///
|
||||
/// \brief Sets the tensor iterator outputs.
|
||||
///
|
||||
/// \param ti_node The TI node we will set outputs to.
|
||||
///
|
||||
void
|
||||
set_tensor_iterator_outputs(std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
|
||||
|
||||
///
|
||||
/// \brief Sets the tensor iterator body output.
|
||||
///
|
||||
/// \param[in] desc The descriptor of the TI body output.
|
||||
/// \param ti_node The TI node we will set output to.
|
||||
///
|
||||
void set_tensor_iterator_body_output(
|
||||
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
|
||||
|
||||
///
|
||||
/// \brief Sets the tensor iterator concatenated body output.
|
||||
///
|
||||
/// \param[in] desc The descriptor of the TI body output.
|
||||
/// \param ti_node The TI node we will set output to.
|
||||
///
|
||||
void set_tensor_iterator_concatenated_body_output(
|
||||
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
|
||||
|
||||
const ngraph::NodeVector& m_arguments;
|
||||
const py::dict& m_attributes;
|
||||
ngraph::OutputVector m_body_outputs;
|
||||
ngraph::ParameterVector m_body_parameters;
|
||||
std::shared_ptr<ngraph::Function> m_body;
|
||||
py::list m_slice_input_desc;
|
||||
py::list m_merged_input_desc;
|
||||
py::list m_invariant_input_desc;
|
||||
std::map<int64_t, const py::dict> m_outputs;
|
||||
};
|
||||
} // namespace util
|
||||
@@ -19,10 +19,9 @@ from _pyngraph import PartialShape
|
||||
|
||||
import ngraph as ng
|
||||
import ngraph.opset1 as ng_opset1
|
||||
import ngraph.opset5 as ng_opset5
|
||||
from ngraph.impl import Type
|
||||
|
||||
from tests import skip_segfault
|
||||
|
||||
np_types = [np.float32, np.int32]
|
||||
integral_np_types = [
|
||||
np.int8,
|
||||
@@ -718,14 +717,89 @@ def test_rnn_sequence():
|
||||
assert list(node_param.get_output_shape(1)) == expected_shape_h
|
||||
|
||||
|
||||
@skip_segfault
|
||||
def test_loop():
|
||||
trip_count = 8
|
||||
condition = True
|
||||
from ngraph.utils.tensor_iterator_types import (
|
||||
GraphBody,
|
||||
TensorIteratorSliceInputDesc,
|
||||
TensorIteratorMergedInputDesc,
|
||||
TensorIteratorInvariantInputDesc,
|
||||
TensorIteratorBodyOutputDesc,
|
||||
TensorIteratorConcatOutputDesc,
|
||||
)
|
||||
|
||||
node_default = ng.loop(trip_count, condition)
|
||||
condition = ng.constant(True, dtype=np.bool)
|
||||
trip_count = ng.constant(16, dtype=np.int32)
|
||||
# Body parameters
|
||||
body_timestep = ng.parameter([], np.int32, "timestep")
|
||||
body_data_in = ng.parameter([1, 2, 2], np.float32, "body_in")
|
||||
body_prev_cma = ng.parameter([2, 2], np.float32, "body_prev_cma")
|
||||
body_const_one = ng.parameter([], np.int32, "body_const_one")
|
||||
|
||||
assert node_default.get_type_name() == "Loop"
|
||||
# CMA = cumulative moving average
|
||||
prev_cum_sum = ng.multiply(ng.convert(body_timestep, "f32"), body_prev_cma)
|
||||
curr_cum_sum = ng.add(prev_cum_sum, ng.squeeze(body_data_in, [0]))
|
||||
elem_cnt = ng.add(body_const_one, body_timestep)
|
||||
curr_cma = ng.divide(curr_cum_sum, ng.convert(elem_cnt, "f32"))
|
||||
cma_hist = ng.unsqueeze(curr_cma, [0])
|
||||
|
||||
# TI inputs
|
||||
data = ng.parameter([16, 2, 2], np.float32, "data")
|
||||
# Iterations count
|
||||
zero = ng.constant(0, dtype=np.int32)
|
||||
one = ng.constant(1, dtype=np.int32)
|
||||
initial_cma = ng.constant(np.zeros([2, 2], dtype=np.float32), dtype=np.float32)
|
||||
iter_cnt = ng.range(zero, np.int32(16), np.int32(1))
|
||||
ti_inputs = [iter_cnt, data, initial_cma, one]
|
||||
body_const_condition = ng.constant(True, dtype=np.bool)
|
||||
|
||||
graph_body = GraphBody([body_timestep, body_data_in, body_prev_cma, body_const_one],
|
||||
[curr_cma, cma_hist, body_const_condition])
|
||||
ti_slice_input_desc = [
|
||||
# timestep
|
||||
# input_idx, body_param_idx, start, stride, part_size, end, axis
|
||||
TensorIteratorSliceInputDesc(2, 0, 0, 1, 1, -1, 0),
|
||||
# data
|
||||
TensorIteratorSliceInputDesc(3, 1, 0, 1, 1, -1, 0),
|
||||
]
|
||||
ti_merged_input_desc = [
|
||||
# body prev/curr_cma
|
||||
TensorIteratorMergedInputDesc(4, 2, 0),
|
||||
]
|
||||
ti_invariant_input_desc = [
|
||||
# body const one
|
||||
TensorIteratorInvariantInputDesc(5, 3),
|
||||
]
|
||||
|
||||
# TI outputs
|
||||
ti_body_output_desc = [
|
||||
# final average
|
||||
TensorIteratorBodyOutputDesc(0, 0, -1),
|
||||
]
|
||||
ti_concat_output_desc = [
|
||||
# history of cma
|
||||
TensorIteratorConcatOutputDesc(1, 1, 0, 1, 1, -1, 0),
|
||||
]
|
||||
|
||||
node = ng.loop(
|
||||
trip_count,
|
||||
condition,
|
||||
ti_inputs,
|
||||
graph_body,
|
||||
ti_slice_input_desc,
|
||||
ti_merged_input_desc,
|
||||
ti_invariant_input_desc,
|
||||
ti_body_output_desc,
|
||||
ti_concat_output_desc,
|
||||
2,
|
||||
-1,
|
||||
)
|
||||
|
||||
assert node.get_type_name() == "Loop"
|
||||
assert node.get_output_size() == 2
|
||||
# final average
|
||||
assert list(node.get_output_shape(0)) == [2, 2]
|
||||
# cma history
|
||||
assert list(node.get_output_shape(1)) == [16, 2, 2]
|
||||
|
||||
|
||||
def test_roi_pooling():
|
||||
@@ -1096,6 +1170,28 @@ def test_tensor_iterator():
|
||||
assert list(node.get_output_shape(1)) == [16, 2, 2]
|
||||
|
||||
|
||||
def test_read_value_opset5():
|
||||
init_value = ng_opset5.parameter([2, 2], name="init_value", dtype=np.int32)
|
||||
|
||||
node = ng_opset5.read_value(init_value, "var_id_667")
|
||||
|
||||
assert node.get_type_name() == "ReadValue"
|
||||
assert node.get_output_size() == 1
|
||||
assert list(node.get_output_shape(0)) == [2, 2]
|
||||
assert node.get_output_element_type(0) == Type.i32
|
||||
|
||||
|
||||
def test_assign_opset5():
|
||||
input_data = ng_opset5.parameter([5, 7], name="input_data", dtype=np.int32)
|
||||
rv = ng_opset5.read_value(input_data, "var_id_667")
|
||||
node = ng_opset5.assign(rv, "var_id_667")
|
||||
|
||||
assert node.get_type_name() == "Assign"
|
||||
assert node.get_output_size() == 1
|
||||
assert list(node.get_output_shape(0)) == [5, 7]
|
||||
assert node.get_output_element_type(0) == Type.i32
|
||||
|
||||
|
||||
def test_read_value():
|
||||
init_value = ng.parameter([2, 2], name="init_value", dtype=np.int32)
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "ngraph/graph_util.hpp"
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "ngraph/op/util/op_annotations.hpp"
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
#include "ngraph/pass/manager.hpp"
|
||||
#include "ngraph/pass/visualize_tree.hpp"
|
||||
#include "util/all_close.hpp"
|
||||
@@ -261,6 +262,61 @@ TEST(graph_util, clone_multiple_results)
|
||||
auto copy = clone_function(*f);
|
||||
}
|
||||
|
||||
TEST(graph_util, clone_rt_info)
|
||||
{
|
||||
const std::string testAffinity = "CPU";
|
||||
std::shared_ptr<ngraph::Function> original_f;
|
||||
{
|
||||
ngraph::PartialShape shape({1, 84});
|
||||
ngraph::element::Type type(ngraph::element::Type_t::f32);
|
||||
auto param = std::make_shared<ngraph::opset6::Parameter>(type, shape);
|
||||
auto matMulWeights =
|
||||
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {10, 84}, {1});
|
||||
auto shapeOf = std::make_shared<ngraph::opset6::ShapeOf>(matMulWeights);
|
||||
auto gConst1 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i32, {1}, {1});
|
||||
auto gConst2 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {}, {0});
|
||||
auto gather = std::make_shared<ngraph::opset6::Gather>(shapeOf, gConst1, gConst2);
|
||||
auto concatConst = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {1}, {1});
|
||||
auto concat =
|
||||
std::make_shared<ngraph::opset6::Concat>(ngraph::NodeVector{concatConst, gather}, 0);
|
||||
auto relu = std::make_shared<ngraph::opset6::Relu>(param);
|
||||
auto reshape = std::make_shared<ngraph::opset6::Reshape>(relu, concat, false);
|
||||
auto matMul = std::make_shared<ngraph::opset6::MatMul>(reshape, matMulWeights, false, true);
|
||||
auto matMulBias =
|
||||
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {1, 10}, {1});
|
||||
auto addBias = std::make_shared<ngraph::opset6::Add>(matMul, matMulBias);
|
||||
auto result = std::make_shared<ngraph::opset6::Result>(addBias);
|
||||
|
||||
ngraph::ParameterVector params = {param};
|
||||
ngraph::ResultVector results = {result};
|
||||
|
||||
original_f = std::make_shared<ngraph::Function>(results, params);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::string> affinity;
|
||||
|
||||
for (auto&& node : original_f->get_ordered_ops())
|
||||
{
|
||||
auto& nodeInfo = node->get_rt_info();
|
||||
|
||||
nodeInfo["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(testAffinity);
|
||||
affinity[node->get_friendly_name()] = testAffinity;
|
||||
}
|
||||
|
||||
auto clonedFunction = ngraph::clone_function(*original_f);
|
||||
|
||||
for (auto&& node : clonedFunction->get_ordered_ops())
|
||||
{
|
||||
auto& nodeInfo = node->get_rt_info();
|
||||
auto itInfo = nodeInfo.find("affinity");
|
||||
ASSERT_TRUE(itInfo != nodeInfo.end());
|
||||
auto value =
|
||||
ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(itInfo->second)->get();
|
||||
ASSERT_TRUE(affinity.find(node->get_friendly_name()) != affinity.end());
|
||||
ASSERT_TRUE(affinity[node->get_friendly_name()] == value);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(util, round_up)
|
||||
{
|
||||
EXPECT_EQ(0, round_up(0, 4));
|
||||
|
||||
@@ -70,8 +70,9 @@
|
||||
#include <openvino/pp.hpp>
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
#define OV_CC_EXPAND(...) OV_PP_EXPAND(__VA_ARGS__)
|
||||
#define OV_CC_CAT(_0, _1) OV_PP_CAT(_0, _1)
|
||||
#define OV_CC_EXPAND OV_PP_EXPAND
|
||||
#define OV_CC_CAT OV_PP_CAT
|
||||
#define OV_CC_TOSTRING OV_PP_TOSTRING
|
||||
|
||||
#ifdef SELECTIVE_BUILD_ANALYZER
|
||||
# include <string>
|
||||
|
||||
@@ -38,7 +38,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
try {
|
||||
InferenceEngine::Core ie;
|
||||
InferenceEngine::CNNNetwork network = ie.ReadNetwork(net, weights_blob);
|
||||
} catch (const InferenceEngine::details::InferenceEngineException& error) {
|
||||
} catch (const std::exception&) {
|
||||
return 0; // fail gracefully on expected exceptions
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user