Compare commits

...

42 Commits

Author SHA1 Message Date
Alexey Suhov
45d159094b Fix license header in Movidius sources 2021-06-04 19:59:17 +03:00
Artemy Skrebkov
6f860ddcf2 Restore precision setting for benchmark (#4859)
* Add -ip,-op,-iop parameters to benchamrk_app

* Revert not related changes

* Revert extension usage
2021-03-19 10:20:18 +03:00
Andrew Bakalin
a86ae42aed [VPU] Limit dlclose() WA to be used for Ubuntu only (#4806) 2021-03-16 14:54:40 +03:00
Andrey Dmitriev
cef0696ef7 [DOCS] added iname/oname (#4735) 2021-03-16 12:41:54 +03:00
Andrey Zaytsev
e57a96474d Feature/vpu doc fixes 2021 3 (#4635)
* Documentation fixes and updates for VPU

* minor correction

* minor correction

* Fixed links

* updated supported layers list for vpu
2021-03-15 16:40:17 +03:00
Ilya Lavrenov
ed052022d3 Fixed some klockwork issues in C API samples (#4767) 2021-03-15 11:00:07 +03:00
Alina Alborova
6eda5c39c6 Inserted a disclaimer (#4760) 2021-03-12 18:38:58 +03:00
Mikhail Ryzhov
0100810dd6 Added mo.py to wheel packages (#4731) 2021-03-12 15:57:12 +03:00
Elizaveta Lobanova
882e377ef9 [GNA] Update documentation regarding splits and concatenations support (#4740) 2021-03-12 14:00:15 +03:00
Ilya Lavrenov
c9d5d95e2c Blocked dims hwc 2021/3 (#4729)
* Fix for BlockedDims

* Added test for HWC layout
2021-03-11 16:02:25 +03:00
Maxim Shevtsov
d77bc36dcd Docs update (#4626)
* Updated latency case desc to cover multi-socket machines

* updated opt guide a bit

* avoiding '#' which is interpreted as ref

* Update CPU.md

* Update docs/optimization_guide/dldt_optimization_guide.md

Co-authored-by: Alina Alborova <alina.alborova@intel.com>

* Update docs/optimization_guide/dldt_optimization_guide.md

Co-authored-by: Alina Alborova <alina.alborova@intel.com>

* Update docs/optimization_guide/dldt_optimization_guide.md

Co-authored-by: Alina Alborova <alina.alborova@intel.com>

* Update docs/optimization_guide/dldt_optimization_guide.md

Co-authored-by: Alina Alborova <alina.alborova@intel.com>

* Update docs/optimization_guide/dldt_optimization_guide.md

Co-authored-by: Alina Alborova <alina.alborova@intel.com>

Co-authored-by: Alina Alborova <alina.alborova@intel.com>
2021-03-11 15:33:08 +03:00
Andrey Somsikov
19e1b6002e Catch std::except in fuzz tests (#4695)
Fuzz tests must catch all expected exceptions from IE. IE is using C++ std
library which may raise standard exceptions which IE pass through.
2021-03-11 14:09:36 +03:00
Ivan Tikhonov
6bcd0f6072 Fix python API for Loop/TensorIterator/Assign/ReadValue operations 2021-03-11 13:22:50 +03:00
Anton Chetverikov
0e8534a4a9 Add STN to list of supported models (#4728) 2021-03-11 11:55:45 +03:00
Ilya Churaev
08d7c3e75f Restored folded Operations for QueryNetwork (#4685)
* Restored folded Operations for QueryNetwork

* Fixed comment

* Add unfolded constant operations to supported layers map
2021-03-11 10:41:57 +03:00
Evgeny Lazarev
821d513150 Updated documentation about the supported YOLOv3 model from ONNX (#4722) (#4726) 2021-03-11 10:32:13 +03:00
Mikhail Letavin
38a48b9cbf Fix NormalizeL2 creation in QueryNetwork (cherry pick from master PR 4310) (#4651) 2021-03-10 19:41:35 +03:00
Mikhail Letavin
c6d8905a88 [IE CLDNN] Fix missing variable initializations and types (#4669) 2021-03-10 16:23:10 +03:00
Ilya Churaev
3a80652d70 Updated nGraph custom op documentation (#4604)
* Updated nGraph custom op documentation

* Fixed comments
2021-03-10 16:16:57 +03:00
Roman Kazantsev
120d3a596d Document TensorFlow 2* Update: Layers Support and Remove Beta Status (#4474) (#4711)
* Document TensorFlow 2* Update: Layers Support and Remove Beta Status

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Update documentation based on latest test results and feedback

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Remove ConvLSTM2D from supported layers list

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Document Dot layer without limitation

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Address feedback upon DenseFeatures and RNN operations

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Do a grammar correction

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Do a grammar correction based on feedback

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>
2021-03-10 13:35:46 +03:00
Andrew Bakalin
25af83db81 [VPU] WA for Segmentation fault on dlclose() issue (#4645) 2021-03-10 12:09:55 +03:00
Maksim Doronin
03c6f4e3fe [IE][VPU]: Fix empty output of CTCGreedyDecoderSeqLen (#4653)
* Allow the second output of CTCGreedyDecoderSeqLen to be nullptr in cases when it is not used but calculated in the Myriad plugin. In this case, parse the second output as FakeData
* It is a cherry-pick of #4652
* Update the firmware to release version
2021-03-10 11:48:13 +03:00
Mikhail Ryzhov
5d3d323bed [doc] Updated PyPI support OSes (#4643) (#4662)
* [doc] Updated PyPI support OSes (#4643)

* Updated PyPI support OSes

* Added python versions for win and mac

* Update pypi-openvino-dev.md

* Update pypi-openvino-dev.md

* Update pypi-openvino-rt.md

* Update pypi-openvino-dev.md

Co-authored-by: Andrey Zaytsev <andrey.zaytsev@intel.com>
2021-03-09 17:54:29 +03:00
Anastasia Popova
a53524a554 Fixed bug in assign elimination transformation. (#4644) 2021-03-09 14:58:12 +03:00
Maxim Vafin
02d2dbd0fa Add documentation on how to convert QuartzNet model (#4664)
* Add documentation on how to convert QuartzNet model (#4422)

* Add documentation on how to convert QuartzNet model

* Apply review feedback

* Small fix

* Apply review feedback

* Apply suggestions from code review

Co-authored-by: Anastasiya Ageeva <anastasiya.ageeva@intel.com>

Co-authored-by: Anastasiya Ageeva <anastasiya.ageeva@intel.com>

* Add reference to file

Co-authored-by: Anastasiya Ageeva <anastasiya.ageeva@intel.com>
2021-03-09 13:55:46 +03:00
Ilya Lavrenov
bfe0748b4c Docs api (#4657)
* Updated API changes document

* Comment for CVS-49440
2021-03-09 12:50:37 +03:00
Nikolay Shchegolev
d78577aecb [CPU] Statically analyzed issues. (#4637) 2021-03-09 12:26:32 +03:00
Mikhail Ryzhov
e09f0e4808 Corrected PyYAML dependency (#4598) (#4620)
5.4.2 is absent on PyPI
2021-03-05 17:24:36 +03:00
Roman Kazantsev
ff73955354 Add workaround for control edges to support TF 2.4 RNN (#4634)
Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>
2021-03-05 16:41:36 +03:00
Vladislav Volkov
18cb230af4 Fix for MKLDNN constant layers execution (#4642)
* Fix for MKLDNN constant layers execution

* Single mkldnn::engine for all MKLDNN graphs
2021-03-05 16:28:10 +03:00
Alina Kladieva
9067a25616 [.ci/azure] Add windows_conditional_compilation.yml (#4648) (#4655) 2021-03-05 15:59:59 +03:00
Gorokhov Dmitriy
c4ff0ffa9d [CPU] Supported ANY layout for inputs in inferRequest (#4621) 2021-03-05 12:30:26 +03:00
Ilya Churaev
4675a12c8f Fixed KW hits (#4638) 2021-03-05 11:13:11 +03:00
Evgeny Lazarev
3cd5da0797 Fixed transformation to pull constants into Loop body (cherry-pick of PR 4591) (#4607)
* Cherry-pick of PR 4591

* Fixed typo

* Moved a check into the parameter_unchanged_after_iteration function
2021-03-04 17:56:54 +03:00
Andrey Zaytsev
9b402f226f Formula fix (#4624) 2021-03-04 16:24:56 +03:00
Alina Kladieva
784adca70a [.ci/azure] Enable CC build (#4619) 2021-03-04 14:49:50 +03:00
Ilya Churaev
8e1603f7fd Fixed clone rt info (#4597) 2021-03-04 13:02:11 +03:00
azhogov
66ede40e4e Merge branch 'releases/2021/3' of https://github.com/openvinotoolkit/openvino into releases/2021/3 2021-03-04 12:22:59 +03:00
azhogov
40a29a7aa3 Azure CI: Add "ref: releases/2021/3" 2021-03-04 12:22:31 +03:00
Vladislav Volkov
a7e00dae54 Fix for broken CC in CPU plugin (#4595) 2021-03-04 12:22:15 +03:00
Andrey Zaytsev
4c40494605 Feature/azaytsev/gna model link fixes (#4599)
* Added info on DockerHub CI Framework

* Feature/azaytsev/change layout (#3295)

* Changes according to feedback comments

* Replaced @ref's with html links

* Fixed links, added a title page for installing from repos and images, fixed formatting issues

* Added links

* minor fix

* Added DL Streamer to the list of components installed by default

* Link fixes

* Link fixes

* ovms doc fix (#2988)

* added OpenVINO Model Server

* ovms doc fixes

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>

* Updated openvino_docs.xml

* Link Fixes

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>
2021-03-04 12:00:07 +03:00
Andrey Zaytsev
3e2a4a5df1 Feature/azaytsev/cldnn doc fixes (#4600)
* Legal fixes, removed the Generating docs section

* Removed info regarding generating docs

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>
2021-03-04 11:50:02 +03:00
100 changed files with 1524 additions and 998 deletions

View File

@@ -4,11 +4,13 @@ resources:
type: github
endpoint: openvinotoolkit
name: openvinotoolkit/openvino_contrib
ref: releases/2021/3
- repository: testdata
type: github
endpoint: openvinotoolkit
name: openvinotoolkit/testdata
ref: releases/2021/3
jobs:
- job: Lin

View File

@@ -64,13 +64,13 @@ jobs:
- task: CMake@1
inputs:
#-DENABLE_PROFILING_ITT=ON
#-DSELECTIVE_BUILD=COLLECT
cmakeArgs: >
-GNinja
-DVERBOSE_BUILD=ON
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
-DENABLE_FASTER_BUILD=ON
-DENABLE_PROFILING_ITT=ON
-DSELECTIVE_BUILD=COLLECT
$(REPO_DIR)
workingDirectory: $(BUILD_DIR)

View File

@@ -4,11 +4,13 @@ resources:
type: github
endpoint: openvinotoolkit
name: openvinotoolkit/openvino_contrib
ref: releases/2021/3
- repository: testdata
type: github
endpoint: openvinotoolkit
name: openvinotoolkit/testdata
ref: releases/2021/3
jobs:
- job: Mac

View File

@@ -4,11 +4,13 @@ resources:
type: github
endpoint: openvinotoolkit
name: openvinotoolkit/openvino_contrib
ref: releases/2021/3
- repository: testdata
type: github
endpoint: openvinotoolkit
name: openvinotoolkit/testdata
ref: releases/2021/3
jobs:
- job: Win

View File

@@ -0,0 +1,89 @@
jobs:
- job: WinCC
# About 150% of total time
timeoutInMinutes: 120
pool:
name: WIN_VMSS_VENV_F8S_WU2
variables:
system.debug: true
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 8
BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath)
OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
MODELS_PATH: $(REPO_DIR)\..\testdata
WORK_DIR: $(Pipeline.Workspace)\_w
BUILD_DIR: D:\build
BIN_DIR: $(REPO_DIR)\bin\intel64
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
INSTALL_DIR: $(WORK_DIR)\install_pkg
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
IB_DIR: C:\Program Files (x86)\IncrediBuild
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;$(IB_DIR);%PATH%
steps:
- script: |
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
where python3
where python
python --version
where java
java -version
wmic computersystem get TotalPhysicalMemory
wmic cpu list
wmic logicaldisk get description,name
wmic VOLUME list
set
displayName: 'System info'
- script: |
rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR)
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
displayName: 'Make dir'
- script: |
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
call install_ib_console.bat
workingDirectory: $(WORK_DIR)
displayName: 'Install IncrediBuild'
- checkout: self
clean: true
lfs: false
submodules: recursive
path: openvino
- script: |
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
powershell -command "Expand-Archive -Force ninja-win.zip"
workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies'
- script: |
set PATH=$(WORK_DIR)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
workingDirectory: $(BUILD_DIR)
displayName: 'CMake'
- script: |
set PATH=$(WORK_DIR)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
workingDirectory: $(BUILD_DIR)
displayName: 'Build Win'
- script: dir $(REPO_DIR)\bin\ /s
displayName: 'List files'
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
workingDirectory: $(BUILD_DIR)
displayName: 'Install'
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
displayName: Stop IncrediBuild
continueOnError: true
enabled: false

View File

@@ -1,4 +1,4 @@
# [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
# OpenVINO™ Toolkit
[![Stable release](https://img.shields.io/badge/version-2021.2-green.svg)](https://github.com/openvinotoolkit/openvino/releases/tag/2021.2)
[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
![GitHub branch checks state](https://img.shields.io/github/checks-status/openvinotoolkit/openvino/master?label=GitHub%20checks)
@@ -7,7 +7,7 @@
This toolkit allows developers to deploy pre-trained deep learning models
through a high-level C++ Inference Engine API integrated with application logic.
This open source version includes several components: namely [Model Optimizer], [ngraph] and
This open source version includes several components: namely [Model Optimizer], [nGraph] and
[Inference Engine], as well as CPU, GPU, MYRIAD, multi device and heterogeneous plugins to accelerate deep learning inferencing on Intel® CPUs and Intel® Processor Graphics.
It supports pre-trained models from the [Open Model Zoo], along with 100+ open
source and public models in popular formats such as Caffe\*, TensorFlow\*,
@@ -15,7 +15,7 @@ MXNet\* and ONNX\*.
## Repository components:
* [Inference Engine]
* [ngraph]
* [nGraph]
* [Model Optimizer]
## License
@@ -27,9 +27,10 @@ and release your contribution under these terms.
* Docs: https://docs.openvinotoolkit.org/
* Wiki: https://github.com/openvinotoolkit/openvino/wiki
* Issue tracking: https://github.com/openvinotoolkit/openvino/issues
* Additional OpenVINO modules: https://github.com/openvinotoolkit/openvino_contrib
* [HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
* Storage: https://storage.openvinotoolkit.org/
* Additional OpenVINO™ modules: https://github.com/openvinotoolkit/openvino_contrib
* [Intel® Distribution of OpenVINO™ toolkit Product Page](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
* [Intel® Distribution of OpenVINO™ toolkit Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
## Support
Please report questions, issues and suggestions using:
@@ -45,4 +46,4 @@ Please report questions, issues and suggestions using:
[Inference Engine]:https://software.intel.com/en-us/articles/OpenVINO-InferEngine
[Model Optimizer]:https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
[tag on StackOverflow]:https://stackoverflow.com/search?q=%23openvino
[ngraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html
[nGraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html

View File

@@ -2,6 +2,19 @@
The sections below contain detailed list of changes made to the Inference Engine API in recent releases.
## 2021.3
### New API
* InferenceEngine::InferRequest::Cancel to cancel inference request execution
* InferenceEngine::Layout::HWC to support HWC layout for input or output blobs
* InferenceEngine::Precision::F64 data precision for f64 data type
* InferenceEngine::CNNNetwork::getOVNameForTensor to map frameworks tensor names to OpenVINO internal tensor names
### Deprecated API
* InferenceEngine::IVariableState interface is deprecated, use InferenceEngine::VariableState wrapper
## 2021.2
### New API

View File

@@ -6,7 +6,7 @@ Inference Engine Extension API allows to register operation sets (opsets) with c
To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below:
1. Define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
1. Add the `NGRAPH_RTTI_DECLARATION` and `NGRAPH_RTTI_DEFINITION` macros which define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
2. Implement constructors that can optionally take the operation inputs and attributes as parameters.

View File

@@ -113,8 +113,8 @@ CPU-specific settings:
| Parameter name | Parameter values | Default | Description |
| :--- | :--- | :--- | :--- |
| KEY_CPU_THREADS_NUM | positive integer values| 0 | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores|
| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (single execution stream, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior with all available cores processing requests one by one.<br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes. <br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
| KEY_ENFORCE_BF16 | YES/NO| YES | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. |
> **NOTE**: To disable all internal threading, use the following set of configuration parameters: `KEY_CPU_THROUGHPUT_STREAMS=0`, `KEY_CPU_THREADS_NUM=1`, `KEY_CPU_BIND_THREAD=NO`.

View File

@@ -69,7 +69,7 @@ Limitations include:
- Only 1D convolutions are natively supported.
- The number of output channels for convolutions must be a multiple of 4.
- Permute layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8.
- Concatinations and splittings are supported only along the channel dimension (axis=1).
- Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4).
#### Experimental Support for 2D Convolutions
@@ -77,7 +77,7 @@ The Intel® GNA hardware natively supports only 1D convolution.
However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Permute` layers before or after convolutions.
For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
## Operation Precision

View File

@@ -88,7 +88,7 @@ the supported output precision depends on the actual underlying devices. _Gener
|:-------------|:------------:|:------------:|:------------:|:------------:|
|CPU plugin |Supported |Supported |Supported |Supported |
|GPU plugin |Supported |Supported |Supported |Supported |
|VPU plugins |Not supported |Supported |Supported |Supported |
|VPU plugins |Supported |Supported |Supported |Supported |
|GNA plugin |Not supported |Supported |Supported |Supported |
### Supported Output Layout
@@ -111,9 +111,9 @@ The following layers are supported by the plugins and by [Shape Inference featur
| Acosh | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Activation-Clamp | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Activation-ELU | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Activation-Exp | Supported |Supported\*\*\*| Not Supported | Supported | Supported |
| Activation-Exp | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Activation-Leaky ReLU | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Activation-Not | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
| Activation-Not | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Activation-PReLU | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Activation-ReLU | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Activation-ReLU6 | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
@@ -127,7 +127,7 @@ The following layers are supported by the plugins and by [Shape Inference featur
| BatchNormalization | Supported | Supported | Supported | Not Supported | Supported |
| BinaryConvolution | Supported | Supported | Not Supported | Not Supported | Supported |
| Broadcast | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Ceil | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Ceil | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Concat | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Const | Supported | Supported | Supported | Supported | Not Supported |
| Convolution-Dilated | Supported | Supported | Supported | Not Supported | Supported |
@@ -145,8 +145,8 @@ The following layers are supported by the plugins and by [Shape Inference featur
| DeformableConvolution | Supported | Supported | Not Supported | Not Supported | Supported |
| DepthToSpace | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| DetectionOutput | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
| Eltwise-And | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
| Eltwise-Add | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
| Eltwise-And | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Eltwise-Add | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Eltwise-Div | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Eltwise-Equal | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Eltwise-FloorMod | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
@@ -166,12 +166,12 @@ The following layers are supported by the plugins and by [Shape Inference featur
| Eltwise-SquaredDiff | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Eltwise-Sub | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Eltwise-Sum | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Erf | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Exp | Supported | Supported | Not Supported | Supported | Supported |
| Erf | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Exp | Supported | Supported | Supported | Supported | Supported |
| FakeQuantize | Not Supported | Supported | Not Supported | Not Supported | Supported |
| Fill | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Flatten | Supported | Supported | Supported | Not Supported | Supported |
| Floor | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Floor | Supported | Supported\*\* | Supported | Not Supported | Supported |
| FullyConnected (Inner Product) | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Gather | Supported | Supported\*\* | Supported | Not Supported | Supported |
| GatherTree | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
@@ -191,9 +191,9 @@ The following layers are supported by the plugins and by [Shape Inference featur
| Memory | Not Supported | Supported | Not Supported | Supported | Supported |
| MVN | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
| Neg | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| NonMaxSuppression | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| NonMaxSuppression | Not Supported | Supported\*\* | Supported | Not Supported | Supported |
| Normalize | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
| OneHot | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| OneHot | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Pad | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
| Permute | Supported | Supported | Supported | Supported\* | Supported |
| Pooling(AVG,MAX) | Supported | Supported | Supported | Supported | Supported |
@@ -206,17 +206,17 @@ The following layers are supported by the plugins and by [Shape Inference featur
| PSROIPooling | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Range | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Reciprocal | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceAnd | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceAnd | Supported | Supported\*\* | Supported | Not Supported | Supported |
| ReduceL1 | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceL2 | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceLogSum | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceLogSumExp | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceMax | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceMean | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceMin | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceMax | Supported | Supported\*\* | Supported | Not Supported | Supported |
| ReduceMean | Supported | Supported\*\* | Supported | Not Supported | Supported |
| ReduceMin | Supported | Supported\*\* | Supported | Not Supported | Supported |
| ReduceOr | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceProd | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceSum | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ReduceSum | Supported | Supported\*\* | Supported | Not Supported | Supported |
| ReduceSumSquare | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| RegionYolo | Supported | Supported\*\* | Supported | Not Supported | Supported |
| ReorgYolo | Supported | Supported\*\* | Supported | Not Supported | Supported |
@@ -226,7 +226,7 @@ The following layers are supported by the plugins and by [Shape Inference featur
| RNN | Not Supported | Supported | Supported | Not Supported | Not Supported |
| ROIPooling | Supported\* | Supported | Supported | Not Supported | Supported |
| ScaleShift | Supported |Supported\*\*\*| Supported\* | Supported | Supported |
| ScatterUpdate | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ScatterUpdate | Not Supported | Supported\*\* | Supported | Not Supported | Supported |
| Select | Supported | Supported | Supported | Not Supported | Supported |
| Selu | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| ShuffleChannels | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
@@ -236,17 +236,17 @@ The following layers are supported by the plugins and by [Shape Inference featur
| SimplerNMS | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Slice | Supported |Supported\*\*\*| Supported | Supported | Supported |
| SoftMax | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
| Softplus | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Softplus | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Softsign | Supported | Supported\*\* | Not Supported | Supported | Supported |
| SpaceToDepth | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| SpatialTransformer | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| Split | Supported |Supported\*\*\*| Supported | Supported | Supported |
| Squeeze | Supported | Supported\*\* | Supported | Supported | Supported |
| StridedSlice | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| StridedSlice | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Tan | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| TensorIterator | Not Supported | Supported | Supported | Supported | Not Supported |
| Tile | Supported\*\* |Supported\*\*\*| Supported | Not Supported | Supported |
| TopK | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
| TopK | Supported | Supported\*\* | Supported | Not Supported | Supported |
| Unpooling | Supported | Not Supported | Not Supported | Not Supported | Not Supported |
| Unsqueeze | Supported | Supported\*\* | Supported | Supported | Supported |
| Upsampling | Supported | Not Supported | Not Supported | Not Supported | Not Supported |

View File

@@ -255,6 +255,89 @@ Standard TensorFlow\* operations:
| ZerosLike | No |
## TensorFlow 2 Keras\* Supported Operations
Standard TensorFlow 2 Keras\* operations:
| Operation Name in TensorFlow 2 Keras\* | Limitations|
| :----------| :----------|
| ActivityRegularization | No |
| Add | No |
| AdditiveAttention | No |
| AlphaDropout | No |
| Attention | No |
| Average | No |
| AveragePooling1D | No |
| AveragePooling2D | No |
| AveragePooling3D | No |
| BatchNormalization | No |
| Bidirectional | No |
| Concatenate | No |
| Conv1D | No |
| Conv1DTranspose | Not supported if dilation is not equal to 1 |
| Conv2D | No |
| Conv2DTranspose | No |
| Conv3D | No |
| Conv3DTranspose | No |
| Cropping1D | No |
| Cropping2D | No |
| Cropping3D | No |
| Dense | No |
| DenseFeatures | Not supported for categorical and crossed features |
| DepthwiseConv2D | No |
| Dot | No |
| Dropout | No |
| ELU | No |
| Embedding | No |
| Flatten | No |
| GRU | No |
| GRUCell | No |
| GaussianDropout | No |
| GaussianNoise | No |
| GlobalAveragePooling1D | No |
| GlobalAveragePooling2D | No |
| GlobalAveragePooling3D | No |
| GlobalMaxPool1D | No |
| GlobalMaxPool2D | No |
| GlobalMaxPool3D | No |
| LSTM | No |
| LSTMCell | No |
| Lambda | No |
| LayerNormalization | No |
| LeakyReLU | No |
| LocallyConnected1D | No |
| LocallyConnected2D | No |
| MaxPool1D | No |
| MaxPool2D | No |
| MaxPool3D | No |
| Maximum | No |
| Minimum | No |
| Multiply | No |
| PReLU | No |
| Permute | No |
| RNN | Not supported for some custom cells |
| ReLU | No |
| RepeatVector | No |
| Reshape | No |
| SeparableConv1D | No |
| SeparableConv2D | No |
| SimpleRNN | No |
| SimpleRNNCell | No |
| Softmax | No |
| SpatialDropout1D | No |
| SpatialDropout2D | No |
| SpatialDropout3D | No |
| StackedRNNCells | No |
| Subtract | No |
| ThresholdedReLU | No |
| TimeDistributed | No |
| UpSampling1D | No |
| UpSampling2D | No |
| UpSampling3D | No |
| ZeroPadding1D | No |
| ZeroPadding2D | No |
| ZeroPadding3D | No |
## Kaldi\* Supported Layers
Standard Kaldi\* Layers:

View File

@@ -23,6 +23,7 @@
| VGG19 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/vgg19.tar.gz) |
| zfnet512 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/zfnet512.tar.gz) |
| GPT-2 | [model archive](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz) |
| YOLOv3 | [model archive](https://github.com/onnx/models/blob/master/vision/object_detection_segmentation/yolov3/model/yolov3-10.tar.gz) |
Listed models are built with the operation set version 8 except the GPT-2 model. Models that are upgraded to higher operation set versions may not be supported.

View File

@@ -115,6 +115,7 @@ Where `HEIGHT` and `WIDTH` are the input images height and width for which the m
| Keras-TCN | [Repo](https://github.com/philipperemy/keras-tcn) |
| PRNet | [Repo](https://github.com/YadiraF/PRNet) |
| YOLOv4 | [Repo](https://github.com/Ma-Dan/keras-yolo4) |
| STN | [Repo](https://github.com/oarriaga/STN.keras) |
* YOLO topologies from DarkNet* can be converted using [instruction](tf_specific/Convert_YOLO_From_Tensorflow.md),
* FaceNet topologies can be converted using [instruction](tf_specific/Convert_FaceNet_From_Tensorflow.md).
@@ -342,11 +343,9 @@ model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': Cu
tf.saved_model.save(model,'model')
```
Then follow the above instructions for the SavedModel format.
Then follow the above instructions for the SavedModel format.
> **NOTE:** Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.
> **NOTE**: Currently, OpenVINO™ support for TensorFlow* 2 models is in preview (aka Beta), which means limited and not of production quality yet. OpenVINO™ does not support models with Keras RNN and Embedding layers.
> **NOTE:** Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.
## Custom Layer Definition
@@ -360,7 +359,7 @@ See [Custom Layers in the Model Optimizer](../customize_model_optimizer/Customiz
* Custom layer implementation details
## Supported TensorFlow\* Layers
## Supported TensorFlow\* and TensorFlow 2 Keras\* Layers
Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for the list of supported standard layers.

View File

@@ -0,0 +1,32 @@
# Convert PyTorch* QuartzNet to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_QuartzNet}
[NeMo project](https://github.com/NVIDIA/NeMo) provides the QuartzNet model.
## Download the Pre-Trained QuartzNet Model
To download the pre-trained model, refer to the [NeMo Speech Models Catalog](https://ngc.nvidia.com/catalog/models/nvidia:nemospeechmodels).
Here are the instructions on how to obtain QuartzNet in ONNX* format.
```python
import nemo
import nemo.collections.asr as nemo_asr
quartznet = nemo_asr.models.ASRConvCTCModel.from_pretrained(model_info='QuartzNet15x5-En')
# Export QuartzNet model to ONNX* format
quartznet.export('qn.onnx')
```
This code produces 3 ONNX* model files: `encoder_qt.onnx`, `decoder_qt.onnx`, `qn.onnx`.
They are `decoder`, `encoder` and a combined `decoder(encoder(x))` models, respectively.
## Convert ONNX* QuartzNet model to IR
If using a combined model:
```sh
./mo.py --input_model <MODEL_DIR>/qt.onnx --input_shape [B,64,X]
```
If using separate models:
```sh
./mo.py --input_model <MODEL_DIR>/encoder_qt.onnx --input_shape [B,64,X]
./mo.py --input_model <MODEL_DIR>/decoder_qt.onnx --input_shape [B,1024,Y]
```
Where shape is determined by the audio file Mel-Spectrogram length: B - batch dimension, X - dimension based on the input length, Y - determined by encoder output, usually `X / 2`.

View File

@@ -53,6 +53,7 @@ limitations under the License.
<tab type="user" title="Convert ONNX* Faster R-CNN Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN"/>
<tab type="user" title="Convert ONNX* Mask R-CNN Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN"/>
<tab type="user" title="Converting DLRM ONNX* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
<tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_QuartzNet"/>
</tab>
<tab type="user" title="Model Optimizations Techniques" url="@ref openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques"/>
<tab type="user" title="Cutting off Parts of a Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model"/>

View File

@@ -30,7 +30,7 @@ Now the dependencies are installed and you are ready to use the Intel® Vision A
## Optional Steps
* For advanced configuration steps for your IEI Mustang-V100-MX8 accelerator, see [Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-setup-guide.md).
* For advanced configuration steps for your **IEI Mustang-V100-MX8-R10** accelerator, see [Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-setup-guide.md). **IEI Mustang-V100-MX8-R11** accelerator doesn't require any additional steps.
* After you've configured your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see [Intel® Movidius™ VPUs Programming Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-programming-guide.md) to learn how to distribute a model across all 8 VPUs to maximize performance.

View File

@@ -16,7 +16,8 @@ Your installation is complete when these are all completed:
2. Install the dependencies:
- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/)
- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/)
> **NOTE**: Clicking this link will directly download Visual Studio 2019 for Windows that has been validated with OpenVINO™.
- [CMake **3.10 or higher** 64-bit](https://cmake.org/download/)
> **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14.
- [Python **3.6** - **3.8** 64-bit](https://www.python.org/downloads/windows/)

View File

@@ -1,5 +1,7 @@
# Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit {#openvino_docs_install_guides_movidius_setup_guide}
> **NOTE**: These steps are only required for **IEI Mustang-V100-MX8-R10** card. **IEI Mustang-V100-MX8-R11** card doesn't require any additional steps and it's completely configured using the [general guidance](installing-openvino-linux-ivad-vpu.md).
## See Also
- [Intel® Movidius™ VPUs Programming Guide for use with the Intel® Distribution of OpenVINO™](movidius-programming-guide.md)
@@ -9,7 +11,7 @@
- <a class="download" href="<domain_placeholder>/downloads/Intel Vision Accelerator Design with Intel Movidius™ VPUs Errata.pdf">Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Errata</a>
The IEI Mustang-V100-MX8 is an OEM version of the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and the [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/en-us/openvino-toolkit).
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and the [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html).
Instructions in this guide for configuring your accelerator include:
1. Installing the required IEI\* BSL reset software

View File

@@ -24,14 +24,19 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
## System Requirements
* [Python* distribution](https://www.python.org/) 3.6, 3.7, 3.8
* Supported Operating Systems:
- Ubuntu* 18.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
- Ubuntu* 20.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
- macOS* 10.15.x versions
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions
- Windows Server* 2016 or higher
> NOTE: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
The table below lists the supported operating systems and Python* versions required to run the installation.
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
| :------------------------------------------------------------| :---------------------------------------------------|
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
| Red Hat* Enterprise Linux* 8.2, 64-bit | 3.6, 3.7 |
| CentOS* 7.4, 64-bit | 3.6, 3.7 |
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
| Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions | 3.6, 3.7, 3.8 |
| Windows Server* 2016 or higher | 3.6, 3.7, 3.8 |
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
## Install the Developer Package

View File

@@ -21,14 +21,19 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
## System Requirements
* [Python* distribution](https://www.python.org/) 3.6, 3.7, 3.8
* Supported Operating Systems:
- Ubuntu* 18.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
- Ubuntu* 20.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
- macOS* 10.15.x version
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions
- Windows Server* 2016 or higher
> NOTE: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated
The table below lists the supported operating systems and Python* versions required to run the installation.
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
| :------------------------------------------------------------| :---------------------------------------------------|
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
| Red Hat* Enterprise Linux* 8.2, 64-bit | 3.6, 3.7 |
| CentOS* 7.4, 64-bit | 3.6, 3.7 |
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
| Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions | 3.6, 3.7, 3.8 |
| Windows Server* 2016 or higher | 3.6, 3.7, 3.8 |
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
## Install the Runtime Package

View File

@@ -9,9 +9,9 @@
**Detailed description**: For each element from the input tensor calculates corresponding
element in the output tensor with the following formula:
\f[
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
\f]
\f[
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
\f]
The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).

View File

@@ -13,11 +13,11 @@ Deep Learning Inference Engine is a part of Intel&reg; Deep Learning Deployment
Below, there are the three main steps of the deployment process:
1. **Conversion**<br>
Trained models are converted from a specific framework (like Caffe\* or TensorFlow\*) to a framework-agnostic Intermediate Representation (IR) format.
Trained models are converted from a specific framework, like TensorFlow\*, or format, like ONNX\*, to the framework-agnostic Intermediate Representation (IR) format.
- *Performance flow*: This is an offline step where general topology-level optimizations happen automatically (see <a href="#mo-knobs-related-to-performance">Model Optimizer Knobs Related to Performance</a>).
- *Tools*: Intel DL Deployment Toolkit features the Model Optimizer that enables automatic and seamless transition from the training environment to the deployment environment.
- *Tools*: OpenVINO™ features the Model Optimizer that enables automatic and seamless transition from a training to deployment environment.
2. **Model Inference/Execution**<br>
After conversion, Inference Engine consumes the IR to perform inference. While Inference Engine API itself is target-agnostic, internally, it has a notion of plugins, which are device-specific libraries facilitating the hardware-assisted acceleration.
@@ -55,14 +55,16 @@ In contrast, for the latency-oriented tasks, the time to a single frame is more
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.
> **NOTE**: Most samples also support batching (automatically packing multiple input images into a single request). However, high batch size results in a latency penalty. So for more real-time oriented usages, lower batch sizes (as low as a single input) are usually used. However, devices like CPU, Intel&reg; Movidius&trade; Myriad&trade; 2 VPU, Intel&reg; Movidius&trade; Myriad&trade; X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance.
> **NOTE**: The [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample also supports batching, that is automatically packing multiple input images into a single request. However, high batch size results in a latency penalty. So for more real-time oriented usages, batch sizes that are as low as a single input are usually used. Still, devices like CPU, Intel®Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance. Running multiple requests should be coupled with a device configured to the corresponding number of streams. See <a href="#cpu-streams">details on CPU streams</a> for an example.
[OpenVINO™ Deep Learning Workbench tool](https://docs.openvinotoolkit.org/latest/workbench_docs_Workbench_DG_Introduction.html) provides throughput versus latency charts for different numbers of streams, requests, and batch sizes to find the performance sweet spot.
### Comparing Performance with Native/Framework Code <a name="comparing-performance-with-native-framework-code"></a>
When comparing the Inference Engine performance with the framework or another reference code, make sure that both versions are as similar as possible:
- Wrap exactly the inference execution (refer to the [Inference Engine Samples](../IE_DG/Samples_Overview.md) for examples).
- Do not include model loading time.
- Wrap exactly the inference execution (refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for an example).
- Track model loading time separately.
- Ensure the inputs are identical for the Inference Engine and the framework. For example, Caffe\* allows to auto-populate the input with random values. Notice that it might give different performance than on real images.
- Similarly, for correct performance comparison, make sure the access pattern, for example, input layouts, is optimal for Inference Engine (currently, it is NCHW).
- Any user-side pre-processing should be tracked separately.
@@ -77,7 +79,7 @@ You need to build your performance conclusions on reproducible data. Do the perf
- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations and then average or find a mean of the results.
- For time values that range too much, use geomean.
Refer to the [Inference Engine Samples](../IE_DG/Samples_Overview.md) for code examples for the performance measurements. Almost every sample, except interactive demos, has a `-ni` option to specify the number of iterations.
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) for code examples of performance measurements. Almost every sample, except interactive demos, has the `-ni` option to specify the number of iterations.
## Model Optimizer Knobs Related to Performance <a name="mo-knobs-related-to-performance"></a>

View File

@@ -606,7 +606,7 @@ This example uses `curl` to download the `face-detection-retail-004` model from
2. Download a model from the Model Zoo:
```sh
cd $OVSA_DEV_ARTEFACTS
curl --create-dirs https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https:// download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin
curl --create-dirs https://storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https:// storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin
```
The model is downloaded to the `OVSA_DEV_ARTEFACTS/model` directory.

View File

@@ -5,9 +5,9 @@
using namespace TemplateExtension;
constexpr ngraph::NodeTypeInfo Operation::type_info;
//! [op:ctor]
NGRAPH_RTTI_DEFINITION(TemplateExtension::Operation, "Template", 0);
Operation::Operation(const ngraph::Output<ngraph::Node> &arg, int64_t add) : Op({arg}), add(add) {
constructor_validate_and_infer_types();
}

View File

@@ -11,8 +11,7 @@ namespace TemplateExtension {
class Operation : public ngraph::op::Op {
public:
static constexpr ngraph::NodeTypeInfo type_info{"Template", 0};
const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
NGRAPH_RTTI_DECLARATION;
Operation() = default;
Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add);

View File

@@ -6,14 +6,14 @@ include_guard(GLOBAL)
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
set(VPU_SUPPORTED_FIRMWARES_HASH
"87389cef2aff63197f7787fb9b0ef7bfc74119200ef6b9f0c2c763b3ea4aabe9"
"eba4fabfd71f9c81db12886b05f559f1c6092f9b65dfb4493c205f493d816fab")
"cfba5fc0895a564fa51a1438f1c4d4f06198be982b1c2fb973c5cb9ab0a3c1f3"
"4176456c96b151470de3a723b603503306cff2e52975b739927e37d730c053be")
#
# Default packages
#
set(FIRMWARE_PACKAGE_VERSION 1633)
set(FIRMWARE_PACKAGE_VERSION 1639)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
#

View File

@@ -31,8 +31,13 @@ int image_read(const char *img_path, c_mat_t *img) {
img->mat_width = mat.size().width;
img->mat_height = mat.size().height;
img->mat_type = mat.type();
img->mat_data_size = img->mat_channels * img->mat_width * img->mat_height;
img->mat_data_size = mat.elemSize() * img->mat_width * img->mat_height;
img->mat_data = (unsigned char *)malloc(sizeof(unsigned char) * img->mat_data_size);
if (img->mat_data == NULL) {
return -1;
}
for (int i = 0; i < img->mat_data_size; ++i) {
img->mat_data[i] = mat.data[i];
}
@@ -54,8 +59,13 @@ int image_resize(const c_mat_t *src_img, c_mat_t *dst_img, const int width, cons
dst_img->mat_width = mat_dst.size().width;
dst_img->mat_height = mat_dst.size().height;
dst_img->mat_type = mat_dst.type();
dst_img->mat_data_size = dst_img->mat_channels * dst_img->mat_width * dst_img->mat_height;
dst_img->mat_data_size = mat_dst.elemSize() * dst_img->mat_width * dst_img->mat_height;
dst_img->mat_data = (unsigned char *)malloc(sizeof(unsigned char) * dst_img->mat_data_size);
if (dst_img->mat_data == NULL) {
return -1;
}
for (int i = 0; i < dst_img->mat_data_size; ++i) {
dst_img->mat_data[i] = mat_dst.data[i];
}

View File

@@ -39,6 +39,9 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
*n = output_dim.dims[1];
struct classify_res *cls = (struct classify_res *)malloc(sizeof(struct classify_res) * (*n));
if (!cls) {
return NULL;
}
ie_blob_buffer_t blob_cbuffer;
status = ie_blob_get_cbuffer(blob, &blob_cbuffer);

View File

@@ -38,6 +38,9 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
*n = output_dim.dims[1];
struct classify_res *cls = (struct classify_res *)malloc(sizeof(struct classify_res) * (*n));
if (!cls) {
return NULL;
}
ie_blob_buffer_t blob_cbuffer;
status = ie_blob_get_cbuffer(blob, &blob_cbuffer);
@@ -76,8 +79,8 @@ size_t read_image_from_file(const char *img_path, unsigned char *img_data, size_
fseek(fp, 0, SEEK_SET);
read_size = fread(img_data, 1, size, fp);
}
fclose(fp);
}
fclose(fp);
return read_size;
}

View File

@@ -122,6 +122,7 @@ void readInputFilesArgument(const char *arg) {
for (i = 0; i < file_num; ++i) {
free(file_paths[i]);
}
free(file_path);
free(file_paths);
file_num = 0;
}
@@ -279,6 +280,10 @@ int main(int argc, char **argv) {
ie_version_free(&version);
char **argv_temp =(char **)calloc(argc, sizeof(char *));
if (!argv_temp) {
return EXIT_FAILURE;
}
int i, j;
for (i = 0; i < argc; ++i) {
argv_temp[i] = argv[i];
@@ -419,6 +424,10 @@ int main(int argc, char **argv) {
/** Collect images data **/
c_mat_t *originalImages = (c_mat_t *)calloc(file_num, sizeof(c_mat_t));
c_mat_t *images = (c_mat_t *)calloc(file_num, sizeof(c_mat_t));
if (!originalImages || !images)
goto err;
int image_num = 0;
for (i = 0; i < file_num; ++i) {
c_mat_t img = {NULL, 0, 0, 0, 0, 0};
@@ -435,20 +444,27 @@ int main(int argc, char **argv) {
resized_img.mat_height = img.mat_height;
resized_img.mat_type = img.mat_type;
resized_img.mat_data = calloc(1, resized_img.mat_data_size);
if (resized_img.mat_data == NULL) {
image_free(&img);
continue;
}
for (j = 0; j < resized_img.mat_data_size; ++j)
resized_img.mat_data[j] = img.mat_data[j];
} else {
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n", \
warn, img.mat_width, img.mat_height, input_width, input_height);
warn, img.mat_width, img.mat_height, input_width, input_height);
image_resize(&img, &resized_img, (int)input_width, (int)input_height);
if (image_resize(&img, &resized_img, (int)input_width, (int)input_height) == -1) {
printf("%sImage %s cannot be resized!\n", warn, file_paths[i]);
image_free(&img);
continue;
}
}
if (resized_img.mat_data) {
originalImages[image_num] = img;
images[image_num] = resized_img;
++image_num;
}
originalImages[image_num] = img;
images[image_num] = resized_img;
++image_num;
}
if (!image_num) {
@@ -523,8 +539,8 @@ int main(int argc, char **argv) {
if (config_msg) {
ie_config_t * config = parseConfig(config_msg, '#');
status = ie_core_load_network(core, network, device_name, config, &exe_network);
config_free(config);
if (status != OK) {
config_free(config);
goto err;
}
} else {

View File

@@ -53,8 +53,8 @@ size_t read_image_from_file(const char* img_path, unsigned char *img_data, size_
fseek(fp, 0, SEEK_SET);
read_size = fread(img_data, 1, size, fp);
}
fclose(fp);
}
fclose(fp);
return read_size;
}

View File

@@ -7,7 +7,7 @@ networks like SSD-VGG. The sample shows how to use [Shape Inference feature](../
## Running
To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](@ref omz_tools_downloader_README).
> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
>

View File

@@ -9,7 +9,7 @@ networkx==2.2
tqdm==4.31.1
texttable==1.6.3
py-cpuinfo!=5.0,!=6.0
PyYAML>=5.4.2
PyYAML>=5.4.1
pillow>=8.1.0
scikit-image
scikit-learn

View File

@@ -1,5 +1,6 @@
[options]
py_modules =
mo
mo_tf
mo_caffe
mo_mxnet

View File

@@ -85,6 +85,9 @@ public:
* `InferenceEngine::Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights) const`
* function overload which takes a filesystem path to the model.
* For ONNX case the second parameter should contain empty blob.
* @note Created InferenceEngine::CNNNetwork object shares the weights with `weights` object.
* So, do not create `weights` on temporary data which can be later freed, since the network
* constant datas become to point to invalid memory.
* @return CNNNetwork
*/
CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const;

View File

@@ -10,3 +10,4 @@ ie_add_sample(NAME benchmark_app
HEADERS ${HDR}
DEPENDENCIES format_reader
OPENCV_DEPENDENCIES imgcodecs)

View File

@@ -105,6 +105,9 @@ Options:
-nthreads "<integer>" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
-enforcebf16 Optional. Enforcing of floating point operations execution in bfloat16 precision on platforms with native bfloat16 support. By default, this key sets "true" on platforms with native bfloat16 support and "false" for other platforms. Use "-enforcebf16=false" to disable this feature.
-pin "YES"/"NO"/"NUMA" Optional. Enable threads->cores ("YES", default), threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU threads pinning for CPU-involved inference.
-ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
-op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.
Statistics dumping options:

View File

@@ -108,6 +108,19 @@ static const char layout_message[] = "Optional. Prompts how network layouts shou
// @brief message for quantization bits
static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
// TODO: duplicate options from compile_tool
static constexpr char inputs_precision_message[] =
"Optional. Specifies precision for all input layers of the network.";
static constexpr char outputs_precision_message[] =
"Optional. Specifies precision for all output layers of the network.";
static constexpr char iop_message[] =
"Optional. Specifies precision for input and output layers by name.\n"
" Example: -iop \"input:FP16, output:FP16\".\n"
" Notice that quotes are required.\n"
" Overwrites precision from ip and op options for specified layers.";
/// @brief Define flag for showing help message <br>
DEFINE_bool(h, false, help_message);
@@ -198,6 +211,18 @@ DEFINE_string(layout, "", layout_message);
/// @brief Define flag for quantization bits (default 16)
DEFINE_int32(qb, 16, gna_qb_message);
/// @brief Specify precision for all input layers of the network
DEFINE_string(ip, "", inputs_precision_message);
/// @brief Specify precision for all ouput layers of the network
DEFINE_string(op, "", outputs_precision_message);
/// @brief Specify precision for input and output layers by name.\n"
/// Example: -iop \"input:FP16, output:FP16\".\n"
/// Notice that quotes are required.\n"
/// Overwrites layout from ip and op options for specified layers.";
DEFINE_string(iop, "", iop_message);
/**
* @brief This function show a help message
*/
@@ -237,4 +262,7 @@ static void showUsage() {
std::cout << " -load_config " << load_config_message << std::endl;
#endif
std::cout << " -qb " << gna_qb_message << std::endl;
std::cout << " -ip <value> " << inputs_precision_message << std::endl;
std::cout << " -op <value> " << outputs_precision_message << std::endl;
std::cout << " -iop \"<value>\" " << iop_message << std::endl;
}

View File

@@ -67,6 +67,14 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
throw std::logic_error("only " + std::string(detailedCntReport) + " report type is supported for MULTI device");
}
bool isNetworkCompiled = fileExt(FLAGS_m) == "blob";
bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty());
if (isNetworkCompiled && isPrecisionSet) {
std::string err = std::string("Cannot set precision for a compiled network. ") +
std::string("Please re-compile your network with required precision using compile_tool");
throw std::logic_error(err);
}
return true;
}
@@ -380,6 +388,10 @@ int main(int argc, char *argv[]) {
item.second->setPrecision(app_inputs_info.at(item.first).precision);
}
}
processPrecision(cnnNetwork, FLAGS_ip, FLAGS_op, FLAGS_iop);
printInputAndOutputsInfo(cnnNetwork);
// ----------------- 7. Loading the model to the device --------------------------------------------------------
next_step();
startTime = Time::now();

View File

@@ -85,3 +85,240 @@ void parseInputFilesArguments(std::vector<std::string> &files) {
readInputFilesArguments(files, args.at(i));
}
}
namespace {
void splitStringList(const std::string& str, std::vector<std::string>& out, char delim) {
out.clear();
if (str.empty())
return;
std::istringstream istr(str);
std::string elem;
while (std::getline(istr, elem, delim)) {
if (elem.empty()) {
continue;
}
out.emplace_back(std::move(elem));
}
}
std::map<std::string, std::string> parseArgMap(std::string argMap) {
argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end());
std::vector<std::string> pairs;
splitStringList(argMap, pairs, ',');
std::map<std::string, std::string> parsedMap;
for (auto&& pair : pairs) {
std::vector<std::string> keyValue;
splitStringList(pair, keyValue, ':');
if (keyValue.size() != 2) {
throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected <layer_name>:<value>");
}
parsedMap[keyValue[0]] = keyValue[1];
}
return parsedMap;
}
using supported_precisions_t = std::unordered_map<std::string, InferenceEngine::Precision>;
InferenceEngine::Precision getPrecision(std::string value,
const supported_precisions_t& supported_precisions) {
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
const auto precision = supported_precisions.find(value);
if (precision == supported_precisions.end()) {
throw std::logic_error("\"" + value + "\"" + " is not a valid precision");
}
return precision->second;
}
InferenceEngine::Precision getPrecision(const std::string& value) {
static const supported_precisions_t supported_precisions = {
{ "FP32", InferenceEngine::Precision::FP32 },
{ "FP16", InferenceEngine::Precision::FP16 },
{ "BF16", InferenceEngine::Precision::BF16 },
{ "U64", InferenceEngine::Precision::U64 },
{ "I64", InferenceEngine::Precision::I64 },
{ "U32", InferenceEngine::Precision::U32 },
{ "I32", InferenceEngine::Precision::I32 },
{ "U16", InferenceEngine::Precision::U16 },
{ "I16", InferenceEngine::Precision::I16 },
{ "U8", InferenceEngine::Precision::U8 },
{ "I8", InferenceEngine::Precision::I8 },
{ "BOOL", InferenceEngine::Precision::BOOL },
};
return getPrecision(value, supported_precisions);
}
void setPrecisions(const InferenceEngine::CNNNetwork& network, const std::string &iop) {
const auto user_precisions_map = parseArgMap(iop);
auto inputs = network.getInputsInfo();
auto outputs = network.getOutputsInfo();
for (auto&& item : user_precisions_map) {
const auto& layer_name = item.first;
const auto& user_precision = item.second;
const auto input = inputs.find(layer_name);
const auto output = outputs.find(layer_name);
if (input != inputs.end()) {
input->second->setPrecision(getPrecision(user_precision));
} else if (output != outputs.end()) {
output->second->setPrecision(getPrecision(user_precision));
} else {
throw std::logic_error(layer_name + " is not an input neither output");
}
}
}
} // namespace
void processPrecision(InferenceEngine::CNNNetwork& network, const std::string &ip, const std::string &op,
const std::string &iop) {
if (!ip.empty()) {
const auto user_precision = getPrecision(ip);
for (auto&& layer : network.getInputsInfo()) {
layer.second->setPrecision(user_precision);
}
}
if (!op.empty()) {
auto user_precision = getPrecision(op);
for (auto&& layer : network.getOutputsInfo()) {
layer.second->setPrecision(user_precision);
}
}
if (!iop.empty()) {
setPrecisions(network, iop);
}
}
namespace {
using supported_layouts_t = std::unordered_map<std::string, InferenceEngine::Layout>;
using matchLayoutToDims_t = std::unordered_map<size_t, size_t>;
InferenceEngine::Layout getLayout(std::string value,
const supported_layouts_t& supported_layouts) {
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
const auto layout = supported_layouts.find(value);
if (layout == supported_layouts.end()) {
throw std::logic_error("\"" + value + "\"" + " is not a valid layout");
}
return layout->second;
}
InferenceEngine::Layout getLayout(const std::string& value) {
static const supported_layouts_t supported_layouts = {
{ "NCDHW", InferenceEngine::Layout::NCDHW },
{ "NDHWC", InferenceEngine::Layout::NDHWC },
{ "NCHW", InferenceEngine::Layout::NCHW },
{ "NHWC", InferenceEngine::Layout::NHWC },
{ "CHW", InferenceEngine::Layout::CHW },
{ "NC", InferenceEngine::Layout::NC },
{ "C", InferenceEngine::Layout::C },
};
return getLayout(value, supported_layouts);
}
bool isMatchLayoutToDims(InferenceEngine::Layout layout, size_t dimension) {
static const matchLayoutToDims_t matchLayoutToDims = {
{static_cast<size_t>(InferenceEngine::Layout::NCDHW), 5 },
{static_cast<size_t>(InferenceEngine::Layout::NDHWC), 5 },
{static_cast<size_t>(InferenceEngine::Layout::NCHW), 4 },
{static_cast<size_t>(InferenceEngine::Layout::NHWC), 4 },
{static_cast<size_t>(InferenceEngine::Layout::CHW), 3 },
{static_cast<size_t>(InferenceEngine::Layout::NC), 2 },
{static_cast<size_t>(InferenceEngine::Layout::C), 1 }
};
const auto dims = matchLayoutToDims.find(static_cast<size_t>(layout));
if (dims == matchLayoutToDims.end()) {
throw std::logic_error("Layout is not valid.");
}
return dimension == dims->second;
}
void setLayouts(const InferenceEngine::CNNNetwork& network, const std::string iol) {
const auto user_layouts_map = parseArgMap(iol);
auto inputs = network.getInputsInfo();
auto outputs = network.getOutputsInfo();
for (auto&& item : user_layouts_map) {
const auto& layer_name = item.first;
const auto& user_layout = getLayout(item.second);
const auto input = inputs.find(layer_name);
const auto output = outputs.find(layer_name);
if (input != inputs.end()) {
if (!isMatchLayoutToDims(user_layout, input->second->getTensorDesc().getDims().size())) {
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
}
input->second->setLayout(user_layout);
} else if (output != outputs.end()) {
if (!isMatchLayoutToDims(user_layout, output->second->getTensorDesc().getDims().size())) {
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
}
output->second->setLayout(user_layout);
} else {
throw std::logic_error(layer_name + " is not an input neither output");
}
}
}
} // namespace
void processLayout(InferenceEngine::CNNNetwork& network, const std::string& il, const std::string& ol, const std::string& iol) {
if (!il.empty()) {
const auto layout = getLayout(il);
for (auto&& layer : network.getInputsInfo()) {
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
layer.second->setLayout(layout);
}
}
}
if (!ol.empty()) {
const auto layout = getLayout(ol);
for (auto&& layer : network.getOutputsInfo()) {
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
layer.second->setLayout(layout);
}
}
}
if (!iol.empty()) {
setLayouts(network, iol);
}
}
void printInputAndOutputsInfo(const InferenceEngine::CNNNetwork& network) {
std::cout << "Network inputs:" << std::endl;
for (auto&& layer : network.getInputsInfo()) {
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
}
std::cout << "Network outputs:" << std::endl;
for (auto&& layer : network.getOutputsInfo()) {
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
}
}

View File

@@ -32,7 +32,7 @@ The package contains the following components:
* [Kaldi Statistical Language Model Conversion Tool](Kaldi_SLM_conversion_tool.md), which converts custom language models to use in the decoder
Additionally, [new acoustic and language models](http://download.01.org/opencv/2020/openvinotoolkit/2020.1/models_contrib/speech/kaldi/librispeech_s5/) to be used by new demos are located at [download.01.org](https://01.org/).
Additionally, new acoustic and language models are available in the OpenVINO&trade; [storage](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/).
## <a name="run-demos">Run Speech Recognition Demos with Pretrained Models</a>

View File

@@ -109,6 +109,10 @@ Options:
If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored.
-cw_r "<integer>" Optional. Number of frames for right context windows (default is 0). Works only with context window networks.
If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored.
-oname "<outputs>" Optional. Layer names for output blobs. The names are separated with ",". Allows to change the order of output layers for -o flag.
Example: Output1:port,Output2:port.
-iname "<inputs>" Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order of input layers for -i flag.
Example: Input1,Input2
```
@@ -136,7 +140,7 @@ The following pre-trained models are available:
* rm\_lstm4f
* rm\_cnn4a\_smbr
All of them can be downloaded from [https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi) or using the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) .
All of them can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/) or using the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) .
### Speech Inference

View File

@@ -536,7 +536,12 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
CLDNNPlugin::Config conf = _impl->m_config;
UpdateConfig(conf, network, config);
Program prog;
if (m_defaultContext == nullptr) {
m_defaultContext.reset(new CLDNNRemoteCLContext(
std::const_pointer_cast<InferenceEngine::IInferencePlugin>(shared_from_this()),
ParamMap(), conf));
}
Program prog(m_defaultContext->getImpl()->GetEngine(), conf);
auto function = network.getFunction();
if (function == nullptr) {
THROW_IE_EXCEPTION << "CNNetworkImpl representation is not supported anymore";

View File

@@ -24,7 +24,7 @@ class clDNNEngine : public InferenceEngine::InferencePluginInternal,
std::map<std::string, cldnn::device> device_map;
std::mutex engine_mutex;
CLDNNRemoteCLContext::Ptr m_defaultContext;
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,

View File

@@ -71,6 +71,8 @@ public:
class Program {
public:
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
Program(std::shared_ptr<const cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
m_curBatch(-1), queryMode(false), m_max_batch(1) {}
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {}
static const cldnn::primitive_id m_preProcessTag;

View File

@@ -21,7 +21,7 @@ CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
uint32_t plane,
BlobType mem_type) :
m_context(context), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
_handle(nullptr) {
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
}
ParamMap CLDNNRemoteBlobImpl::getParams() const {

View File

@@ -287,7 +287,27 @@ public:
QueryNetworkResult QueryNetwork(const CNNNetwork& network, const std::string& deviceName,
const std::map<std::string, std::string>& config) const override {
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
return GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config);
auto res = GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config);
if (!network.getFunction() || res.supportedLayersMap.empty())
return res;
const auto& func = network.getFunction();
auto specialized_function = ngraph::clone_function(*func);
std::string defDevice = res.supportedLayersMap.begin()->second;
ngraph::pass::ConstantFolding().run_on_function(specialized_function);
std::unordered_set<std::string> opNames;
for (const auto& op : specialized_function->get_ops())
opNames.emplace(op->get_friendly_name());
for (const auto& op : func->get_ops()) {
if (opNames.find(op->get_friendly_name()) == opNames.end() ||
(!res.supportedLayersMap.count(op->get_friendly_name()) &&
std::dynamic_pointer_cast<ngraph::op::Constant>(op)))
res.supportedLayersMap[op->get_friendly_name()] = defDevice;
}
return res;
}
Parameter GetMetric(const std::string& deviceName, const std::string& name) const override {

View File

@@ -325,7 +325,7 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding
case Layout::HWC:
checkDims(dims.size(), 3);
l_order = {1, 2, 0};
l_dims = dims;
l_dims = {dims[1], dims[2], dims[0]};
break;
case Layout::CN:
checkDims(dims.size(), 2);

View File

@@ -169,9 +169,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
}
std::string MKLDNNEdge::name() const {
auto childPtr = getChild();
auto parentPtr = getParent();
return childPtr->getName() + "<->" + parentPtr->getName();
auto childPtr = getChild();
return parentPtr->getName() + std::to_string(parent_port) + "<->" + childPtr->getName() + std::to_string(child_port);
}
void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {

View File

@@ -6,6 +6,7 @@
#include <string>
#include <map>
#include <vector>
#include <tuple>
#include <unordered_set>
#include <limits>
#include <fstream>
@@ -67,6 +68,8 @@ using namespace InferenceEngine::details;
typedef std::unordered_set<MKLDNNEdgePtr> edge_cluster_t;
typedef std::vector<edge_cluster_t> edge_clusters_t;
mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0);
template<typename NET>
void MKLDNNGraph::ApplyUnrollPasses(NET &net) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses");
@@ -453,15 +456,24 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
auto acquireSharedOutputs = [this](MKLDNNNodePtr & graphNode) {
std::vector<shared_memory_ptr> outputs;
bool hasLocalAllocatedEdges = false;
bool hasExternalInvalidEdges = false;
for (size_t i = 0; i < graphNode->getChildEdges().size(); ++i) {
auto edgePtr = graphNode->getChildEdgeAt(i);
if (edgePtr && edgePtr->isUseExternalMemory()) {
outputs.emplace_back(weightsCache->get(edgePtr->name()));
if (edgePtr) {
if (edgePtr->isUseExternalMemory()) {
auto ptr = weightsCache->get(edgePtr->name());
outputs.emplace_back(ptr);
if (!ptr->isValid())
hasExternalInvalidEdges = true;
} else {
hasLocalAllocatedEdges = true;
}
}
}
return outputs;
return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs);
};
for (auto &graphNode : graphNodes) {
@@ -471,12 +483,10 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
if (weightsCache) {
auto sharedOutputs = acquireSharedOutputs(graphNode);
if (std::find_if(sharedOutputs.begin(), sharedOutputs.end(),
[](const shared_memory_ptr & ptr) {
return !ptr->isValid();
}) != sharedOutputs.end()) {
if (std::get<0>(sharedOutputs) || std::get<1>(sharedOutputs)) {
graphNode->execute(stream);
for (auto & output : sharedOutputs)
for (auto & output : std::get<2>(sharedOutputs))
output->valid(true);
}
} else {

View File

@@ -30,7 +30,7 @@ public:
Ready = 1,
};
MKLDNNGraph(mkldnn::engine eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0)) : status(NotReady), eng(eng) {}
MKLDNNGraph() = default;
Status GetStatus() {
return status;
@@ -172,7 +172,7 @@ protected:
graphEdges.clear();
_meanImages.clear();
}
Status status;
Status status { NotReady };
Config config;
// For dumping purposes. -1 - no counting, all other positive
@@ -191,7 +191,7 @@ protected:
std::map<std::string, MeanImage> _meanImages;
std::string _name;
mkldnn::engine eng;
static mkldnn::engine eng;
void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);

View File

@@ -11,6 +11,7 @@
#include <nodes/mkldnn_concat_node.h>
#include <nodes/mkldnn_split_node.h>
#include <ie_compound_blob.h>
#include <ie_common.h>
#include "mkldnn_exec_network.h"
#include "mkldnn_itt.h"
#include "nodes/common/cpu_convert.h"
@@ -128,6 +129,13 @@ void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
default:
THROW_IE_EXCEPTION << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
}
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) {
input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout());
}
pushInput(input.first, input.second, inPrec);
}
}

View File

@@ -91,7 +91,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank)
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes) {
size_t bitMask = 0ul;
unsigned bitMask = 0ul;
for (auto& item : supportedTypes) {
bitMask |= 1 << static_cast<unsigned>(item);
}

View File

@@ -205,7 +205,7 @@ private:
void parallelItInit(size_t start, std::vector<size_t>& counters, const std::vector<size_t>& iterationRange) {
auto itCounter = counters.rbegin();
auto itWork = iterationRange.rbegin();
while (itCounter != counters.rend()) {
while (itCounter != counters.rend() && itWork != iterationRange.rend()) {
*itCounter = start % *itWork;
start /= *itWork;
++itCounter;
@@ -217,7 +217,7 @@ private:
auto itCounter = counters.rbegin();
auto itWork = iterationRange.rbegin();
while (itCounter != counters.rend()) {
while (itCounter != counters.rend() && itWork != iterationRange.rend()) {
*itCounter = (*itCounter + 1) % *itWork;
if (*itCounter != 0) {
break;

View File

@@ -991,13 +991,17 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
}
void MKLDNNBinaryConvolutionNode::createPrimitive() {
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
auto srcDims = config.inConfs[0].desc.getDims();
auto weiDims = config.inConfs[1].desc.getDims();
auto dstDims = config.outConfs[0].desc.getDims();
auto implType = getSelectedPrimitiveDescriptor()->getImplementationType();
auto implType = selectedPrimitiveDescriptor->getImplementationType();
jcp.ngroups = group;
jcp.mb = srcDims[0];
@@ -1295,7 +1299,11 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
auto weights = reinterpret_cast<const uint8_t*>(weightsMemory->GetPtr());
auto dst = reinterpret_cast<uint8_t*>(dstMemory->GetPtr());
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
auto srcBlockDesc = config.inConfs[0].desc.getBlockingDesc();
std::vector<size_t> srcStride(srcBlockDesc.getStrides().size());
@@ -1315,7 +1323,7 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
dstStride[dstBlockDesc.getOrder()[i]] = dstBlockDesc.getStrides()[i];
}
auto implType = getSelectedPrimitiveDescriptor()->getImplementationType();
auto implType = selectedPrimitiveDescriptor->getImplementationType();
if (implType != impl_desc_type::ref) {
executeOptimized(src, weights, dst, srcStride, weightsStride, dstStride);
} else {

View File

@@ -854,7 +854,10 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
}
void MKLDNNDeformableConvolutionNode::createPrimitive() {
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
auto srcDims = config.inConfs[0].desc.getDims();
auto weiDims = config.inConfs[2].desc.getDims();
@@ -1057,7 +1060,10 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
auto src_block_desc = config.inConfs[0].desc.getBlockingDesc();
std::vector<size_t> src_strides(src_block_desc.getStrides().size());

View File

@@ -942,7 +942,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
arg.src_stride = src_stride_size;
arg.dst_stride = dst_stride_size;
arg.work_amount = static_cast<size_t>(C2 / blk_size); // work amount for vector part
arg.oc_off = static_cast<size_t>(c * sizeof(float));
arg.oc_off = sizeof(float) * c;
(*mvn_kernel)(&arg);
});
} else {
@@ -956,7 +956,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
arg.src_stride = src_stride_size;
arg.dst_stride = dst_stride_size;
arg.work_amount = static_cast<size_t>(C2 / blk_size);
arg.oc_off = static_cast<size_t>(c * sizeof(float));
arg.oc_off = sizeof(float) * c;
(*mvn_kernel)(&arg);
});
}

View File

@@ -252,7 +252,10 @@ void MKLDNNPadNode::padConstant() {
return;
}
InferenceEngine::Precision precision = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU Pad node with name '" << getName() << "' doesn't have primitive descriptors.";
InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc.getPrecision();
OV_SWITCH(MKLDNNPlugin, PadConstantEmitter, this, precision,
OV_CASE(InferenceEngine::Precision::FP32, float),
OV_CASE(InferenceEngine::Precision::I32, int32_t),

View File

@@ -49,11 +49,11 @@ private:
InferenceEngine::SizeVector srcStrides;
InferenceEngine::SizeVector dstStrides;
InferenceEngine::SizeVector srcDimsForReflectOrSymmetric;
size_t nDimsForWork;
size_t workAmount;
size_t lastDstDim;
size_t shift;
uint8_t sizeData;
size_t nDimsForWork = 0lu;
size_t workAmount = 0lu;
size_t lastDstDim = 1lu;
size_t shift = 0lu;
uint8_t sizeData = 1;
} params;
template<typename T>

View File

@@ -279,8 +279,8 @@ private:
Reg64 reg_output_scale = rbx;
Reg64 reg_output_shift = rdx;
bool do_rounding;
bool do_dequantization;
bool do_rounding = true;
bool do_dequantization = true;
inline void compute_planar() {
int src_type_size = jqp_.src_prc.size();
@@ -1209,7 +1209,11 @@ void MKLDNNQuantizeNode::createPrimitive() {
jqp.op_type = quantizeOpType;
if (getSelectedPrimitiveDescriptor()->getImplementationType() != impl_desc_type::ref) {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
if (mayiuse(cpu::x64::avx512_common)) {
if (isBinarization())
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(jqp));
@@ -1523,7 +1527,11 @@ void MKLDNNQuantizeNode::executeQuantization() {
}
void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
if (getSelectedPrimitiveDescriptor()->getImplementationType() != impl_desc_type::ref) {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
if (jqp.op_type == QuantizeOpType::Binarization)
executeBinarization();
else

View File

@@ -332,7 +332,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
}
void MKLDNNROIPoolingNode::createPrimitive() {
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
jpp.c_block = simd_w;
@@ -378,7 +381,10 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
const auto *src_roi = reinterpret_cast<const float *>(srcMemory1.GetPtr());
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
auto src_strides = config.inConfs[0].desc.getBlockingDesc().getStrides();
auto dst_strides = config.outConfs[0].desc.getBlockingDesc().getStrides();
@@ -526,8 +532,8 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
arg.xf = in_x - left_x_index;
arg.yf = in_y - top_y_index;
arg.xoff = (size_t) ((right_x_index - left_x_index) * jpp.c_block * sizeof(float));
arg.yoff = (size_t) ((bottom_y_index - top_y_index) * jpp.iw * jpp.c_block * sizeof(float));
arg.xoff = sizeof(float) * (right_x_index - left_x_index) * jpp.c_block;
arg.yoff = sizeof(float) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block;
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
top_y_index * src_strides[2] + left_x_index * src_strides[3]];

View File

@@ -458,7 +458,10 @@ void MKLDNNSplitNode::setDynamicBatchLim(int lim) {
}
void MKLDNNSplitNode::prepareOptimizedParams() {
const auto& inpTensorDesc = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
THROW_IE_EXCEPTION << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors.";
const auto& inpTensorDesc = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc;
const auto outputPortsCount = outDims.size();
//find axis order position

View File

@@ -187,8 +187,7 @@ private:
} // namespace MKLDNNPlugin
MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(layer, eng, cache),
sub_graph(eng) {}
MKLDNNNode(layer, eng, cache) {}
void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
auto *ti = dynamic_cast<class InferenceEngine::TensorIterator*>(getCnnLayer().get());

View File

@@ -433,11 +433,11 @@ private:
size_t num_boxes;
size_t num_classes;
size_t max_output_boxes_per_class;
float iou_threshold;
float score_threshold;
float soft_nms_sigma;
float scale;
size_t max_output_boxes_per_class = 0lu;
float iou_threshold = 0.0f;
float score_threshold = 0.0f;
float soft_nms_sigma = 0.0f;
float scale = 1.f;
std::vector<std::vector<size_t>> numFiltBox;
const std::string inType = "input", outType = "output";

View File

@@ -40,8 +40,8 @@ struct jit_args_logistic {
struct jit_logistic_config_params {
InferenceEngine::Precision src_dt;
InferenceEngine::Precision dst_dt;
unsigned src_data_size;
unsigned dst_data_size;
unsigned src_data_size = 0;
unsigned dst_data_size = 0;
};
struct jit_uni_logistic_kernel {

View File

@@ -131,6 +131,9 @@ void FrontEnd::parseCTCGreedyDecoderSeqLen(const Model& model, const ie::CNNLaye
"provided {} outputs",
layer->type, layer->name, outputs.size());
DataVector conditionalOutputs(2);
conditionalOutputs[0] = outputs[0];
conditionalOutputs[1] = outputs[1] != nullptr ? outputs[1] : model->addFakeData();
const auto mergeRepeated = layer->GetParamAsBool("merge_repeated");
const auto blankIndex = [&] {
@@ -167,7 +170,7 @@ void FrontEnd::parseCTCGreedyDecoderSeqLen(const Model& model, const ie::CNNLaye
sequenceLengthType);
_stageBuilder->addCTCGreedyDecoderSeqLenStage(model, layer->name, layer,
inputs, outputs, mergeRepeated, blankIndex);
inputs, conditionalOutputs, mergeRepeated, blankIndex);
}
} // namespace vpu

View File

@@ -41,6 +41,11 @@ target_link_libraries(${TARGET_NAME}
PRIVATE
mvnc inference_engine inference_engine_legacy vpu_graph_transformer)
# MyriadPlugin is not safe to unload it at runtime
if(LINUX AND LINUX_OS_NAME MATCHES "Ubuntu")
set_target_properties(${TARGET_NAME} PROPERTIES LINK_OPTIONS "-Wl,-z,nodelete")
endif()
ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
# LTO

View File

@@ -44,6 +44,19 @@ TEST_F(TensorDescTests, CreateBlockedBlobNCDHW) {
ASSERT_EQ(Layout::BLOCKED, blockedBlob->getTensorDesc().getLayout());
}
TEST_F(TensorDescTests, CompareHWCandCHWLayouts) {
TensorDesc descCHW(Precision::FP32, {1, 3, 4}, Layout::CHW);
TensorDesc descHWC(Precision::FP32, {1, 3, 4}, Layout::HWC);
SizeVector chw = {0, 1, 2};
SizeVector hwc = {1, 2, 0};
ASSERT_NE(descCHW, descHWC);
ASSERT_NE(descCHW.getBlockingDesc(), descHWC.getBlockingDesc());
ASSERT_NE(descCHW.getBlockingDesc().getOrder(), descHWC.getBlockingDesc().getOrder());
ASSERT_EQ(descCHW.getBlockingDesc().getOrder(), chw);
ASSERT_EQ(descHWC.getBlockingDesc().getOrder(), hwc);
}
TEST_F(TensorDescTests, CompareNHWCandNCHWLayouts) {
TensorDesc descNCHW(Precision::FP32, {1, 3, 4, 2}, Layout::NCHW);
TensorDesc descNHWC(Precision::FP32, {1, 3, 4, 2}, Layout::NHWC);

View File

@@ -572,6 +572,58 @@ TEST_P(IEClassNetworkTestP, QueryNetworkWithKSO) {
}
}
TEST_P(IEClassNetworkTestP, SetAffinityWithConstantBranches) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
try {
std::shared_ptr<ngraph::Function> func;
{
ngraph::PartialShape shape({1, 84});
ngraph::element::Type type(ngraph::element::Type_t::f32);
auto param = std::make_shared<ngraph::opset6::Parameter>(type, shape);
auto matMulWeights =
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {10, 84}, {1});
auto shapeOf = std::make_shared<ngraph::opset6::ShapeOf>(matMulWeights);
auto gConst1 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i32, {1}, {1});
auto gConst2 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {}, {0});
auto gather = std::make_shared<ngraph::opset6::Gather>(shapeOf, gConst1, gConst2);
auto concatConst = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {1}, {1});
auto concat =
std::make_shared<ngraph::opset6::Concat>(ngraph::NodeVector{concatConst, gather}, 0);
auto relu = std::make_shared<ngraph::opset6::Relu>(param);
auto reshape = std::make_shared<ngraph::opset6::Reshape>(relu, concat, false);
auto matMul = std::make_shared<ngraph::opset6::MatMul>(reshape, matMulWeights, false, true);
auto matMulBias =
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {1, 10}, {1});
auto addBias = std::make_shared<ngraph::opset6::Add>(matMul, matMulBias);
auto result = std::make_shared<ngraph::opset6::Result>(addBias);
ngraph::ParameterVector params = {param};
ngraph::ResultVector results = {result};
func = std::make_shared<ngraph::Function>(results, params);
}
CNNNetwork net(func);
auto rres = ie.QueryNetwork(net, deviceName);
auto rl_map = rres.supportedLayersMap;
for (const auto & op : func->get_ops()) {
if (!rl_map.count(op->get_friendly_name())) {
FAIL() << "Op " << op->get_friendly_name() << " is not supported by " << deviceName;
}
}
for (const auto & op : net.getFunction()->get_ops()) {
std::string affinity = rl_map[op->get_friendly_name()];
op->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(affinity);
}
ExecutableNetwork exeNetwork = ie.LoadNetwork(ksoNetwork, deviceName);
} catch (const NotImplementedException& ex) {
std::string message = ex.what();
ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively");
}
}
TEST_P(IEClassNetworkTestP, SetAffinityWithKSO) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;

View File

@@ -21,7 +21,7 @@ clDNN uses 3<sup>rd</sup>-party components licensed under following licenses:
- *RapidJSON* under [Tencent\* License](https://github.com/Tencent/rapidjson/blob/master/license.txt)
## Documentation
There is inline documentation available that can be [generated with Doxygen](#generating-documentation).
There is inline documentation available that can be generated with Doxygen.
Accelerate Deep Learning Inference with Intel® Processor Graphics whitepaper [link](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
@@ -45,7 +45,7 @@ request will be merged into our GitHub repository.
## System Requirements
clDNN supports Intel® HD Graphics and Intel® Iris® Graphics and is optimized for Gen9-Gen12LP architectures
clDNN currently uses OpenCL™ with multiple Intel® OpenCL™ extensions and requires Intel® Graphics Driver to run.
clDNN currently uses OpenCL™ with multiple Intel OpenCL™ extensions and requires Intel® Graphics Driver to run.
clDNN requires CPU with Intel® SSE/Intel® AVX support.
@@ -62,32 +62,20 @@ The software dependencies are:
> Intel® CPU intrinsics header (`<immintrin.h>`) must be available during compilation.
- [python™](https://www.python.org/downloads/) 2.7 or later (scripts are both compatible with python™ 2.7.x and python™ 3.x)
- *(optional)* [Doxygen\*](http://www.stack.nl/~dimitri/doxygen/download.html) 1.8.13 or later
Needed for manual generation of documentation from inline comments or running `docs` custom target which will generate it automatically.
> [GraphViz\*](http://www.graphviz.org/Download..php) (2.38 or later) is also recommended to generate documentation with all embedded diagrams.
(Make sure that `dot` application is visible in the `PATH` environment variable.)
### Generating documentation
Documentation is provided inline and can be generated in HTML format with Doxygen. We recommend to use latest
[Doxygen\*](http://www.stack.nl/~dimitri/doxygen/download.html) and [GraphViz\*](http://www.graphviz.org/Download..php).
Documentation templates and configuration files are stored in `docs` subdirectory. You can simply call:
```shellscript
cd docs && doxygen
```
to generate HTML documentation in `docs/html` subdirectory.
There is also custom CMake target named `docs` which will generate documentation in `CLDNN__OUTPUT_BIN_DIR/html` directory. For example, when using Unix makefiles, you can run:
```
make docs
```
in order to create it.
# Trademark Information
Intel, the Intel logo, Intel Atom, Intel Core, Intel Xeon Phi, Iris, OpenVINO,
the OpenVINO logo, Pentium, VTune, and Xeon are trademarks
of Intel Corporation or its subsidiaries.
\* Other names and brands may be claimed as the property of others.
Copyright © 2020, Intel® Corporation
Microsoft, Windows, and the Windows logo are trademarks, or registered
trademarks of Microsoft Corporation in the United States and/or other
countries.
OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission
by Khronos.
Copyright © 2021, Intel Corporation

View File

@@ -25,7 +25,7 @@ struct ctc_greedy_decoder_params : public base_params {
ctc_greedy_decoder_params() : base_params(KernelType::CTC_GREEDY_DECODER) {}
bool merge_repeated = true;
uint32_t blank_index;
uint32_t blank_index = 0;
uint32_t outputs_num = 1;
};

View File

@@ -27,9 +27,9 @@ struct mvn_params : public base_params {
mvn_params() : base_params(KernelType::MVN) {}
MVNMode mvnMode = MVNMode::WITHIN_CHANNELS;
bool mvnNormalizeVariance;
float epsilon;
MVNEpsMode mvnEpsMode;
bool mvnNormalizeVariance = false;
float epsilon = 0.0f;
MVNEpsMode mvnEpsMode = MVNEpsMode::INSIDE_SQRT;
virtual ParamsKey GetParamsKey() const {
ParamsKey k = base_params::GetParamsKey();

View File

@@ -393,19 +393,19 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(const layout& input_layout,
}
// A set of rules that define when b_fs_yx_fsv16 mem format can be used for fp16/fp32 case
int32_t feature_block_size = 16;
int32_t correct_data_type = input_layout.data_type == data_types::f16 || input_layout.data_type == data_types::f32;
correct_data_type &= weights_layout.data_type == input_layout.data_type;
int32_t correct_batch = (input_layout.size.batch[0] == 1) || (input_layout.size.batch[0] > 1 && input_layout.data_type == data_types::f32);
int32_t correct_spatial_dims = input_layout.size.spatial[2] == 1 && input_layout.size.spatial[3] == 1;
bool correct_data_type = (input_layout.data_type == data_types::f16 || input_layout.data_type == data_types::f32) &&
(weights_layout.data_type == input_layout.data_type);
bool correct_batch = (input_layout.size.batch[0] == 1) || (input_layout.size.batch[0] > 1 && input_layout.data_type == data_types::f32);
bool correct_spatial_dims = input_layout.size.spatial[2] == 1 && input_layout.size.spatial[3] == 1;
int32_t required_feature_num = weak_restrictions ? feature_block_size / 2 : feature_block_size;
int32_t correct_in_feature = (input_layout.size.feature[0] >= required_feature_num &&
bool correct_in_feature = (input_layout.size.feature[0] >= required_feature_num &&
output_layout.size.feature[0] >= required_feature_num);
int32_t in_features_per_group = input_layout.size.feature[0] / conv->groups;
int32_t out_features_per_group = output_layout.size.feature[0] / conv->groups;
if (!correct_in_feature && input_layout.size.feature[0] <= 4 && out_features_per_group >= feature_block_size)
correct_in_feature = true;
int32_t depthwise = conv->groups == static_cast<uint32_t>(input_layout.size.feature[0]); // depthwise conv
int32_t grouped = ((feature_block_size % out_features_per_group == 0) &&
bool depthwise = conv->groups == static_cast<uint32_t>(input_layout.size.feature[0]); // depthwise conv
bool grouped = ((feature_block_size % out_features_per_group == 0) &&
(feature_block_size % in_features_per_group == 0) &&
(feature_block_size / out_features_per_group > 1) &&
(feature_block_size / in_features_per_group > 1) &&

View File

@@ -1,20 +1,6 @@
/*
* Copyright 2017-2019 Intel Corporation.
* The source code, information and material ("Material") contained herein is
* owned by Intel Corporation or its suppliers or licensors, and title to such
* Material remains with Intel Corporation or its suppliers or licensors.
* The Material contains proprietary information of Intel or its suppliers and
* licensors. The Material is protected by worldwide copyright laws and treaty
* provisions.
* No part of the Material may be used, copied, reproduced, modified, published,
* uploaded, posted, transmitted, distributed or disclosed in any way without
* Intel's prior express written permission. No license under any patent,
* copyright or other intellectual property rights in the Material is granted to
* or conferred upon you, either expressly, by implication, inducement, estoppel
* or otherwise.
* Any license under such intellectual property rights must be express and
* approved by Intel in writing.
*/
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "XLinkStringUtils.h"

View File

@@ -1,20 +1,6 @@
/*
* Copyright 2017-2019 Intel Corporation.
* The source code, information and material ("Material") contained herein is
* owned by Intel Corporation or its suppliers or licensors, and title to such
* Material remains with Intel Corporation or its suppliers or licensors.
* The Material contains proprietary information of Intel or its suppliers and
* licensors. The Material is protected by worldwide copyright laws and treaty
* provisions.
* No part of the Material may be used, copied, reproduced, modified, published,
* uploaded, posted, transmitted, distributed or disclosed in any way without
* Intel's prior express written permission. No license under any patent,
* copyright or other intellectual property rights in the Material is granted to
* or conferred upon you, either expressly, by implication, inducement, estoppel
* or otherwise.
* Any license under such intellectual property rights must be express and
* approved by Intel in writing.
*/
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mvnc_data.h"
#include "mvnc_tool.h"

View File

@@ -18,7 +18,9 @@
#include <vpu/vpu_plugin_config.hpp>
#include <vpu/private_plugin_config.hpp>
#include <vpu/utils/string.hpp>
#include "samples/common.hpp"
#include "samples/args_helper.hpp"
static constexpr char help_message[] =
"Optional. Print the usage message.";
@@ -208,106 +210,6 @@ IE_SUPPRESS_DEPRECATED_END
return config;
}
static std::map<std::string, std::string> parseArgMap(std::string argMap) {
argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end());
std::vector<std::string> pairs;
vpu::splitStringList(argMap, pairs, ',');
std::map<std::string, std::string> parsedMap;
for (auto&& pair : pairs) {
std::vector<std::string> keyValue;
vpu::splitStringList(pair, keyValue, ':');
if (keyValue.size() != 2) {
throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected <layer_name>:<value>");
}
parsedMap[keyValue[0]] = keyValue[1];
}
return parsedMap;
}
using supported_precisions_t = std::unordered_map<std::string, InferenceEngine::Precision>;
using supported_layouts_t = std::unordered_map<std::string, InferenceEngine::Layout>;
using matchLayoutToDims_t = std::unordered_map<size_t, size_t>;
static InferenceEngine::Layout getLayout(std::string value,
const supported_layouts_t& supported_layouts) {
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
const auto layout = supported_layouts.find(value);
if (layout == supported_layouts.end()) {
throw std::logic_error("\"" + value + "\"" + " is not a valid layout");
}
return layout->second;
}
static InferenceEngine::Layout getLayout(const std::string& value) {
static const supported_layouts_t supported_layouts = {
{ "NCDHW", InferenceEngine::Layout::NCDHW },
{ "NDHWC", InferenceEngine::Layout::NDHWC },
{ "NCHW", InferenceEngine::Layout::NCHW },
{ "NHWC", InferenceEngine::Layout::NHWC },
{ "CHW", InferenceEngine::Layout::CHW },
{ "NC", InferenceEngine::Layout::NC },
{ "C", InferenceEngine::Layout::C },
};
return getLayout(value, supported_layouts);
}
static bool isMatchLayoutToDims(InferenceEngine::Layout layout, size_t dimension) {
static const matchLayoutToDims_t matchLayoutToDims = {
{static_cast<size_t>(InferenceEngine::Layout::NCDHW), 5 },
{static_cast<size_t>(InferenceEngine::Layout::NDHWC), 5 },
{static_cast<size_t>(InferenceEngine::Layout::NCHW), 4 },
{static_cast<size_t>(InferenceEngine::Layout::NHWC), 4 },
{static_cast<size_t>(InferenceEngine::Layout::CHW), 3 },
{static_cast<size_t>(InferenceEngine::Layout::NC), 2 },
{static_cast<size_t>(InferenceEngine::Layout::C), 1 }
};
const auto dims = matchLayoutToDims.find(static_cast<size_t>(layout));
if (dims == matchLayoutToDims.end()) {
throw std::logic_error("Layout is not valid.");
}
return dimension == dims->second;
}
static InferenceEngine::Precision getPrecision(std::string value,
const supported_precisions_t& supported_precisions) {
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
const auto precision = supported_precisions.find(value);
if (precision == supported_precisions.end()) {
throw std::logic_error("\"" + value + "\"" + " is not a valid precision");
}
return precision->second;
}
static InferenceEngine::Precision getPrecision(const std::string& value) {
static const supported_precisions_t supported_precisions = {
{ "FP32", InferenceEngine::Precision::FP32 },
{ "FP16", InferenceEngine::Precision::FP16 },
{ "BF16", InferenceEngine::Precision::BF16 },
{ "U64", InferenceEngine::Precision::U64 },
{ "I64", InferenceEngine::Precision::I64 },
{ "U32", InferenceEngine::Precision::U32 },
{ "I32", InferenceEngine::Precision::I32 },
{ "U16", InferenceEngine::Precision::U16 },
{ "I16", InferenceEngine::Precision::I16 },
{ "U8", InferenceEngine::Precision::U8 },
{ "I8", InferenceEngine::Precision::I8 },
{ "BOOL", InferenceEngine::Precision::BOOL },
};
return getPrecision(value, supported_precisions);
}
bool isFP16(InferenceEngine::Precision precision) {
return precision == InferenceEngine::Precision::FP16;
}
@@ -320,29 +222,6 @@ bool isFloat(InferenceEngine::Precision precision) {
return isFP16(precision) || isFP32(precision);
}
static void setPrecisions(const InferenceEngine::CNNNetwork& network) {
const auto user_precisions_map = parseArgMap(FLAGS_iop);
auto inputs = network.getInputsInfo();
auto outputs = network.getOutputsInfo();
for (auto&& item : user_precisions_map) {
const auto& layer_name = item.first;
const auto& user_precision = item.second;
const auto input = inputs.find(layer_name);
const auto output = outputs.find(layer_name);
if (input != inputs.end()) {
input->second->setPrecision(getPrecision(user_precision));
} else if (output != outputs.end()) {
output->second->setPrecision(getPrecision(user_precision));
} else {
throw std::logic_error(layer_name + " is not an input neither output");
}
}
}
static void setDefaultIO(InferenceEngine::CNNNetwork& network) {
const bool isMYRIAD = FLAGS_d.find("MYRIAD") != std::string::npos;
const bool isVPUX = FLAGS_d.find("VPUX") != std::string::npos;
@@ -377,81 +256,6 @@ static void setDefaultIO(InferenceEngine::CNNNetwork& network) {
}
}
static void processPrecisions(InferenceEngine::CNNNetwork& network) {
if (!FLAGS_ip.empty()) {
const auto user_precision = getPrecision(FLAGS_ip);
for (auto&& layer : network.getInputsInfo()) {
layer.second->setPrecision(user_precision);
}
}
if (!FLAGS_op.empty()) {
auto user_precision = getPrecision(FLAGS_op);
for (auto&& layer : network.getOutputsInfo()) {
layer.second->setPrecision(user_precision);
}
}
if (!FLAGS_iop.empty()) {
setPrecisions(network);
}
}
static void setLayouts(const InferenceEngine::CNNNetwork& network) {
const auto user_layouts_map = parseArgMap(FLAGS_iol);
auto inputs = network.getInputsInfo();
auto outputs = network.getOutputsInfo();
for (auto&& item : user_layouts_map) {
const auto& layer_name = item.first;
const auto& user_layout = getLayout(item.second);
const auto input = inputs.find(layer_name);
const auto output = outputs.find(layer_name);
if (input != inputs.end()) {
if (!isMatchLayoutToDims(user_layout, input->second->getTensorDesc().getDims().size())) {
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
}
input->second->setLayout(user_layout);
} else if (output != outputs.end()) {
if (!isMatchLayoutToDims(user_layout, output->second->getTensorDesc().getDims().size())) {
throw std::logic_error(item.second + " layout is not applicable to " + layer_name);
}
output->second->setLayout(user_layout);
} else {
throw std::logic_error(layer_name + " is not an input neither output");
}
}
}
static void processLayout(InferenceEngine::CNNNetwork& network) {
if (!FLAGS_il.empty()) {
const auto layout = getLayout(FLAGS_il);
for (auto&& layer : network.getInputsInfo()) {
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
layer.second->setLayout(layout);
}
}
}
if (!FLAGS_ol.empty()) {
const auto layout = getLayout(FLAGS_ol);
for (auto&& layer : network.getOutputsInfo()) {
if (isMatchLayoutToDims(layout, layer.second->getTensorDesc().getDims().size())) {
layer.second->setLayout(layout);
}
}
}
if (!FLAGS_iol.empty()) {
setLayouts(network);
}
}
std::string getFileNameFromPath(const std::string& path,
#if defined(_WIN32)
const std::string& sep = "\\") {
@@ -487,18 +291,10 @@ int main(int argc, char* argv[]) {
auto network = ie.ReadNetwork(FLAGS_m);
setDefaultIO(network);
processPrecisions(network);
processLayout(network);
processPrecision(network, FLAGS_ip, FLAGS_op, FLAGS_iop);
processLayout(network, FLAGS_il, FLAGS_ol, FLAGS_iol);
std::cout << "Network inputs:" << std::endl;
for (auto&& layer : network.getInputsInfo()) {
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
}
std::cout << "Network outputs:" << std::endl;
for (auto&& layer : network.getOutputsInfo()) {
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
}
std::cout << std::endl;
printInputAndOutputsInfo(network);
auto timeBeforeLoadNetwork = std::chrono::steady_clock::now();
auto executableNetwork = ie.LoadNetwork(network, FLAGS_d, configure());

View File

@@ -1,5 +1,5 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Copyright (C) 2018-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -16,68 +16,20 @@
import logging as log
import networkx as nx
from mo.front.common.replacement import FrontReplacementOp
from mo.front.common.replacement import FrontReplacementPattern
from mo.graph.graph import Graph
from mo.utils.error import Error
class AssignElimination(FrontReplacementOp):
op = "Assign"
class AssignAndAssertElimination(FrontReplacementPattern):
# The solution with removal of Assign and Assert operations is temporary.
# The proper solution is to keep these operations until the partial inference
# phase when control flow edges are properly handled and later unnecessary ones are eliminated.
# In order to achieve this we need to implement control flow inference function
# for these operations similar to "Merge" and "Switch" operations.
enabled = True
def replace_sub_graph(self, graph: Graph, match: dict):
node = match['op']
# here we request all data flow output edges (control flow edges will not be listed)
out_edges = node.out_edges()
if len(out_edges) == 0:
graph.remove_node(node.id)
log.debug('Assign op was removed {}'.format(node.id))
else:
raise Error('Data flow edge coming out of Assign node {}'.format(node.id))
class AssignSubElimination(FrontReplacementOp):
op = "AssignSub"
enabled = True
def replace_sub_graph(self, graph: Graph, match: dict):
node = match['op']
# here we request all data flow output edges (control flow edges will not be listed)
out_edges = node.out_edges()
if len(out_edges) == 0:
graph.remove_node(node.id)
log.debug('AssignSub op was removed {}'.format(node.id))
else:
raise Error('Data flow edge coming out of AssignSub node {}'.format(node.id))
class AssignAddElimination(FrontReplacementOp):
op = "AssignAdd"
enabled = True
def replace_sub_graph(self, graph: Graph, match: dict):
node = match['op']
# here we request all data flow output edges (control flow edges will not be listed)
out_edges = node.out_edges()
if len(out_edges) == 0:
graph.remove_node(node.id)
log.debug('AssignAdd op was removed {}'.format(node.id))
else:
raise Error('Data flow edge coming out of AssignAdd node {}'.format(node.id))
class AssertElimination(FrontReplacementOp):
op = "Assert"
enabled = True
def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
node = match['op']
# here we request all data flow output edges (control flow edges will not be listed)
out_edges = node.out_edges()
if len(out_edges) == 0:
graph.remove_node(node.id)
log.debug('Assert op was removed {}'.format(node.id))
else:
raise Error('Data flow edge coming out of Assert node {}'.format(node.id))
def find_and_replace_pattern(self, graph: Graph):
for node in graph.get_op_nodes():
if node.soft_get('op') in ["Assign", "AssignSub", "AssignAdd", "Assert"]:
log.debug('"{}" op with id="{}" was removed'.format(node.op, node.id))
graph.remove_node(node.id)

View File

@@ -51,6 +51,12 @@ def update_body_graph(body_graph: Graph, subgraph_proto: dict,
# add incoming edges based on data_nodes_map
for dst_port, inp in enumerate(pb_node.input):
orig_src_id = inp.split(":")[0]
# TODO: avoid this temporal workaround for TF 2.4 or higher RNN layers:
# skip control flow dependency
if orig_src_id[0] == '^':
continue
src_id = map_original_name[orig_src_id]
src_port = 0 if len(inp.split(":")) == 1 else int(inp.split(":")[-1])
assert (body_graph.has_node(src_id))

View File

@@ -20,6 +20,7 @@ import numpy as np
from extensions.ops.tensor_iterator import TensorIterator
from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Node, Graph
from mo.middle.passes.fusing.helpers import common_bfs
from mo.middle.passes.infer import partial_infer
from mo.ops.const import Const
@@ -312,14 +313,52 @@ class Loop(TensorIterator):
'from_port': 0,
'to_port': 0})
@staticmethod
def parameter_unchanged_after_iteration(loop_node: Node, body_parameter: Node):
"""
Checks if the body Parameter node is connected to some body Result and the data provided to Result is not
changed between iterations. The data is considered unchanged if:
1. There is no back edge for this Parameter OR
2. There is a back edge from some Result to Parameter and there are only Identity ops in between or
Parameter is connected to Result directly.
:param loop_node: the Loop node to check
:param body_parameter: the body Parameter node
:return: the result of the check
"""
assert body_parameter.id in loop_node.body
assert body_parameter.soft_get('op') == 'Parameter'
if not any([attr['to_layer'] == body_parameter.soft_get('internal_layer_id') for attr in loop_node.back_edges]):
return True
for back_edge_attrs in loop_node.back_edges:
if back_edge_attrs['to_layer'] == body_parameter.soft_get('internal_layer_id'):
result_internal_id = back_edge_attrs['from_layer']
result_nodes = loop_node.body.get_op_nodes(internal_layer_id=result_internal_id)
assert len(result_nodes) == 1, 'There should be exactly one node with id {}, but there are {}' \
''.format(result_internal_id, len(result_nodes))
result_node = result_nodes[0]
# check that the Result node consumes data from Parameter node directly or through Identity operations
parameters = common_bfs(result_node, ['Identity'], ['Parameter'], is_backward=True, attr_to_check='op',
follow_multi_consumer_data_nodes=True)
if any([node.soft_get('internal_layer_id') == body_parameter.internal_layer_id for node in parameters]):
return True
return False
@staticmethod
def pull_constant_inputs_into_body(loop_node: Node):
for port_idx, in_port in reversed(loop_node.in_ports().items()):
if port_idx > 1 and not in_port.disconnected() and in_port.get_source().node.soft_get('type') == 'Const':
body_parameter = Loop.external_port_id_to_body_node(loop_node, port_idx, loop_node.input_port_map)
# if there is a back edge into a body Parameter then we cannot replace it with a Const if the value
# is updated during each iteration. So we need to check that the tensor is passed to the next iteration
# unchanged
if not Loop.parameter_unchanged_after_iteration(loop_node, body_parameter):
continue
original_const_node = in_port.get_source().node
new_const_node = Const(loop_node.body, original_const_node.attrs()).create_node()
body_parameter = Loop.external_port_id_to_body_node(loop_node, port_idx, loop_node.input_port_map)
body_parameter.out_port(0).get_connection().set_source(new_const_node.out_port(0))
loop_node.body.remove_nodes_from([body_parameter.id])
loop_node.delete_input_port(port_idx)
@@ -336,7 +375,7 @@ class Loop(TensorIterator):
@staticmethod
def update_port_map_value_ext(port_map: dict, layer_id_attr: str, layer_id_value: int,
updated_attr: str, new_attr_value: int):
updated_attr: str, new_attr_value: int):
"""
Updates a value of requested attribute for a certain layer id in a port map
:param port_map: a map of external ports to internal layer ids

View File

@@ -1,5 +1,5 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Copyright (C) 2018-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -59,7 +59,8 @@ def get_value_in_port(node) -> Port:
return None if len(value_ports) != 1 else value_ports[0]
def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward: bool = True, allowed_all: bool = False):
def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward: bool = True, allowed_all: bool = False,
attr_to_check='type', follow_multi_consumer_data_nodes=False):
"""
The purpose of this algorithm is to find layers with 'op_name' located in given direction.
In case of branching algorithm goes into each branch, but if it can't find layer in one of them it returns
@@ -70,6 +71,8 @@ def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward:
:param op_name: The list with names of operations for searching
:param is_backward: The direction of BFS algorithm
:param allowed_all: Bool flag meaning we can jump over all operations
:param attr_to_check: the attribute to check when looking if the node is in "op_name" list
:param follow_multi_consumer_data_nodes: for backward traversal allow to follow data nodes with multiple consumers
"""
ret = []
q = deque([start_node])
@@ -83,8 +86,8 @@ def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward:
in_nodes_size = len(node.in_nodes()) if is_backward else len(node.out_nodes())
for id in range(in_nodes_size): # in_nodes() can return either list or dict
pnode = node.in_node(id) if is_backward else node.out_node(id)
if pnode.has_valid('type'):
if pnode.type in op_name:
if pnode.has_valid(attr_to_check):
if pnode[attr_to_check] in op_name:
if pnode.id not in ret:
ret.append(pnode.id)
elif allowed_all or pnode.op in allowed_ops:
@@ -93,7 +96,7 @@ def common_bfs(start_node: Node, allowed_ops: list, op_name: list, is_backward:
return []
elif pnode.kind == 'data' and pnode.value is None:
# If we go backward we don't use data node that have more than one consumer
if not is_backward or (is_backward and len(pnode.out_nodes()) == 1):
if not is_backward or (len(pnode.out_nodes()) == 1 or follow_multi_consumer_data_nodes):
q.append(pnode)
return [Node(start_node.graph, x) for x in ret]

View File

@@ -1,5 +1,5 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Copyright (C) 2018-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -16,9 +16,11 @@
import unittest
from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Node
from mo.middle.passes.fusing.helpers import forward_bfs, backward_bfs, get_next_operation
from mo.utils.unittest.graph import build_graph
from mo.middle.passes.fusing.helpers import forward_bfs, backward_bfs, get_next_operation, common_bfs
from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, connect, const, result, \
valued_const_with_data, connect_data
nodes_attributes = {
'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
@@ -256,6 +258,67 @@ class BFSTests(unittest.TestCase):
res = backward_bfs(Node(graph, 'add_1_data'), ['Add', 'ScaleShift', 'Mul', 'Parameter'], ['Conv2D'])
self.assertTrue(len(res) == 0, 'Sholdn\'t find any nodes due to cycle in graph')
def test_backward_bfs_check_op_instead_of_type(self):
# Placeholder->ScaleShift->Mul1->Add1---->Concat
# `----------->Add2->Mul2--'
graph = build_graph(nodes_attributes,
[('placeholder_1', 'placeholder_1_data'),
('placeholder_1_data', 'add_2'),
('scaleshift_1_w', 'scaleshift_1'),
('scaleshift_1', 'scaleshift_1_data'),
('scaleshift_1_data', 'mul_1'),
('mul_1', 'mul_1_data'),
('mul_1_data', 'add_1'),
('add_1', 'add_1_data'),
('add_2', 'add_2_data'),
('add_2_data', 'mul_2'),
('mul_2', 'mul_2_data'),
('add_1_data', 'concat_1'),
('mul_2_data', 'concat_1'),
('concat_1', 'concat_1_data'),
('concat_1_data', 'op_output')
])
res = common_bfs(Node(graph, 'concat_1'), ['Mul', 'Add'], ['Parameter'], is_backward=True, attr_to_check='op')
self.assertTrue(len(res) == 0, 'Smth went wrong with bfs')
res = common_bfs(Node(graph, 'concat_1'), ['Mul'], ['Add'], is_backward=True, attr_to_check='op')
self.assertTrue(len(res) == 2 and all([res[x].id in ['add_1', 'add_2'] for x in range(len(res))]),
'Add operations was not found by bfs')
res = common_bfs(Node(graph, 'concat_1'), ['ScaleShift'], ['Add'], is_backward=True, attr_to_check='op')
self.assertTrue(len(res) == 0, 'BFS shouldn\'t find any operations')
res = common_bfs(Node(graph, 'concat_1'), [], ['Add'], allowed_all=True, is_backward=True, attr_to_check='op')
self.assertTrue(len(res) == 2 and all([res[x].id in ['add_1', 'add_2'] for x in range(len(res))]),
'Add operations was not found by bfs')
res = common_bfs(Node(graph, 'concat_1'), ['ScaleShift'], ['ScaleShift'], is_backward=True, attr_to_check='op')
self.assertTrue(len(res) == 0, 'No one node should be found! But bfs found {} nodes'.format(len(res)))
def test_backward_bfs_multi_consumer_data_nodes(self):
# Placeholder-> Mul -> Result
# Const -/ \- Result2
graph = build_graph({**regular_op_with_shaped_data('parameter', [1], {'op': 'Parameter'}),
**valued_const_with_data('const', int64_array([5])),
**regular_op_with_shaped_data('mul', [1], {'op': 'Mul'}),
**result('result'),
**result('result2'),
},
[*connect('parameter', '0:mul'),
*connect('const', '1:mul'),
*connect('mul:0', 'result'),
*connect_data('mul', 'result2'),
])
res = common_bfs(Node(graph, 'result'), ['Mul'], ['Parameter'], is_backward=True, attr_to_check='op',
follow_multi_consumer_data_nodes=True)
self.assertTrue(len(res) == 1, 'The multi-consumer data node "mul_d" was not followed')
res = common_bfs(Node(graph, 'result'), ['Mul'], ['Parameter'], is_backward=True, attr_to_check='op')
self.assertTrue(len(res) == 0, 'The multi-consumer data node "mul_d" was followed')
# Unit tests for get_next_operation
class GetNextOperationTests(unittest.TestCase):

View File

@@ -37,7 +37,7 @@ py_modules = []
for name in os.listdir():
if re.match('requirements(.*)\.txt', name):
requirements_txt.append(name)
if re.match('mo_(.*)\.py', name):
if re.match('mo(.*)\.py', name):
py_modules.append(name.split('.')[0])
# Minimal set of dependencies

View File

@@ -194,8 +194,10 @@ namespace ngraph
// Check execution condition
bool body_exec_condition(false);
body_outputs[special_ports.body_condition_output_idx]->read(
&body_exec_condition, sizeof(bool));
if (body_outputs.size() > special_ports.body_condition_output_idx &&
body_outputs[special_ports.body_condition_output_idx])
body_outputs[special_ports.body_condition_output_idx]->read(
&body_exec_condition, sizeof(bool));
if (!body_exec_condition)
break;

View File

@@ -161,6 +161,8 @@ namespace ngraph
for (size_t i = 0; i < concat_outputs.size(); ++i)
{
const auto& concat_desc = concat_outputs[i];
if (!concat_desc)
continue;
auto shape =
func->get_results().at(concat_desc->m_body_value_index)->get_shape();
std::vector<Shape> shapes_to_concat(values_to_concat[i].size(), shape);

View File

@@ -1248,7 +1248,19 @@ void propagate_rt_info(Node* node, const Output<Node>& final_port)
if (stop_nodes.count(in.get_node()))
continue;
auto consumer = in.get_node()->shared_from_this();
// FIXME: Here we have a WA in order to save some original fields
// if we have conflicts because Variant merge doesn't work.
// We can restore original fields because we don't change the operation
auto orig_rt_info = consumer->get_rt_info();
copy_runtime_info({curr_node, consumer}, consumer);
auto& rt_info = consumer->get_rt_info();
for (const auto& it : orig_rt_info)
{
if (rt_info.find(it.first) == rt_info.end())
rt_info[it.first] = it.second;
}
}
}
}

View File

@@ -1497,17 +1497,17 @@ def lstm_sequence(
Shape: [batch_size]. Integer type.
@param W: Tensor with weights for matrix multiplication operation with input portion of data.
Shape: [num_directions, 4*hidden_size, input_size].
:param R: The tensor with weights for matrix multiplication operation with hidden state.
@param R: The tensor with weights for matrix multiplication operation with hidden state.
Shape: [num_directions, 4*hidden_size, hidden_size].
:param B: The tensor with biases.
@param B: The tensor with biases.
Shape: [num_directions, 4*hidden_size].
:param hidden_size: Specifies hidden state size.
:param direction: Specifies if the RNN is forward, reverse, or bidirectional.
:param activations: The list of three activation functions for gates.
:param activations_alpha: The list of alpha parameters for activation functions.
:param activations_beta: The list of beta parameters for activation functions.
:param clip: Specifies bound values [-C, C] for tensor clipping performed before activations.
:param name: An optional name of the output node.
@param hidden_size: Specifies hidden state size.
@param direction: Specifies if the RNN is forward, reverse, or bidirectional.
@param activations: The list of three activation functions for gates.
@param activations_alpha: The list of alpha parameters for activation functions.
@param activations_beta: The list of beta parameters for activation functions.
@param clip: Specifies bound values [-C, C] for tensor clipping performed before activations.
@param name: An optional name of the output node.
@return The new node represents LSTMSequence. Node outputs count: 3.
"""
@@ -2800,11 +2800,11 @@ def tensor_iterator(
"""
attributes = {
"body": graph_body.serialize(),
"slice_input_desc": [desc.serialize() for desc in slice_input_desc],
"merged_input_desc": [desc.serialize() for desc in merged_input_desc],
"invariant_input_desc": [desc.serialize() for desc in invariant_input_desc],
"body_output_desc": [desc.serialize() for desc in body_output_desc],
"concat_output_desc": [desc.serialize() for desc in concat_output_desc],
"input_descriptions": {"slice_input_desc": [desc.serialize() for desc in slice_input_desc],
"merged_input_desc": [desc.serialize() for desc in merged_input_desc],
"invariant_input_desc": [desc.serialize() for desc in invariant_input_desc]},
"output_descriptions": {"body_output_desc": [desc.serialize() for desc in body_output_desc],
"concat_output_desc": [desc.serialize() for desc in concat_output_desc]}
}
return _get_node_factory_opset1().create("TensorIterator", as_nodes(*inputs), attributes)

View File

@@ -385,16 +385,56 @@ def rnn_sequence(
def loop(
trip_count: NodeInput,
execution_condition: NodeInput,
inputs: List[Node],
graph_body: GraphBody,
slice_input_desc: List[TensorIteratorSliceInputDesc],
merged_input_desc: List[TensorIteratorMergedInputDesc],
invariant_input_desc: List[TensorIteratorInvariantInputDesc],
body_output_desc: List[TensorIteratorBodyOutputDesc],
concat_output_desc: List[TensorIteratorConcatOutputDesc],
body_condition_output_idx: int,
current_iteration_input_idx: int = -1,
name: Optional[str] = None,
) -> Node:
"""Return a node which performs Loop.
"""Perform recurrent execution of the network described in the body, iterating through the data.
@param trip_count: A scalar or 1D tensor with 1 element specifying
maximum number of iterations.
@param execution_condition: A scalar or 1D tensor with 1 element
specifying whether to execute the first iteration or not.
@param inputs: The provided to TensorIterator operator.
@param graph_body: The graph representing the body we execute.
@param slice_input_desc: The descriptors describing sliced inputs, that is nodes
representing tensors we iterate through, processing single
data slice in one iteration.
@param merged_input_desc: The descriptors describing merged inputs, that is nodes
representing variables with initial value at first iteration,
which may be changing through iterations.
@param invariant_input_desc: The descriptors describing invariant inputs, that is nodes
representing variable with persistent value through all
iterations.
@param body_output_desc: The descriptors describing body outputs from specified
iteration.
@param concat_output_desc: The descriptors describing specified output values through
all the iterations concatenated into one node.
@param body_condition_output_idx: Determines the purpose of the corresponding result in
the graph_body. This result will determine the dynamic
exit condition. If the value of this result is False,
then iterations stop.
@param current_iteration_input_idx: Determines the purpose of the corresponding parameter
in the graph_body. This parameter will be used as
an iteration counter. Optional.
@return: The new node which performs Loop.
"""
inputs = as_nodes(trip_count, execution_condition)
return _get_node_factory_opset5().create("Loop", inputs)
attributes = {
"body": graph_body.serialize(),
"input_descriptions": {"slice_input_desc": [desc.serialize() for desc in slice_input_desc],
"merged_input_desc": [desc.serialize() for desc in merged_input_desc],
"invariant_input_desc": [desc.serialize() for desc in invariant_input_desc]},
"output_descriptions": {"body_output_desc": [desc.serialize() for desc in body_output_desc],
"concat_output_desc": [desc.serialize() for desc in concat_output_desc]},
"special_body_ports": {"body_condition_output_idx": body_condition_output_idx,
"current_iteration_input_idx": current_iteration_input_idx}
}
return _get_node_factory_opset5().create("Loop", as_nodes(trip_count, execution_condition, *inputs),
attributes)

View File

@@ -22,7 +22,7 @@ from ngraph.opset4.ops import acosh
from ngraph.opset1.ops import add
from ngraph.opset1.ops import asin
from ngraph.opset4.ops import asinh
from ngraph.opset3.ops import assign
from ngraph.opset6.ops import assign
from ngraph.opset1.ops import atan
from ngraph.opset4.ops import atanh
from ngraph.opset1.ops import avg_pool
@@ -114,7 +114,7 @@ from ngraph.opset1.ops import prior_box_clustered
from ngraph.opset1.ops import psroi_pooling
from ngraph.opset4.ops import proposal
from ngraph.opset1.ops import range
from ngraph.opset3.ops import read_value
from ngraph.opset6.ops import read_value
from ngraph.opset4.ops import reduce_l1
from ngraph.opset4.ops import reduce_l2
from ngraph.opset1.ops import reduce_logical_and

View File

@@ -142,3 +142,35 @@ def mvn(
}
return _get_node_factory_opset6().create("MVN", inputs, attributes)
@nameable_op
def assign(new_value: NodeInput, variable_id: str, name: Optional[str] = None) -> Node:
"""Return a node which produces the Assign operation.
@param new_value: Node producing a value to be assigned to a variable.
@param variable_id: Id of a variable to be updated.
@param name: Optional name for output node.
@return Assign node
"""
return _get_node_factory_opset6().create(
"Assign",
[as_node(new_value)],
{"variable_id": variable_id}
)
@nameable_op
def read_value(init_value: NodeInput, variable_id: str, name: Optional[str] = None) -> Node:
"""Return a node which produces the Assign operation.
@param init_value: Node producing a value to be returned instead of an unassigned variable.
@param variable_id: Id of a variable to be read.
@param name: Optional name for output node.
@return ReadValue node
"""
return _get_node_factory_opset6().create(
"ReadValue",
[as_node(init_value)],
{"variable_id": variable_id}
)

View File

@@ -21,11 +21,16 @@
#include <pybind11/stl.h>
#include "dict_attribute_visitor.hpp"
#include "ngraph/op/loop.hpp"
#include "ngraph/op/util/sub_graph_base.hpp"
namespace py = pybind11;
util::DictAttributeDeserializer::DictAttributeDeserializer(const py::dict& attributes)
util::DictAttributeDeserializer::DictAttributeDeserializer(
const py::dict& attributes,
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>>& variables)
: m_attributes(attributes)
, m_variables(variables)
{
}
@@ -34,7 +39,116 @@ void util::DictAttributeDeserializer::on_adapter(const std::string& name,
{
if (m_attributes.contains(name))
{
NGRAPH_CHECK(false, "No AttributeVisitor support for accessing attribute named: ", name);
if (const auto& a = ngraph::as_type<ngraph::AttributeAdapter<
std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>>>(
&adapter))
{
std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::InputDescription>>
input_descs;
const py::dict& input_desc = m_attributes[name.c_str()].cast<py::dict>();
const auto& merged_input_desc = input_desc["merged_input_desc"].cast<py::list>();
const auto& slice_input_desc = input_desc["slice_input_desc"].cast<py::list>();
const auto& invariant_input_desc = input_desc["invariant_input_desc"].cast<py::list>();
for (py::handle h : slice_input_desc)
{
const py::dict& desc = h.cast<py::dict>();
auto slice_in =
std::make_shared<ngraph::op::util::SubGraphOp::SliceInputDescription>(
desc["input_idx"].cast<int64_t>(),
desc["body_parameter_idx"].cast<int64_t>(),
desc["start"].cast<int64_t>(),
desc["stride"].cast<int64_t>(),
desc["part_size"].cast<int64_t>(),
desc["end"].cast<int64_t>(),
desc["axis"].cast<int64_t>());
input_descs.push_back(slice_in);
}
for (py::handle h : merged_input_desc)
{
const py::dict& desc = h.cast<py::dict>();
auto merged_in =
std::make_shared<ngraph::op::util::SubGraphOp::MergedInputDescription>(
desc["input_idx"].cast<int64_t>(),
desc["body_parameter_idx"].cast<int64_t>(),
desc["body_value_idx"].cast<int64_t>());
input_descs.push_back(merged_in);
}
for (py::handle h : invariant_input_desc)
{
const py::dict& desc = h.cast<py::dict>();
auto invariant_in =
std::make_shared<ngraph::op::util::SubGraphOp::InvariantInputDescription>(
desc["input_idx"].cast<int64_t>(),
desc["body_parameter_idx"].cast<int64_t>());
input_descs.push_back(invariant_in);
}
a->set(input_descs);
}
else if (const auto& a = ngraph::as_type<ngraph::AttributeAdapter<std::vector<
std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>>>(&adapter))
{
std::vector<std::shared_ptr<ngraph::op::util::SubGraphOp::OutputDescription>>
output_descs;
const py::dict& output_desc = m_attributes[name.c_str()].cast<py::dict>();
const auto& body_output_desc = output_desc["body_output_desc"].cast<py::list>();
const auto& concat_output_desc = output_desc["concat_output_desc"].cast<py::list>();
for (py::handle h : body_output_desc)
{
const py::dict& desc = h.cast<py::dict>();
auto body_output =
std::make_shared<ngraph::op::util::SubGraphOp::BodyOutputDescription>(
desc["body_value_idx"].cast<int64_t>(),
desc["output_idx"].cast<int64_t>(),
desc["iteration"].cast<int64_t>());
output_descs.push_back(body_output);
}
for (py::handle h : concat_output_desc)
{
const py::dict& desc = h.cast<py::dict>();
auto concat_output =
std::make_shared<ngraph::op::util::SubGraphOp::ConcatOutputDescription>(
desc["body_value_idx"].cast<int64_t>(),
desc["output_idx"].cast<int64_t>(),
desc["start"].cast<int64_t>(),
desc["stride"].cast<int64_t>(),
desc["part_size"].cast<int64_t>(),
desc["end"].cast<int64_t>(),
desc["axis"].cast<int64_t>());
output_descs.push_back(concat_output);
}
a->set(output_descs);
}
else if (const auto& a = ngraph::as_type<
ngraph::AttributeAdapter<ngraph::op::v5::Loop::SpecialBodyPorts>>(&adapter))
{
ngraph::op::v5::Loop::SpecialBodyPorts special_body_ports;
const py::dict& special_ports_dict = m_attributes[name.c_str()].cast<py::dict>();
special_body_ports.body_condition_output_idx =
special_ports_dict["body_condition_output_idx"].cast<int64_t>();
special_body_ports.current_iteration_input_idx =
special_ports_dict["current_iteration_input_idx"].cast<int64_t>();
a->set(special_body_ports);
}
else if (const auto& a =
ngraph::as_type<ngraph::AttributeAdapter<std::shared_ptr<ngraph::Variable>>>(
&adapter))
{
std::string variable_id = m_attributes[name.c_str()].cast<std::string>();
if (!m_variables.count(variable_id))
{
m_variables[variable_id] = std::make_shared<ngraph::Variable>(ngraph::VariableInfo{
ngraph::PartialShape::dynamic(), ngraph::element::dynamic, variable_id});
}
a->set(m_variables[variable_id]);
}
else
{
NGRAPH_CHECK(
false, "No AttributeVisitor support for accessing attribute named: ", name);
}
}
}
void util::DictAttributeDeserializer::on_adapter(const std::string& name,
@@ -222,6 +336,28 @@ void util::DictAttributeDeserializer::on_adapter(
}
}
void util::DictAttributeDeserializer::on_adapter(
const std::string& name, ngraph::ValueAccessor<std::shared_ptr<ngraph::Function>>& adapter)
{
if (m_attributes.contains(name))
{
if (name == "body")
{
const py::dict& body_attrs = m_attributes[name.c_str()].cast<py::dict>();
const auto& body_outputs =
as_output_vector(body_attrs["results"].cast<ngraph::NodeVector>());
const auto& body_parameters = body_attrs["parameters"].cast<ngraph::ParameterVector>();
auto body = std::make_shared<ngraph::Function>(body_outputs, body_parameters);
adapter.set(body);
}
else
{
NGRAPH_CHECK(
false, "No AttributeVisitor support for accessing attribute named: ", name);
}
}
}
util::DictAttributeSerializer::DictAttributeSerializer(const std::shared_ptr<ngraph::Node>& node)
{
node->visit_attributes(*this);

View File

@@ -22,6 +22,7 @@
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/util/variable.hpp"
#include <pybind11/pybind11.h>
@@ -32,114 +33,96 @@ namespace util
class DictAttributeDeserializer : public ngraph::AttributeVisitor
{
public:
DictAttributeDeserializer(const py::dict& attributes);
DictAttributeDeserializer(
const py::dict& attributes,
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>>& variables);
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<void>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<bool>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::string>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int8_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int16_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int32_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int64_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint8_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint16_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint32_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint64_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<float>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<double>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::string>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int8_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int16_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int32_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int64_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint8_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint16_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint32_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint64_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<float>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::shared_ptr<ngraph::Function>>& adapter) override;
protected:
const py::dict& m_attributes;
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>>& m_variables;
};
class DictAttributeSerializer : public ngraph::AttributeVisitor
{
public:
DictAttributeSerializer(const std::shared_ptr<ngraph::Node>& node);
explicit DictAttributeSerializer(const std::shared_ptr<ngraph::Node>& node);
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<void>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<bool>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::string>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int8_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int16_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int32_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<int64_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint8_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint16_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint32_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<uint64_t>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<float>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<double>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
virtual void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::string>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int8_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int16_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int32_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<int64_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint8_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint16_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint32_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<uint64_t>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<float>& adapter) override;
void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<std::string>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<float>>& adapter) override;
void on_adapter(const std::string& name,
ngraph::ValueAccessor<std::vector<double>>& adapter) override;
template <typename T>
T get_attribute(const std::string& name)

View File

@@ -117,7 +117,8 @@ void regclass_pyngraph_Node(py::module m)
[](std::shared_ptr<ngraph::Node>& self, const std::string& atr_name, py::object value) {
py::dict attr_dict;
attr_dict[atr_name.c_str()] = value;
util::DictAttributeDeserializer dict_deserializer(attr_dict);
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>> variables;
util::DictAttributeDeserializer dict_deserializer(attr_dict, variables);
self->visit_attributes(dict_deserializer);
});
}

View File

@@ -31,9 +31,9 @@
#include "ngraph/except.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/util/op_types.hpp"
#include "ngraph/op/util/variable.hpp"
#include "ngraph/opsets/opset.hpp"
#include "node_factory.hpp"
#include "tensor_iterator_builder.hpp"
namespace py = pybind11;
@@ -60,14 +60,7 @@ namespace
"Currently NodeFactory doesn't support Constant node: ",
op_type_name);
if (op_type_name == "TensorIterator")
{
// XXX: How to differentiate opsets?
return util::TensorIteratorBuilder(as_node_vector(arguments), attributes)
.configure(std::static_pointer_cast<ngraph::op::TensorIterator>(op_node));
}
util::DictAttributeDeserializer visitor(attributes);
util::DictAttributeDeserializer visitor(attributes, m_variables);
op_node->set_arguments(arguments);
op_node->visit_attributes(visitor);
@@ -104,6 +97,7 @@ namespace
}
const ngraph::OpSet& m_opset = ngraph::get_opset6();
std::unordered_map<std::string, std::shared_ptr<ngraph::Variable>> m_variables;
};
} // namespace

View File

@@ -1,224 +0,0 @@
//*****************************************************************************
// Copyright 2017-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <string>
#include "ngraph/check.hpp"
#include "ngraph/except.hpp"
#include "tensor_iterator_builder.hpp"
util::TensorIteratorBuilder::TensorIteratorBuilder(const ngraph::NodeVector& arguments,
const py::dict& attributes)
: m_arguments(arguments)
, m_attributes(attributes)
{
get_graph_body();
// Set-up TI inputs.
NGRAPH_CHECK(m_attributes.contains("slice_input_desc"),
"The required \"slice_input_desc\" attribute is missing. Can't build "
"TensorIterator operator.");
m_slice_input_desc = m_attributes["slice_input_desc"].cast<py::list>();
if (m_attributes.contains("merged_input_desc"))
{
m_merged_input_desc = m_attributes["merged_input_desc"].cast<py::list>();
}
if (m_attributes.contains("invariant_input_desc"))
{
m_invariant_input_desc = m_attributes["invariant_input_desc"].cast<py::list>();
}
if (m_attributes.contains("body_output_desc"))
{
py::list body_output_desc = m_attributes["body_output_desc"].cast<py::list>();
for (py::handle h : body_output_desc)
{
py::dict desc = h.cast<py::dict>();
desc["type"] = "BodyOutputDesc";
check_attribute(desc, "output_idx", "BodyOutputDesc");
m_outputs.emplace(desc["output_idx"].cast<int64_t>(), desc);
}
}
if (m_attributes.contains("concat_output_desc"))
{
py::list concat_output_desc = m_attributes["concat_output_desc"].cast<py::list>();
for (py::handle h : concat_output_desc)
{
py::dict desc = h.cast<py::dict>();
desc["type"] = "ConcatOutputDesc";
check_attribute(desc, "output_idx", "ConcatOutputDesc");
m_outputs.emplace(desc["output_idx"].cast<int64_t>(), desc);
}
}
}
std::shared_ptr<ngraph::op::TensorIterator>
util::TensorIteratorBuilder::configure(std::shared_ptr<ngraph::op::TensorIterator>&& ti_node)
{
ti_node->set_body(m_body);
set_tensor_iterator_sliced_inputs(ti_node);
set_tensor_iterator_merged_inputs(ti_node);
set_tensor_iterator_invariant_inputs(ti_node);
set_tensor_iterator_outputs(ti_node);
ti_node->constructor_validate_and_infer_types();
return std::move(ti_node);
}
void util::TensorIteratorBuilder::check_attribute(const py::dict& attrs,
std::string attr_name,
std::string desc_name) const
{
NGRAPH_CHECK(attrs.contains(attr_name),
"The required \"",
attr_name,
"\" attribute is missing. Can't build TensorIterator's ",
desc_name,
".");
}
void util::TensorIteratorBuilder::get_graph_body()
{
NGRAPH_CHECK(m_attributes.contains("body"),
"The required \"body\" attribute is missing. Can't build TensorIterator "
"operator.");
const py::dict& body_attrs = m_attributes["body"].cast<py::dict>();
NGRAPH_CHECK(body_attrs.contains("parameters"),
"The required body's \"parameters\" "
"attribute is missing. Can't build TensorIterator's body.");
NGRAPH_CHECK(body_attrs.contains("results"),
"The required body's \"results\" "
"attribute is missing. Can't build TensorIterator's body.");
m_body_outputs = as_output_vector(body_attrs["results"].cast<ngraph::NodeVector>());
m_body_parameters = body_attrs["parameters"].cast<ngraph::ParameterVector>();
m_body = std::make_shared<ngraph::Function>(m_body_outputs, m_body_parameters);
}
void util::TensorIteratorBuilder::set_tensor_iterator_sliced_inputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
{
for (py::handle h : m_slice_input_desc)
{
const py::dict& desc = h.cast<py::dict>();
check_attribute(desc, "input_idx", "SliceInputDesc");
check_attribute(desc, "body_parameter_idx", "SliceInputDesc");
check_attribute(desc, "start", "SliceInputDesc");
check_attribute(desc, "stride", "SliceInputDesc");
check_attribute(desc, "part_size", "SliceInputDesc");
check_attribute(desc, "end", "SliceInputDesc");
check_attribute(desc, "axis", "SliceInputDesc");
ti_node->set_sliced_input(m_body_parameters.at(desc["body_parameter_idx"].cast<int64_t>()),
m_arguments.at(desc["input_idx"].cast<int64_t>()),
desc["start"].cast<int64_t>(),
desc["stride"].cast<int64_t>(),
desc["part_size"].cast<int64_t>(),
desc["end"].cast<int64_t>(),
desc["axis"].cast<int64_t>());
}
}
void util::TensorIteratorBuilder::set_tensor_iterator_merged_inputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
{
for (py::handle h : m_merged_input_desc)
{
const py::dict& desc = h.cast<py::dict>();
check_attribute(desc, "input_idx", "MergedInputDesc");
check_attribute(desc, "body_parameter_idx", "MergedInputDesc");
check_attribute(desc, "body_value_idx", "MergedInputDesc");
ti_node->set_merged_input(m_body_parameters.at(desc["body_parameter_idx"].cast<int64_t>()),
m_arguments.at(desc["input_idx"].cast<int64_t>()),
m_body_outputs.at(desc["body_value_idx"].cast<int64_t>()));
}
}
void util::TensorIteratorBuilder::set_tensor_iterator_invariant_inputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
{
for (py::handle h : m_invariant_input_desc)
{
const py::dict& desc = h.cast<py::dict>();
check_attribute(desc, "input_idx", "InvariantInputDesc");
check_attribute(desc, "body_parameter_idx", "InvariantInputDesc");
ti_node->set_invariant_input(
m_body_parameters.at(desc["body_parameter_idx"].cast<int64_t>()),
m_arguments.at(desc["input_idx"].cast<int64_t>()));
}
}
void util::TensorIteratorBuilder::set_tensor_iterator_outputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
{
for (const auto& elem : m_outputs)
{
const py::dict& desc = elem.second.cast<py::dict>();
if (desc["type"].cast<std::string>() == "BodyOutputDesc")
{
set_tensor_iterator_body_output(desc, ti_node);
}
else if (desc["type"].cast<std::string>() == "ConcatOutputDesc")
{
set_tensor_iterator_concatenated_body_output(desc, ti_node);
}
else
{
throw ngraph::ngraph_error("Unrecognized TensorIterator output type.");
}
}
}
void util::TensorIteratorBuilder::set_tensor_iterator_body_output(
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
{
check_attribute(desc, "body_value_idx", "BodyOutputDesc");
check_attribute(desc, "iteration", "BodyOutputDesc");
NGRAPH_CHECK(desc["output_idx"].cast<size_t>() == ti_node->get_output_size(),
"Descriptor output idx value is different from currently configured "
"TensorIterator output.");
ti_node->get_iter_value(m_body_outputs.at(desc["body_value_idx"].cast<int64_t>()),
desc["iteration"].cast<int64_t>());
}
void util::TensorIteratorBuilder::set_tensor_iterator_concatenated_body_output(
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const
{
check_attribute(desc, "body_value_idx", "ConcatOutputDesc");
check_attribute(desc, "start", "ConcatOutputDesc");
check_attribute(desc, "stride", "ConcatOutputDesc");
check_attribute(desc, "part_size", "ConcatOutputDesc");
check_attribute(desc, "end", "ConcatOutputDesc");
check_attribute(desc, "axis", "ConcatOutputDesc");
NGRAPH_CHECK(desc["output_idx"].cast<size_t>() == ti_node->get_output_size(),
"Descriptor output idx value is different from currently configured "
"TensorIterator output.");
ti_node->get_concatenated_slices(m_body_outputs.at(desc["body_value_idx"].cast<int64_t>()),
desc["start"].cast<int64_t>(),
desc["stride"].cast<int64_t>(),
desc["part_size"].cast<int64_t>(),
desc["end"].cast<int64_t>(),
desc["axis"].cast<int64_t>());
}

View File

@@ -1,135 +0,0 @@
//*****************************************************************************
// Copyright 2017-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <cctype>
#include <map>
#include <memory>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include "ngraph/node.hpp"
#include "ngraph/op/parameter.hpp"
#include "ngraph/op/tensor_iterator.hpp"
namespace py = pybind11;
namespace util
{
class TensorIteratorBuilder
{
public:
///
/// \brief Initialize TensorIterator node builder.
///
/// \param[in] arguments The arguments passed to TensorIterator node.
/// \param[in] attributes The TensorIterator's attributes. This
/// py::dict contains all descriptors for
/// plethora of TensorIterator available inputs
/// and outputs.
///
TensorIteratorBuilder(const ngraph::NodeVector& arguments, const py::dict& attributes);
///
/// \brief Configure instance of TensorIterator node with set-up parameters.
///
/// \param ti_node The TensorIterator node instance to configure.
///
/// \return TensorIterator node.
///
std::shared_ptr<ngraph::op::TensorIterator>
configure(std::shared_ptr<ngraph::op::TensorIterator>&& ti_node);
private:
///
/// \brief Helper to conduct attribute presence.
///
/// \param[in] attrs The attributes
/// \param[in] attr_name The attribute name
/// \param[in] desc_name The description name
///
inline void check_attribute(const py::dict& attrs,
std::string attr_name,
std::string desc_name) const;
///
/// \brief Retrieve the TI graph body.
///
void get_graph_body();
///
/// \brief Sets the tensor iterator sliced inputs.
///
/// \param ti_node The TI node we will set input to.
///
void set_tensor_iterator_sliced_inputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
///
/// \brief Sets the tensor iterator merged inputs.
///
/// \param ti_node The TI node we will set inputs to.
///
void set_tensor_iterator_merged_inputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
///
/// \brief Sets the tensor iterator invariant inputs.
///
/// \param ti_node The TI node we will set inputs to.
///
void set_tensor_iterator_invariant_inputs(
std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
///
/// \brief Sets the tensor iterator outputs.
///
/// \param ti_node The TI node we will set outputs to.
///
void
set_tensor_iterator_outputs(std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
///
/// \brief Sets the tensor iterator body output.
///
/// \param[in] desc The descriptor of the TI body output.
/// \param ti_node The TI node we will set output to.
///
void set_tensor_iterator_body_output(
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
///
/// \brief Sets the tensor iterator concatenated body output.
///
/// \param[in] desc The descriptor of the TI body output.
/// \param ti_node The TI node we will set output to.
///
void set_tensor_iterator_concatenated_body_output(
const py::dict& desc, std::shared_ptr<ngraph::op::TensorIterator>& ti_node) const;
const ngraph::NodeVector& m_arguments;
const py::dict& m_attributes;
ngraph::OutputVector m_body_outputs;
ngraph::ParameterVector m_body_parameters;
std::shared_ptr<ngraph::Function> m_body;
py::list m_slice_input_desc;
py::list m_merged_input_desc;
py::list m_invariant_input_desc;
std::map<int64_t, const py::dict> m_outputs;
};
} // namespace util

View File

@@ -19,10 +19,9 @@ from _pyngraph import PartialShape
import ngraph as ng
import ngraph.opset1 as ng_opset1
import ngraph.opset5 as ng_opset5
from ngraph.impl import Type
from tests import skip_segfault
np_types = [np.float32, np.int32]
integral_np_types = [
np.int8,
@@ -718,14 +717,89 @@ def test_rnn_sequence():
assert list(node_param.get_output_shape(1)) == expected_shape_h
@skip_segfault
def test_loop():
trip_count = 8
condition = True
from ngraph.utils.tensor_iterator_types import (
GraphBody,
TensorIteratorSliceInputDesc,
TensorIteratorMergedInputDesc,
TensorIteratorInvariantInputDesc,
TensorIteratorBodyOutputDesc,
TensorIteratorConcatOutputDesc,
)
node_default = ng.loop(trip_count, condition)
condition = ng.constant(True, dtype=np.bool)
trip_count = ng.constant(16, dtype=np.int32)
# Body parameters
body_timestep = ng.parameter([], np.int32, "timestep")
body_data_in = ng.parameter([1, 2, 2], np.float32, "body_in")
body_prev_cma = ng.parameter([2, 2], np.float32, "body_prev_cma")
body_const_one = ng.parameter([], np.int32, "body_const_one")
assert node_default.get_type_name() == "Loop"
# CMA = cumulative moving average
prev_cum_sum = ng.multiply(ng.convert(body_timestep, "f32"), body_prev_cma)
curr_cum_sum = ng.add(prev_cum_sum, ng.squeeze(body_data_in, [0]))
elem_cnt = ng.add(body_const_one, body_timestep)
curr_cma = ng.divide(curr_cum_sum, ng.convert(elem_cnt, "f32"))
cma_hist = ng.unsqueeze(curr_cma, [0])
# TI inputs
data = ng.parameter([16, 2, 2], np.float32, "data")
# Iterations count
zero = ng.constant(0, dtype=np.int32)
one = ng.constant(1, dtype=np.int32)
initial_cma = ng.constant(np.zeros([2, 2], dtype=np.float32), dtype=np.float32)
iter_cnt = ng.range(zero, np.int32(16), np.int32(1))
ti_inputs = [iter_cnt, data, initial_cma, one]
body_const_condition = ng.constant(True, dtype=np.bool)
graph_body = GraphBody([body_timestep, body_data_in, body_prev_cma, body_const_one],
[curr_cma, cma_hist, body_const_condition])
ti_slice_input_desc = [
# timestep
# input_idx, body_param_idx, start, stride, part_size, end, axis
TensorIteratorSliceInputDesc(2, 0, 0, 1, 1, -1, 0),
# data
TensorIteratorSliceInputDesc(3, 1, 0, 1, 1, -1, 0),
]
ti_merged_input_desc = [
# body prev/curr_cma
TensorIteratorMergedInputDesc(4, 2, 0),
]
ti_invariant_input_desc = [
# body const one
TensorIteratorInvariantInputDesc(5, 3),
]
# TI outputs
ti_body_output_desc = [
# final average
TensorIteratorBodyOutputDesc(0, 0, -1),
]
ti_concat_output_desc = [
# history of cma
TensorIteratorConcatOutputDesc(1, 1, 0, 1, 1, -1, 0),
]
node = ng.loop(
trip_count,
condition,
ti_inputs,
graph_body,
ti_slice_input_desc,
ti_merged_input_desc,
ti_invariant_input_desc,
ti_body_output_desc,
ti_concat_output_desc,
2,
-1,
)
assert node.get_type_name() == "Loop"
assert node.get_output_size() == 2
# final average
assert list(node.get_output_shape(0)) == [2, 2]
# cma history
assert list(node.get_output_shape(1)) == [16, 2, 2]
def test_roi_pooling():
@@ -1096,6 +1170,28 @@ def test_tensor_iterator():
assert list(node.get_output_shape(1)) == [16, 2, 2]
def test_read_value_opset5():
init_value = ng_opset5.parameter([2, 2], name="init_value", dtype=np.int32)
node = ng_opset5.read_value(init_value, "var_id_667")
assert node.get_type_name() == "ReadValue"
assert node.get_output_size() == 1
assert list(node.get_output_shape(0)) == [2, 2]
assert node.get_output_element_type(0) == Type.i32
def test_assign_opset5():
input_data = ng_opset5.parameter([5, 7], name="input_data", dtype=np.int32)
rv = ng_opset5.read_value(input_data, "var_id_667")
node = ng_opset5.assign(rv, "var_id_667")
assert node.get_type_name() == "Assign"
assert node.get_output_size() == 1
assert list(node.get_output_shape(0)) == [5, 7]
assert node.get_output_element_type(0) == Type.i32
def test_read_value():
init_value = ng.parameter([2, 2], name="init_value", dtype=np.int32)

View File

@@ -26,6 +26,7 @@
#include "ngraph/graph_util.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/op/util/op_annotations.hpp"
#include "ngraph/opsets/opset6.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "util/all_close.hpp"
@@ -261,6 +262,61 @@ TEST(graph_util, clone_multiple_results)
auto copy = clone_function(*f);
}
TEST(graph_util, clone_rt_info)
{
const std::string testAffinity = "CPU";
std::shared_ptr<ngraph::Function> original_f;
{
ngraph::PartialShape shape({1, 84});
ngraph::element::Type type(ngraph::element::Type_t::f32);
auto param = std::make_shared<ngraph::opset6::Parameter>(type, shape);
auto matMulWeights =
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {10, 84}, {1});
auto shapeOf = std::make_shared<ngraph::opset6::ShapeOf>(matMulWeights);
auto gConst1 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i32, {1}, {1});
auto gConst2 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {}, {0});
auto gather = std::make_shared<ngraph::opset6::Gather>(shapeOf, gConst1, gConst2);
auto concatConst = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {1}, {1});
auto concat =
std::make_shared<ngraph::opset6::Concat>(ngraph::NodeVector{concatConst, gather}, 0);
auto relu = std::make_shared<ngraph::opset6::Relu>(param);
auto reshape = std::make_shared<ngraph::opset6::Reshape>(relu, concat, false);
auto matMul = std::make_shared<ngraph::opset6::MatMul>(reshape, matMulWeights, false, true);
auto matMulBias =
ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {1, 10}, {1});
auto addBias = std::make_shared<ngraph::opset6::Add>(matMul, matMulBias);
auto result = std::make_shared<ngraph::opset6::Result>(addBias);
ngraph::ParameterVector params = {param};
ngraph::ResultVector results = {result};
original_f = std::make_shared<ngraph::Function>(results, params);
}
std::unordered_map<std::string, std::string> affinity;
for (auto&& node : original_f->get_ordered_ops())
{
auto& nodeInfo = node->get_rt_info();
nodeInfo["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(testAffinity);
affinity[node->get_friendly_name()] = testAffinity;
}
auto clonedFunction = ngraph::clone_function(*original_f);
for (auto&& node : clonedFunction->get_ordered_ops())
{
auto& nodeInfo = node->get_rt_info();
auto itInfo = nodeInfo.find("affinity");
ASSERT_TRUE(itInfo != nodeInfo.end());
auto value =
ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(itInfo->second)->get();
ASSERT_TRUE(affinity.find(node->get_friendly_name()) != affinity.end());
ASSERT_TRUE(affinity[node->get_friendly_name()] == value);
}
}
TEST(util, round_up)
{
EXPECT_EQ(0, round_up(0, 4));

View File

@@ -70,8 +70,9 @@
#include <openvino/pp.hpp>
#include <openvino/itt.hpp>
#define OV_CC_EXPAND(...) OV_PP_EXPAND(__VA_ARGS__)
#define OV_CC_CAT(_0, _1) OV_PP_CAT(_0, _1)
#define OV_CC_EXPAND OV_PP_EXPAND
#define OV_CC_CAT OV_PP_CAT
#define OV_CC_TOSTRING OV_PP_TOSTRING
#ifdef SELECTIVE_BUILD_ANALYZER
# include <string>

View File

@@ -38,7 +38,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
try {
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork(net, weights_blob);
} catch (const InferenceEngine::details::InferenceEngineException& error) {
} catch (const std::exception&) {
return 0; // fail gracefully on expected exceptions
}