Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-08-20 13:32:53 +09:00
commit ef937a5a52
446 changed files with 10885 additions and 5977 deletions

View File

@ -4,17 +4,13 @@ jobs:
matrix: matrix:
Release: Release:
BUILD_TYPE: 'Release' BUILD_TYPE: 'Release'
PROTOBUF_LITE: 'OFF' PROTOBUF_LITE: 'ON'
TOX_COMMAND: 'tox && tox -e zoo_models' TOX_COMMAND: 'tox && tox -e zoo_models'
Debug: Debug:
BUILD_TYPE: 'Debug' BUILD_TYPE: 'Debug'
PROTOBUF_LITE: 'OFF'
TOX_COMMAND: 'tox'
Protobuf_lite:
BUILD_TYPE: 'Release'
PROTOBUF_LITE: 'ON' PROTOBUF_LITE: 'ON'
TOX_COMMAND: 'tox && tox -e zoo_models' TOX_COMMAND: 'tox'
maxParallel: 3 maxParallel: 2
# About 300% of total time # About 300% of total time
timeoutInMinutes: 90 timeoutInMinutes: 90
@ -56,10 +52,10 @@ jobs:
- script: | - script: |
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR) rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
sudo mkdir -p $(MODELS_DIR) sudo mkdir -p $(MODELS_DIR)
sudo apt --assume-yes install nfs-common sudo apt --assume-yes install nfs-common
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
mkdir -p $(MODELS_DIR)/models_data
displayName: 'Make dirs' displayName: 'Make dirs'
- checkout: self - checkout: self
@ -76,15 +72,15 @@ jobs:
workingDirectory: $(WORK_DIR) workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies' displayName: 'Install dependencies'
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)"
displayName: 'Update models'
condition: ne(variables['BUILD_TYPE'], 'Debug')
- script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) . - script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)' displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
displayName: 'Get models'
condition: ne(variables['BUILD_TYPE'], 'Debug')
- script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h - script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
displayName: 'Create swap' displayName: 'Create swap'
- script: sudo docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)" - script: sudo docker run --name openvino-onnx-ci-container --volume $(MODELS_DIR)/models_data/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)' displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'

View File

@ -16,7 +16,7 @@ jobs:
timeoutInMinutes: 120 timeoutInMinutes: 120
pool: pool:
name: WIN_VMSS_VENV_F8S_WU2 name: WIN_VMSS_VENV_F16S_WU2
variables: variables:
system.debug: true system.debug: true
@ -34,8 +34,6 @@ jobs:
INSTALL_DIR: $(WORK_DIR)\install_pkg INSTALL_DIR: $(WORK_DIR)\install_pkg
INSTALL_TEST_DIR: $(INSTALL_DIR)\tests INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
IB_DIR: C:\Program Files (x86)\IncrediBuild
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
steps: steps:
- script: | - script: |
@ -59,12 +57,6 @@ jobs:
rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR) rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
displayName: 'Make dir' displayName: 'Make dir'
- script: |
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
call install_ib_console.bat
workingDirectory: $(WORK_DIR)
displayName: 'Install IncrediBuild'
- checkout: self - checkout: self
clean: true clean: true
lfs: false lfs: false
@ -109,9 +101,7 @@ jobs:
- script: dir $(REPO_DIR)\inference-engine\temp\ /s - script: dir $(REPO_DIR)\inference-engine\temp\ /s
displayName: 'List temp SDKs' displayName: 'List temp SDKs'
- script: | - script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
set PATH=$(WORK_DIR)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
workingDirectory: $(BUILD_DIR) workingDirectory: $(BUILD_DIR)
displayName: 'Build Win' displayName: 'Build Win'
@ -153,10 +143,8 @@ jobs:
displayName: 'PaddlePaddle Frontend UT' displayName: 'PaddlePaddle Frontend UT'
continueOnError: false continueOnError: false
- script: | - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
set PATH=$(IB_DIR);%PATH% displayName: 'IE UT old'
call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
displayName: 'IE UT old - IB'
continueOnError: false continueOnError: false
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
@ -187,11 +175,8 @@ jobs:
displayName: 'TEMPLATE FuncTests' displayName: 'TEMPLATE FuncTests'
continueOnError: false continueOnError: false
# call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml - script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
- script: | displayName: 'CPU FuncTests'
set PATH=$(IB_DIR);%PATH%
call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24
displayName: 'CPU FuncTests - IB'
continueOnError: false continueOnError: false
- script: | - script: |
@ -213,8 +198,3 @@ jobs:
buildPlatform: 'x64' # Optional buildPlatform: 'x64' # Optional
buildConfiguration: 'Windows' # Optional buildConfiguration: 'Windows' # Optional
#publishRunAttachments: true # Optional #publishRunAttachments: true # Optional
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
displayName: Stop IncrediBuild
continueOnError: true
enabled: false

View File

@ -1,7 +1,7 @@
jobs: jobs:
- job: WinCC - job: WinCC
# About 150% of total time # About 150% of total time
timeoutInMinutes: 120 timeoutInMinutes: 60
pool: pool:
name: WIN_VMSS_VENV_F8S_WU2 name: WIN_VMSS_VENV_F8S_WU2
@ -10,26 +10,22 @@ jobs:
system.debug: true system.debug: true
VSTS_HTTP_RETRY: 5 VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200 VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 8
BUILD_TYPE: Release BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath) REPO_DIR: $(Build.Repository.LocalPath)
OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
MODELS_PATH: $(REPO_DIR)\..\testdata MODELS_PATH: $(REPO_DIR)\..\testdata
WORK_DIR: $(Pipeline.Workspace)\_w WORK_DIR: $(Pipeline.Workspace)\_w
BUILD_DIR: D:\build BUILD_DIR: D:\build
BIN_DIR: $(REPO_DIR)\bin\intel64
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
INSTALL_DIR: $(WORK_DIR)\install_pkg INSTALL_DIR: $(WORK_DIR)\install_pkg
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
IB_DIR: C:\Program Files (x86)\IncrediBuild
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
steps: steps:
- script: | - script: |
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom" powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
where python3 where python3
python3 --version
where python where python
python --version python --version
where java where java
@ -46,12 +42,6 @@ jobs:
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR) rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
displayName: 'Make dir' displayName: 'Make dir'
- script: |
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
call install_ib_console.bat
workingDirectory: $(WORK_DIR)
displayName: 'Install IncrediBuild'
- checkout: self - checkout: self
clean: true clean: true
lfs: false lfs: false
@ -59,7 +49,8 @@ jobs:
path: openvino path: openvino
- script: | - script: |
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip rem Speed up build
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
powershell -command "Expand-Archive -Force ninja-win.zip" powershell -command "Expand-Archive -Force ninja-win.zip"
workingDirectory: $(WORK_DIR) workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies' displayName: 'Install dependencies'
@ -70,20 +61,19 @@ jobs:
workingDirectory: $(BUILD_DIR) workingDirectory: $(BUILD_DIR)
displayName: 'CMake' displayName: 'CMake'
- script: | - script: dir $(REPO_DIR)\inference-engine\temp\ /s
set PATH=$(WORK_DIR)\ninja-win;%PATH% displayName: 'List temp SDKs'
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
- script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
workingDirectory: $(BUILD_DIR) workingDirectory: $(BUILD_DIR)
displayName: 'Build Win' displayName: 'Build Win CC'
- script: dir $(REPO_DIR)\bin\ /s - script: dir $(REPO_DIR)\bin\ /s
displayName: 'List files' displayName: 'List bin files'
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
workingDirectory: $(BUILD_DIR) workingDirectory: $(BUILD_DIR)
displayName: 'Install' displayName: 'Install'
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent - script: dir $(INSTALL_DIR) /s
displayName: Stop IncrediBuild displayName: 'List install files'
continueOnError: true
enabled: false

View File

@ -4,7 +4,7 @@ LABEL version=2021.03.30.1
# Build configuration arguments # Build configuration arguments
ARG BUILD_TYPE=Release ARG BUILD_TYPE=Release
ARG PROTOBUF_LITE=OFF ARG PROTOBUF_LITE=ON
ARG http_proxy ARG http_proxy
ARG https_proxy ARG https_proxy

View File

@ -75,6 +75,6 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins
*.md @openvinotoolkit/openvino-docs-maintainers *.md @openvinotoolkit/openvino-docs-maintainers
# Control 3d party dependencies # Control 3d party dependencies
*requirements* @openvino-configuration-mgmt **/*requirements*.* @openvino-configuration-mgmt
*setup.py @openvino-configuration-mgmt **/setup.py @openvino-configuration-mgmt
/scripts/install_dependencies/ @openvino-configuration-mgmt /scripts/install_dependencies/ @openvino-configuration-mgmt

View File

@ -18,9 +18,11 @@ FunctionTemplate: '^(operator.+|\w+)$'
TypeAliasName: '^\w+$' TypeAliasName: '^\w+$'
VariableReference: '^\w+$' VariableReference: '^\w+$'
EnumName: '^[A-Z][\w]+$'
# excepts element_type
# TODO: Fix interpolate
EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
# TODO: align # TODO: align
EnumConstantName: '^.*$'
EnumName: '^.*$'
UsingDeclaration: '^.*$' UsingDeclaration: '^.*$'
TypedefName: '^.*$' TypedefName: '^.*$'

View File

@ -0,0 +1,34 @@
# Paddle Support in the OpenVINO™ {#openvino_docs_IE_DG_Paddle_Support}
Starting from the 2022.1 release, OpenVINO™ supports reading native Paddle models.
`Core::ReadNetwork()` method provides a uniform way to read models from IR or Paddle format, it is a recommended approach to reading models.
## Read Paddle Models from IR
After [Converting a Paddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md) to [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md), it can be read as recommended. Example:
```cpp
InferenceEngine::Core core;
auto network = core.ReadNetwork("model.xml");
```
## Read Paddle Models from Paddle Format (Paddle `inference model` model type)
**Example:**
```cpp
InferenceEngine::Core core;
auto network = core.ReadNetwork("model.pdmodel");
```
**Reshape feature:**
OpenVINO™ does not provide a mechanism to specify pre-processing, such as mean values subtraction and reverse input channels, for the Paddle format.
If a Paddle model contains dynamic shapes for input, use the `CNNNetwork::reshape` method for shape specialization.
## NOTE
* Paddle [`inference model`](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/inference_en.md) mainly contains two kinds of files `model.pdmodel`(model file) and `model.pdiparams`(params file), which are used for inference.
* Supported Paddle models list and how to export these models are described in [Convert a Paddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md).
* For `Normalize` Paddle Models, the input data should be in FP32 format.
* When reading Paddle models from Paddle format, make sure that `model.pdmodel` and `model.pdiparams` are in the same folder directory.

View File

@ -0,0 +1,128 @@
# Auto-Device Plugin {#openvino_docs_IE_DG_supported_plugins_AUTO}
## Auto-Device Plugin Execution
Auto-device is a new special "virtual" or "proxy" device in the OpenVINO™ toolkit.
Use "AUTO" as the device name to delegate selection of an actual accelerator to OpenVINO.
With the 2021.4 release, Auto-device internally recognizes and selects devices from CPU,
integrated GPU and discrete Intel GPUs (when available) depending on the device capabilities and the characteristic of CNN models,
for example, precisions. Then Auto-device assigns inference requests to the selected device.
From the application point of view, this is just another device that handles all accelerators in full system.
With the 2021.4 release, Auto-device setup is done in three major steps:
* Step 1: Configure each device as usual (for example, via the conventional <code>SetConfig</code> method)
* Step 2: Load a network to the Auto-device plugin. This is the only change needed in your application
* Step 3: Just like with any other executable network (resulted from <code>LoadNetwork</code>), create as many requests as needed to saturate the devices.
These steps are covered below in details.
## Defining and Configuring the Auto-Device Plugin
Following the OpenVINO notions of “devices”, the Auto-device has “AUTO” name. The only configuration option for Auto-device is a limited device list:
| Parameter name | Parameter values | Default | Description |
| :--- | :--- | :--- |:-----------------------------------------------------------------------------|
| "AUTO_DEVICE_LIST" | comma-separated device names <span style="color:red">with no spaces</span>| N/A | Device candidate list to be selected |
You can use the configuration name directly as a string or use <code>IE::KEY_AUTO_DEVICE_LIST</code> from <code>ie_plugin_config.hpp</code>,
which defines the same string.
There are two ways to use Auto-device:
1. Directly indicate device by “AUTO” or empty string:
@snippet snippets/AUTO0.cpp part0
2. Use Auto-device configuration to limit the device candidates list to be selected:
@snippet snippets/AUTO1.cpp part1
Auto-device supports query device optimization capabilities in metric;
| Parameter name | Parameter values |
| :--- | :--- |
| "OPTIMIZATION_CAPABILITIES" | Auto-Device capabilities |
## Enumerating Available Devices and Auto-Device Selecting Logic
### Enumerating Available Devices
Inference Engine now features a dedicated API to enumerate devices and their capabilities.
See [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md).
This is the example output from the sample (truncated to the devices' names only):
```sh
./hello_query_device
Available devices:
Device: CPU
...
Device: GPU.0
...
Device: GPU.1
```
### Default Auto-Device selecting logic
With the 2021.4 release, Auto-Device selects the most suitable device with following default logic:
1. Check if dGPU, iGPU and CPU device are available
2. Get the precision of the input model, such as FP32
3. According to the priority of dGPU, iGPU and CPU (in this order), if the device supports the precision of input network, select it as the most suitable device
For example, CPU, dGPU and iGPU can support below precision and optimization capabilities:
| Device | OPTIMIZATION_CAPABILITIES |
| :--- | :--- |
| CPU | WINOGRAD FP32 FP16 INT8 BIN |
| dGPU | FP32 BIN BATCHED_BLOB FP16 INT8 |
| iGPU | FP32 BIN BATCHED_BLOB FP16 INT8 |
When application use Auto-device to run FP16 IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to dGPU.
When application use Auto-device to run FP16 IR on system with CPU and iGPU, Auto-device will offload this workload to iGPU.
When application use Auto-device to run WINOGRAD-enabled IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to CPU.
In any case, when loading the network to dGPU or iGPU fails, the networks falls back to CPU as the last choice.
### Limit Auto Target Devices Logic
According to the Auto-device selection logic from the previous section,
the most suitable device from available devices to load mode as follows:
@snippet snippets/AUTO2.cpp part2
Another way to load mode to device from limited choice of devices is with Auto-device:
@snippet snippets/AUTO3.cpp part3
## Configuring the Individual Devices and Creating the Auto-Device on Top
As described in the first section, configure each individual device as usual and then just create the "AUTO" device on top:
@snippet snippets/AUTO4.cpp part4
Alternatively, you can combine all the individual device settings into single config and load it,
allowing the Auto-device plugin to parse and apply it to the right devices. See the code example here:
@snippet snippets/AUTO5.cpp part5
## Using the Auto-Device with OpenVINO Samples and Benchmark App
Note that every OpenVINO sample that supports "-d" (which stands for "device") command-line option transparently accepts the Auto-device.
The Benchmark Application is the best example of the optimal usage of the Auto-device.
You do not need to set the number of requests and CPU threads, as the application provides optimal out-of-the-box performance.
Below is the example command-line to evaluate AUTO performance with that:
```sh
./benchmark_app d AUTO m <model> -i <input> -niter 1000
```
You can also use the auto-device with limit device choice:
```sh
./benchmark_app d AUTO:CPU,GPU m <model> -i <input> -niter 1000
```
Note that the default CPU stream is 1 if using “-d AUTO”.
Note that you can use the FP16 IR to work with auto-device.
Also note that no demos are (yet) fully optimized for the auto-device, by means of selecting the most suitable device,
using the GPU streams/throttling, and so on.

View File

@ -66,10 +66,8 @@ In addition to common parameters, the MYRIAD plugin accepts the following option
| Parameter Name | Parameter Values | Default | Description | | Parameter Name | Parameter Values | Default | Description |
| :--- | :--- | :--- | :--- | | :--- | :--- | :--- | :--- |
| `KEY_VPU_MYRIAD_PLATFORM` | empty string/`VPU_MYRIAD_2450`/`VPU_MYRIAD_2480` | empty string | If set, the plugin will use a device with specific platform to allocate a network. |
| `KEY_VPU_MYRIAD_PROTOCOL` | empty string/`VPU_MYRIAD_USB`/`VPU_MYRIAD_PCIE` | empty string | If set, the plugin will use a device with specific protocol to allocate a network. | | `KEY_VPU_MYRIAD_PROTOCOL` | empty string/`VPU_MYRIAD_USB`/`VPU_MYRIAD_PCIE` | empty string | If set, the plugin will use a device with specific protocol to allocate a network. |
| `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO` | `NO` | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. | | `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO` | `NO` | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
| `KEY_VPU_PLATFORM` | empty string/`VPU_2450`/`VPU_2480` | empty string | **Deprecated** Use `KEY_VPU_MYRIAD_PLATFORM` instead. <br />If set, the plugin will use a device with specific platform to allocate a network. |
| `KEY_VPU_FORCE_RESET` | `YES`/`NO` | `NO` | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. | | `KEY_VPU_FORCE_RESET` | `YES`/`NO` | `NO` | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
## Device allocation <a name="MYRIAD_DEVICE_ALLOC">&nbsp;</a> ## Device allocation <a name="MYRIAD_DEVICE_ALLOC">&nbsp;</a>

View File

@ -14,6 +14,7 @@ The Inference Engine provides unique capabilities to infer deep learning models
|[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs | |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs |
|[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor| |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor|
|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel | |[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel |
|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel&reg; device for inference automatically |
|[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel&reg; devices (for example if a device doesn't [support certain layers](#supported-layers)). | |[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel&reg; devices (for example if a device doesn't [support certain layers](#supported-layers)). |
Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).

View File

@ -0,0 +1,62 @@
# Converting a Paddle* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle}
A summary of the steps for optimizing and deploying a model that was trained with Paddle\*:
1. [Configure the Model Optimizer](../Config_Model_Optimizer.md) for Paddle\*.
2. [Convert a Paddle\* Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
3. Test the model in the Intermediate Representation format using the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in the target environment via provided Inference Engine [sample applications](../../../IE_DG/Samples_Overview.md).
4. [Integrate](../../../IE_DG/Samples_Overview.md) the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in your application to deploy the model in the target environment.
## Supported Topologies
| Model Name| Model Type| Description|
| ------------- | ------------ | ------------- |
|ppocr-det| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
|ppocr-rec| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
|ResNet-50| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
|MobileNet v2| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
|MobileNet v3| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
|BiSeNet v2| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|DeepLab v3 plus| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|Faster-SCNN| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|OCRNET| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|Yolo v3| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
|ppyolo| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
> **NOTE:** The verified models are exported from the repository of branch release/2.1.
## Convert a Paddle* Model <a name="Convert_From_Paddle"></a>
To convert a Paddle\* model:
1. Go to the `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer` directory.
2. Use the `mo.py` script to simply convert a model, specifying the framework, the path to the input model `.pdmodel` file and the path to an output directory with write permissions:
```sh
python3 mo.py --input_model <INPUT_MODEL>.pdmodel --output_dir <OUTPUT_MODEL_DIR> --framework=paddle
```
Parameters to convert your model:
* [Framework-agnostic parameters](Converting_Model_General.md): These parameters are used to convert a model trained with any supported framework.
> **NOTE:** `--scale`, `--scale_values`, `--mean_values`, `--mean_file` are unsupported in the current version of mo_paddle.
### Example of Converting a Paddle* Model
Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer.
```sh
python3 mo.py --model_name yolov3_darknet53_270e_coco --output_dir <OUTPUT_MODEL_DIR> --framework=paddle --data_type=FP32 --reverse_input_channels --input_shape=[2,3,608,608],[1,2],[1,2] --input=image,im_shape,scale_factor --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1 --input_model=yolov3.pdmodel
```
## Supported Paddle\* Layers
Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
## Frequently Asked Questions (FAQ)
The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
## Summary
In this document, you learned:
* Basic information about how the Model Optimizer works with Paddle\* models
* Which Paddle\* models are supported
* How to convert a trained Paddle\* model using the Model Optimizer with framework-agnostic command-line options

View File

@ -16,7 +16,7 @@ The <code>mo.py</code> script is the universal entry point that can deduce the f
* `.onnx` - ONNX\* models * `.onnx` - ONNX\* models
* `.nnet` - Kaldi\* models. * `.nnet` - Kaldi\* models.
If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet}`` option to specify the framework type explicitly. If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet,paddle}`` option to specify the framework type explicitly.
For example, the following commands are equivalent: For example, the following commands are equivalent:
```sh ```sh
@ -33,6 +33,7 @@ Framework-specific parameters for:
* [MXNet](Convert_Model_From_MxNet.md), * [MXNet](Convert_Model_From_MxNet.md),
* [ONNX](Convert_Model_From_ONNX.md), * [ONNX](Convert_Model_From_ONNX.md),
* [Kaldi](Convert_Model_From_Kaldi.md). * [Kaldi](Convert_Model_From_Kaldi.md).
* [Paddle](Convert_Model_From_Paddle.md).
## See Also ## See Also

View File

@ -326,6 +326,7 @@ limitations under the License.
</tab> </tab>
<tab type="user" title="Heterogeneous Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_HETERO"/> <tab type="user" title="Heterogeneous Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_HETERO"/>
<tab type="user" title="Multi-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_MULTI"/> <tab type="user" title="Multi-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_MULTI"/>
<tab type="user" title="Auto-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_AUTO"/>
<tab type="user" title="GNA Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GNA"/> <tab type="user" title="GNA Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GNA"/>
</tab> </tab>
<tab type="user" title="Known Issues" url="@ref openvino_docs_IE_DG_Known_Issues_Limitations"/> <tab type="user" title="Known Issues" url="@ref openvino_docs_IE_DG_Known_Issues_Limitations"/>

View File

@ -4,7 +4,16 @@
**Category**: Comparison binary operation **Category**: Comparison binary operation
**Short description**: *Greater* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. **Short description**: *Greater* performs element-wise comparison operation with two given tensors applying broadcast rules specified in the `auto_broadcast` attribute.
**Detailed description**
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attribute is not `none`. Broadcasting is performed according to `auto_broadcast` value.
After broadcasting, *Greater* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} > b_{i}
\f]
**Attributes**: **Attributes**:
@ -13,39 +22,33 @@
* **Description**: specifies rules used for auto-broadcasting of input tensors. * **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**: * **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match * *none* - no auto-broadcasting is allowed, all input shapes should match
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>. * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
* **Type**: string * **Type**: string
* **Default value**: "numpy" * **Default value**: "numpy"
* **Required**: *no* * **Required**: *no*
**Inputs** **Inputs**
* **1**: A tensor of type *T*. **Required.** * **1**: A tensor of type *T* and arbitrary shape. **Required.**
* **2**: A tensor of type *T*. **Required.** * **2**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs** **Outputs**
* **1**: The result of element-wise comparison operation. A tensor of type boolean. * **1**: The result of element-wise comparison operation applied to the input tensors. A tensor of type *T_BOOL* and shape equal to broadcasted shape of two inputs.
**Types** **Types**
* *T*: arbitrary supported type. * *T*: arbitrary supported type.
* *T_BOOL*: `boolean`.
**Detailed description**
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
After broadcasting *Greater* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} > b_{i}
\f]
**Examples** **Examples**
*Example 1* *Example 1: no broadcast*
```xml ```xml
<layer ... type="Greater"> <layer ... type="Greater">
<data auto_broadcast="none"/>
<input> <input>
<port id="0"> <port id="0">
<dim>256</dim> <dim>256</dim>
@ -65,9 +68,10 @@ o_{i} = a_{i} > b_{i}
</layer> </layer>
``` ```
*Example 2: broadcast* *Example 2: numpy broadcast*
```xml ```xml
<layer ... type="Greater"> <layer ... type="Greater">
<data auto_broadcast="numpy"/>
<input> <input>
<port id="0"> <port id="0">
<dim>8</dim> <dim>8</dim>

View File

@ -4,7 +4,18 @@
**Category**: Comparison binary operation **Category**: Comparison binary operation
**Short description**: *NotEqual* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. **Short description**: *NotEqual* performs element-wise comparison operation with two given tensors applying
multi-directional broadcast rules specified in the `auto_broadcast` attribute.
**Detailed description**
Before performing comparison operation, input tensors *a* and *b* are broadcasted if their shapes are different.
Broadcasting is performed according to `auto_broadcast` value.
After broadcasting, *NotEqual* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} != b_{i}
\f]
**Attributes**: **Attributes**:
@ -13,7 +24,8 @@
* **Description**: specifies rules used for auto-broadcasting of input tensors. * **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**: * **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match * *none* - no auto-broadcasting is allowed, all input shapes should match
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>. * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
* **Type**: string * **Type**: string
* **Default value**: "numpy" * **Default value**: "numpy"
* **Required**: *no* * **Required**: *no*
@ -31,15 +43,6 @@
* *T*: arbitrary supported type. * *T*: arbitrary supported type.
**Detailed description**
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
After broadcasting *NotEqual* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} \neq b_{i}
\f]
**Examples** **Examples**
*Example 1* *Example 1*

View File

@ -6,33 +6,7 @@
**Short description**: *LogicalXor* performs element-wise logical XOR operation with two given tensors applying multi-directional broadcast rules. **Short description**: *LogicalXor* performs element-wise logical XOR operation with two given tensors applying multi-directional broadcast rules.
**Attributes**: **Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T*. **Required.**
* **2**: A tensor of type *T*. **Required.**
**Outputs**
* **1**: The result of element-wise logical XOR operation. A tensor of type *T*.
**Types**
* *T*: boolean type.
**Detailed description**
Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*: After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
@ -40,9 +14,35 @@ After broadcasting *LogicalXor* does the following with the input tensors *a* an
o_{i} = a_{i} \oplus b_{i} o_{i} = a_{i} \oplus b_{i}
\f] \f]
**Attributes**:
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes must match
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
* **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise *logicalXor* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
**Types**
* *T_BOOL*: `boolean`.
**Examples** **Examples**
*Example 1* *Example 1: no broadcast*
```xml ```xml
<layer ... type="LogicalXor"> <layer ... type="LogicalXor">
@ -65,7 +65,7 @@ o_{i} = a_{i} \oplus b_{i}
</layer> </layer>
``` ```
*Example 2: broadcast* *Example 2: numpy broadcast*
```xml ```xml
<layer ... type="LogicalXor"> <layer ... type="LogicalXor">
<input> <input>

View File

@ -163,7 +163,7 @@ strides = [1, 1]
kernel = [2, 2] kernel = [2, 2]
rounding_type = "floor" rounding_type = "floor"
auto_pad = "same_upper" auto_pad = "same_upper"
output = [[[[5, 5, -6], output = [[[[5, 5, 3],
[8, 9, 9] [8, 9, 9]
[8, 9, 9]], [8, 9, 9]],
[[6, 5, 5], [[6, 5, 5],

View File

@ -0,0 +1,360 @@
## MaxPool <a name="MaxPool"></a> {#openvino_docs_ops_pooling_MaxPool_8}
**Versioned name**: *MaxPool-8*
**Category**: *Pooling*
**Short description**: Performs the max pooling operation on input.
**Detailed description**: Input shape can be either 3D, 4D, or 5D. The max pooling operation is performed with respect to input shape from the third dimension to the last dimension. If paddings are used, during the pooling calculation their values are `-inf`. The max pooling operation involves sliding a filter over each channel of a feature map and downsampling by choosing the largest value within the region covered by the filter.
**Attributes**: *Pooling* attributes are specified in the `data` node, which is a child of the layer node.
* *strides*
* **Description**: *strides* is a distance (in pixels) to slide the window on the feature map over the (z, y, x) axes for 3D poolings and (y, x) axes for 2D poolings. For example, *strides* equal to "4,2,1" means sliding the window 4 pixels at a time over depth dimension, 2 over height dimension, and 1 over width dimension.
* **Range of values**: integer values starting from 0
* **Type**: int[]
* **Required**: *yes*
* *dilations*
* **Description**: *dilations* specify the index of the next pixel to select when pooling. If not present, the dilation defaults to 1, meaning the adjacent pixel is chosen. A value of 2 indicates that one pixel is skipped and every other pixel is considered. Dilations specify one value for each spatial axis of the kernel: `(z, y, x)` for 3D poolings and `(y, x)` for 2D poolings.
* **Range of values**: integer values starting from 0
* **Type**: int[]
* **Default value**: `[1,1,...]`
* **Required**: *no*
* *pads_begin*
* **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal to "1,2" means adding 1 pixel to the top of the input and 2 to the left of the input. All added padding values are equal to negative infinity.
* **Range of values**: integer values starting from 0
* **Type**: int[]
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
* *pads_end*
* **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal to "1,2" means adding 1 pixel to the bottom of the input and 2 to the right of the input. All added padding values are equal to negative infinity.
* **Range of values**: integer values starting from 0
* **Type**: int[]
* **Required**: *yes*
* **Note**: the attribute is ignored when the *auto_pad* attribute is specified.
* *kernel*
* **Description**: *kernel* is a size of each filter. For example, *kernel* equal to (2, 3) means that each filter has height equal to 2 and width equal to 3.
* **Range of values**: integer values starting from 1
* **Type**: int[]
* **Required**: *yes*
* *rounding_type*
* **Description**: *rounding_type* is a type of rounding to be used to compute output shape.
* **Range of values**:
* *ceil*
* *floor*
* **Type**: string
* **Default value**: *floor*
* **Required**: *no*
* *auto_pad*
* **Description**: *auto_pad* how the padding is calculated. Possible values:
* *explicit*: explicit padding values from `pads_begin` and `pads_end` are used.
* *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value, an extra padding is added at the end (at the beginning).
* *valid* padding is not used.
* **Type**: string
* **Default value**: *explicit*
* **Required**: *no*
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is not equal to explicit.
* *index_element_type*
* **Description**: the type of output tensor with indices
* **Range of values**: "i64" or "i32"
* **Type**: string
* **Default value**: "i64"
* **Required**: *No*
* *axis*
* **Description**: indicator of the first dimension in the input shape that should be used to calculate the upper bound of allowed index output values. The upper bound is the product of dimensions starting from the one pointed by the 'axis' attribute until the end of the input shape.
* **Range of values**: integer number. Negative value means counting dimension from the end. The range is `[-R, R - 1]`, where `R` is the rank of the input tensor.
* **Type**: int
* **Default value**: 0
* **Required**: *No*
**Inputs**:
* **1**: 3D, 4D, or 5D input tensor of type T. Required.
**Outputs**:
* **1**: Input shape can be either `[N, C, H]`, `[N, C, H, W]`, or `[N, C, H, W, D]`. The corresponding output shape is `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, H_out, W_out, D_out]`. Output tensor has the same data type as the input tensor.
* **2**: Output tensor of type *T_IND* with indices of values selected by the pooling operation.
Shape of this output matches the first output. The type of this output can be specified using the `index_element_type` attribute.
Values are computed as indices in a tensor flattened to 1D, not considering padding. Examples for a 5D input tensor:
* When `axis == 0`, the values are in the range `[0, N * C * H * W * D)`.
* When `axis == 2`, the values are in the range `[0, H * W * D)`.
Note: the values of this output can only be calculated correctly if `pads_value` is set to `-infinity`.
**Types**
* *T*: floating point or integer type.
* *T_IND*: `int64` or `int32`.
**Mathematical Formulation**
Output shape calculation based on `auto_pad` and `rounding_type`:
* `auto_pad = explicit` and `rounding_type = floor`
`H_out = floor((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)`
`W_out = floor((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)`
`D_out = floor((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)`
* `auto_pad = explicit` and `rounding_type = ceil`
`H_out = ceil((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)`
`W_out = ceil((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)`
`D_out = ceil((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)`
* `auto_pad = valid`
`H_out = ceil((H - ((kernel[0] - 1) * dilations[0] + 1) + 1) / strides[0])`
`W_out = ceil((W - ((kernel[1] - 1) * dilations[1] + 1) + 1) / strides[1])`
`D_out = ceil((D - ((kernel[2] - 1) * dilations[2] + 1) + 1) / strides[2])`
* `auto_pad = same_upper / same_lower`
`H_out = H`
`W_out = W`
`D_out = D`
If `H + pads_begin[i] + pads_end[i] - kernel[i]` is not divisible by `strides[i]` evenly, the result is rounded with respect to the `rounding_type` attribute.
Example 1 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = explicit`.
```
input = [[[[-1, 2, 3],
[4, 5, -6],
[-7, 8, 9]]]]
strides = [1, 1]
pads_begin = [1, 1]
pads_end = [1, 1]
kernel = [2, 2]
rounding_type = "floor"
auto_pad = "explicit"
output0 = [[[[-1, 2, 3, 3],
[4, 5, 5, -6],
[4, 8, 9, 9],
[-7, 8, 9, 9]]]]
output1 = [[[[0, 1, 2, 2],
[3, 4, 4, 5],
[3, 7, 8, 8],
[6, 7, 8, 8]]]]
```
Example 2 shows how *MaxPool* operates with 3D input using 1D kernel and `auto_pad = valid`.
```
input = [[[-1, 2, 3, 5, -7, 9, 1]]]
strides = [1]
kernel = [3]
rounding_type = "floor"
auto_pad = "valid"
output0 = [[[3, 5, 5, 9, 9]]]
output1 = [[[2, 3, 3, 5, 5]]]
```
Example 3 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = same_lower`.
```
input = [[[[-1, 2, 3],
[4, 5, -6],
[-7, 8, 9]]]]
strides = [1, 1]
kernel = [2, 2]
rounding_type = "floor"
auto_pad = "same_lower"
output0 = [[[[-1, 2, 3],
[4, 5, 5]
[4, 8, 9]]]]
output1 = [[[[0, 1, 2],
[3, 4, 4]
[3, 7, 8]]]]
```
Example 4 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = same_upper`.
```
input = [[[[-1, 2, 3],
[4, 5, -6],
[-7, 8, 9]],
[[2, -1, 5],
[6, -7, 1],
[8, 2, -3]]]]
strides = [1, 1]
kernel = [2, 2]
rounding_type = "floor"
auto_pad = "same_upper"
output0 = [[[[5, 5, 3],
[8, 9, 9]
[8, 9, 9]],
[[6, 5, 5],
[8, 2, 1],
[8, 2, -3]]]]
output1 = [[[[4, 4, 2],
[7, 8, 8]
[7, 8, 8]],
[[12, 11, 11],
[15, 16, 14],
[15, 16, 17]]]]
```
Example 5 shows how *MaxPool* operates with 4D input using 2D kernel, `auto_pad = valid` and `rounding_type = ceil`.
```
input = [[[[-1, 2, 3],
[4, 5, -6],
[-7, 8, 9]]]]
strides = [2, 2]
kernel = [2, 2]
rounding_type = "ceil"
auto_pad = "valid"
output0 = [[[[5, 3],
[8, 9]]]]
output1 = [[[[4, 2],
[7, 8]]]]
```
Example 6 shows how *MaxPool* operates on 4D input using dilated 2D kernel, `auto_pad = explicit` and `rounding_type = floor`.
```
input = [[[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]]]
strides = [1, 1]
kernel = [2, 2]
dilations = [2, 2]
rounding_type = "floor"
auto_pad = "explicit"
pads_begin = [1, 1]
pads_end = [1, 1]
output0 = [[[[5, 6, 5],
[8, 9, 8],
[5, 6, 5]]]]
output1 = [[[[4, 5, 4],
[7, 8, 7],
[4, 5, 4]]]]
```
Example 7 shows how *MaxPool* operates on 4D input using 2D kernel, with non-default `axis` value.
```
input = [[[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[10, 11, 12],
[13, 14, 15],
[16, 17, 18]]
]]
strides = [1, 1]
kernel = [2, 2]
dilations = [1, 1]
rounding_type = "floor"
auto_pad = "explicit"
pads_begin = [0, 0]
pads_end = [0, 0]
axis = 2
output0 = [[[[5, 6],
[8, 9]],
[[14, 15],
[17, 18]]]]
output1 = [[[[4, 5],
[7, 8]],
[[4, 5],
[7, 8]]]]
```
**Examples**
```xml
<layer ... type="MaxPool" ... >
<data auto_pad="same_upper" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>32</dim>
<dim>32</dim>
</port>
</input>
<output>
<port id="1">
<dim>1</dim>
<dim>3</dim>
<dim>32</dim>
<dim>32</dim>
</port>
<port id="2">
<dim>1</dim>
<dim>3</dim>
<dim>32</dim>
<dim>32</dim>
</port>
</output>
</layer>
<layer ... type="MaxPool" ... >
<data auto_pad="explicit" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>32</dim>
<dim>32</dim>
</port>
</input>
<output>
<port id="1">
<dim>1</dim>
<dim>3</dim>
<dim>17</dim>
<dim>17</dim>
</port>
<port id="2">
<dim>1</dim>
<dim>3</dim>
<dim>17</dim>
<dim>17</dim>
</port>
</output>
</layer>
<layer ... type="MaxPool" ... >
<data auto_pad="valid" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>32</dim>
<dim>32</dim>
</port>
</input>
<output>
<port id="1">
<dim>1</dim>
<dim>3</dim>
<dim>16</dim>
<dim>16</dim>
</port>
<port id="2">
<dim>1</dim>
<dim>3</dim>
<dim>16</dim>
<dim>16</dim>
</port>
</output>
</layer>
```

12
docs/snippets/AUTO0.cpp Normal file
View File

@ -0,0 +1,12 @@
#include <ie_core.hpp>
int main() {
//! [part0]
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
// these 2 lines below are equivalent
InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO");
InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "");
//! [part0]
return 0;
}

15
docs/snippets/AUTO1.cpp Normal file
View File

@ -0,0 +1,15 @@
#include <ie_core.hpp>
int main() {
//! [part1]
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
// "AUTO" plugin is (globally) pre-configured with the explicit option:
ie.SetConfig({{"AUTO_DEVICE_LIST", "CPU,GPU"}}, "AUTO");
// the below 3 lines are equivalent (the first line leverages the pre-configured AUTO, while second and third explicitly pass the same settings)
InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO", {});
InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "AUTO", {{"AUTO_DEVICE_LIST", "CPU,GPU"}});
InferenceEngine::ExecutableNetwork exec2 = ie.LoadNetwork(network, "AUTO:CPU,GPU");
//! [part1]
return 0;
}

10
docs/snippets/AUTO2.cpp Normal file
View File

@ -0,0 +1,10 @@
#include <ie_core.hpp>
int main() {
//! [part2]
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO");
//! [part2]
return 0;
}

10
docs/snippets/AUTO3.cpp Normal file
View File

@ -0,0 +1,10 @@
#include <ie_core.hpp>
int main() {
//! [part3]
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO:CPU,GPU");
//! [part3]
return 0;
}

19
docs/snippets/AUTO4.cpp Normal file
View File

@ -0,0 +1,19 @@
#include <ie_core.hpp>
int main() {
const std::map<std::string, std::string> cpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } };
const std::map<std::string, std::string> gpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } };
//! [part4]
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
// configure the CPU device first
ie.SetConfig(cpu_config, "CPU");
// configure the GPU device
ie.SetConfig(gpu_config, "GPU");
// load the network to the auto-device
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO");
// new metric allows to query the optimization capabilities
std::vector<std::string> device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
//! [part4]
return 0;
}

15
docs/snippets/AUTO5.cpp Normal file
View File

@ -0,0 +1,15 @@
#include <ie_core.hpp>
int main() {
std::string device_name = "AUTO:CPU,GPU";
const std::map< std::string, std::string > full_config = {};
//! [part5]
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
// 'device_name' can be "AUTO:CPU,GPU" to configure the auto-device to use CPU and GPU
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device_name, full_config);
// new metric allows to query the optimization capabilities
std::vector<std::string> device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
//! [part5]
return 0;
}

View File

@ -0,0 +1,84 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <ie_ngraph_utils.hpp>
#include <ngraph/ngraph.hpp>
#include <shared_test_classes/base/layer_test_utils.hpp>
#include "comparison.hpp"
using namespace ngraph;
using namespace InferenceEngine;
using ComparisonTypes = ngraph::helpers::ComparisonTypes;
namespace reference_tests {
namespace ComparisonOpsRefTestDefinitions {
namespace {
TEST_P(ReferenceComparisonLayerTest, GreaterCompareWithHardcodedRefs) {
Exec();
}
template <element::Type_t IN_ET>
std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
using T = typename element_type_traits<IN_ET>::value_type;
std::vector<RefComparisonParams> compParams {
// 1D // 2D // 3D // 4D
Builder {}
.compType(ComparisonTypes::GREATER)
.input1({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
.input2({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
.expected({{2, 2}, element::boolean, std::vector<char> {0, 0, 0, 0}}),
Builder {}
.compType(ComparisonTypes::GREATER)
.input1({{2, 3}, type, std::vector<T> {0, 6, 45, 1, 21, 21}})
.input2({{2, 3}, type, std::vector<T> {1, 18, 23, 1, 19, 21}})
.expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 1, 0, 1, 0}}),
Builder {}
.compType(ComparisonTypes::GREATER)
.input1({{1}, type, std::vector<T> {53}})
.input2({{1}, type, std::vector<T> {53}})
.expected({{1}, element::boolean, std::vector<char> {0}}),
Builder {}
.compType(ComparisonTypes::GREATER)
.input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 12, 8}})
.input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
.expected({{2, 4}, element::boolean, std::vector<char> {0, 0, 0, 0, 0, 0, 1, 0}}),
Builder {}
.compType(ComparisonTypes::GREATER)
.input1({{3, 1, 2}, type, std::vector<T> {2, 1, 4, 1, 3, 1}})
.input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
.expected({{3, 2, 2}, element::boolean, std::vector<char> {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}}),
Builder {}
.compType(ComparisonTypes::GREATER)
.input1({{2, 1, 2, 1}, type, std::vector<T> {2, 1, 4, 1}})
.input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
.expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {1, 0, 1, 0}})};
return compParams;
}
std::vector<RefComparisonParams> generateComparisonCombinedParams() {
const std::vector<std::vector<RefComparisonParams>> compTypeParams {
generateComparisonParams<element::Type_t::f32>(element::f32),
generateComparisonParams<element::Type_t::f16>(element::f16),
generateComparisonParams<element::Type_t::i32>(element::i32),
generateComparisonParams<element::Type_t::i64>(element::i64),
generateComparisonParams<element::Type_t::u32>(element::u32),
generateComparisonParams<element::Type_t::u64>(element::u64),
generateComparisonParams<element::Type_t::boolean>(element::boolean)};
std::vector<RefComparisonParams> combinedParams;
for (const auto& params : compTypeParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
} // namespace
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName);
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -0,0 +1,48 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <ie_ngraph_utils.hpp>
#include <ngraph/ngraph.hpp>
#include <shared_test_classes/base/layer_test_utils.hpp>
#include <tuple>
#include "logical.hpp"
using namespace ngraph;
using namespace InferenceEngine;
using LogicalTypes = ngraph::helpers::LogicalTypes;
namespace reference_tests {
namespace LogicalOpsRefTestDefinitions {
namespace {
std::vector<RefLogicalParams> generateLogicalParams() {
std::vector<RefLogicalParams> logicalParams {
Builder {}
.opType(LogicalTypes::LOGICAL_XOR)
.input1({{2, 2}, element::boolean, std::vector<char> {true, false, true, false}})
.input2({{2, 2}, element::boolean, std::vector<char> {false, true, true, false}})
.expected({{2, 2}, element::boolean, std::vector<char> {true, true, false, false}}),
Builder {}
.opType(LogicalTypes::LOGICAL_XOR)
.input1({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}})
.input2({{1, 1, 2, 1}, element::boolean, std::vector<char> {true, false}})
.expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {false, false, false, false}}),
Builder {}
.opType(LogicalTypes::LOGICAL_XOR)
.input1({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true}})
.input2({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, true, true, false, false, true, true, false}})
.expected({{3, 4}, element::boolean, std::vector<char> {false, false, false, false, false, true, false, false, false, false, false, true}})};
return logicalParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_LogicalOr_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()),
ReferenceLogicalLayerTest::getTestCaseName);
} // namespace
} // namespace LogicalOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -7,10 +7,10 @@ import sys
import errno import errno
import subprocess # nosec import subprocess # nosec
import typing import typing
import multiprocessing
from fnmatch import fnmatchcase from fnmatch import fnmatchcase
from pathlib import Path from pathlib import Path
from shutil import copyfile, rmtree from shutil import copyfile, rmtree
from distutils.command.install import install
from distutils.command.build import build from distutils.command.build import build
from distutils.command.clean import clean from distutils.command.clean import clean
from distutils.errors import DistutilsSetupError from distutils.errors import DistutilsSetupError
@ -27,11 +27,11 @@ PYTHON_VERSION = f'python{sys.version_info.major}.{sys.version_info.minor}'
# The following variables can be defined in environment or .env file # The following variables can be defined in environment or .env file
CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.') CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
CORE_LIBS_DIR = config('CORE_LIBS_DIR', '') CORE_LIBS_DIR = config('CORE_LIBS_DIR', 'deployment_tools/inference_engine/lib/intel64')
PLUGINS_LIBS_DIR = config('PLUGINS_LIBS_DIR', '') PLUGINS_LIBS_DIR = config('PLUGINS_LIBS_DIR', 'deployment_tools/inference_engine/lib/intel64')
NGRAPH_LIBS_DIR = config('NGRAPH_LIBS_DIR', '') NGRAPH_LIBS_DIR = config('NGRAPH_LIBS_DIR', 'deployment_tools/ngraph/lib')
TBB_LIBS_DIR = config('TBB_LIBS_DIR', '') TBB_LIBS_DIR = config('TBB_LIBS_DIR', 'deployment_tools/inference_engine/external/tbb/lib')
PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', '') PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path' LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
LIB_INSTALL_CFG = { LIB_INSTALL_CFG = {
@ -118,7 +118,66 @@ class PrebuiltExtension(Extension):
class CustomBuild(build): class CustomBuild(build):
"""Custom implementation of build_clib""" """Custom implementation of build_clib"""
cmake_build_types = ['Release', 'Debug', 'RelWithDebInfo', 'MinSizeRel']
user_options = [
('config=', None, 'Build configuration [{types}].'.format(types='|'.join(cmake_build_types))),
('jobs=', None, 'Specifies the number of jobs to use with make.'),
('cmake-args=', None, 'Additional options to be passed to CMake.'),
]
def initialize_options(self):
"""Set default values for all the options that this command supports."""
super().initialize_options()
self.build_base = 'build'
self.config = None
self.jobs = None
self.cmake_args = None
def finalize_options(self):
"""Set final values for all the options that this command supports."""
super().finalize_options()
if not self.config:
if self.debug:
self.config = 'Debug'
else:
self.announce('Set default value for CMAKE_BUILD_TYPE = Release.', level=4)
self.config = 'Release'
else:
build_types = [item.lower() for item in self.cmake_build_types]
try:
i = build_types.index(str(self.config).lower())
self.config = self.cmake_build_types[i]
self.debug = True if 'Debug' == self.config else False
except ValueError:
self.announce('Unsupported CMAKE_BUILD_TYPE value: ' + self.config, level=4)
self.announce('Supported values: {types}'.format(types=', '.join(self.cmake_build_types)), level=4)
sys.exit(1)
if self.jobs is None and os.getenv('MAX_JOBS') is not None:
self.jobs = os.getenv('MAX_JOBS')
self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs)
def run(self): def run(self):
global CMAKE_BUILD_DIR
self.jobs = multiprocessing.cpu_count()
plat_specifier = '.{0}-{1}.{2}'.format(self.plat_name, *sys.version_info[:2])
self.build_temp = os.path.join(self.build_base, 'temp' + plat_specifier, self.config)
# if setup.py is directly called use CMake to build product
if CMAKE_BUILD_DIR == '.':
openvino_root_dir = os.path.normpath(os.path.join(CMAKE_BUILD_DIR, '../../../../'))
self.announce('Configuring cmake project', level=3)
self.spawn(['cmake', '-H' + openvino_root_dir, '-B' + self.build_temp,
'-DCMAKE_BUILD_TYPE={type}'.format(type=self.config),
'-DENABLE_PYTHON=ON',
'-DNGRAPH_ONNX_FRONTEND_ENABLE=ON'])
self.announce('Building binaries', level=3)
self.spawn(['cmake', '--build', self.build_temp,
'--config', self.config, '-j', str(self.jobs)])
CMAKE_BUILD_DIR = self.build_temp
self.run_command('build_clib') self.run_command('build_clib')
build.run(self) build.run(self)
# Copy extra package_data content filtered by find_packages # Copy extra package_data content filtered by find_packages
@ -133,14 +192,6 @@ class CustomBuild(build):
copyfile(path, dst / path_rel) copyfile(path, dst / path_rel)
class CustomInstall(install):
"""Enable build_clib during the installation"""
def run(self):
self.run_command('build_clib')
install.run(self)
class PrepareLibs(build_clib): class PrepareLibs(build_clib):
"""Prepare prebuilt libraries""" """Prepare prebuilt libraries"""
@ -369,6 +420,7 @@ if os.path.exists(package_license):
packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG)) packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
package_data: typing.Dict[str, list] = {} package_data: typing.Dict[str, list] = {}
setup( setup(
version=config('WHEEL_VERSION', '0.0.0'), version=config('WHEEL_VERSION', '0.0.0'),
author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'), author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
@ -376,14 +428,13 @@ setup(
license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'), license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
author=config('WHEEL_AUTHOR', 'Intel Corporation'), author=config('WHEEL_AUTHOR', 'Intel Corporation'),
description=config('WHEEL_DESC', 'Inference Engine Python* API'), description=config('WHEEL_DESC', 'Inference Engine Python* API'),
install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'requirements.txt')), install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
long_description=get_description(config('WHEEL_OVERVIEW', 'pypi_overview.md')), long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
long_description_content_type='text/markdown', long_description_content_type='text/markdown',
download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'), download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'), url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
cmdclass={ cmdclass={
'build': CustomBuild, 'build': CustomBuild,
'install': CustomInstall,
'build_clib': PrepareLibs, 'build_clib': PrepareLibs,
'build_ext': CopyExt, 'build_ext': CopyExt,
'clean': CustomClean, 'clean': CustomClean,

View File

@ -212,6 +212,9 @@ int main(int argc, char* argv[]) {
bool perf_counts = false; bool perf_counts = false;
// Update config per device according to command line parameters // Update config per device according to command line parameters
for (auto& device : devices) { for (auto& device : devices) {
if (device == "AUTO") {
continue;
}
if (!config.count(device)) if (!config.count(device))
config[device] = {}; config[device] = {};
std::map<std::string, std::string>& device_config = config.at(device); std::map<std::string, std::string>& device_config = config.at(device);

View File

@ -627,10 +627,9 @@ int main(int argc, char* argv[]) {
if (FLAGS_q.compare("user") == 0) { if (FLAGS_q.compare("user") == 0) {
if (!FLAGS_rg.empty()) { if (!FLAGS_rg.empty()) {
slog::warn slog::warn << "Custom scale factor will be used for imported gna model: " << FLAGS_rg << slog::endl;
<< "Custom scale factor will be ignored - using scale factor from provided imported gna model: " }
<< FLAGS_rg << slog::endl;
} else {
auto scaleFactorInput = ParseScaleFactors(FLAGS_sf); auto scaleFactorInput = ParseScaleFactors(FLAGS_sf);
if (numInputFiles != scaleFactorInput.size()) { if (numInputFiles != scaleFactorInput.size()) {
std::string errMessage( std::string errMessage(
@ -641,11 +640,9 @@ int main(int argc, char* argv[]) {
for (size_t i = 0; i < scaleFactorInput.size(); ++i) { for (size_t i = 0; i < scaleFactorInput.size(); ++i) {
slog::info << "For input " << i << " using scale factor of " << scaleFactorInput[i] << slog::endl; slog::info << "For input " << i << " using scale factor of " << scaleFactorInput[i] << slog::endl;
std::string scaleFactorConfigKey = std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
gnaPluginConfig[scaleFactorConfigKey] = scaleFactorInput[i]; gnaPluginConfig[scaleFactorConfigKey] = scaleFactorInput[i];
} }
}
} else { } else {
// "static" quantization with calculated scale factor // "static" quantization with calculated scale factor
if (!FLAGS_rg.empty()) { if (!FLAGS_rg.empty()) {

View File

@ -1,136 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <memory>
#include <map>
#include "ie_metric_helpers.hpp"
#include "auto_exec_network.hpp"
#include "auto_infer_request.hpp"
namespace AutoPlugin {
using namespace InferenceEngine;
AutoExecutableNetwork::AutoExecutableNetwork(NetworkFuture cpuFuture,
NetworkFuture acceleratorFuture,
bool enablePerfCount)
: _cpuFuture(std::move(cpuFuture))
, _acceleratorFuture(std::move(acceleratorFuture))
, _enablePerfCount(enablePerfCount) {
// both are valid, like AUTO:CPU,GPU
if (_cpuFuture.valid() && _acceleratorFuture.valid()) {
try {
_networkFirstReady = _cpuFuture.get();
_alreadyActualNetwork = false;
} catch (const std::exception& e) {
printf("Warning: load network to CPU failed: %s\n", e.what());
_networkActualNeeded = _acceleratorFuture.get();
_alreadyActualNetwork = true;
}
} else if (_acceleratorFuture.valid()) { // only accelerator is valid, like AUTO:GPU
_networkActualNeeded = _acceleratorFuture.get();
_alreadyActualNetwork = true;
} else if (_cpuFuture.valid()) { // only CPU is valid, like AUTO:CPU
_networkActualNeeded = _cpuFuture.get();
_alreadyActualNetwork = true;
} else {
IE_THROW() << "No device task available";
}
}
AutoExecutableNetwork::~AutoExecutableNetwork() = default;
InferenceEngine::IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
OutputsDataMap networkOutputs) {
InferenceEngine::SoExecutableNetworkInternal network;
SoIInferRequestInternal inferRequest;
if (TryGetActualNetwork(network)) {
inferRequest = {_networkActualNeeded, _networkActualNeeded->CreateInferRequest()};
} else {
inferRequest = {_networkFirstReady, _networkFirstReady->CreateInferRequest()};
}
return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest,
shared_from_this(), _alreadyActualNetwork,
_enablePerfCount);
}
bool AutoExecutableNetwork::TryGetActualNetwork(InferenceEngine::SoExecutableNetworkInternal& soExecNetwork) {
// try to get actual network
if (_acceleratorFuture.valid() && _acceleratorFuture.wait_for(std::chrono::nanoseconds(0)) == std::future_status::ready) {
soExecNetwork = _acceleratorFuture.get();
_alreadyActualNetwork = true;
_networkActualNeeded = soExecNetwork;
// reapply config to actual network
// fixme: GPU doesn't support SetConfig and throw exception
try {
_networkActualNeeded->SetConfig(_cacheConfig);
} catch (...) {
}
return true;
}
// if already get actual network
if (_alreadyActualNetwork) {
soExecNetwork = _networkActualNeeded;
return true;
}
return false;
}
void AutoExecutableNetwork::WaitForActualDevice() const {
if (_alreadyActualNetwork) {
return;
}
if (_acceleratorFuture.valid()) {
_networkActualNeeded = _acceleratorFuture.get();
_alreadyActualNetwork = true;
} else {
IE_THROW() << "Export failed due to no valid executable network";
}
}
void AutoExecutableNetwork::Export(std::ostream& networkModel) {
//fixme: the Export should work with actual device, so we have to wait!!!
WaitForActualDevice();
_networkActualNeeded->Export(networkModel);
}
RemoteContext::Ptr AutoExecutableNetwork::GetContext() const {
// fixme: the GetContext should work with actual device, so we have to wait!!!
WaitForActualDevice();
return _networkActualNeeded->GetContext();
}
InferenceEngine::CNNNetwork AutoExecutableNetwork::GetExecGraphInfo() {
WaitForActualDevice();
return _networkActualNeeded->GetExecGraphInfo();
}
Parameter AutoExecutableNetwork::GetMetric(const std::string &name) const {
// fixme: should we wait actual device? meanwhile it will block inference, how to fix?
// WaitForActualDevice();
if (_alreadyActualNetwork) {
return _networkActualNeeded->GetMetric(name);
} else {
return _networkFirstReady->GetMetric(name);
}
}
void AutoExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
//fixme: have to store the config and reapply when the networks swapped
_cacheConfig = config;
if (_alreadyActualNetwork) {
_networkActualNeeded->SetConfig(config);
} else {
_networkFirstReady->SetConfig(config);
}
}
Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
//fixme: carefuly select between FirstLoaded and ActuallyNeeded
return _cacheConfig;
}
} // namespace AutoPlugin

View File

@ -1,56 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <atomic>
#include <mutex>
#include <queue>
#include <unordered_map>
#include <map>
#include <vector>
#include <string>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include <threading/ie_itask_executor.hpp>
namespace AutoPlugin {
using DeviceName = std::string;
using NetworkFuture = std::future<InferenceEngine::SoExecutableNetworkInternal>;
class AutoExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
public:
using Ptr = std::shared_ptr<AutoExecutableNetwork>;
explicit AutoExecutableNetwork(NetworkFuture cpuTask,
NetworkFuture acceleratorTask,
bool enablePerfCount);
void Export(std::ostream& networkModel) override;
InferenceEngine::RemoteContext::Ptr GetContext() const override;
InferenceEngine::CNNNetwork GetExecGraphInfo() override;
InferenceEngine::Parameter GetMetric(const std::string &name) const override;
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) override;
bool TryGetActualNetwork(InferenceEngine::SoExecutableNetworkInternal& soExecNetwork);
~AutoExecutableNetwork();
private:
void WaitForActualDevice() const;
private:
InferenceEngine::SoExecutableNetworkInternal _networkFirstReady;
mutable InferenceEngine::SoExecutableNetworkInternal _networkActualNeeded;
NetworkFuture _cpuFuture;
mutable NetworkFuture _acceleratorFuture;
bool _enablePerfCount;
mutable std::atomic<bool> _alreadyActualNetwork = {false};
std::map<std::string, InferenceEngine::Parameter> _cacheConfig;
};
} // namespace AutoPlugin

View File

@ -1,103 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <iostream>
#include "auto_infer_request.hpp"
#include <ie_input_info.hpp>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
namespace AutoPlugin {
using namespace InferenceEngine;
AutoInferRequest::AutoInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
const SoIInferRequestInternal& inferRequest,
const InferenceEngine::IExecutableNetworkInternal::Ptr autoExecutableNetwork,
bool alreadyActualNetwork,
bool enablePerfCount)
: IInferRequestInternal(networkInputs, networkOutputs)
, _inferRequest(inferRequest)
, _autoExecutableNetwork(std::dynamic_pointer_cast<AutoPlugin::AutoExecutableNetwork>(autoExecutableNetwork))
, _alreadyActualNetwork(alreadyActualNetwork)
, _enablePerfCount(enablePerfCount) {
IE_ASSERT(_autoExecutableNetwork != nullptr);
for (const auto &it : _networkInputs)
_inputs[it.first] = _inferRequest->GetBlob(it.first);
for (const auto &it : _networkOutputs)
_outputs[it.first] = _inferRequest->GetBlob(it.first);
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
if (_enablePerfCount) {
try {
return _inferRequest->GetPerformanceCounts();
} catch (...) {
return {};
}
} else {
return {};
}
}
void AutoInferRequest::InferImpl() {
HotSwapRequests(); //safe to call here (before actual inference started)
SetBlobsToDeviceRequest();
_inferRequest->Infer();
}
void AutoInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
IInferRequestInternal::SetBlob(name, data);
}
Blob::Ptr AutoInferRequest::GetBlob(const std::string& name) {
return IInferRequestInternal::GetBlob(name);
}
void AutoInferRequest::Cancel() {
_inferRequest->Cancel();
}
void AutoInferRequest::StartAsync() {
HotSwapRequests(); //safe to call here (before actual inference started)
SetBlobsToDeviceRequest();
_inferRequest->StartAsync();
}
InferenceEngine::StatusCode AutoInferRequest::Wait(int64_t millis_timeout) {
return _inferRequest->Wait(millis_timeout);
}
void AutoInferRequest::SetCallback(Callback callback) {
_callback = callback;
_inferRequest->SetCallback(callback);
}
void AutoInferRequest::HotSwapRequests() {
if (!_alreadyActualNetwork) {
InferenceEngine::SoExecutableNetworkInternal tempSoExecNetwork;
if (_autoExecutableNetwork->TryGetActualNetwork(tempSoExecNetwork)) {
_alreadyActualNetwork = true;
_inferRequest = {tempSoExecNetwork, tempSoExecNetwork->CreateInferRequest()};
_inferRequest->SetCallback(_callback);
}
}
}
void AutoInferRequest::SetBlobsToDeviceRequest() {
for (const auto &it : _networkInputs) {
const auto &name = it.first;
// this assumes the request is already in BUSY state
auto blob = GetBlob(name);
if (_inferRequest->GetBlob(name) != blob)
_inferRequest->SetBlob(name, blob);
}
for (const auto &it : _networkOutputs) {
const auto &name = it.first;
// this assumes the request is already in BUSY state
auto blob = GetBlob(name);
if (_inferRequest->GetBlob(name) != blob)
_inferRequest->SetBlob(name, blob);
}
}
} // namespace AutoPlugin

View File

@ -1,55 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <atomic>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include <ie_blob.h>
#include <ie_common.h>
#include <map>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "auto_exec_network.hpp"
namespace AutoPlugin {
class AutoInferRequest : public InferenceEngine::IInferRequestInternal {
public:
using Ptr = std::shared_ptr<AutoInferRequest>;
explicit AutoInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const InferenceEngine::SoIInferRequestInternal& inferRequest,
const InferenceEngine::IExecutableNetworkInternal::Ptr executeNetwork,
bool alreadyActualNetwork,
bool enablePerfCount);
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
void InferImpl() override;
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
void Cancel() override;
//async impl
void StartAsync() override;
InferenceEngine::StatusCode Wait(int64_t millis_timeout) override;
void SetCallback(Callback callback) override;
private:
void HotSwapRequests();
void SetBlobsToDeviceRequest();
private:
InferenceEngine::SoIInferRequestInternal _inferRequest;
AutoPlugin::AutoExecutableNetwork::Ptr _autoExecutableNetwork;
Callback _callback; // need to save the callback for hot-swap of the requests
bool _alreadyActualNetwork{ false };
bool _enablePerfCount { false };
};
} // namespace AutoPlugin

View File

@ -2,397 +2,10 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include <string>
#include <vector>
#include <memory>
#include <map>
#include <unordered_set>
#include <ie_metric_helpers.hpp>
#include <threading/ie_executor_manager.hpp>
#include <ie_algorithm.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <transformations/utils/utils.hpp>
#include <ie_icore.hpp>
#include "auto_plugin.hpp" #include "auto_plugin.hpp"
#include "ngraph_ops/convolution_ie.hpp"
#include "ngraph_ops/deconvolution_ie.hpp"
namespace AutoPlugin { namespace AutoPlugin {
namespace {
std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) {
auto nGraphFunc = network.getFunction();
bool isINTModel = ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
if (isINTModel) {
return METRIC_VALUE(INT8);
}
for (auto & node : nGraphFunc->get_ordered_ops()) {
if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) ||
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node) ||
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) ||
std::dynamic_pointer_cast<ngraph::op::DeconvolutionIE>(node)) {
auto layerType = node->input(1).get_element_type().get_type_name();
if (layerType == "f32")
return METRIC_VALUE(FP32);
if (layerType == "f16")
return METRIC_VALUE(FP16);
}
}
return METRIC_VALUE(FP32);
}
} // namespace
AutoInferencePlugin::AutoInferencePlugin() {
_pluginName = "AUTO";
}
IE::IExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadNetwork(const std::string& fileName,
const ConfigType& config) {
return LoadNetworkImpl(fileName, {}, config);
}
IE::IExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadExeNetworkImpl(const IE::CNNNetwork& network,
const ConfigType& config) {
if (network.getFunction() == nullptr) {
IE_THROW() << "AUTO device supports just ngraph network representation";
}
auto networkPrecision = GetNetworkPrecision(network);
return LoadNetworkImpl({}, network, config, networkPrecision);
}
std::shared_ptr<AutoExecutableNetwork> AutoInferencePlugin::LoadNetworkImpl(const std::string& modelPath,
const InferenceEngine::CNNNetwork& network,
const ConfigType& config,
const std::string& networkPrecision) {
if (GetCore() == nullptr) {
IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
}
if (modelPath.empty() && network.getFunction() == nullptr) {
IE_THROW() << "AUTO device supports just ngraph network representation";
}
auto fullConfig = mergeConfigs(_config, config);
CheckConfig(fullConfig);
auto metaDevices = GetDeviceList(fullConfig);
auto core = GetCore(); // shared_ptr that holds the Core while the lambda below (which captures that by val) works
auto LoadNetworkAsync =
[core, modelPath, network](const std::string& device)
-> IE::SoExecutableNetworkInternal {
IE::SoExecutableNetworkInternal executableNetwork;
if (!modelPath.empty()) {
executableNetwork = core->LoadNetwork(modelPath, device, {});
} else {
executableNetwork = core->LoadNetwork(network, device, {});
}
return executableNetwork;
};
NetworkFuture cpuFuture;
NetworkFuture acceleratorFuture;
// start CPU task
const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
[=](const std::string& d)->bool{return d.find("CPU") != std::string::npos;});
if (CPUIter != metaDevices.end()) {
cpuFuture = std::async(std::launch::async, LoadNetworkAsync, *CPUIter);
}
// start accelerator task, like GPU
const auto accelerator = SelectDevice(metaDevices, networkPrecision);
bool isAccelerator = accelerator.find("CPU") == std::string::npos;
if (isAccelerator) {
acceleratorFuture = std::async(std::launch::async, LoadNetworkAsync, accelerator);
}
bool enablePerfCount = fullConfig.find(IE::PluginConfigParams::KEY_PERF_COUNT) != fullConfig.end();
return std::make_shared<AutoExecutableNetwork>(std::move(cpuFuture), std::move(acceleratorFuture), enablePerfCount);
}
IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const {
IE::QueryNetworkResult queryResult = {};
if (GetCore() == nullptr) {
IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
}
if (network.getFunction() == nullptr) {
IE_THROW() << "AUTO device supports just ngraph network representation";
}
auto fullConfig = mergeConfigs(_config, config);
auto metaDevices = GetDeviceList(fullConfig);
std::unordered_set<std::string> supportedLayers;
for (auto&& value : metaDevices) {
try {
auto deviceQr = GetCore()->QueryNetwork(network, value, {});
std::unordered_set<std::string> deviceSupportedLayers;
for (auto &&layerQr : deviceQr.supportedLayersMap) {
deviceSupportedLayers.emplace(layerQr.first);
}
supportedLayers = supportedLayers.empty()
? deviceSupportedLayers : (deviceSupportedLayers.empty()
? supportedLayers : IE::details::Intersection(
supportedLayers, deviceSupportedLayers));
break;
} catch (...) {
}
}
for (auto&& supportedLayer : supportedLayers) {
queryResult.supportedLayersMap[supportedLayer] = GetName();
}
return queryResult;
}
IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
const std::map<std::string, IE::Parameter> & options) const {
auto it = _config.find(name);
if (it == _config.end()) {
IE_THROW() << "Unsupported config key: " << name;
} else {
return { it->second };
}
}
void AutoInferencePlugin::SetConfig(const ConfigType& config) {
for (auto && kvp : config) {
if (kvp.first.find("AUTO_") == 0) {
_config[kvp.first] = kvp.second;
} else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
if (kvp.second == IE::PluginConfigParams::YES ||
kvp.second == IE::PluginConfigParams::NO) {
_config[kvp.first] = kvp.second;
} else {
IE_THROW() << "Unsupported config value: " << kvp.second
<< " for key: " << kvp.first;
}
} else {
IE_THROW() << "Unsupported config key: " << kvp.first;
}
}
}
IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
const std::map<std::string, IE::Parameter> & options) const {
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics;
metrics.emplace_back(METRIC_KEY(SUPPORTED_METRICS));
metrics.emplace_back(METRIC_KEY(FULL_DEVICE_NAME));
metrics.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
metrics.emplace_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
std::string device_name = {"Inference Engine AUTO device"};
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys = {
IE::KEY_AUTO_DEVICE_LIST,
IE::PluginConfigParams::KEY_PERF_COUNT
};
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
std::vector<std::string> capabilities = GetOptimizationCapabilities(options);
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
} else {
IE_THROW() << "Unsupported metric key " << name;
}
}
//////////////////////////////////// private & protected functions ///////////////////
std::vector<DeviceName> AutoInferencePlugin::GetDeviceList(const ConfigType& config) const {
std::vector<DeviceName> deviceList;
auto deviceListConfig = config.find(IE::KEY_AUTO_DEVICE_LIST);
if (deviceListConfig == config.end()) {
deviceList = GetCore()->GetAvailableDevices();
} else {
deviceList = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
}
if (deviceList.empty()) {
IE_THROW() << "Please, check environment due to no supported devices can be used";
}
return deviceList;
}
std::vector<std::string> AutoInferencePlugin::GetOptimizationCapabilities(const std::map<std::string, IE::Parameter> & options) const {
// FIXME: workaround to get devicelist.
std::unordered_set<std::string> capabilities;
std::vector<std::string> queryDeviceLists{"CPU", "GPU"};
if (options.find(IE::KEY_AUTO_DEVICE_LIST) != options.end()) {
auto deviceListConfig = options.at(IE::KEY_AUTO_DEVICE_LIST).as<std::string>();
queryDeviceLists = IE::DeviceIDParser::getHeteroDevices(deviceListConfig);
} else if (_config.find(IE::KEY_AUTO_DEVICE_LIST) != _config.end()) {
auto deviceListConfig = _config.at(IE::KEY_AUTO_DEVICE_LIST);
queryDeviceLists = IE::DeviceIDParser::getHeteroDevices(deviceListConfig);
}
for (auto &item : queryDeviceLists) {
try {
std::vector<std::string> device_cap =
GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
for (auto &cap : device_cap) {
capabilities.insert(cap);
}
} catch (...) {
}
}
return {capabilities.begin(), capabilities.end()};
}
void AutoInferencePlugin::CheckConfig(const ConfigType& config) {
std::vector<std::string> supportedConfigKeys = GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
for (auto&& kvp : config) {
if (kvp.first.find("AUTO_") == 0) {
continue;
} else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
if (kvp.second == IE::PluginConfigParams::YES ||
kvp.second == IE::PluginConfigParams::NO) {
continue;
} else {
IE_THROW() << "Unsupported config value: " << kvp.second
<< " for key: " << kvp.first;
}
} else {
IE_THROW() << "Unsupported config key: " << kvp.first;
}
}
}
DeviceName AutoInferencePlugin::SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision) {
if (metaDevices.empty()) {
IE_THROW(NotFound) << "No available device to select in AUTO plugin";
}
if (metaDevices.size() == 1) {
return metaDevices.at(0);
}
std::vector<DeviceName> CPU;
std::vector<DeviceName> dGPU;
std::vector<DeviceName> iGPU;
std::vector<DeviceName> MYRIAD;
std::vector<DeviceName> VPUX;
for (auto& item : metaDevices) {
if (item.find("CPU") == 0) {
CPU.push_back(item);
continue;
}
if (item.find("MYRIAD") == 0) {
MYRIAD.push_back(item);
continue;
}
if (item.find("VPUX") == 0) {
VPUX.push_back(item);
continue;
}
if (item.find("GPU") == 0) {
auto gpuFullDeviceName = GetCore()->GetMetric(item, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
iGPU.push_back(item);
} else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
dGPU.push_back(item);
}
continue;
}
}
if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
IE_THROW(NotFound) << "No available device found";
}
// Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
if (!dGPU.empty()) {
for (auto&& item : dGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!VPUX.empty()) {
for (auto&& item : VPUX) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!iGPU.empty()) {
for (auto&& item : iGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!MYRIAD.empty()) {
for (auto&& item : MYRIAD) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
}
// If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
if (networkPrecision == "FP32") {
if (!dGPU.empty()) {
for (auto&& item : dGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!VPUX.empty()) {
for (auto&& item : VPUX) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!iGPU.empty()) {
for (auto&& item : iGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!MYRIAD.empty()) {
for (auto&& item : MYRIAD) {
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
}
}
if (CPU.empty()) {
IE_THROW() << "Cannot select any device";
}
return CPU[0];
}
ConfigType AutoInferencePlugin::mergeConfigs(ConfigType config, const ConfigType& local) {
for (auto && kvp : local) {
config[kvp.first] = kvp.second;
}
return config;
}
// define CreatePluginEngine to create plugin instance // define CreatePluginEngine to create plugin instance
static const IE::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"}; static const InferenceEngine::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version) IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version)
} // namespace AutoPlugin } // namespace AutoPlugin

View File

@ -4,43 +4,14 @@
#pragma once #pragma once
#include <map>
#include <vector>
#include <string>
#include <unordered_set>
#include <type_traits>
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp> #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp> #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include <threading/ie_executor_manager.hpp>
#include "auto_exec_network.hpp"
namespace AutoPlugin { namespace AutoPlugin {
namespace IE = InferenceEngine; class AutoInferencePlugin : public InferenceEngine::IInferencePlugin {
using ConfigType = std::map<std::string, std::string>;
class AutoInferencePlugin : public IE::IInferencePlugin {
public: public:
AutoInferencePlugin(); AutoInferencePlugin() = default;
~AutoInferencePlugin() = default; ~AutoInferencePlugin() = default;
IE::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const IE::CNNNetwork& network, const ConfigType& config) override;
IE::IExecutableNetworkInternal::Ptr LoadNetwork(const std::string& fileName, const ConfigType& config) override;
IE::QueryNetworkResult QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const override;
IE::Parameter GetMetric(const std::string& name, const std::map<std::string, IE::Parameter>& options) const override;
IE::Parameter GetConfig(const std::string& name, const std::map<std::string, IE::Parameter> & options) const override;
void SetConfig(const ConfigType& config) override;
private:
std::shared_ptr<AutoExecutableNetwork> LoadNetworkImpl(const std::string& modelPath,
const InferenceEngine::CNNNetwork& network,
const ConfigType &config,
const std::string &networkPrecision = METRIC_VALUE(FP32));
std::vector<DeviceName> GetDeviceList(const ConfigType& config) const;
std::vector<std::string> GetOptimizationCapabilities(const std::map<std::string, IE::Parameter>& options) const;
DeviceName SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
void CheckConfig(const ConfigType& config);
static ConfigType mergeConfigs(ConfigType config, const ConfigType& local);
}; };
} // namespace AutoPlugin } // namespace AutoPlugin

View File

@ -60,6 +60,7 @@
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp> #include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp> #include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp> #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
#include <transformations/op_conversions/convert_gather_0d.hpp> #include <transformations/op_conversions/convert_gather_0d.hpp>
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp> #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp> #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
@ -362,6 +363,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
pass_config->disable<ngraph::pass::ConvertBroadcast3>(); pass_config->disable<ngraph::pass::ConvertBroadcast3>();
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>(); pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>(); pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
if (!config.enable_loop_unrolling) { if (!config.enable_loop_unrolling) {
pass_config->disable<ngraph::pass::ConvertTensorIteratorToRNNSequence>(); pass_config->disable<ngraph::pass::ConvertTensorIteratorToRNNSequence>();
@ -388,11 +390,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT");
using namespace ngraph::pass::low_precision; using namespace ngraph::pass::low_precision;
ngraph::pass::Manager manager;
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
// With this key users can work-around such issues // With this key users can work-around such issues
if (!config.enable_fp16_for_quantized_models) { if (!config.enable_fp16_for_quantized_models) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
manager.run_passes(nGraphFunc);
} }
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({ auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({

View File

@ -15,15 +15,15 @@ namespace CLDNNPlugin {
static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) { static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
switch (mode) { switch (mode) {
case ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel: case ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL:
return cldnn::coordinate_transformation_mode::half_pixel; return cldnn::coordinate_transformation_mode::half_pixel;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel: case ngraph::op::v4::Interpolate::CoordinateTransformMode::PYTORCH_HALF_PIXEL:
return cldnn::coordinate_transformation_mode::pytorch_half_pixel; return cldnn::coordinate_transformation_mode::pytorch_half_pixel;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric: case ngraph::op::v4::Interpolate::CoordinateTransformMode::ASYMMETRIC:
return cldnn::coordinate_transformation_mode::asymmetric; return cldnn::coordinate_transformation_mode::asymmetric;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn: case ngraph::op::v4::Interpolate::CoordinateTransformMode::TF_HALF_PIXEL_FOR_NN:
return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn; return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners: case ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS:
return cldnn::coordinate_transformation_mode::align_corners; return cldnn::coordinate_transformation_mode::align_corners;
} }
@ -32,15 +32,15 @@ static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngr
static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) { static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) {
switch (mode) { switch (mode) {
case ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor: case ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_FLOOR:
return cldnn::nearest_mode::round_prefer_floor; return cldnn::nearest_mode::round_prefer_floor;
case ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil: case ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_CEIL:
return cldnn::nearest_mode::round_prefer_ceil; return cldnn::nearest_mode::round_prefer_ceil;
case ngraph::op::v4::Interpolate::NearestMode::floor: case ngraph::op::v4::Interpolate::NearestMode::FLOOR:
return cldnn::nearest_mode::floor; return cldnn::nearest_mode::floor;
case ngraph::op::v4::Interpolate::NearestMode::ceil: case ngraph::op::v4::Interpolate::NearestMode::CEIL:
return cldnn::nearest_mode::ceil; return cldnn::nearest_mode::ceil;
case ngraph::op::v4::Interpolate::NearestMode::simple: case ngraph::op::v4::Interpolate::NearestMode::SIMPLE:
return cldnn::nearest_mode::simple; return cldnn::nearest_mode::simple;
} }
@ -49,18 +49,18 @@ static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMo
static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) { static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) {
switch (mode) { switch (mode) {
case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes: return cldnn::shape_calculation_mode::sizes; case ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES: return cldnn::shape_calculation_mode::sizes;
case ngraph::op::v4::Interpolate::ShapeCalcMode::scales: return cldnn::shape_calculation_mode::scales; case ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES: return cldnn::shape_calculation_mode::scales;
} }
IE_THROW() << "Unknown shape calculation mode: " << static_cast<int>(mode); IE_THROW() << "Unknown shape calculation mode: " << static_cast<int>(mode);
} }
static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) { static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) {
switch (mode) { switch (mode) {
case ngraph::op::v4::Interpolate::InterpolateMode::nearest: return cldnn::resample_type::nearest; case ngraph::op::v4::Interpolate::InterpolateMode::NEAREST: return cldnn::resample_type::nearest;
case ngraph::op::v4::Interpolate::InterpolateMode::linear: return cldnn::resample_type::caffe_bilinear; case ngraph::op::v4::Interpolate::InterpolateMode::LINEAR: return cldnn::resample_type::caffe_bilinear;
case ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx: return cldnn::resample_type::linear_onnx; case ngraph::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX: return cldnn::resample_type::linear_onnx;
case ngraph::op::v4::Interpolate::InterpolateMode::cubic: return cldnn::resample_type::cubic; case ngraph::op::v4::Interpolate::InterpolateMode::CUBIC: return cldnn::resample_type::cubic;
} }
IE_THROW() << "Unknown interpolation mode: " << static_cast<int>(mode); IE_THROW() << "Unknown interpolation mode: " << static_cast<int>(mode);
} }

View File

@ -7,6 +7,7 @@
#include "dnn_types.h" #include "dnn_types.h"
#include <cstdint> #include <cstdint>
#include <cpp/ie_cnn_network.h> #include <cpp/ie_cnn_network.h>
#include <ie_algorithm.hpp>
namespace GNAPluginNS { namespace GNAPluginNS {
namespace GNALimitations { namespace GNALimitations {
@ -114,5 +115,10 @@ public:
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage); bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
return total_size / bufferMaxSize + 1;
}
} // namespace GNALimitations } // namespace GNALimitations
} // namespace GNAPluginNS } // namespace GNAPluginNS

View File

@ -19,6 +19,7 @@
#include "gna_slope_scale.h" #include "gna_slope_scale.h"
#include "runtime/pwl.h" #include "runtime/pwl.h"
#include "gna_data_types.hpp" #include "gna_data_types.hpp"
#include "round_float_define.hpp"
namespace GNAPluginNS { namespace GNAPluginNS {
namespace frontend { namespace frontend {
@ -41,8 +42,8 @@ struct ScaleFactorUpdateResult {
* @param p2 Second float value * @param p2 Second float value
* @return Returns true if two float values are equal * @return Returns true if two float values are equal
*/ */
static bool fp32eq(float p1, float p2) { static bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2))); return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
} }
/** /**
@ -73,14 +74,14 @@ static float selectBestOutputScaleFactors(float inScale, std::vector<float> outS
auto sd = 0.0; auto sd = 0.0;
for (size_t j = 0; j < slopes.size(); ++j) { for (size_t j = 0; j < slopes.size(); ++j) {
auto s = gna_slope(slopes[j], inScale, outScale); auto s = gna_slope(slopes[j], inScale, outScale);
auto slope = static_cast<uint32_t>(s.slope * s.slope_scale); auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) && slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) { if (slope < std::numeric_limits<int16_t>::min() || slope > std::numeric_limits<int16_t>::max()) {
sd += std::numeric_limits<int8_t>::max(); sd += std::numeric_limits<int8_t>::max();
continue; continue;
} }
auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale; auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
if (fp32eq(testSlope, slopes[j])) { if (fp32eq(testSlope, slopes[j], 1.0E-6)) {
return outScale; return outScale;
} }

View File

@ -683,7 +683,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input = layer->insData[0].lock(); auto input = layer->insData[0].lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(input, 8)->getDims(); auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ? const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
@ -908,7 +908,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(inputs, 8)->getDims(); auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
@ -1410,7 +1410,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor; noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
} }
auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs; auto input_data = HasTo2DReshapeData(layer) ?
Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
auto in_dims = input_data->getDims(); auto in_dims = input_data->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size; uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
@ -2212,8 +2213,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
nextMemoryLayer.reserved_size = ALIGN64(memorySize); nextMemoryLayer.reserved_size = ALIGN64(memorySize);
} else { } else {
IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out)); // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer)); gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
} }
return; return;
} }
@ -2498,8 +2499,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
memoryLayer.reserved_size = ALIGN64(memorySize); memoryLayer.reserved_size = ALIGN64(memorySize);
} else { } else {
IE_ASSERT(memoryLayer.reserved_size >= ALIGN64(num_data_bytes_in)); // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset); gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
} }
return prevLayer; return prevLayer;

View File

@ -15,7 +15,9 @@ namespace GNAPluginNS {
* @param input a pointer to data to be reshaped * @param input a pointer to data to be reshaped
* @param maxZeroDimSize the maximum size of zero dimension * @param maxZeroDimSize the maximum size of zero dimension
*/ */
inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t maxZeroDimSize) { inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t minZeroDimSize,
size_t maxZeroDimSize) {
IE_ASSERT(minZeroDimSize > 0);
auto dims = input->getDims(); auto dims = input->getDims();
uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims)); uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims));
uint32_t numColumnsIn = 1; uint32_t numColumnsIn = 1;
@ -23,7 +25,7 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
if (numRowsIn % 8 == 0) { if (numRowsIn % 8 == 0) {
if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) { if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) {
size_t indexDivide = maxZeroDimSize; size_t indexDivide = maxZeroDimSize;
while (indexDivide > 1) { while (indexDivide > minZeroDimSize) {
if ((numRowsIn / 8) % indexDivide == 0) break; if ((numRowsIn / 8) % indexDivide == 0) break;
--indexDivide; --indexDivide;
} }
@ -55,4 +57,5 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
// Don't reshape diagonallayers with bias connection // Don't reshape diagonallayers with bias connection
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
} }
} // namespace GNAPluginNS } // namespace GNAPluginNS

View File

@ -54,6 +54,7 @@
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp> #include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp> #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp> #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
#include <transformations/common_optimizations/transpose_sinking.hpp>
#include <transformations/utils/utils.hpp> #include <transformations/utils/utils.hpp>
#include "transformations/remove_extra_reshapes.hpp" #include "transformations/remove_extra_reshapes.hpp"
@ -703,9 +704,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<SplitConvolutionWithBias>(); manager.register_pass<SplitConvolutionWithBias>();
manager.register_pass<SplitConvolution>(); manager.register_pass<SplitConvolution>();
manager.register_pass<HandleTransposesAroundMatMul>(); manager.register_pass<HandleTransposesAroundMatMul>();
manager.register_pass<SwapInputMatMul>();
manager.register_pass<SwapInputMatMulWithBias>();
manager.register_pass<SwapInputMatMulWithFq>(); manager.register_pass<SwapInputMatMulWithFq>();
manager.register_pass<SwapInputMatMulWithBias>();
manager.register_pass<SwapInputMatMul>();
manager.register_pass<InsertTransposeAfterConvOrPool>(); manager.register_pass<InsertTransposeAfterConvOrPool>();
manager.register_pass<ReorderActivationAndPooling>(); manager.register_pass<ReorderActivationAndPooling>();
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>(); manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
@ -727,6 +728,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
pass_config->disable<ngraph::pass::ReluFakeQuantizeFusion>(); pass_config->disable<ngraph::pass::ReluFakeQuantizeFusion>();
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue 52034 // Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue 52034
pass_config->disable<ngraph::pass::AddFakeQuantizeFusion>(); pass_config->disable<ngraph::pass::AddFakeQuantizeFusion>();
// TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph transformations
pass_config->disable<ngraph::pass::TransposeReduction>();
manager.run_passes(graph); manager.run_passes(graph);
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork); convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
} }
@ -1576,6 +1579,18 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
transpose_inputs_info, transpose_inputs_info,
transpose_outputs_info); transpose_outputs_info);
// If scale factors are defined in configuration we still need to use them instead of imported values,
// for example to change the scale factors for the old models.
if (!config.inputScaleFactors.empty()) {
IE_ASSERT(config.inputScaleFactors.size() == inputsDesc->inputScaleFactors.size());
for (size_t i = 0; i < config.inputScaleFactors.size(); ++i) {
if (config.inputScaleFactors[i] != GNAPluginNS::kScaleFactorDefault) {
gnalog() << "[Import Network] Using input scale factor defined in configuration for input " << i << std::endl;
inputsDesc->inputScaleFactors[i] = config.inputScaleFactors[i];
}
}
}
#if GNA_LIB_VER == 2 #if GNA_LIB_VER == 2
auto getOrientation = [](Gna2Operation & gnaOperation) { auto getOrientation = [](Gna2Operation & gnaOperation) {
return gnaOperation.Type == Gna2OperationTypeConvolution ? return gnaOperation.Type == Gna2OperationTypeConvolution ?

View File

@ -95,7 +95,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
} }
// missing scale factors are set to be 1.0f // missing scale factors are set to be 1.0f
if (inputScaleFactors.size() <= input_index) { if (inputScaleFactors.size() <= input_index) {
inputScaleFactors.resize(input_index + 1, 1.f); inputScaleFactors.resize(input_index + 1, GNAPluginNS::kScaleFactorDefault);
} }
inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value); inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
} else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)) { } else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)) {

View File

@ -18,6 +18,8 @@
namespace GNAPluginNS { namespace GNAPluginNS {
static const float kScaleFactorDefault = 1.f;
struct Config { struct Config {
Config() { Config() {
AdjustKeyMapValues(); AdjustKeyMapValues();

View File

@ -45,4 +45,18 @@ public:
}; };
std::vector<SplitConnectedLayerInfo> splitOutputLayers; std::vector<SplitConnectedLayerInfo> splitOutputLayers;
}; };
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) {
std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment;
uint32_t usedSize = 0;
while (usedSize < totalSize) {
uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
splitSizes.push_back(partSize);
usedSize += partSize;
}
return splitSizes;
}
} // namespace GNAPluginNS } // namespace GNAPluginNS

View File

@ -87,7 +87,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
}); });
IE_ASSERT(inputLayer != nullptr); IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() : size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue); std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr); IE_ASSERT(diagLayer != nullptr);
diagLayer->_weights = make_shared_blob<float>( diagLayer->_weights = make_shared_blob<float>(
@ -1113,6 +1113,9 @@ void InsertConcatAligningFilterPass::run() {
SizeVector({filterWeights.size()}), SizeVector({filterWeights.size()}),
Layout::C)); Layout::C));
concatAligningFilter->_weights->allocate(); concatAligningFilter->_weights->allocate();
if (!concatAligningFilter->_weights->buffer().as<float*>()) {
THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
}
CopyVectorToBlob(concatAligningFilter->_weights, filterWeights); CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
@ -1395,15 +1398,20 @@ void EltwiseSplitOverChannelsPass::run() {
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1"; THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
} }
auto oData = l->outData.front(); auto oData = l->outData.front();
auto out_width = GetDataDimSize(oData, DataDimName::W); auto oDims = oData->getDims();
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end()); auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
// gna limit this to be OxFFFF if (totalElementsSize <= GNALimitations::bufferMaxSize) {
auto maxAffineElements = 65536 - 64;
if (totalElementsForOutput <= maxAffineElements) {
continue; continue;
} }
auto totalSplits = 1 + totalElementsForOutput / maxAffineElements; auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
IE_ASSERT(firstValuableDim != std::end(oDims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
// Split output size should be multiple by 64 to avoid align filters insertion
auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize);
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n"; pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l); auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
@ -1421,27 +1429,13 @@ void EltwiseSplitOverChannelsPass::run() {
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc(); auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
// create split layer outputs // create split layer outputs
size_t usedElements = 0; for (auto elementsNum : splitSizes) {
for (size_t i = 0; i < totalSplits; i++) { auto newDims = oDims;
SizeVector newDims; newDims[splittedDimIx] = elementsNum;
size_t elements_num = std::min(totalElementsForOutput - usedElements,
static_cast<size_t>(maxAffineElements));
if (inputDesc.getDims().size() == 2) {
newDims = SizeVector{1, elements_num};
} else {
elements_num = elements_num - elements_num % out_width;
newDims = SizeVector{1, elements_num / out_width, out_width};
}
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout()); auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc); auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
getCreatorLayer(data) = split; getCreatorLayer(data) = split;
split->outData.push_back(data); split->outData.push_back(data);
usedElements += elements_num;
if (usedElements == totalElementsForOutput) {
break;
}
} }
// replacing connection X->eltwise to X->split // replacing connection X->eltwise to X->split
auto oData = CNNLayerFindOutData(l, kThEltwiseInput); auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
@ -1461,7 +1455,7 @@ void EltwiseSplitOverChannelsPass::run() {
concat->outData.push_back(masterEltwise->outData.front()); concat->outData.push_back(masterEltwise->outData.front());
getCreatorLayer(masterEltwise->outData.front()) = concat; getCreatorLayer(masterEltwise->outData.front()) = concat;
for (size_t k = 0; k != totalSplits; k++) { for (size_t k = 0; k != splitSizes.size(); k++) {
auto eltwiseRaw = std::make_shared<EltwiseLayer>( auto eltwiseRaw = std::make_shared<EltwiseLayer>(
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32}); LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
IE_ASSERT(eltwiseRaw != nullptr); IE_ASSERT(eltwiseRaw != nullptr);
@ -1521,7 +1515,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
if (was_reshaped) { if (was_reshaped) {
dataDims = reshaped_data[insData->getName()]; dataDims = reshaped_data[insData->getName()];
} else { } else {
dataDims = HasTo2DReshapeData(l) ? Get2DReshapedData(insData, 8)->getDims() : insData->getDims(); dataDims = HasTo2DReshapeData(l) ?
Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
insData->getDims();
} }
if (dataDims.size() <= 2) { if (dataDims.size() <= 2) {

View File

@ -12,6 +12,7 @@
#include <ngraph/pattern/op/wrap_type.hpp> #include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp> #include <ngraph/rt_info.hpp>
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
#include "layers/gna_split_layer.hpp"
using namespace GNAPluginNS; using namespace GNAPluginNS;
@ -19,22 +20,6 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
uint32_t width = conv->get_input_shape(0).back();
uint32_t in_channels = conv->get_input_shape(0).at(1);
uint32_t usedWidth = 0;
std::vector<int64_t> split_sizes;
uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
width_max_size = width_max_size - width_max_size % 64;
while (usedWidth < width) {
uint32_t width_part = std::min(width - usedWidth, width_max_size);
split_sizes.push_back(width_part);
usedWidth += width_part;
}
IE_ASSERT(usedWidth == width);
return split_sizes;
}
static bool Convert(std::shared_ptr<ngraph::Node> conv, static bool Convert(std::shared_ptr<ngraph::Node> conv,
std::shared_ptr<ngraph::Node> add, std::shared_ptr<ngraph::Node> add,
std::shared_ptr<ngraph::Node> bias, std::shared_ptr<ngraph::Node> bias,
@ -45,15 +30,21 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
return false; return false;
} }
auto split_sizes = GetConvSplitSizes(conv); uint32_t width = conv->get_input_shape(0).back();
uint32_t in_channels = conv->get_input_shape(0).at(1);
auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
IE_ASSERT(split_sizes.size() > 1); IE_ASSERT(split_sizes.size() > 1);
std::vector<int64_t> split_sizes_casted(split_sizes.size());
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
return static_cast<int64_t>(size);
});
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1, /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
otherwise this split axis isn't supported */ otherwise this split axis isn't supported */
const int64_t width_axis = conv->get_input_shape(0).size() - 1; const int64_t width_axis = conv->get_input_shape(0).size() - 1;
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0), auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}), ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes)); ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
ngraph::copy_runtime_info(conv, split_node); ngraph::copy_runtime_info(conv, split_node);
split_node->set_friendly_name(conv->get_friendly_name() + "/split"); split_node->set_friendly_name(conv->get_friendly_name() + "/split");
ngraph::OutputVector convOutputs; ngraph::OutputVector convOutputs;

View File

@ -41,23 +41,6 @@ namespace VPUConfigParams {
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::MYRIAD_ENABLE_FORCE_RESET instead") INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::MYRIAD_ENABLE_FORCE_RESET instead")
DECLARE_VPU_MYRIAD_CONFIG_KEY(FORCE_RESET); DECLARE_VPU_MYRIAD_CONFIG_KEY(FORCE_RESET);
/**
* @deprecated
* @brief This option allows to specify device.
* If specified device is not available then creating infer request will throw an exception.
*/
INFERENCE_ENGINE_DEPRECATED("")
DECLARE_VPU_MYRIAD_CONFIG_KEY(PLATFORM);
/**
* @deprecated
* @brief Supported keys definition for VPU_MYRIAD_CONFIG_KEY(PLATFORM) option.
*/
INFERENCE_ENGINE_DEPRECATED("")
DECLARE_VPU_MYRIAD_CONFIG_VALUE(2450);
INFERENCE_ENGINE_DEPRECATED("")
DECLARE_VPU_MYRIAD_CONFIG_VALUE(2480);
/** /**
* @deprecated Use InferenceEngine::MYRIAD_DDR_TYPE instead * @deprecated Use InferenceEngine::MYRIAD_DDR_TYPE instead
* @brief This option allows to specify device memory type. * @brief This option allows to specify device memory type.

View File

@ -19,10 +19,6 @@
#include "ie_plugin_config.hpp" #include "ie_plugin_config.hpp"
#include "ie_version.hpp" #include "ie_version.hpp"
namespace ngraph {
class Function;
} // namespace ngraph
namespace InferenceEngine { namespace InferenceEngine {
class IExtension; class IExtension;
class Blob; class Blob;
@ -30,6 +26,9 @@ class RemoteContext;
} // namespace InferenceEngine } // namespace InferenceEngine
namespace ov { namespace ov {
class Function;
namespace runtime { namespace runtime {
/** /**
@ -72,7 +71,7 @@ public:
* * binPath parameter is not used. * * binPath parameter is not used.
* @return Function * @return Function
*/ */
std::shared_ptr<ngraph::Function> read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const; std::shared_ptr<ov::Function> read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const;
#endif #endif
/** /**
@ -86,7 +85,7 @@ public:
* * binPath parameter is not used. * * binPath parameter is not used.
* @return Function * @return Function
*/ */
std::shared_ptr<ngraph::Function> read_model(const std::string& modelPath, const std::string& binPath = {}) const; std::shared_ptr<ov::Function> read_model(const std::string& modelPath, const std::string& binPath = {}) const;
/** /**
* @brief Reads models from IR and ONNX formats * @brief Reads models from IR and ONNX formats
* @param model string with model in IR or ONNX format * @param model string with model in IR or ONNX format
@ -101,7 +100,7 @@ public:
* constant data becomes to point to invalid memory. * constant data becomes to point to invalid memory.
* @return Function * @return Function
*/ */
std::shared_ptr<ngraph::Function> read_model(const std::string& model, std::shared_ptr<ov::Function> read_model(const std::string& model,
const std::shared_ptr<const InferenceEngine::Blob>& weights) const; const std::shared_ptr<const InferenceEngine::Blob>& weights) const;
/** /**
@ -116,7 +115,7 @@ public:
* operation * operation
* @return An executable network reference * @return An executable network reference
*/ */
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network, InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ov::Function>& network,
const std::string& deviceName, const std::string& deviceName,
const std::map<std::string, std::string>& config = {}); const std::map<std::string, std::string>& config = {});
@ -145,7 +144,7 @@ public:
* operation * operation
* @return An executable network object * @return An executable network object
*/ */
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network, InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ov::Function>& network,
const std::shared_ptr<InferenceEngine::RemoteContext>& context, const std::shared_ptr<InferenceEngine::RemoteContext>& context,
const std::map<std::string, std::string>& config = {}); const std::map<std::string, std::string>& config = {});
@ -189,7 +188,7 @@ public:
* @param config Optional map of pairs: (config parameter name, config parameter value) * @param config Optional map of pairs: (config parameter name, config parameter value)
* @return An object containing a map of pairs a layer name -> a device name supporting this layer. * @return An object containing a map of pairs a layer name -> a device name supporting this layer.
*/ */
InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ngraph::Function>& network, InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ov::Function>& network,
const std::string& deviceName, const std::string& deviceName,
const std::map<std::string, std::string>& config = {}) const; const std::map<std::string, std::string>& config = {}) const;

View File

@ -62,18 +62,17 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
} else if (deviceName_.find("MULTI:") == 0) { } else if (deviceName_.find("MULTI:") == 0) {
deviceName_ = "MULTI"; deviceName_ = "MULTI";
config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6); config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
} else if (deviceName_.find("AUTO") == 0) { } else if (deviceName.find("AUTO") == 0) {
deviceName_ = "AUTO"; deviceName_ = "MULTI";
if (deviceName.size() > std::string("AUTO").size()) { if (deviceName.find("AUTO:") == 0) {
std::string deviceList = deviceName.substr(std::string("AUTO:").size()); config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] =
if (deviceList.find("AUTO") != std::string::npos) { deviceName.substr(std::string("AUTO:").size());
IE_THROW() << "Device list for AUTO should not be AUTO";
}
config_[InferenceEngine::KEY_AUTO_DEVICE_LIST] = deviceName.substr(std::string("AUTO:").size());
} }
config_.insert({CONFIG_KEY_INTERNAL(WORK_MODE), ""});
} else { } else {
if (deviceName_.empty()) { if (deviceName_ == "AUTO") {
deviceName_ = "AUTO"; deviceName_ = "MULTI";
config_.insert({CONFIG_KEY_INTERNAL(WORK_MODE), ""});
} }
InferenceEngine::DeviceIDParser parser(deviceName_); InferenceEngine::DeviceIDParser parser(deviceName_);
deviceName_ = parser.getDeviceName(); deviceName_ = parser.getDeviceName();
@ -579,7 +578,21 @@ public:
} }
} }
auto parsed = parseDeviceNameIntoConfig(deviceName); // AUTO case
{
if (deviceName.find("AUTO:") == 0) {
IE_THROW()
<< "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
"To get individual devices's metrics call GetMetric for each device separately";
}
}
std::string pluginName = deviceName;
if (pluginName == "AUTO") {
pluginName = "MULTI";
}
auto parsed = parseDeviceNameIntoConfig(pluginName);
// we need to return a copy of Parameter object which is created on Core side, // we need to return a copy of Parameter object which is created on Core side,
// not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
@ -629,11 +642,14 @@ public:
* @param deviceName A name of device * @param deviceName A name of device
* @return Reference to a CPP plugin wrapper * @return Reference to a CPP plugin wrapper
*/ */
InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& deviceName) const { InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& pluginName) const {
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName"); OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName");
std::lock_guard<std::mutex> lock(pluginsMutex); std::lock_guard<std::mutex> lock(pluginsMutex);
auto deviceName = pluginName;
if (deviceName == "AUTO") {
deviceName = "MULTI";
}
auto it = pluginRegistry.find(deviceName); auto it = pluginRegistry.find(deviceName);
if (it == pluginRegistry.end()) { if (it == pluginRegistry.end()) {
IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine"; IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
@ -856,9 +872,9 @@ public:
} else if (deviceName.find("AUTO") == 0) { } else if (deviceName.find("AUTO") == 0) {
auto pos = deviceName.find_first_of(":"); auto pos = deviceName.find_first_of(":");
if (pos != std::string::npos) { if (pos != std::string::npos) {
deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1)); deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1));
} }
deviceNames.emplace_back("AUTO"); deviceNames.emplace_back("MULTI");
} else { } else {
deviceNames.push_back(deviceName); deviceNames.push_back(deviceName);
} }

View File

@ -0,0 +1,301 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "threading/ie_tbb_streams_executor.hpp"
#include <atomic>
#include <list>
#include <memory>
#include <queue>
#include <thread>
#include <tuple>
#include <utility>
#include "details/ie_exception.hpp"
#include "ie_parallel.hpp"
#include "ie_parallel_custom_arena.hpp"
#include "ie_system_conf.h"
#include "threading/ie_thread_affinity.hpp"
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
# include <tbb/concurrent_queue.h>
# include <tbb/enumerable_thread_specific.h>
# include <tbb/global_control.h>
# include <tbb/task_group.h>
# include <tbb/task_scheduler_observer.h>
namespace InferenceEngine {
struct TBBStreamsExecutor::Impl {
struct Stream;
using TaskQueue = tbb::concurrent_queue<Task>;
using StreamQueue = tbb::concurrent_bounded_queue<Stream*>;
using LocalStreams = tbb::enumerable_thread_specific<Stream*>;
struct Shared : public std::enable_shared_from_this<Shared> {
using Ptr = std::shared_ptr<Shared>;
TaskQueue _taskQueue;
StreamQueue _streamQueue;
};
struct Stream {
struct Observer : tbb::task_scheduler_observer {
Stream* _thisStream = nullptr;
LocalStreams* _localStream = nullptr;
CpuSet _mask;
int _ncpus = 0;
int _threadBindingStep = 0;
int _offset = 0;
Observer(custom::task_arena& arena,
Stream* thisStream,
LocalStreams* localStream,
const bool pinToCores,
const int streamId,
const int threadsPerStream,
const int threadBindingStep,
const int threadBindingOffset)
: tbb::task_scheduler_observer{static_cast<tbb::task_arena&>(arena)},
_thisStream{thisStream},
_localStream{localStream},
_threadBindingStep{threadBindingStep},
_offset{streamId * threadsPerStream + threadBindingOffset} {
if (pinToCores) {
std::tie(_mask, _ncpus) = GetProcessMask();
}
}
void on_scheduler_entry(bool) override {
_localStream->local() = _thisStream;
if (nullptr != _mask) {
PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
_threadBindingStep,
_ncpus,
_mask);
}
}
void on_scheduler_exit(bool) override {
_localStream->local() = nullptr;
if (nullptr != _mask) {
PinCurrentThreadByMask(_ncpus, _mask);
}
}
~Observer() override = default;
};
explicit Stream(Impl* impl, const bool externStream = false) : _impl{impl} {
{
std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
if (_impl->_streamIdQueue.empty()) {
_streamId = _impl->_streamId++;
} else {
_streamId = _impl->_streamIdQueue.front();
_impl->_streamIdQueue.pop();
}
}
_numaNodeId = _impl->_config._streams
? _impl->_usedNumaNodes.at((_streamId % _impl->_config._streams) /
((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1) /
_impl->_usedNumaNodes.size()))
: _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size());
auto concurrency =
(0 == _impl->_config._threadsPerStream) ? tbb::task_arena::automatic : _impl->_config._threadsPerStream;
auto masterThreads = externStream ? 1u : 0u;
if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) {
if (Config::PreferredCoreType::ROUND_ROBIN != _impl->_config._threadPreferredCoreType) {
if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) {
_arena.initialize(concurrency);
} else {
const auto selected_core_type =
Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType
? custom::info::core_types().back() // running on Big cores only
: custom::info::core_types().front(); // running on Little cores only
_arena.initialize(custom::task_arena::constraints{}
.set_core_type(selected_core_type)
.set_max_concurrency(concurrency));
}
} else {
// assigning the stream to the core type in the round-robin fashion
// wrapping around total_streams (i.e. how many streams all different core types can handle
// together)
const auto total_streams = _impl->_totalSreamsOnCoreTypes.back().second;
const auto streamId_wrapped = _streamId % total_streams;
const auto& selected_core_type =
std::find_if(_impl->_totalSreamsOnCoreTypes.cbegin(),
_impl->_totalSreamsOnCoreTypes.cend(),
[streamId_wrapped](const decltype(_impl->_totalSreamsOnCoreTypes)::value_type& p) {
return p.second > streamId_wrapped;
})
->first;
_arena.initialize(custom::task_arena::constraints{}
.set_core_type(selected_core_type)
.set_max_concurrency(concurrency));
}
} else if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
_arena.initialize(custom::task_arena::constraints{_numaNodeId, concurrency});
} else {
_arena.initialize(concurrency, masterThreads);
}
_observer.reset(new Observer{_arena,
this,
&(_impl->_localStream),
(ThreadBindingType::CORES == _impl->_config._threadBindingType),
_streamId,
_impl->_config._threadsPerStream,
_impl->_config._threadBindingStep,
_impl->_config._threadBindingOffset});
_observer->observe(true);
}
~Stream() {
static_cast<tbb::task_arena&>(_arena).terminate();
_observer->observe(false);
{
std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
_impl->_streamIdQueue.push(_streamId);
}
}
Impl* _impl = nullptr;
int _streamId = 0;
int _numaNodeId = 0;
custom::task_arena _arena;
std::unique_ptr<Observer> _observer;
};
using Streams = std::list<Stream>;
using ExternStreams = tbb::enumerable_thread_specific<Stream>;
explicit Impl(const Config& config)
: _config{config},
_shared{std::make_shared<Shared>()},
_localStream{nullptr},
_externStreams{this, true} {
if (_config._streams * _config._threadsPerStream >= static_cast<int>(std::thread::hardware_concurrency())) {
_maxTbbThreads.reset(
new tbb::global_control{tbb::global_control::max_allowed_parallelism,
static_cast<std::size_t>(_config._streams * _config._threadsPerStream + 1)});
}
auto numaNodes = getAvailableNUMANodes();
if (_config._streams != 0) {
std::copy_n(std::begin(numaNodes),
std::min(static_cast<std::size_t>(_config._streams), numaNodes.size()),
std::back_inserter(_usedNumaNodes));
} else {
_usedNumaNodes = numaNodes;
}
if (ThreadBindingType::HYBRID_AWARE == config._threadBindingType) {
const auto core_types = custom::info::core_types();
const int threadsPerStream =
(0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream;
int sum = 0;
// reversed order, so BIG cores are first
for (auto iter = core_types.rbegin(); iter < core_types.rend(); iter++) {
const auto& type = *iter;
// calculating the #streams per core type
const int num_streams_for_core_type =
std::max(1,
custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(type)) /
threadsPerStream);
sum += num_streams_for_core_type;
// prefix sum, so the core type for a given stream id will be deduced just as a upper_bound
// (notice that the map keeps the elements in the descending order, so the big cores are populated
// first)
_totalSreamsOnCoreTypes.emplace_back(type, sum);
}
}
_shared->_streamQueue.set_capacity(_config._streams);
for (int streamId = 0; streamId < _config._streams; ++streamId) {
_streams.emplace_back(this);
_shared->_streamQueue.push(&(_streams.back()));
}
}
~Impl() {
for (int streamId = 0; streamId < _config._streams; ++streamId) {
Stream* stream = nullptr;
_shared->_streamQueue.pop(stream);
(void)stream;
}
}
static void Schedule(Shared::Ptr& shared, Task task) {
Stream* stream = nullptr;
if (shared->_streamQueue.try_pop(stream)) {
struct TryPop {
void operator()() const {
try {
do {
Task task = std::move(_task);
task();
} while (_shared->_taskQueue.try_pop(_task));
} catch (...) {
}
if (_shared->_streamQueue.try_push(_stream)) {
if (_shared->_taskQueue.try_pop(_task)) {
Schedule(_shared, std::move(_task));
}
}
}
Stream* _stream;
mutable Shared::Ptr _shared;
mutable Task _task;
};
stream->_arena.enqueue(TryPop{stream, shared->shared_from_this(), std::move(task)});
} else {
shared->_taskQueue.push(std::move(task));
}
}
Config _config;
std::unique_ptr<tbb::global_control> _maxTbbThreads;
std::mutex _streamIdMutex;
int _streamId = 0;
std::queue<int> _streamIdQueue;
std::vector<int> _usedNumaNodes;
Shared::Ptr _shared;
LocalStreams _localStream;
ExternStreams _externStreams;
Streams _streams;
using StreamIdToCoreTypes = std::vector<std::pair<custom::core_type_id, int>>;
StreamIdToCoreTypes _totalSreamsOnCoreTypes;
};
TBBStreamsExecutor::TBBStreamsExecutor(const Config& config) : _impl{new TBBStreamsExecutor::Impl{config}} {}
TBBStreamsExecutor::~TBBStreamsExecutor() {
_impl.reset();
}
int TBBStreamsExecutor::GetStreamId() {
auto stream = _impl->_localStream.local();
if (nullptr == stream) {
stream = &(_impl->_externStreams.local());
}
return stream->_streamId;
}
int TBBStreamsExecutor::GetNumaNodeId() {
auto stream = _impl->_localStream.local();
if (nullptr == stream) {
stream = &(_impl->_externStreams.local());
}
return stream->_numaNodeId;
}
void TBBStreamsExecutor::run(Task task) {
if (_impl->_config._streams == 0) {
Execute(std::move(task));
} else {
Impl::Schedule(_impl->_shared, std::move(task));
}
}
void TBBStreamsExecutor::Execute(Task task) {
auto stream = _impl->_localStream.local();
if (nullptr == stream) {
_impl->_externStreams.local()._arena.execute(std::move(task));
} else {
stream->_arena.execute(std::move(task));
}
}
} // namespace InferenceEngine
#endif // ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))

View File

@ -43,7 +43,9 @@ ngraph::pass::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
Shape bias_shape(bias->get_shape()); Shape bias_shape(bias->get_shape());
Shape output_shape(fc->get_shape()); Shape output_shape(fc->get_shape());
size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>()); size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) { if (bias_shape.empty() ||
(bias_shape.back() != output_shape.back() && bias_shape.back() != 1) ||
bias_shape.back() != bias_size) {
return false; return false;
} }

View File

@ -131,7 +131,7 @@ public:
const float dequantizationMul, const float dequantizationMul,
const float dequantizationSub, const float dequantizationSub,
const ngraph::element::Type originalPrecision, const ngraph::element::Type originalPrecision,
const ngraph::PartialShape dataNodeOutputShape, const ngraph::PartialShape& dataNodeOutputShape,
element::Type precision, element::Type precision,
const element::Type deqPrecision = element::f32, const element::Type deqPrecision = element::f32,
std::shared_ptr<ngraph::Node> input = nullptr); std::shared_ptr<ngraph::Node> input = nullptr);

View File

@ -0,0 +1,26 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <ngraph/ngraph.hpp>
#include "layer_transformation.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
class LP_TRANSFORMATIONS_API PadTransformation : public LayerTransformation {
public:
NGRAPH_RTTI_DECLARATION;
PadTransformation(const Params& params = Params());
bool transform(TransformationContext& context, pattern::Matcher& m) override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -17,11 +17,13 @@ class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public Precisi
}; };
using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>; using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>;
} // namespace ngraph
extern template class LP_TRANSFORMATIONS_API VariantImpl<AvgPoolPrecisionPreservedAttributePtr>; namespace ov {
extern template class LP_TRANSFORMATIONS_API VariantImpl<ngraph::AvgPoolPrecisionPreservedAttributePtr>;
template<> template<>
class LP_TRANSFORMATIONS_API VariantWrapper<AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<AvgPoolPrecisionPreservedAttributePtr> { class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<ngraph::AvgPoolPrecisionPreservedAttributePtr> {
public: public:
static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 }; static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 };
@ -31,9 +33,9 @@ public:
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {} VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; } ngraph::AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AvgPoolPrecisionPreservedAttribute>>>>& attributes); void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<ngraph::AvgPoolPrecisionPreservedAttribute>>>>& attributes);
std::string to_string() override; std::string to_string() override;
}; };
} // namespace ngraph } // namespace ov

View File

@ -62,12 +62,15 @@ public:
}; };
using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>; using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>;
} // namespace ngraph
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<IntervalsAlignmentAttributePtr>; namespace ov {
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::IntervalsAlignmentAttributePtr>;
template<> template<>
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>> : class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>> :
public VariantImpl<std::shared_ptr<IntervalsAlignmentAttribute>> { public VariantImpl<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>> {
public: public:
static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 }; static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 };
@ -77,12 +80,13 @@ public:
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {} VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
std::shared_ptr<IntervalsAlignmentAttribute> get() const { return this->m_value; } std::shared_ptr<ngraph::IntervalsAlignmentAttribute> get() const { return this->m_value; }
static std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> create( static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>>> create(
const std::shared_ptr<ngraph::Node>& node, const std::shared_ptr<ngraph::Node>& node,
const AttributeParameters& params); const AttributeParameters& params);
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>>>& attributes); void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>>>>& attributes);
std::string to_string() override; std::string to_string() override;
}; };
} // namespace ngraph
} // namespace ov

View File

@ -16,11 +16,14 @@
namespace ngraph { namespace ngraph {
class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute { class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute {
}; };
} // namespace ngraph
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PerTensorQuantizationAttribute>; namespace ov {
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::PerTensorQuantizationAttribute>;
template<> template<>
class LP_TRANSFORMATIONS_API VariantWrapper<PerTensorQuantizationAttribute> : public VariantImpl<PerTensorQuantizationAttribute> { class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::PerTensorQuantizationAttribute> : public VariantImpl<ngraph::PerTensorQuantizationAttribute> {
public: public:
static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 }; static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 };
@ -30,4 +33,5 @@ public:
return type_info; return type_info;
} }
}; };
} // namespace ngraph
} // namespace ov

View File

@ -31,10 +31,14 @@ public:
using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>; using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>;
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PrecisionPreservedAttributePtr>; } // namespace ngraph
namespace ov {
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::PrecisionPreservedAttributePtr>;
template<> template<>
class LP_TRANSFORMATIONS_API VariantWrapper<PrecisionPreservedAttributePtr> : public VariantImpl<PrecisionPreservedAttributePtr> { class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::PrecisionPreservedAttributePtr> : public VariantImpl<ngraph::PrecisionPreservedAttributePtr> {
public: public:
static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 }; static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 };
@ -44,8 +48,9 @@ public:
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {} VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
PrecisionPreservedAttributePtr get() { return this->m_value; } ngraph::PrecisionPreservedAttributePtr get() { return this->m_value; }
std::string to_string() override; std::string to_string() override;
}; };
} // namespace ngraph
} // namespace ov

View File

@ -34,11 +34,14 @@ public:
static const std::vector<ngraph::element::Type> defaultPrecisions; static const std::vector<ngraph::element::Type> defaultPrecisions;
PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions); PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions);
}; };
} // namespace ngraph
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<PrecisionsAttribute>>; namespace ov {
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<ngraph::PrecisionsAttribute>>;
template<> template<>
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<PrecisionsAttribute>> { class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<ngraph::PrecisionsAttribute>> {
public: public:
static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 }; static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 };
@ -50,15 +53,16 @@ public:
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override; std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
std::shared_ptr<PrecisionsAttribute> get() { return this->m_value; } std::shared_ptr<ngraph::PrecisionsAttribute> get() { return this->m_value; }
// create attribute instance for node // create attribute instance for node
static std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>> create( static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>>> create(
const std::shared_ptr<ngraph::Node>& node, const std::shared_ptr<ngraph::Node>& node,
const AttributeParameters& params); const AttributeParameters& params);
// merge attribute instances which can be got from different sources: node, input port or output port // merge attribute instances which can be got from different sources: node, input port or output port
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>>& attributes); void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>>>>& attributes);
// vizualize shared attributes details in VizualizeTree pass // vizualize shared attributes details in VizualizeTree pass
std::string to_string() override; std::string to_string() override;
}; };
} // namespace ngraph
} // namespace ov

View File

@ -32,12 +32,15 @@ public:
}; };
using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>; using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>;
} // namespace ngraph
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<QuantizationAlignmentAttributePtr>; namespace ov {
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::QuantizationAlignmentAttributePtr>;
template<> template<>
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>> : class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>> :
public VariantImpl<std::shared_ptr<QuantizationAlignmentAttribute>> { public VariantImpl<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>> {
public: public:
static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 }; static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 };
@ -49,12 +52,12 @@ public:
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override; std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
std::shared_ptr<QuantizationAlignmentAttribute> get() { return this->m_value; } std::shared_ptr<ngraph::QuantizationAlignmentAttribute> get() { return this->m_value; }
static std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>> create( static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>>> create(
const std::shared_ptr<ngraph::Node>& node, const std::shared_ptr<ngraph::Node>& node,
const AttributeParameters& params); const AttributeParameters& params);
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>>>& attributes); void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>>>>& attributes);
std::string to_string() override; std::string to_string() override;
}; };
} // namespace ngraph } // namespace ov

View File

@ -55,8 +55,8 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
auto convolution = m.get_match_root(); auto convolution = m.get_match_root();
if (!canConvolutionBeTransformed(context, convolution)) { if (!canConvolutionBeTransformed(context, convolution)) {
auto weightInput = convolution->get_input_node_shared_ptr(1); const auto weightInput = convolution->get_input_node_shared_ptr(1);
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput); const auto reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ? FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolution, 1ul) : NetworkHelper::getDequantization(convolution, 1ul) :
NetworkHelper::getDequantization(reshapeFromWeights); NetworkHelper::getDequantization(reshapeFromWeights);
@ -69,7 +69,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
reshapeFromWeights->input_value(1), reshapeFromWeights->input_value(1),
false); false);
} }
if (as_type_ptr<opset1::Constant>(resultConstant)) { if (is_type<opset1::Constant>(resultConstant)) {
replace_node(weightInput, resultConstant); replace_node(weightInput, resultConstant);
} }
} else { } else {
@ -84,10 +84,9 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
{ {
std::shared_ptr<opset1::Subtract> subtract; std::shared_ptr<opset1::Subtract> subtract;
if (dequantization.subtract != nullptr) { if (dequantization.subtract != nullptr) {
std::shared_ptr<ngraph::Node> layer = dequantization.subtract; NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract); auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
if (optimizedSubtract == nullptr) { if (optimizedSubtract == nullptr) {
optimizedSubtract = dequantization.subtract; optimizedSubtract = dequantization.subtract;
} }
@ -99,7 +98,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
size_t length = subtract->get_output_partial_shape(0).rank().get_length(); size_t length = subtract->get_output_partial_shape(0).rank().get_length();
// Insert explicit broadcast for channel dimension [1] and immediately fold it // Insert explicit broadcast for channel dimension [1] and immediately fold it
Shape broadcastShape(subtract->get_output_partial_shape(0).rank().get_length(), 1); Shape broadcastShape(length, 1);
broadcastShape[1] = subtract->get_output_partial_shape(0)[1].get_length(); broadcastShape[1] = subtract->get_output_partial_shape(0)[1].get_length();
std::shared_ptr<Node> newShift = fold<opset1::Broadcast>( std::shared_ptr<Node> newShift = fold<opset1::Broadcast>(
@ -122,11 +121,9 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
const size_t groupsCount = NetworkHelper::getGroupsCount(convolution); const size_t groupsCount = NetworkHelper::getGroupsCount(convolution);
std::shared_ptr<Node> newMultiplyAfterConst; std::shared_ptr<Node> newMultiplyAfterConst;
if (groupsCount > 1ul) { if (groupsCount > 1ul) {
std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1)); const std::vector<float> scales = dequantization.multiplyConstant->cast_vector<float>();
const std::vector<float> scales = multiplyConst->cast_vector<float>();
if (scales.size() == 1ul) { if (scales.size() == 1ul) {
newMultiplyAfterConst = dequantization.multiply->input_value(1).get_node_shared_ptr()->clone_with_new_inputs({}); newMultiplyAfterConst = dequantization.multiplyConstant->clone_with_new_inputs({});
} else { } else {
const ngraph::PartialShape inputPShape = convolution->get_input_partial_shape(0); const ngraph::PartialShape inputPShape = convolution->get_input_partial_shape(0);
const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount; const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
@ -150,17 +147,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
} }
newMultiplyAfterConst = std::make_shared<opset1::Constant>( newMultiplyAfterConst = std::make_shared<opset1::Constant>(
dequantization.multiply->get_input_element_type(1), dequantization.multiplyConstant->get_element_type(),
newMulShape, newMulShape,
outputScales); outputScales);
} }
} else { } else {
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(
dequantization.multiply->input_value(1).get_node_shared_ptr());
newMultiplyAfterConst = std::make_shared<opset1::Constant>( newMultiplyAfterConst = std::make_shared<opset1::Constant>(
reducedConstant->get_output_element_type(0), dequantization.multiplyConstant->get_element_type(),
Shape{ 1 }, Shape{ 1 },
reducedConstant->cast_vector<float>()[0]); dequantization.multiplyConstant->cast_vector<float>()[0]);
} }
const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) }); const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
@ -190,7 +185,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
if (is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) { if (is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
auto newConvolution = convolution->clone_with_new_inputs({ auto newConvolution = convolution->clone_with_new_inputs({
convolution->get_input_node_ptr(0)->get_input_source_output(0), convolution->get_input_node_ptr(0)->input_value(0),
convolution->input_value(1)}); convolution->input_value(1)});
replace_node(convolution, newConvolution); replace_node(convolution, newConvolution);
NetworkHelper::copyInfo(convolution, newConvolution); NetworkHelper::copyInfo(convolution, newConvolution);
@ -206,7 +201,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
return false; return false;
} }
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->input_value(1).get_node_shared_ptr()); std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
dequantization = reshapeFromWeights == nullptr ? dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolution, 1ul) : NetworkHelper::getDequantization(convolution, 1ul) :
@ -221,12 +216,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>( std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
reshapeFromWeights == nullptr ? reshapeFromWeights == nullptr ?
convolution->input_value(1).get_node_shared_ptr() : convolution->get_input_node_shared_ptr(1) :
convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0)); convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0)); std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
{ {
Shape newScaleShape = multiplyFromWeights->get_input_shape(1); const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
assert(newScalePShape.is_static());
Shape newScaleShape = newScalePShape.to_shape();
if (!newScaleShape.empty()) { if (!newScaleShape.empty()) {
// that's all we need: [C, 1, 1, 1] => [C, 1, 1] // that's all we need: [C, 1, 1, 1] => [C, 1, 1]
newScaleShape.pop_back(); newScaleShape.pop_back();
@ -268,9 +266,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
} else { } else {
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract); subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
const Shape weightsShape = subtractFromWeights->input(0).get_shape(); const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
Shape zeroPointShape(weightsShape.size(), 1ul); assert(weightsPShape.is_static());
zeroPointShape[0] = weightsShape[0];
const size_t weightsRankValue = weightsPShape.rank().get_length();
Shape zeroPointShape(weightsRankValue, 1ul);
zeroPointShape[0] = static_cast<size_t>(weightsPShape[0].get_length());
auto zeroPointConstant = fold<opset1::Broadcast>( auto zeroPointConstant = fold<opset1::Broadcast>(
subtractFromWeights->input_value(1), subtractFromWeights->input_value(1),
@ -288,7 +289,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
std::shared_ptr<Node> childNode = reshapeFromWeights == nullptr ? convolution : reshapeFromWeights; std::shared_ptr<Node> childNode = reshapeFromWeights == nullptr ? convolution : reshapeFromWeights;
auto newConvolution = convolution->clone_with_new_inputs({ auto newConvolution = convolution->clone_with_new_inputs({
convolution->get_input_source_output(0), convolution->input_value(0),
childNode.get() == convolution.get() ? childNode.get() == convolution.get() ?
convolution->get_input_node_ptr(1)->input_value(0) : convolution->get_input_node_ptr(1)->input_value(0) :
childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})}); childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})});
@ -311,7 +312,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter( std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this()); convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization); copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
updateOutput(context, finalDequantization, convolution); updateOutput(context, finalDequantization, convolution);
// [C, 1, 1] -> [1, C, 1, 1] // [C, 1, 1] -> [1, C, 1, 1]

View File

@ -87,7 +87,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
reshapeFromWeights->input_value(1), reshapeFromWeights->input_value(1),
false); false);
} }
if (as_type_ptr<opset1::Constant>(resultConstant)) { if (is_type<opset1::Constant>(resultConstant)) {
replace_node(weightsInput, resultConstant); replace_node(weightsInput, resultConstant);
} }
} else { } else {
@ -100,16 +100,14 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData); FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
{ {
if (dequantization.subtract != nullptr) { if (dequantization.subtract != nullptr) {
std::shared_ptr<ngraph::Node> layer = dequantization.subtract; NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
NetworkHelper::optimizeSubtract(dequantization.subtract); NetworkHelper::optimizeSubtract(dequantization.subtract);
} }
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>( std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
reducedConstant->get_output_element_type(0), dequantization.multiplyConstant->get_element_type(),
Shape{ 1 }, Shape{ 1 },
reducedConstant->cast_vector<float>()[0]); dequantization.multiplyConstant->cast_vector<float>()[0]);
auto inputs = convolutionBackpropData->input_values(); auto inputs = convolutionBackpropData->input_values();
inputs[0] = dequantization.multiply->input_value(0); inputs[0] = dequantization.multiply->input_value(0);
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs); const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
@ -126,7 +124,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get()); ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
replace_node(convolutionBackpropData, newMultiplyAfter); replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr(); convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0); inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) { if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs); auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
@ -137,7 +135,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
{ {
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul); decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul); dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) { if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
@ -152,7 +149,10 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0)); std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
{ {
Shape newScaleShape = multiplyFromWeights->get_input_shape(1); const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
assert(newScalePShape.is_static());
Shape newScaleShape = newScalePShape.to_shape();
auto inputs = convolutionBackpropData->input_values(); auto inputs = convolutionBackpropData->input_values();
inputs[1] = multiplyFromWeights->input_value(0); inputs[1] = multiplyFromWeights->input_value(0);
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>( auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
@ -164,7 +164,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
false), false),
convolutionBackpropData->get_output_element_type(0))); convolutionBackpropData->get_output_element_type(0)));
replace_node(convolutionBackpropData, newMultiplyAfter); replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr(); convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
} }
if (subtractFromWeights != nullptr) { if (subtractFromWeights != nullptr) {
@ -175,9 +175,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
} else { } else {
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract); subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
const Shape weightsShape = subtractFromWeights->input(0).get_shape(); const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
Shape zeroPointShape(weightsShape.size(), 1ul); assert(weightsPShape.is_static());
zeroPointShape[1] = weightsShape[1];
const size_t weightsRankValue = weightsPShape.rank().get_length();
Shape zeroPointShape(weightsRankValue, 1ul);
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
auto zeroPointConstant = fold<opset1::Broadcast>( auto zeroPointConstant = fold<opset1::Broadcast>(
subtractFromWeights->get_input_node_shared_ptr(1), subtractFromWeights->get_input_node_shared_ptr(1),
@ -215,7 +218,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>(""); rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
} }
return true; return true;
} }

View File

@ -56,8 +56,10 @@ bool FakeQuantizeTransformation::transform(TransformationContext& context, ngrap
namespace fq { namespace fq {
static std::shared_ptr<Node> updateShape(std::shared_ptr<Node> constantOp, const PartialShape& targetShape) { static std::shared_ptr<Node> updateShape(std::shared_ptr<Node> constantOp, const PartialShape& targetShape) {
assert(constantOp->get_output_partial_shape(0).is_static());
const Shape shape = constantOp->get_output_shape(0); const Shape shape = constantOp->get_output_shape(0);
if ((shape.size() < static_cast<size_t>(targetShape.rank().get_length())) && (shape.size() > 1ul)) {
if ((shape.size() > 1ul) && (shape.size() < static_cast<size_t>(targetShape.rank().get_length()))) {
constantOp = fold<opset1::Unsqueeze>( constantOp = fold<opset1::Unsqueeze>(
constantOp, constantOp,
std::make_shared<opset1::Constant>(ngraph::element::i32, Shape{ 1 }, std::vector<size_t>({ 0ul }))); std::make_shared<opset1::Constant>(ngraph::element::i32, Shape{ 1 }, std::vector<size_t>({ 0ul })));
@ -93,19 +95,19 @@ static std::shared_ptr<opset1::Constant> getConstant(const std::shared_ptr<Node>
} // namespace fq } // namespace fq
bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr<Node>& eltwise) { bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr<Node>& eltwise) {
const std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
if (constant == nullptr) {
return false;
}
Shape shape = constant->get_shape();
if (shape_size(shape) != 1ul) {
const auto eltwiseInputPShape = eltwise->get_input_partial_shape(0); const auto eltwiseInputPShape = eltwise->get_input_partial_shape(0);
const auto eltwiseOutputPShape = eltwise->get_output_partial_shape(0); const auto eltwiseOutputPShape = eltwise->get_output_partial_shape(0);
if (eltwiseInputPShape != eltwiseOutputPShape || eltwiseInputPShape.rank().is_dynamic() || eltwiseOutputPShape.rank().is_dynamic()) { if (eltwiseInputPShape != eltwiseOutputPShape || eltwiseInputPShape.rank().is_dynamic() || eltwiseOutputPShape.rank().is_dynamic()) {
return false; return false;
} }
std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
if (constant == nullptr) {
return false;
}
Shape shape = constant->get_output_shape(0);
if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
if ((eltwiseOutputPShape.rank().get_length() - shape.size()) > 1) { if ((eltwiseOutputPShape.rank().get_length() - shape.size()) > 1) {
return false; return false;
} }
@ -179,8 +181,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0)); inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
} else if (is_type<opset1::Convert>(eltwise)) { } else if (is_type<opset1::Convert>(eltwise)) {
// issue #40611 // issue #40611
if ((eltwise->input(0).get_element_type() == element::i32) && if ((eltwise->get_input_element_type(0) == element::i32) &&
((eltwise->output(0).get_element_type() == element::f16) || (eltwise->output(0).get_element_type() == element::f32))) { ((eltwise->get_output_element_type(0) == element::f16) || (eltwise->get_output_element_type(0) == element::f32))) {
return nullptr; return nullptr;
} }
} else { } else {
@ -190,7 +192,7 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
const auto data = fq::getData(eltwise); const auto data = fq::getData(eltwise);
const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise); const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise);
std::shared_ptr<opset1::FakeQuantize> newFakeQuantize = as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({ const auto newFakeQuantize = as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
data->output(outputIdx), data->output(outputIdx),
inputLowConst_f32, inputLowConst_f32,
inputHighConst_f32, inputHighConst_f32,

View File

@ -90,7 +90,7 @@ bool FakeQuantizeDequantization::checkShape(const std::shared_ptr<ngraph::Node>&
if (!inPShape.rank().is_dynamic()) { if (!inPShape.rank().is_dynamic()) {
for (int i = 0; i < inPShape.rank().get_length(); ++i) { for (int i = 0; i < inPShape.rank().get_length(); ++i) {
if (inPShape[i] != outPShape[i] && !inPShape.is_dynamic()) { if (inPShape[i] != outPShape[i] && !inPShape[i].is_dynamic()) {
return false; return false;
} }
} }
@ -108,7 +108,7 @@ bool FakeQuantizeDequantization::checkElementwise(const std::shared_ptr<ngraph::
return false; return false;
} }
const ngraph::Shape constShape = constant->get_output_shape(0); const ngraph::Shape constShape = constant->get_shape();
if ((constShape.size() > 5ul)) { if ((constShape.size() > 5ul)) {
return false; return false;
} }

View File

@ -40,8 +40,12 @@ bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, n
namespace fuse_fq { namespace fuse_fq {
std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const Shape& targetShape) { std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const PartialShape& targetPShape) {
assert(targetPShape.is_static());
assert(op->get_output_partial_shape(0).is_static());
const Shape targetShape = targetPShape.to_shape();
const Shape shape = op->get_output_shape(0); const Shape shape = op->get_output_shape(0);
if ((shape.size() < targetShape.size()) && (shape.size() > 1ul)) { if ((shape.size() < targetShape.size()) && (shape.size() > 1ul)) {
op = fold<opset1::Unsqueeze>( op = fold<opset1::Unsqueeze>(
op, op,
@ -81,14 +85,19 @@ bool eltwiseWithConstant(const std::shared_ptr<Node>& eltwise) {
return false; return false;
} }
Shape shape = constant->get_output_shape(0); Shape shape = constant->get_shape();
if ((!shape.empty()) && (shape_size(shape) != 1ul)) { if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
const Shape eltwiseShape = eltwise->get_output_shape(0); const auto eltwisePShape = eltwise->get_output_partial_shape(0);
if ((eltwiseShape.size() - shape.size()) > 1) { if (eltwisePShape.rank().is_dynamic()) {
return false; return false;
} }
if ((eltwiseShape.size() - shape.size()) == 1ul) { const size_t eltwiseOutRank = eltwisePShape.rank().get_length();
if ((eltwiseOutRank - shape.size()) > 1) {
return false;
}
if ((eltwiseOutRank - shape.size()) == 1ul) {
shape.insert(shape.begin(), 1ul); shape.insert(shape.begin(), 1ul);
} }
@ -118,22 +127,22 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
constant : constant :
foldConvert(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
} else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { } else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
constant : constant :
foldConvert(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
} else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { } else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
constant : constant :
foldConvert(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
} else if (is_type<opset1::Add>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { } else if (is_type<opset1::Add>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
if (is_type<opset1::Convolution>(fuse_fq::getData(eltwise)) || if (is_type<opset1::Convolution>(fuse_fq::getData(eltwise)) ||
is_type<opset1::GroupConvolution>(fuse_fq::getData(eltwise))) { is_type<opset1::GroupConvolution>(fuse_fq::getData(eltwise))) {
@ -144,8 +153,8 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
constant : constant :
foldConvert(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
} else if (is_type<opset1::Convert>(eltwise)) { } else if (is_type<opset1::Convert>(eltwise)) {
// issue #40611 // issue #40611
if ((eltwise->input(0).get_element_type() == element::i32) && (eltwise->output(0).get_element_type() == element::f32)) { if ((eltwise->input(0).get_element_type() == element::i32) && (eltwise->output(0).get_element_type() == element::f32)) {

View File

@ -72,7 +72,7 @@ bool InterpolateTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer
std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(layer); std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(layer);
if (interpolate4) { if (interpolate4) {
const auto attrs = interpolate4->get_attrs(); const auto attrs = interpolate4->get_attrs();
return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest; return attrs.mode == op::v4::Interpolate::InterpolateMode::NEAREST;
} }
return false; return false;
@ -108,7 +108,7 @@ bool InterpolateTransformation::canBeTransformed(const TransformationContext& co
if (interpolate4) { if (interpolate4) {
const auto interpAttrs = interpolate4->get_attrs(); const auto interpAttrs = interpolate4->get_attrs();
if (interpAttrs.mode != op::v4::Interpolate::InterpolateMode::nearest) { if (interpAttrs.mode != op::v4::Interpolate::InterpolateMode::NEAREST) {
return false; return false;
} }
@ -126,7 +126,7 @@ bool InterpolateTransformation::canBeTransformed(const TransformationContext& co
} }
} }
if (interpAttrs.coordinate_transformation_mode == op::v4::Interpolate::CoordinateTransformMode::align_corners) { if (interpAttrs.coordinate_transformation_mode == op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS) {
return false; return false;
} }
} }

View File

@ -50,6 +50,7 @@
#include "low_precision/multiply.hpp" #include "low_precision/multiply.hpp"
#include "low_precision/mvn.hpp" #include "low_precision/mvn.hpp"
#include "low_precision/normalize_l2.hpp" #include "low_precision/normalize_l2.hpp"
#include "low_precision/pad.hpp"
#include "low_precision/prelu.hpp" #include "low_precision/prelu.hpp"
#include "low_precision/reduce_max.hpp" #include "low_precision/reduce_max.hpp"
#include "low_precision/reduce_mean.hpp" #include "low_precision/reduce_mean.hpp"
@ -219,6 +220,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
common->add_matcher<ngraph::pass::low_precision::MultiplyTransformation>(params); common->add_matcher<ngraph::pass::low_precision::MultiplyTransformation>(params);
common->add_matcher<ngraph::pass::low_precision::MVNTransformation>(params); common->add_matcher<ngraph::pass::low_precision::MVNTransformation>(params);
common->add_matcher<ngraph::pass::low_precision::NormalizeL2Transformation>(params); common->add_matcher<ngraph::pass::low_precision::NormalizeL2Transformation>(params);
common->add_matcher<ngraph::pass::low_precision::PadTransformation>(params);
common->add_matcher<ngraph::pass::low_precision::PReluTransformation>(params); common->add_matcher<ngraph::pass::low_precision::PReluTransformation>(params);
common->add_matcher<ngraph::pass::low_precision::ReduceMaxTransformation>(params); common->add_matcher<ngraph::pass::low_precision::ReduceMaxTransformation>(params);
common->add_matcher<ngraph::pass::low_precision::ReduceMeanTransformation>(params); common->add_matcher<ngraph::pass::low_precision::ReduceMeanTransformation>(params);

View File

@ -141,6 +141,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const s
{ name<opset1::ReduceMin>() }, { name<opset1::ReduceMin>() },
{ name<opset1::Relu>() }, { name<opset1::Relu>() },
// TODO: there are conditions // TODO: there are conditions
{ name<opset1::Pad>() },
{ name<opset1::Reshape>() }, { name<opset1::Reshape>() },
{ name<opset1::Squeeze>() }, { name<opset1::Squeeze>() },
{ name<opset1::Split>() }, { name<opset1::Split>() },
@ -166,7 +167,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const s
std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(node); std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(node);
if (interpolate4) { if (interpolate4) {
const auto attrs = interpolate4->get_attrs(); const auto attrs = interpolate4->get_attrs();
return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest; return attrs.mode == op::v4::Interpolate::InterpolateMode::NEAREST;
} }
} }
@ -194,6 +195,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isSupported(const std::share
{ name<ngraph::op::MVN>() }, { name<ngraph::op::MVN>() },
{ name<opset6::MVN>() }, { name<opset6::MVN>() },
{ name<opset1::NormalizeL2>() }, { name<opset1::NormalizeL2>() },
{ name<opset1::Pad>() },
{ name<opset1::PRelu>() }, { name<opset1::PRelu>() },
{ name<opset1::ReduceMax>() }, { name<opset1::ReduceMax>() },
{ name<opset1::ReduceMean>() }, { name<opset1::ReduceMean>() },

View File

@ -94,7 +94,10 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) : Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) :
dequantization1.subtractConstant->get_shape(); dequantization1.subtractConstant->get_shape();
const auto weightsShape = newMatMul->get_input_shape(1); const auto weightsPShape = newMatMul->get_input_partial_shape(1);
assert(weightsPShape.is_static());
const auto weightsShape = weightsPShape.to_shape();
const size_t firstWeightsIdx = matMul->get_transpose_b() ? weightsShape.size() - 1ul : weightsShape.size() - 2ul; const size_t firstWeightsIdx = matMul->get_transpose_b() ? weightsShape.size() - 1ul : weightsShape.size() - 2ul;
const size_t lastDataIdx = matMul->get_transpose_a() ? broadcastShape.size() - 2 : broadcastShape.size() - 1; const size_t lastDataIdx = matMul->get_transpose_a() ? broadcastShape.size() - 2 : broadcastShape.size() - 1;
broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx]; broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx];
@ -118,8 +121,8 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
parent = newSubtract; parent = newSubtract;
} }
auto transpose = [](const std::shared_ptr<Node>& node) -> std::shared_ptr<Node> { auto transpose = [](const std::shared_ptr<opset1::Constant>& node) -> std::shared_ptr<Node> {
const Shape outputShape = node->get_output_shape(0); const Shape outputShape = node->get_shape();
if (outputShape.size() < 2ul) { if (outputShape.size() < 2ul) {
return node; return node;
} }
@ -153,7 +156,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
} }
} }
const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ngraph::opset1::Multiply>( const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<opset1::Multiply>(
mulConst1, mulConst1,
foldConvert(mulConst2, element::f32))); foldConvert(mulConst2, element::f32)));

View File

@ -164,17 +164,17 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
Shape constShape; Shape constShape;
int inputIndex; int inputIndex;
if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(1))) { if (const auto constant = as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
inputIndex = 0; inputIndex = 0;
constShape = operation->get_input_shape(1); constShape = constant->get_shape();
if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)) || if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
(is_type<opset1::Subtract>(operation->get_input_node_shared_ptr(0)) && (is_type<opset1::Subtract>(operation->get_input_node_shared_ptr(0)) &&
is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) { is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) {
return false; return false;
} }
} else if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0))) { } else if (const auto constant = as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
inputIndex = 1; inputIndex = 1;
constShape = operation->get_input_shape(0); constShape = constant->get_shape();
} else { } else {
return false; return false;
} }

View File

@ -191,12 +191,12 @@ size_t NetworkHelper::getInputChannelsCount(std::shared_ptr<Node> layer) {
} }
size_t NetworkHelper::getGroupsCount(std::shared_ptr<Node> layer) { size_t NetworkHelper::getGroupsCount(std::shared_ptr<Node> layer) {
if (as_type_ptr<opset1::Convolution>(layer)) { if (is_type<opset1::Convolution>(layer)) {
return 1; return 1;
} else if (auto group_convolution = as_type_ptr<opset1::GroupConvolution>(layer)) { } else if (is_type<opset1::GroupConvolution>(layer)) {
return layer->get_input_shape(1)[0]; // input weights for opset1::GC is in format GOI..., see the specification return layer->get_input_partial_shape(1)[0].get_length(); // input weights for opset1::GC is in format GOI..., see the specification
} else { } else {
THROW_TRANSFORMATION_EXCEPTION << "Invalid layer type of " << layer->get_friendly_name() << "; expected Convolutino or GroupConvolution"; THROW_TRANSFORMATION_EXCEPTION << "Invalid layer type of " << layer->get_friendly_name() << "; expected Convolution or GroupConvolution";
} }
} }
@ -239,9 +239,15 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
auto b = addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0); auto b = addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0);
std::shared_ptr<Node> bDivA; std::shared_ptr<Node> bDivA;
if (shape_size(b->get_output_shape(0)) == 1 || const auto aPShape = a->get_output_partial_shape(0);
shape_size(a->get_output_shape(0)) == 1 || assert(aPShape.is_static());
shape_size(b->get_output_shape(0)) == shape_size(a->get_output_shape(0))) { const auto aShape = aPShape.to_shape();
const auto bPShape = b->get_output_partial_shape(0);
assert(bPShape.is_static());
const auto bShape = bPShape.to_shape();
if ((shape_size(bShape) == 1) || (shape_size(aShape) == 1) || (shape_size(bShape) == shape_size(aShape))) {
// safely division to avoid NaN // safely division to avoid NaN
const std::vector<float> bValues = as_type_ptr<opset1::Constant>(b)->cast_vector<float>(); const std::vector<float> bValues = as_type_ptr<opset1::Constant>(b)->cast_vector<float>();
const std::vector<float> aValues = as_type_ptr<opset1::Constant>(a)->cast_vector<float>(); const std::vector<float> aValues = as_type_ptr<opset1::Constant>(a)->cast_vector<float>();
@ -263,7 +269,7 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
auto aPrecision = a->get_output_element_type(0); auto aPrecision = a->get_output_element_type(0);
bDivA = std::make_shared<opset1::Constant>( bDivA = std::make_shared<opset1::Constant>(
aPrecision, aPrecision,
aBroadcasted ? b->get_output_shape(0) : a->get_output_shape(0), aBroadcasted ? bShape : aShape,
bDivAValues); bDivAValues);
} else { } else {
b = foldConvert(b, element::f32); b = foldConvert(b, element::f32);
@ -463,7 +469,14 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
} }
auto newInput = multiply->input_value(1 - constant1->output(0).get_target_inputs().begin()->get_index()); auto newInput = multiply->input_value(1 - constant1->output(0).get_target_inputs().begin()->get_index());
auto newConst = fold<opset1::Multiply>(constant1, constant2); auto multiplyResult = fold<opset1::Multiply>(constant1, constant2);
{
// optimize constant shape: used in rfcn-resnet101-coco
const auto multiplyResultConstant = as_type_ptr<opset1::Constant>(multiplyResult);
if ((multiplyResultConstant != nullptr) && NetworkHelper::isScalarLike(multiplyResultConstant)) {
multiplyResult = NetworkHelper::toScalar(multiplyResultConstant);
}
}
auto inputPrecision0 = nextMultiply->get_origin_input_type(0); auto inputPrecision0 = nextMultiply->get_origin_input_type(0);
auto inputPrecision1 = nextMultiply->get_origin_input_type(1); auto inputPrecision1 = nextMultiply->get_origin_input_type(1);
auto outputPrecision = nextMultiply->get_overridden_output_type(0); auto outputPrecision = nextMultiply->get_overridden_output_type(0);
@ -472,7 +485,7 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
std::vector<element::Type>{ inputPrecision0, inputPrecision1 }, std::vector<element::Type>{ inputPrecision0, inputPrecision1 },
std::vector<element::Type>{ outputPrecision }, std::vector<element::Type>{ outputPrecision },
ngraph::op::TemporaryReplaceOutputType(newInput, inputPrecision0).get(), ngraph::op::TemporaryReplaceOutputType(newInput, inputPrecision0).get(),
ngraph::op::TemporaryReplaceOutputType(newConst, inputPrecision1).get()); ngraph::op::TemporaryReplaceOutputType(multiplyResult, inputPrecision1).get());
copy_runtime_info(multiply, newMultiply); copy_runtime_info(multiply, newMultiply);
replace_node(nextMultiply, newMultiply); replace_node(nextMultiply, newMultiply);
return newMultiply; return newMultiply;
@ -734,9 +747,12 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0)); auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0));
if (constant) { if (constant) {
const bool roundValues = roundValuesWasSet ? roundValuesArg : fq->output(0).get_element_type().is_integral(); const bool roundValues = roundValuesWasSet ? roundValuesArg : fq->get_output_element_type(0).is_integral();
const auto constPShape = fq->get_output_partial_shape(0);
assert(constPShape.is_static());
const Shape constShape = constPShape.to_shape();
Shape constShape = fq->get_output_shape(0);
if (constShape.empty() || constShape.size() > 5lu) { if (constShape.empty() || constShape.size() > 5lu) {
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size(); THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
} }
@ -1117,7 +1133,7 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization(
const float dequantizationMul, const float dequantizationMul,
const float dequantizationSub, const float dequantizationSub,
const ngraph::element::Type originalPrecision, const ngraph::element::Type originalPrecision,
const ngraph::PartialShape dataNodeOutputShape, const ngraph::PartialShape& dataNodeOutputShape,
element::Type precision, element::Type precision,
const ngraph::element::Type deqPrecision, const ngraph::element::Type deqPrecision,
std::shared_ptr<ngraph::Node> input) { std::shared_ptr<ngraph::Node> input) {
@ -1767,7 +1783,9 @@ std::vector<element::Type> NetworkHelper::precisionIntersection(
bool NetworkHelper::isFQByDynamicDimension(const std::shared_ptr<opset1::FakeQuantize>& fq) { bool NetworkHelper::isFQByDynamicDimension(const std::shared_ptr<opset1::FakeQuantize>& fq) {
const auto pInputShape = fq->get_input_partial_shape(0); const auto pInputShape = fq->get_input_partial_shape(0);
auto olShape = fq->get_input_shape(3); const auto olPShape = fq->get_input_partial_shape(3);
assert(olPShape.is_static());
auto olShape = olPShape.to_shape();
if (shape_size(olShape) > 1ul) { if (shape_size(olShape) > 1ul) {
if (pInputShape.rank().is_dynamic()) { if (pInputShape.rank().is_dynamic()) {

View File

@ -78,12 +78,12 @@ bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& co
const std::vector<int64_t> axesByChannels = { 1, 2, 3 }; const std::vector<int64_t> axesByChannels = { 1, 2, 3 };
std::vector<int64_t> axesValues = axes->cast_vector<int64_t>(); std::vector<int64_t> axesValues = axes->cast_vector<int64_t>();
if (!(axesValues == axesAcrossSpatial || axesValues == axesByChannels)) { if ((axesValues != axesAcrossSpatial) && (axesValues != axesByChannels)) {
return false; return false;
} }
const ngraph::Shape outputShape = scalesConst->get_output_shape(0); const Shape outputShape = scalesConst->get_shape();
const size_t size = ngraph::shape_size(outputShape); const size_t size = shape_size(outputShape);
if (size != 1ul) { if (size != 1ul) {
const auto channelsInterval = operation->get_output_partial_shape(0)[1]; const auto channelsInterval = operation->get_output_partial_shape(0)[1];
if (channelsInterval.is_dynamic() || static_cast<size_t>(channelsInterval.get_length()) != size) { if (channelsInterval.is_dynamic() || static_cast<size_t>(channelsInterval.get_length()) != size) {

View File

@ -0,0 +1,277 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/pad.hpp"
#include <memory>
#include <ngraph/ngraph.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include "low_precision/network_helper.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PadTransformation, "PadTransformation", 0);
PadTransformation::PadTransformation(const Params& params) : LayerTransformation(params) {
auto mul = pattern::wrap_type<opset1::Multiply>();
auto padsBegin = pattern::wrap_type<opset1::Constant>();
auto padsEnd = pattern::wrap_type<opset1::Constant>();
auto padsValue = pattern::wrap_type<opset1::Constant>();
auto matcher = pattern::wrap_type<opset1::Pad>({ mul, padsBegin, padsEnd, padsValue });
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
auto op = m.get_match_root();
if (transformation_callback(op)) {
return false;
}
return transform(*context, m);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "PadTransformation");
this->register_matcher(m, callback);
}
bool PadTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
if (!canBeTransformed(context, m.get_match_root())) {
return false;
}
const auto pad = as_type_ptr<opset1::Pad>(NetworkHelper::separateInStandaloneBranch(m.get_match_root()));
const auto padConstant = as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
const auto padConstantValue = padConstant->cast_vector<float>()[0];
const auto padsBegin = pad->get_pads_begin();
const auto padsEnd = pad->get_pads_end();
const auto padMode = pad->get_pad_mode();
auto dequantization = NetworkHelper::getDequantization(pad);
if (padMode == op::PadMode::CONSTANT) {
auto bcastConstant = [&](const std::shared_ptr<opset1::Constant> &constant) {
size_t padIdx = 0;
for (size_t i = 0; i < padsBegin.size(); ++i) {
if (padsBegin[i] != 0 || padsEnd[i] != 0) {
padIdx = i;
break;
}
}
const auto inputPShape = pad->get_input_partial_shape(0);
assert(inputPShape[padIdx].is_static());
assert(inputPShape.rank().is_static());
auto bcastedShape = Shape(inputPShape.rank().get_length(), 1ul);
bcastedShape[padIdx] = inputPShape[padIdx].get_length();
const auto bCastConst = opset1::Constant::create(element::i32, Shape{bcastedShape.size()}, bcastedShape);
return as_type_ptr<opset1::Constant>(fold<opset1::Broadcast>(constant, bCastConst));
};
if (dequantization.subtract && shape_size(dequantization.subtractConstant->get_shape()) == 1ul) {
const auto broadcastedConstant = bcastConstant(dequantization.subtractConstant);
replace_node(dequantization.subtractConstant, broadcastedConstant);
dequantization.subtractConstant = broadcastedConstant;
}
if (padConstantValue != 0.f && shape_size(dequantization.multiplyConstant->get_shape()) == 1ul) {
const auto broadcastedConstant = bcastConstant(dequantization.multiplyConstant);
replace_node(dequantization.multiplyConstant, broadcastedConstant);
dequantization.multiplyConstant = broadcastedConstant;
}
}
auto foldConstantIfNecessary = [&padMode, &padsBegin, &padsEnd](
const std::shared_ptr<opset1::Constant>& constant,
const std::shared_ptr<opset1::Pad>& pad,
float padVal) {
const auto constantShape = constant->get_shape();
if (shape_size(constantShape) == 1ul) {
return NetworkHelper::toScalar(constant);
}
std::vector<size_t> padsForConstantBegin(constantShape.size(), 0ul);
std::vector<size_t> padsForConstantEnd(constantShape.size(), 0ul);
bool foldingIsNecessary = false;
// folding is necessary when dequantization and padding by the same dimension
for (size_t i = 0; i < constantShape.size(); ++i) {
if (padsBegin[i] != 0ul && constantShape[i] != 1ul) {
foldingIsNecessary = true;
padsForConstantBegin[i] = padsBegin[i];
}
if (padsEnd[i] != 0ul && constantShape[i] != 1ul) {
foldingIsNecessary = true;
padsForConstantEnd[i] = padsEnd[i];
}
}
if (foldingIsNecessary) {
const auto beginConst = opset1::Constant::create(element::u32, { padsForConstantBegin.size() }, padsForConstantBegin);
const auto endConst = opset1::Constant::create(element::u32, { padsForConstantEnd.size() }, padsForConstantEnd);
const auto padValueConstant = opset1::Constant::create(constant->get_element_type(), Shape{}, { padVal });
const auto foldedConstant = fold<opset1::Pad>(constant, beginConst, endConst, padValueConstant, padMode);
return as_type_ptr<opset1::Constant>(foldedConstant);
} else {
return constant;
}
};
if (dequantization.subtract) {
const auto normalizedSubConst = NetworkHelper::normalizeDequantizationShape(dequantization.subtract);
float padValueForSub = padConstantValue;
if (padMode == op::PadMode::CONSTANT) {
padValueForSub = 0.f;
}
const auto newSubConstant = foldConstantIfNecessary(normalizedSubConst, pad, padValueForSub);
replace_node(normalizedSubConst, newSubConstant);
dequantization.subtractConstant = newSubConstant;
}
{
const auto normalizedMulConst = NetworkHelper::normalizeDequantizationShape(dequantization.multiply);
float padValueForMul = padConstantValue;
if (padMode == op::PadMode::CONSTANT) {
padValueForMul = 1.f;
}
const auto newMulConstant = foldConstantIfNecessary(normalizedMulConst, pad, padValueForMul);
replace_node(normalizedMulConst, newMulConstant);
dequantization.multiplyConstant = newMulConstant;
}
// we must convert pad value in low precision
const auto convertedZero = opset1::Constant::create(dequantization.data.get_element_type(), Shape{}, { padConstantValue });
pad->set_argument(3, convertedZero);
moveDequantizationAfter(context, pad, dequantization, true);
return true;
}
bool PadTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
if (!LayerTransformation::canBeTransformedSpatialDimension(context, op)) {
return false;
}
const auto pad = as_type_ptr<opset1::Pad>(op);
if (!pad) {
return false;
}
const auto dequantization = NetworkHelper::getDequantization(op);
if (dequantization.empty()) {
return false;
}
const auto mode = pad->get_pad_mode();
if (mode == op::PadMode::CONSTANT) {
auto padAndDqByTheSameDimension = [&](const std::shared_ptr<opset1::Constant>& deqConst) {
const auto padsBegin = pad->get_pads_begin();
const auto padsEnd = pad->get_pads_end();
int beginNonZeroIdx = -1;
for (size_t i = 0; i < padsBegin.size(); ++i) {
const bool padDimensionNotUnique = (beginNonZeroIdx != -1) && (padsBegin[i] != 0);
if (padDimensionNotUnique) {
return false;
}
if (padsBegin[i] != 0) {
beginNonZeroIdx = i;
}
}
int endNonZeroIdx = -1;
for (size_t i = 0; i < padsEnd.size(); ++i) {
const bool padDimensionNotUnique = (endNonZeroIdx != -1) && (padsEnd[i] != 0);
if (padDimensionNotUnique) {
return false;
}
if (padsEnd[i] != 0) {
endNonZeroIdx = i;
}
}
if ((beginNonZeroIdx != endNonZeroIdx) && (beginNonZeroIdx != -1) && (endNonZeroIdx != -1)) {
return false;
}
const size_t paddingDimension = beginNonZeroIdx != -1 ? beginNonZeroIdx : endNonZeroIdx;
const auto padInputPShape = pad->get_input_partial_shape(0);
const auto padInputRank = padInputPShape.rank();
if (padInputRank.is_dynamic() || padInputPShape[paddingDimension].is_dynamic()) {
return false;
}
const size_t inputRankValue = padInputRank.get_length();
auto deqShape = deqConst->get_shape();
if (shape_size(deqShape) > 1ul) {
while (deqShape.size() < inputRankValue) {
deqShape.insert(deqShape.begin(), 1ul);
}
for (size_t i = 0; i < deqShape.size(); ++i) {
const bool deqAndPadDimensionsMismatched = (deqShape[i] > 1ul) && (i != paddingDimension);
if (deqAndPadDimensionsMismatched) {
return false;
}
}
}
return true;
};
if (dequantization.subtract && !padAndDqByTheSameDimension(dequantization.subtractConstant)) {
return false;
}
const auto constant = as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
const auto constantValue = constant->cast_vector<float>()[0];
if (constantValue != 0.f && !padAndDqByTheSameDimension(dequantization.multiplyConstant)) {
return false;
}
}
if (mode == op::PadMode::REFLECT) {
auto deqShape = dequantization.multiplyConstant->get_shape();
if (shape_size(deqShape) == 1ul) {
return true;
} else {
const auto padInputRank = pad->get_input_partial_shape(0).rank();
if (padInputRank.is_dynamic()) {
return false;
}
const size_t inputRankValue = padInputRank.get_length();
while (deqShape.size() < inputRankValue) {
deqShape.insert(deqShape.begin(), 1ul);
}
const auto padsBegin = pad->get_pads_begin();
const auto padsEnd = pad->get_pads_end();
// PadTransformation with "REFLECT" mode doesn't support dequantization and padding by the same dimension
for (size_t i = 0; i < deqShape.size(); ++i) {
if (deqShape[i] != 1ul && (padsBegin[i] != 0ul || padsEnd[i] != 0ul)) {
return false;
}
}
}
}
return true;
}
bool PadTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
return true;
}
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -47,7 +47,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) { auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) {
// reshape for element-wise constant is not required // reshape for element-wise constant is not required
auto constantShape = originalConstant->get_shape(); auto constantShape = originalConstant->get_shape();
if (shape_size(constantShape) == 1ul) { if (NetworkHelper::isScalarLike(originalConstant)) {
if (!constantShape.empty()) { if (!constantShape.empty()) {
const auto newConstant = NetworkHelper::toScalar(originalConstant); const auto newConstant = NetworkHelper::toScalar(originalConstant);
replace_node(originalConstant, newConstant); replace_node(originalConstant, newConstant);
@ -75,19 +75,28 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
return; return;
} }
Shape newOperationConstantBroadcastedShape = originalConstant->output(0).get_shape(); auto getBCastedConst = [](const std::shared_ptr<opset1::Constant>& constant, size_t dimensionsToBroadcast) -> std::shared_ptr<Node> {
if (dimensionsToBroadcast == 1ul) {
return constant;
}
Shape newOperationConstantBroadcastedShape = constant->get_shape();
// add dimensions to broadcast values // add dimensions to broadcast values
if (newOperationConstantBroadcastedShape.size() == 2ul) { if (newOperationConstantBroadcastedShape.size() == 2ul) {
newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast); newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast);
} else { } else {
newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast; newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
} }
const std::shared_ptr<Node> broadcastedConstant = fold<opset1::Broadcast>(
originalConstant, const auto targetShapeConstant = opset1::Constant::create(
std::make_shared<opset1::Constant>(
element::i32, element::i32,
Shape({ newOperationConstantBroadcastedShape.size() }), Shape{ newOperationConstantBroadcastedShape.size() },
newOperationConstantBroadcastedShape)); newOperationConstantBroadcastedShape);
return fold<opset1::Broadcast>(constant, targetShapeConstant);
};
const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul); std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
newReshapeConstValues[1] = reshapeOutputPShape[1].get_length(); newReshapeConstValues[1] = reshapeOutputPShape[1].get_length();
@ -190,7 +199,7 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
subtractShapeWithBatch.insert(subtractShapeWithBatch.begin(), 1ul); subtractShapeWithBatch.insert(subtractShapeWithBatch.begin(), 1ul);
} }
const Shape multiplyShape = dequantization.multiply == nullptr ? Shape{} : dequantization.multiply->input(1).get_shape(); const Shape multiplyShape = dequantization.multiply == nullptr ? Shape{} : dequantization.multiplyConstant->get_shape();
Shape multiplyShapeWithBatch = multiplyShape; Shape multiplyShapeWithBatch = multiplyShape;
if ((dequantization.multiply != nullptr) && if ((dequantization.multiply != nullptr) &&
(multiplyShapeWithBatch.size() > 1ul) && (multiplyShapeWithBatch.size() > 1ul) &&

View File

@ -9,6 +9,7 @@
#include <ngraph/variant.hpp> #include <ngraph/variant.hpp>
using namespace ngraph; using namespace ngraph;
using namespace ov;
template class ngraph::VariantImpl<AvgPoolPrecisionPreservedAttributePtr>; template class ngraph::VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;

View File

@ -12,6 +12,7 @@
#include "low_precision/network_helper.hpp" #include "low_precision/network_helper.hpp"
using namespace ngraph; using namespace ngraph;
using namespace ov;
using namespace ngraph::pass::low_precision; using namespace ngraph::pass::low_precision;
IntervalsAlignmentAttribute::IntervalsAlignmentAttribute( IntervalsAlignmentAttribute::IntervalsAlignmentAttribute(

View File

@ -5,6 +5,7 @@
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" #include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
using namespace ngraph; using namespace ngraph;
using namespace ov;
template class ngraph::VariantImpl<PerTensorQuantizationAttribute>; template class ngraph::VariantImpl<PerTensorQuantizationAttribute>;
constexpr VariantTypeInfo VariantWrapper<PerTensorQuantizationAttribute>::type_info; constexpr VariantTypeInfo VariantWrapper<PerTensorQuantizationAttribute>::type_info;

View File

@ -8,6 +8,7 @@
#include <string> #include <string>
using namespace ngraph; using namespace ngraph;
using namespace ov;
PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) { PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) {
sharedValue->value = value; sharedValue->value = value;

View File

@ -14,6 +14,7 @@
#include "low_precision/network_helper.hpp" #include "low_precision/network_helper.hpp"
using namespace ngraph; using namespace ngraph;
using namespace ov;
// order defines default precision // order defines default precision
const std::vector<ngraph::element::Type> PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 }; const std::vector<ngraph::element::Type> PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 };

View File

@ -12,6 +12,7 @@
#include <ngraph/opsets/opset1.hpp> #include <ngraph/opsets/opset1.hpp>
#include "low_precision/network_helper.hpp" #include "low_precision/network_helper.hpp"
using namespace ov;
using namespace ngraph; using namespace ngraph;
using namespace ngraph::pass::low_precision; using namespace ngraph::pass::low_precision;

View File

@ -42,47 +42,40 @@ void transposeDequantizationConstant(std::shared_ptr<Node>& transpose) {
return; return;
} }
if (dequantization.multiply->get_input_node_ptr(1)->get_output_shape(0).size() > 1ul) {
auto transposeDeqConstant = []( auto transposeDeqConstant = [](
std::shared_ptr<Node> dequantizationConstant, const std::shared_ptr<opset1::Constant>& dequantizationConstant,
const PartialShape& transposeOutputShape, const PartialShape& transposeOutputPShape,
const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> { const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> {
const auto dequantizationShape = dequantizationConstant->get_output_shape(0); const auto constantShape = dequantizationConstant->get_shape();
if (dequantizationShape.empty() || (dequantizationShape.size() == 1ul)) { if (shape_size(constantShape) == 1ul) {
return nullptr; return NetworkHelper::toScalar(dequantizationConstant);
} }
if (dequantizationShape.size() != static_cast<size_t>(transposeOutputShape.rank().get_length())) { assert(transposeOutputPShape.rank().is_static());
dequantizationConstant = fold<opset1::Unsqueeze>( const size_t transposeOutRank = transposeOutputPShape.rank().get_length();
dequantizationConstant, if (constantShape.size() != transposeOutRank) {
std::make_shared<opset1::Constant>(element::i32, Shape{ 1 }, std::vector<size_t>{0})); const auto unsqueezeConst = opset1::Constant::create(element::i32, Shape{ 1 }, std::vector<size_t>{ 0 });
} const auto deqConstantWithBatch = fold<opset1::Unsqueeze>(dequantizationConstant, unsqueezeConst);
return fold<opset1::Transpose>(deqConstantWithBatch, transposeConstant);
} else {
return fold<opset1::Transpose>(dequantizationConstant, transposeConstant); return fold<opset1::Transpose>(dequantizationConstant, transposeConstant);
}
}; };
if (dequantization.subtract != nullptr) { if (dequantization.subtract != nullptr) {
auto constant = transposeDeqConstant( const auto constant = transposeDeqConstant(
dequantization.subtractConstant, dequantization.subtractConstant,
transpose->get_output_partial_shape(0), transpose->get_output_partial_shape(0),
transpose->get_input_node_shared_ptr(1)); transpose->get_input_node_shared_ptr(1));
if (constant != nullptr) { replace_node(dequantization.subtractConstant, constant);
replace_node(
dequantization.subtract->get_input_node_shared_ptr(1),
constant);
}
} }
if (dequantization.multiply != nullptr) { if (dequantization.multiply != nullptr) {
auto constant = transposeDeqConstant( const auto constant = transposeDeqConstant(
dequantization.multiplyConstant, dequantization.multiplyConstant,
transpose->get_output_partial_shape(0), transpose->get_output_partial_shape(0),
transpose->get_input_node_shared_ptr(1)); transpose->get_input_node_shared_ptr(1));
if (constant != nullptr) { replace_node(dequantization.multiplyConstant, constant);
replace_node(
dequantization.multiply->get_input_node_shared_ptr(1),
constant);
}
}
} }
} }

View File

@ -74,14 +74,13 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
return false; return false;
} }
const std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1)); const Shape multiplyConstShape = dequantization.multiplyConstant->get_shape();
const Shape multiplyConstShape = multiplyConst->get_output_shape(0);
if (!multiplyConstShape.empty() && (shape_size(multiplyConstShape) != 1ul)) { if (!multiplyConstShape.empty() && (shape_size(multiplyConstShape) != 1ul)) {
const size_t groupsCount = NetworkHelper::getGroupsCount(layer); const size_t groupsCount = NetworkHelper::getGroupsCount(layer);
const ngraph::PartialShape inputPShape = layer->get_input_partial_shape(0); const PartialShape inputPShape = layer->get_input_partial_shape(0);
const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount; const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
const std::vector<float> scales = multiplyConst->cast_vector<float>(); const std::vector<float> scales = dequantization.multiplyConstant->cast_vector<float>();
for (size_t group = 0; group < groupsCount; ++group) { for (size_t group = 0; group < groupsCount; ++group) {
for (size_t i = 0; i < inputChannelsInGroup; ++i) { for (size_t i = 0; i < inputChannelsInGroup; ++i) {
if (scales[group * inputChannelsInGroup] != scales[group * inputChannelsInGroup + i]) { if (scales[group * inputChannelsInGroup] != scales[group * inputChannelsInGroup + i]) {
@ -90,30 +89,33 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
} }
} }
const ngraph::PartialShape outputPShape = layer->get_output_partial_shape(0); const PartialShape outputPShape = layer->get_output_partial_shape(0);
const auto rank = outputPShape.rank().get_length(); const auto rank = outputPShape.rank();
if ((rank != 4) && (rank != 5)) { if (rank.is_dynamic()) {
return false;
}
const auto rankVal = rank.get_length();
if ((rankVal != 4) && (rankVal != 5)) {
return false; return false;
} }
} }
} else { } else {
const std::shared_ptr<opset1::Multiply> multiply = as_type_ptr<opset1::Multiply>(layer->input_value(0).get_node_shared_ptr()); const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer);
if (multiply == nullptr) { if (dequantization.multiply == nullptr) {
return false; return false;
} }
// SS takes inputs [0: data, 1: scales, 2: shifts], takes scales (index = 1) if (dequantization.multiplyConstant == nullptr) {
const std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(multiply->input_value(1).get_node_shared_ptr());
if (multiplyConst == nullptr) {
return false; return false;
} }
// exactly cast vector as original code has a conversion; // exactly cast vector as original code has a conversion;
// optimize cast: // optimize cast:
// two branches depending on real type of the constant? // two branches depending on real type of the constant?
const auto scalesBuffer = multiplyConst->cast_vector<float>(); const auto scalesBuffer = dequantization.multiplyConstant->cast_vector<float>();
size_t scalesBufferSize = shape_size(multiplyConst->get_output_shape(0)); size_t scalesBufferSize = shape_size(dequantization.multiplyConstant->get_shape());
for (size_t i = 1lu; i < scalesBufferSize; ++i) { for (size_t i = 1ul; i < scalesBufferSize; ++i) {
if (scalesBuffer[i - 1] != scalesBuffer[i]) { if (scalesBuffer[i - 1] != scalesBuffer[i]) {
return false; return false;
} }
@ -132,11 +134,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
// TODO Implement similar checks in other weightable operaitons // TODO Implement similar checks in other weightable operaitons
const std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->input_value(1).get_node_shared_ptr()); const std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->get_input_node_shared_ptr(1));
std::shared_ptr<opset1::FakeQuantize> fqFromWeights; std::shared_ptr<opset1::FakeQuantize> fqFromWeights;
if (reshapeFromWeights == nullptr) { if (reshapeFromWeights == nullptr) {
fqFromWeights = as_type_ptr<opset1::FakeQuantize>(layer->input_value(1).get_node_shared_ptr()); fqFromWeights = as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
if (fqFromWeights == nullptr) { if (fqFromWeights == nullptr) {
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, 1ul); const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, 1ul);
fqFromWeights = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr()); fqFromWeights = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
@ -154,23 +156,29 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
return false; return false;
} }
const Shape constOutputShape = fqFromWeights->get_input_node_ptr(3)->get_output_shape(0); const auto olPShape = fqFromWeights->get_input_partial_shape(3);
if (fqFromWeights->get_input_node_ptr(4)->get_output_shape(0) != constOutputShape) { const auto ohPShape = fqFromWeights->get_input_partial_shape(4);
if (olPShape.is_dynamic() || ohPShape.is_dynamic() || olPShape != ohPShape) {
return false; return false;
} }
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
if ( const auto fqOutPShape = fqFromWeights->get_output_partial_shape(0);
// expected, it's ok: return true const size_t outChannelsIdx = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
(shape_size(constOutputShape) != 1ul) && if (fqOutPShape.rank().is_dynamic() || fqOutPShape[outChannelsIdx].is_dynamic()) {
// not expected, something wrong: return false
((constOutputShape.size() <= outChannelsShapeIndex) ||
// Check if all dimensions of scale except the output channels are all ones
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex])))) {
return false; return false;
} }
const Shape constShape = olPShape.to_shape();
if (shape_size(constShape) != 1ul) {
const size_t constChannels = constShape[outChannelsIdx];
const size_t fqOutChannels = fqOutPShape[outChannelsIdx].get_length();
const bool constChannelsAndFqChannelsMismatched = (constChannels != 1ul) && (fqOutChannels != constChannels);
if ((constShape.size() <= outChannelsIdx) || (shape_size(constShape) != constChannels) || constChannelsAndFqChannelsMismatched) {
return false;
}
}
} else { } else {
// TODO: LPT: is it possible to share with isQuantized? // TODO: LPT: is it possible to share with isQuantized?
const FakeQuantizeDequantization dequantizationOnWeights = reshapeFromWeights == nullptr ? const FakeQuantizeDequantization dequantizationOnWeights = reshapeFromWeights == nullptr ?
@ -180,33 +188,33 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
return false; return false;
} }
const opset1::Constant* weightsData = as_type<opset1::Constant>(dequantizationOnWeights.data.get_node()); const auto weightsData = as_type_ptr<opset1::Constant>(dequantizationOnWeights.data.get_node_shared_ptr());
if (weightsData == nullptr) { if (weightsData == nullptr) {
return false; return false;
} }
const ngraph::element::Type weightsDataPrecision = weightsData->output(0).get_element_type(); const auto weightsDataPrecision = weightsData->get_element_type();
if (!DataPrecision::isSupported(weightsDataPrecision)) { if (!DataPrecision::isSupported(weightsDataPrecision)) {
return false; return false;
} }
if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) { if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) {
const auto subtractConstantType = dequantizationOnWeights.subtractConstant->output(0).get_element_type(); const auto subtractConstantType = dequantizationOnWeights.subtractConstant->get_element_type();
if (subtractConstantType != weightsDataPrecision) { if (subtractConstantType != weightsDataPrecision) {
return false; return false;
} }
} }
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul; const size_t outChannelsIdx = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
if (dequantizationOnWeights.subtract) { if (dequantizationOnWeights.subtract) {
const auto subConstShape = dequantizationOnWeights.subtractConstant->get_shape(); const auto subConstShape = dequantizationOnWeights.subtractConstant->get_shape();
if (shape_size(subConstShape) > 1ul && shape_size(subConstShape) != subConstShape[outChannelsShapeIndex]) { if (shape_size(subConstShape) > 1ul && shape_size(subConstShape) != subConstShape[outChannelsIdx]) {
return false; return false;
} }
} }
if (dequantizationOnWeights.multiply) { if (dequantizationOnWeights.multiply) {
const auto mulConstShape = dequantizationOnWeights.multiplyConstant->get_shape(); const auto mulConstShape = dequantizationOnWeights.multiplyConstant->get_shape();
if (shape_size(mulConstShape) > 1ul && shape_size(mulConstShape) != mulConstShape[outChannelsShapeIndex]) { if (shape_size(mulConstShape) > 1ul && shape_size(mulConstShape) != mulConstShape[outChannelsIdx]) {
return false; return false;
} }
} }
@ -321,7 +329,7 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
} }
bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) { bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) {
if (!as_type_ptr<opset1::Convolution>(layer) && !as_type_ptr<opset1::GroupConvolution>(layer)) { if (!is_type<opset1::Convolution>(layer) && !is_type<opset1::GroupConvolution>(layer)) {
return false; return false;
} }
@ -341,7 +349,7 @@ bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr<Node>& lay
} }
std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) { std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) {
auto fq = as_type_ptr<opset1::FakeQuantize>(node->input_value(1).get_node_shared_ptr()); auto fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_shared_ptr(1));
// TODO: temporary workaround // TODO: temporary workaround
if (fq == nullptr) { if (fq == nullptr) {
fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0)); fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0));

View File

@ -26,8 +26,13 @@ Config::Config() {
// for the TBB code-path, additional configuration depending on the OS and CPU types // for the TBB code-path, additional configuration depending on the OS and CPU types
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#if defined(__APPLE__) || defined(_WIN32) #if defined(__APPLE__) || defined(_WIN32)
// 'CORES' is not implemented for Win/MacOS; so the 'NUMA' is default // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default
auto numaNodes = getAvailableNUMANodes();
if (numaNodes.size() > 1) {
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA; streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
} else {
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NONE;
}
#endif #endif
if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) { if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) {

View File

@ -12,8 +12,11 @@
#include "mkldnn_itt.h" #include "mkldnn_itt.h"
#include "nodes/mkldnn_memory_node.hpp" #include "nodes/mkldnn_memory_node.hpp"
#include <threading/ie_executor_manager.hpp> #include <threading/ie_executor_manager.hpp>
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
#include <threading/ie_tbb_streams_executor.hpp>
#else
#include <threading/ie_cpu_streams_executor.hpp> #include <threading/ie_cpu_streams_executor.hpp>
#endif
#include <ie_system_conf.h> #include <ie_system_conf.h>
#include <algorithm> #include <algorithm>
#include <unordered_set> #include <unordered_set>
@ -32,6 +35,14 @@ MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap network
return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this())); return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this()));
} }
struct ImmediateSerialExecutor : public ITaskExecutor {
void run(InferenceEngine::Task task) override {
std::lock_guard<std::mutex> l{_mutex};
task();
}
std::mutex _mutex;
};
MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
const Config &cfg, const Config &cfg,
const MKLDNNExtensionManager::Ptr& extMgr, const MKLDNNExtensionManager::Ptr& extMgr,
@ -61,11 +72,20 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
} else { } else {
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, isFloatModel); auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, isFloatModel);
streamsExecutorConfig._name = "CPUStreamsExecutor"; streamsExecutorConfig._name = "CPUStreamsExecutor";
_taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig); #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
_taskExecutor = std::make_shared<TBBStreamsExecutor>(streamsExecutorConfig);
#else
_taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
#endif
} }
if (0 != cfg.streamExecutorConfig._streams) { if (0 != cfg.streamExecutorConfig._streams) {
_callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor( #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
// There is no additional threads but we still need serialize callback execution to preserve legacy behaviour
_callbackExecutor = std::make_shared<ImmediateSerialExecutor>();
#else
_callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE}); IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE});
#endif
} else { } else {
_callbackExecutor = _taskExecutor; _callbackExecutor = _taskExecutor;
} }
@ -146,6 +166,19 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() {
return graphLock; return graphLock;
} }
MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const {
int streamId = 0;
int numaNodeId = 0;
auto streamsExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(_taskExecutor.get());
if (nullptr != streamsExecutor) {
streamId = streamsExecutor->GetStreamId();
numaNodeId = streamsExecutor->GetNumaNodeId();
}
auto graphLock = Graph::Lock(_graphs[streamId % _graphs.size()]);
IE_ASSERT(graphLock._graph.IsReady());
return graphLock;
}
void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) { void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
{ {
std::lock_guard<std::mutex> lock{_cfgMutex}; std::lock_guard<std::mutex> lock{_cfgMutex};
@ -171,9 +204,8 @@ InferenceEngine::CNNNetwork MKLDNNExecNetwork::GetExecGraphInfo() {
} }
Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const { Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const {
if (_graphs.size() == 0) if (_graphs.size() == 0) IE_THROW() << "No graph was found";
IE_THROW() << "No graph was found"; Config engConfig = GetGraph()._graph.getProperty();
Config engConfig = const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty();
auto option = engConfig._config.find(name); auto option = engConfig._config.find(name);
if (option != engConfig._config.end()) { if (option != engConfig._config.end()) {
return option->second; return option->second;
@ -187,8 +219,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
IE_THROW() << "No graph was found"; IE_THROW() << "No graph was found";
if (name == METRIC_KEY(NETWORK_NAME)) { if (name == METRIC_KEY(NETWORK_NAME)) {
IE_SET_METRIC_RETURN(NETWORK_NAME, IE_SET_METRIC_RETURN(NETWORK_NAME, GetGraph()._graph.dump().getName());
const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.dump().getName());
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) { } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics; std::vector<std::string> metrics;
metrics.push_back(METRIC_KEY(NETWORK_NAME)); metrics.push_back(METRIC_KEY(NETWORK_NAME));
@ -198,12 +229,12 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys; std::vector<std::string> configKeys;
for (auto && key : const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty()._config) { for (auto && key : GetGraph()._graph.getProperty()._config) {
configKeys.push_back(key.first); configKeys.push_back(key.first);
} }
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) { } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
Config engConfig = const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty(); Config engConfig = GetGraph()._graph.getProperty();
auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)); auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
IE_ASSERT(option != engConfig._config.end()); IE_ASSERT(option != engConfig._config.end());
auto streams = std::stoi(option->second); auto streams = std::stoi(option->second);

View File

@ -59,8 +59,9 @@ protected:
Graph& _graph; Graph& _graph;
}; };
}; };
// WARNING: Do not use _graphs directly. // WARNING: Do not use _graphs directly.
std::deque<Graph> _graphs; mutable std::deque<Graph> _graphs;
NumaNodesWeights& _numaNodesWeights; NumaNodesWeights& _numaNodesWeights;
/* WARNING: Use GetGraph() function to get access to graph in current stream. /* WARNING: Use GetGraph() function to get access to graph in current stream.
@ -68,6 +69,8 @@ protected:
* even from main thread * even from main thread
*/ */
Graph::Lock GetGraph(); Graph::Lock GetGraph();
Graph::Lock GetGraph() const;
bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const; bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const;
}; };

View File

@ -24,22 +24,20 @@
#include <transformations/common_optimizations/common_optimizations.hpp> #include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp> #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
#include <transformations/common_optimizations/depth_to_space_fusion.hpp>
#include <transformations/common_optimizations/softmax_fusion.hpp> #include <transformations/common_optimizations/softmax_fusion.hpp>
#include <transformations/common_optimizations/normalize_l2_fusion.hpp>
#include <transformations/op_conversions/convert_depth_to_space.hpp> #include <transformations/op_conversions/convert_depth_to_space.hpp>
#include <transformations/op_conversions/convert_shuffle_channels3.hpp> #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
#include <transformations/op_conversions/convert_space_to_depth.hpp> #include <transformations/op_conversions/convert_space_to_depth.hpp>
#include <transformations/op_conversions/convert_gelu.hpp> #include <transformations/op_conversions/convert_gelu.hpp>
#include <transformations/op_conversions/convert_gather_v7_to_gather_v1.hpp> #include <transformations/op_conversions/convert_gather_downgrade.hpp>
#include <transformations/op_conversions/convert_gather_v1_to_gather_v7.hpp> #include <transformations/op_conversions/convert_gather_upgrade.hpp>
#include <transformations/op_conversions/gelu7_downgrade.hpp> #include <transformations/op_conversions/gelu7_downgrade.hpp>
#include <transformations/op_conversions/hswish_decomposition.hpp> #include <transformations/op_conversions/hswish_decomposition.hpp>
#include <transformations/op_conversions/hsigmoid_decomposition.hpp> #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
#include <transformations/op_conversions/mvn6_decomposition.hpp> #include <transformations/op_conversions/mvn6_decomposition.hpp>
#include <transformations/op_conversions/normalize_l2_decomposition.hpp>
#include <transformations/op_conversions/reduce_l1_decomposition.hpp> #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
#include <transformations/op_conversions/reduce_l2_decomposition.hpp> #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
#include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
#include <transformations/op_conversions/softplus_decomposition.hpp> #include <transformations/op_conversions/softplus_decomposition.hpp>
#include <transformations/op_conversions/convert_space_to_batch.hpp> #include <transformations/op_conversions/convert_space_to_batch.hpp>
#include <transformations/op_conversions/convert_batch_to_space.hpp> #include <transformations/op_conversions/convert_batch_to_space.hpp>
@ -53,7 +51,6 @@
#include <transformations/op_conversions/gru_cell_decomposition.hpp> #include <transformations/op_conversions/gru_cell_decomposition.hpp>
#include <transformations/op_conversions/log_softmax_decomposition.hpp> #include <transformations/op_conversions/log_softmax_decomposition.hpp>
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp> #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp> #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp> #include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp> #include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
@ -249,7 +246,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
return false; return false;
}; };
pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator, ngraph::pass::ConvertGRUSequenceToTensorIterator, pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
ngraph::pass::ConvertGRUSequenceToTensorIterator,
ngraph::pass::ConvertLSTMSequenceToTensorIterator>( ngraph::pass::ConvertLSTMSequenceToTensorIterator>(
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool { [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
return isSequencePrimitiveSupported(node); return isSequencePrimitiveSupported(node);
@ -280,18 +278,17 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
return MKLDNNMVNNode::isSupportedOperation(node, errorMessage); return MKLDNNMVNNode::isSupportedOperation(node, errorMessage);
}); });
pass_config->set_callback<ngraph::pass::NormalizeL2Decomposition>(
[](const_node_ptr &node) -> bool {
std::string errorMsg;
return MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
});
pass_config->set_callback<ngraph::pass::SoftmaxFusion>( pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
[](const_node_ptr &node) -> bool { [](const_node_ptr &node) -> bool {
return node->input_value(0).get_partial_shape().rank().get_length() > 5; return node->input_value(0).get_partial_shape().rank().get_length() > 5;
}); });
auto normalizeL2FusionCallback = [](const_node_ptr &node) -> bool {
std::string errorMsg;
return !MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
};
pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithAdd>(normalizeL2FusionCallback);
pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithMax>(normalizeL2FusionCallback);
// List of enabled/disabled transformations // List of enabled/disabled transformations
pass_config->disable<ngraph::pass::ConvertGELU>(); pass_config->disable<ngraph::pass::ConvertGELU>();
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>(); pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
@ -307,10 +304,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>(); pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>(); pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>(); pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
pass_config->disable<ngraph::pass::ConvertDeformableConv8To1>();
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>(); pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>(); pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
if (useLpt) { if (useLpt) {
pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool { pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool {

View File

@ -741,9 +741,10 @@ private:
bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept { bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try { try {
const auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op); if (!one_of(op->get_type_info(),
if (!defConvNode) { ngraph::op::v1::DeformableConvolution::type_info,
errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1."; ngraph::op::v8::DeformableConvolution::type_info)) {
errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1 or v8.";
return false; return false;
} }
} catch (...) { } catch (...) {
@ -759,28 +760,35 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar
if (!isSupportedOperation(op, errorMessage)) { if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage; IE_THROW(NotImplemented) << errorMessage;
} }
auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op); auto defConvNodeBase = std::dynamic_pointer_cast<ngraph::op::util::DeformableConvolutionBase>(op);
group = defConvNode->get_group(); group = defConvNodeBase->get_group();
deformable_group = defConvNode->get_deformable_group(); deformable_group = defConvNodeBase->get_deformable_group();
auto& strides = defConvNodeBase->get_strides();
auto& strides = defConvNode->get_strides();
for (int i = 0; i < strides.size(); i++) { for (int i = 0; i < strides.size(); i++) {
stride.push_back(strides[i]); stride.push_back(strides[i]);
} }
auto& dilations = defConvNode->get_dilations(); auto& dilations = defConvNodeBase->get_dilations();
for (int i = 1; i <= dilations.size(); i++) { for (int i = 1; i <= dilations.size(); i++) {
dilation.push_back(dilations[dilations.size() - i] - 1); dilation.push_back(dilations[dilations.size() - i] - 1);
} }
paddingL = defConvNode->get_pads_begin(); paddingL = defConvNodeBase->get_pads_begin();
if (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info) {
auto defConvNode = std::dynamic_pointer_cast<ngraph::op::v8::DeformableConvolution>(op);
with_bilinear_pad = defConvNode->get_bilinear_interpolation_pad();
} else {
with_bilinear_pad = false;
}
enforceRef = (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info);
} }
void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' "; std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' ";
if (getParentEdges().size() != 3) if (getParentEdges().size() != 3 && getParentEdges().size() != 4)
IE_THROW() << errorPrefix << "has incorrect number of input edges"; IE_THROW() << errorPrefix << "has incorrect number of input edges";
if (getChildEdges().empty()) if (getChildEdges().empty())
IE_THROW() << errorPrefix << "has incorrect number of output edges"; IE_THROW() << errorPrefix << "has incorrect number of output edges";
@ -806,22 +814,29 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty()) if (!supportedPrimitiveDescriptors.empty())
return; return;
size_t inputsNumber = getOriginalInputsNumber();
NodeConfig config; NodeConfig config;
config.dynBatchSupport = false; config.dynBatchSupport = false;
config.inConfs.resize(3); config.inConfs.resize(inputsNumber);
config.inConfs[0].constant = false; config.inConfs[0].constant = false;
config.inConfs[0].inPlace = -1; config.inConfs[0].inPlace = -1;
config.inConfs[1].constant = false; config.inConfs[1].constant = false;
config.inConfs[1].inPlace = -1; config.inConfs[1].inPlace = -1;
config.inConfs[1].constant = false; config.inConfs[2].constant = false;
config.inConfs[1].inPlace = -1; config.inConfs[2].inPlace = -1;
if (inputsNumber > 3) {
config.inConfs[3].constant = false;
config.inConfs[3].inPlace = -1;
}
config.outConfs.resize(1); config.outConfs.resize(1);
config.outConfs[0].constant = false; config.outConfs[0].constant = false;
config.outConfs[0].inPlace = -1; config.outConfs[0].inPlace = -1;
impl_desc_type impl_type; impl_desc_type impl_type;
if (mayiuse(cpu::x64::avx512_common)) { if (enforceRef) {
impl_type = impl_desc_type::ref;
} else if (mayiuse(cpu::x64::avx512_common)) {
impl_type = impl_desc_type::jit_avx512; impl_type = impl_desc_type::jit_avx512;
} else if (mayiuse(cpu::x64::avx2)) { } else if (mayiuse(cpu::x64::avx2)) {
impl_type = impl_desc_type::jit_avx2; impl_type = impl_desc_type::jit_avx2;
@ -831,8 +846,8 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
impl_type = impl_desc_type::ref; impl_type = impl_desc_type::ref;
} }
if (mayiuse(cpu::x64::sse41)) { if (!enforceRef && mayiuse(cpu::x64::sse41)) {
// optimzed implementation // optimized implementation
auto dataFormat = memory::format_tag::nhwc; auto dataFormat = memory::format_tag::nhwc;
auto offFormat = memory::format_tag::nchw; auto offFormat = memory::format_tag::nchw;
auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o
@ -842,8 +857,25 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
memory::data_type::f32, dataFormat); memory::data_type::f32, dataFormat);
config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(), config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(),
memory::data_type::f32, offFormat); memory::data_type::f32, offFormat);
auto& wDims = getParentEdgeAt(2)->getShape().getStaticDims();
if (group > 1 && wDims.size() != 5) {
auto new_dims = InferenceEngine::SizeVector({group, div_up(wDims[0], group)});
for (int i = 1; i < wDims.size(); i++) {
new_dims.push_back(wDims[i]);
}
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
memory::data_type::f32, weiFormat); memory::data_type::f32, weiFormat);
} else {
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
memory::data_type::f32, weiFormat);
}
if (inputsNumber > 3) {
config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(3)->getShape().getStaticDims(),
memory::data_type::f32, memory::format_tag::nchw);
}
config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(),
memory::data_type::f32, dataFormat); memory::data_type::f32, dataFormat);
supportedPrimitiveDescriptors.push_back({config, impl_type}); supportedPrimitiveDescriptors.push_back({config, impl_type});
@ -855,6 +887,10 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
memory::format_tag::nchw); memory::format_tag::nchw);
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32, config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32,
memory::format_tag::oihw); memory::format_tag::oihw);
if (inputsNumber > 3) {
config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(3)->getShape().getStaticDims(), memory::data_type::f32,
memory::format_tag::nchw);
}
config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32,
memory::format_tag::nchw); memory::format_tag::nchw);
supportedPrimitiveDescriptors.push_back({config, impl_type}); supportedPrimitiveDescriptors.push_back({config, impl_type});
@ -874,6 +910,7 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
jcp.dg = deformable_group; jcp.dg = deformable_group;
jcp.ngroups = group; jcp.ngroups = group;
jcp.mb = srcDims[0]; jcp.mb = srcDims[0];
jcp.oc = dstDims[1] / jcp.ngroups; jcp.oc = dstDims[1] / jcp.ngroups;
@ -884,9 +921,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
jcp.oh = dstDims[2]; jcp.oh = dstDims[2];
jcp.ow = dstDims[3]; jcp.ow = dstDims[3];
bool with_groups = group > 1; jcp.kh = weiDims[2];
jcp.kh = weiDims[with_groups + 2]; jcp.kw = weiDims[3];
jcp.kw = weiDims[with_groups + 3];
jcp.t_pad = paddingL[0]; jcp.t_pad = paddingL[0];
jcp.l_pad = paddingL[1]; jcp.l_pad = paddingL[1];
@ -898,6 +934,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
jcp.dilate_w = dilation[1]; jcp.dilate_w = dilation[1];
jcp.with_bias = false; jcp.with_bias = false;
jcp.with_bi_pad = with_bilinear_pad;
jcp.with_modulation = getParentEdges().size() > 3;
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
jcp.ic_block = simd_w; jcp.ic_block = simd_w;
@ -910,13 +948,16 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
jcp.typesize_in = sizeof(float); jcp.typesize_in = sizeof(float);
jcp.typesize_off = sizeof(float); jcp.typesize_off = sizeof(float);
jcp.typesize_out = sizeof(float); jcp.typesize_out = sizeof(float);
jcp.typesize_modulation = sizeof(float);
jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3; jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3;
jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4; jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4;
jcp.nthr = dnnl_get_max_threads(); jcp.nthr = dnnl_get_max_threads();
if (mayiuse(cpu::x64::avx512_common)) { if (enforceRef) {
return;
} else if (mayiuse(cpu::x64::avx512_common)) {
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx512_common>(jcp)); def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx512_common>(jcp));
} else if (mayiuse(cpu::x64::avx2)) { } else if (mayiuse(cpu::x64::avx2)) {
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx2>(jcp)); def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx2>(jcp));
@ -930,9 +971,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst, void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst,
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides, const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides) { const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides,
const float* modulation, const std::vector<size_t>& modulation_strides) {
const bool with_groups = jcp.ngroups > 1; const bool with_groups = jcp.ngroups > 1;
const int G = jcp.ngroups; const int G = jcp.ngroups;
const int MB = jcp.mb; const int MB = jcp.mb;
const int OH = jcp.oh; const int OH = jcp.oh;
@ -956,65 +997,79 @@ void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const f
const int DG = jcp.dg; const int DG = jcp.dg;
const int channel_per_deformable_group = IC * G / DG; const int channel_per_deformable_group = (IC * G) / DG;
const bool with_bi_pad = jcp.with_bi_pad;
auto ker = [=](int g, int mb, int oc, int oh, int ow) { auto ker = [=](int g, int mb, int oc, int oh, int ow) {
float d = 0; float d = 0;
const int h_in = oh * KSH - padT; const int h_in = oh * KSH - padT;
const int w_in = ow * KSW - padL; const int w_in = ow * KSW - padL;
for (int ic = 0; ic < IC; ic++) { for (int ic = 0; ic < IC; ic++) {
const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1] + h_in * src_strides[2] + w_in * src_strides[3]; const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1];
const int deformable_group_index = ic / channel_per_deformable_group; const int deformable_group_index = (IC * g + ic) / channel_per_deformable_group;
const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1]; const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1];
const float *modulation_offset_ptr = nullptr;
if (modulation != nullptr) {
modulation_offset_ptr = modulation + mb * modulation_strides[0] + (deformable_group_index * KH * KW) * modulation_strides[1];
}
for (int kh = 0; kh < KH; kh++) { for (int kh = 0; kh < KH; kh++) {
for (int kw = 0; kw < KW; kw++) { for (int kw = 0; kw < KW; kw++) {
const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3]; const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
const float offset_h = data_offset_ptr[data_offset_h_index]; const float offset_h = data_offset_ptr[data_offset_h_index];
const float offset_w = data_offset_ptr[data_offset_w_index]; const float offset_w = data_offset_ptr[data_offset_w_index];
float val = 0.0f; float map_h = h_in + kh * (KDH + 1) + offset_h;
const float h_im = h_in + kh * (KDH + 1) + offset_h; float map_w = w_in + kw * (KDW + 1) + offset_w;
const float w_im = w_in + kw * (KDW + 1) + offset_w; bool skip_compute;
if (with_bilinear_pad) {
if (h_im >= 0 && w_im >= 0 && h_im < IH && w_im < IW) { skip_compute = !(static_cast<int>(map_w) > -1 &&
float map_h = kh * (KDH + 1) + offset_h; static_cast<int>(map_w) < IW &&
float map_w = kw * (KDW + 1) + offset_w; static_cast<int>(map_h) > -1 &&
const int cur_height = IH - h_in; static_cast<int>(map_h) < IH);
const int cur_width = IW - w_in;
int h_low = static_cast<int>(floorf(map_h));
int w_low = static_cast<int>(floorf(map_w));
int h_high;
int w_high;
if (h_low >= cur_height - 1) {
h_high = h_low = cur_height - 1;
map_h = static_cast<float>(h_low);
} else { } else {
h_high = h_low + 1; skip_compute = !(map_w >= 0 &&
} map_w < IW &&
map_h >= 0 &&
if (w_low >= cur_width - 1) { map_h < IH);
w_high = w_low = cur_width - 1;
map_w = static_cast<float>(w_low);
} else {
w_high = w_low + 1;
} }
if (!skip_compute) {
const int cur_h_end = IH;
const int cur_w_end = IW;
int h_low = with_bi_pad ? static_cast<int>(floorf(map_h)) :
std::max(static_cast<int>(floorf(map_h)), 0);
int w_low = with_bi_pad ? static_cast<int>(floorf(map_w)) :
std::max(static_cast<int>(floorf(map_w)), 0);
const int cur_h_start = h_low;
const int cur_w_start = w_low;
int h_high = with_bi_pad ? h_low + 1 : std::min(static_cast<int>(ceilf(map_h)), cur_h_end - 1);
int w_high = with_bi_pad ? w_low + 1 : std::min(static_cast<int>(ceilf(map_w)), cur_w_end - 1);
float lh = map_h - h_low; float lh = map_h - h_low;
float lw = map_w - w_low; float lw = map_w - w_low;
float hh = 1 - lh, hw = 1 - lw; float hh = 1 - lh, hw = 1 - lw;
float v1 = data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]]; float v1 = (cur_w_start >= 0 && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]] : 0.0f;
float v2 = data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]]; float v2 = (w_high < cur_w_end && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]] : 0.0f;
float v3 = data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]]; float v3 = (cur_w_start >= 0 && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]] : 0.0f;
float v4 = data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]]; float v4 = (w_high < cur_w_end && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]] : 0.0f;
float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
float modulation_scalar = 1.0f;
if (modulation_offset_ptr != nullptr) {
size_t modulation_index = (kh * KW + kw) * modulation_strides[1] + oh * modulation_strides[2] + ow * modulation_strides[3];
modulation_scalar = modulation_offset_ptr[modulation_index];
}
const float weight = with_groups ? weights[(g + oc / G) * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] +
kw * wei_strides[3]]
: weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]];
d += val * weight * modulation_scalar;
} }
d += val * (with_groups ? weights[g * wei_strides[0] + oc * wei_strides[1] + ic * wei_strides[2] + kh * wei_strides[3] +
kw * wei_strides[4]]
: weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]]);
} }
} }
} }
@ -1058,6 +1113,8 @@ void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const f
} }
void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
const size_t inputsNumber = getOriginalInputsNumber();
auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
auto &srcMemory2 = getParentEdgeAt(2)->getMemory(); auto &srcMemory2 = getParentEdgeAt(2)->getMemory();
@ -1066,8 +1123,18 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
const auto *src = reinterpret_cast<const float *>(srcMemory0.GetPtr()); const auto *src = reinterpret_cast<const float *>(srcMemory0.GetPtr());
const auto *offsets = reinterpret_cast<const float *>(srcMemory1.GetPtr()); const auto *offsets = reinterpret_cast<const float *>(srcMemory1.GetPtr());
const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr()); const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
float* modulation = nullptr;
if (inputsNumber > 3) {
modulation = reinterpret_cast<float *>(getParentEdgeAt(3)->getMemory().GetPtr());
}
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr()); float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>(); auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
std::vector<size_t> src_strides(src_block_desc.getStrides().size()); std::vector<size_t> src_strides(src_block_desc.getStrides().size());
for (int i = 0; i < src_strides.size(); i++) { for (int i = 0; i < src_strides.size(); i++) {
@ -1080,13 +1147,19 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i]; dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i];
} }
auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides(); auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides(); auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
InferenceEngine::SizeVector modulation_strides;
if (inputsNumber > 3) {
modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
}
if (def_conv_kernel) { if (def_conv_kernel) {
executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides); executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides);
} else { } else {
executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides); executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides, modulation, modulation_strides);
} }
} }

View File

@ -22,8 +22,6 @@ struct jit_def_conv_params {
int kd, kh, kw; int kd, kh, kw;
int stride_d, stride_h, stride_w; int stride_d, stride_h, stride_w;
int dilate_d, dilate_h, dilate_w; int dilate_d, dilate_h, dilate_w;
bool with_bias;
bool with_sum;
int nthr; int nthr;
int nb_ic, ic_block; int nb_ic, ic_block;
int nb_oc, oc_block; int nb_oc, oc_block;
@ -32,13 +30,19 @@ struct jit_def_conv_params {
int ur_w_tail; int ur_w_tail;
int typesize_in; int typesize_in;
int typesize_off; int typesize_off;
int typesize_modulation;
int typesize_bia; int typesize_bia;
int typesize_out; int typesize_out;
bool with_bias;
bool with_sum;
bool with_modulation;
bool with_bi_pad;
}; };
struct jit_def_conv_call_args { struct jit_def_conv_call_args {
const void *src; const void *src;
const void *off; const void *off;
const void *modulation;
const void *filt; const void *filt;
const void *bias; const void *bias;
const void *dst; const void *dst;
@ -75,11 +79,13 @@ public:
bool canBeInPlace() const override { bool canBeInPlace() const override {
return false; return false;
} }
bool enforceRef = false;
InferenceEngine::Precision getRuntimePrecision() const override; InferenceEngine::Precision getRuntimePrecision() const override;
private: private:
size_t group = 1; size_t group = 1;
bool with_bilinear_pad = false;
std::vector<ptrdiff_t> stride = {}; std::vector<ptrdiff_t> stride = {};
std::vector<ptrdiff_t> dilation = {}; std::vector<ptrdiff_t> dilation = {};
std::vector<ptrdiff_t> paddingL = {}; std::vector<ptrdiff_t> paddingL = {};
@ -92,10 +98,10 @@ private:
void executeReference(const float* src, const float* offsets, const float* weights, float* dst, void executeReference(const float* src, const float* offsets, const float* weights, float* dst,
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides, const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides); const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides,
const float* modulation = nullptr, const std::vector<size_t>& modulation_strides = {});
void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst, void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst,
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides, const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides, const std::vector<size_t>& dst_strides);
const std::vector<size_t>& dst_strides);
}; };
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@ -272,6 +272,8 @@ void MKLDNNStridedSliceNode::createPrimitive() {
auto srcOrder = srcBlockingDesc.getOrder(); auto srcOrder = srcBlockingDesc.getOrder();
params.srcDims = srcBlockingDesc.getBlockDims(); params.srcDims = srcBlockingDesc.getBlockDims();
params.dstDims = dstBlockingDesc.getBlockDims(); params.dstDims = dstBlockingDesc.getBlockDims();
params.srcMemPtr = srcMemPtr;
params.dstMemPtr = dstMemPtr;
params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size(); params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size();
if (params.parametersAreConstant) { if (params.parametersAreConstant) {
@ -282,8 +284,6 @@ void MKLDNNStridedSliceNode::createPrimitive() {
SizeVector newSrcDims, newDstDims; SizeVector newSrcDims, newDstDims;
dimsNormalization(newSrcDims, newDstDims); dimsNormalization(newSrcDims, newDstDims);
dimsGluing(realNDims, newSrcDims, newDstDims); dimsGluing(realNDims, newSrcDims, newDstDims);
if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
indicesCalculation(); indicesCalculation();
} }
} }
@ -510,14 +510,35 @@ void MKLDNNStridedSliceNode::dimsGluing(const size_t realNDims, const SizeVector
if (params.dstDims.size() > 2) if (params.dstDims.size() > 2)
params.lastDstDim /= newDstDims[secondDim.first]; params.lastDstDim /= newDstDims[secondDim.first];
} }
// some parameter calculations for common execution
params.isOptimized = params.nDimsForWork == 1 && params.dstDims.size() > 1;
if (params.isOptimized) {
if (params.dstDims.size() == 2)
params.dstDims[1] = 1;
params.workAmount = params.dstDims[0] * params.dstDims[1];
params.srcShift = (begin[0] * params.srcStrides[0] + begin[1] * params.srcStrides[1]) * params.dataSize;
} else {
params.srcShift = stride.back() == 1 && stride.size() > 1 ?
begin[params.nDimsForWork] * params.srcStrides[params.nDimsForWork] * params.dataSize : 0;
}
} }
void MKLDNNStridedSliceNode::indicesCalculation() { void MKLDNNStridedSliceNode::indicesCalculation() {
// indices calculation before execution for the best performance // indices calculation before execution for the best performance
params.nThreads = parallel_get_max_threads();
params.srcIndices.resize(params.workAmount, 0); params.srcIndices.resize(params.workAmount, 0);
params.dstIndices.resize(params.workAmount, 0); params.dstIndices.resize(params.workAmount, 0);
// should choose more optimal thread count
const size_t nthr = parallel_get_max_threads();
params.nThreads = nthr > params.workAmount ? params.workAmount : nthr;
if (params.isOptimized) {
indicesCalculationForOptimized();
return;
}
auto getSrcIdx = [this](const SizeVector& indexes){ auto getSrcIdx = [this](const SizeVector& indexes){
size_t srcIdx = 0; size_t srcIdx = 0;
for (int i = 0; i < params.nDimsForWork; ++i) for (int i = 0; i < params.nDimsForWork; ++i)
@ -542,11 +563,11 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
if (coords[k] < params.dstDims[k]) { if (coords[k] < params.dstDims[k]) {
srcIdx += stride[k] * params.srcStrides[k] * params.dataSize; srcIdx += stride[k] * params.srcStrides[k] * params.dataSize;
break; break;
} else { }
coords[k] = 0; coords[k] = 0;
out = true; out = true;
} }
}
if (out) if (out)
srcIdx = getSrcIdx(coords); srcIdx = getSrcIdx(coords);
@ -554,6 +575,25 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
}); });
} }
void MKLDNNStridedSliceNode::indicesCalculationForOptimized() {
const size_t dstIdx0 = params.dstStrides[0] * params.dataSize;
const size_t dstIdx1 = params.dstStrides[1] * params.dataSize;
const size_t srcIdx0 = stride[0] * params.srcStrides[0] * params.dataSize;
const size_t srcIdx1 = stride[1] * params.srcStrides[1] * params.dataSize;
for (size_t i0 = 0; i0 < params.dstDims[0]; i0++) {
const size_t idx = i0 * params.dstDims[1];
params.dstIndices[idx] = i0 * dstIdx0;
params.srcIndices[idx] = i0 * srcIdx0;
for (size_t i1 = 1; i1 < params.dstDims[1]; i1++) {
params.dstIndices[idx + i1] = params.dstIndices[idx] + i1 * dstIdx1;
params.srcIndices[idx + i1] = params.srcIndices[idx] + i1 * srcIdx1;
}
}
}
void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
if (!params.parametersAreConstant) { if (!params.parametersAreConstant) {
auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
@ -586,42 +626,15 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
SizeVector newSrcDims, newDstDims; SizeVector newSrcDims, newDstDims;
dimsNormalization(newSrcDims, newDstDims); dimsNormalization(newSrcDims, newDstDims);
dimsGluing(dstDims.size(), newSrcDims, newDstDims); dimsGluing(dstDims.size(), newSrcDims, newDstDims);
if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
indicesCalculation(); indicesCalculation();
} }
if (params.dstDims.size() > 1 && params.nDimsForWork == 1)
stridedSliceV();
else
stridedSlice(); stridedSlice();
} }
void MKLDNNStridedSliceNode::stridedSliceV() { inline void MKLDNNStridedSliceNode::stridedSlice() {
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(DATA_ID)->getMemoryPtr()->GetPtr()) + const uint8_t* srcData = reinterpret_cast<const uint8_t*>(params.srcMemPtr->GetPtr()) + params.srcShift;
(begin[0] * params.srcStrides[0] + begin[1] * params.srcStrides[1]) * params.dataSize; uint8_t* dstData = reinterpret_cast<uint8_t*>(params.dstMemPtr->GetPtr());
uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
const size_t dstIdx = params.dstStrides[0] * params.dataSize;
const size_t srcIdx = stride[0] * params.srcStrides[0] * params.dataSize;
const size_t dstShift = params.dstStrides[1] * params.dataSize;
const size_t srcShift = stride[1] * params.srcStrides[1] * params.dataSize;
if (params.dstDims.size() > 2) {
parallel_for2d(params.dstDims[0], params.dstDims[1], [&](const size_t i, const size_t j) {
cpu_memcpy(&dstData[i * dstIdx + j * dstShift], &srcData[i * srcIdx + j * srcShift], params.lastDstDim);
});
} else {
parallel_for(params.dstDims[0], [&](const size_t i) {
cpu_memcpy(&dstData[i * dstIdx], &srcData[i * srcIdx], params.lastDstDim);
});
}
}
void MKLDNNStridedSliceNode::stridedSlice() {
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(DATA_ID)->getMemoryPtr()->GetPtr()) +
(stride.back() == 1 && stride.size() > 1 ? begin[params.nDimsForWork] * params.srcStrides[params.nDimsForWork] * params.dataSize : 0);
uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0; size_t start = 0, end = 0;

View File

@ -27,14 +27,14 @@ public:
static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept; static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
private: private:
void stridedSliceV(); inline void stridedSlice();
void stridedSlice();
void addHiddenDims(const size_t nSrcDims); void addHiddenDims(const size_t nSrcDims);
void orderParametersByLayouts(); void orderParametersByLayouts();
void dimsNormalization(InferenceEngine::SizeVector& newSrcDims, InferenceEngine::SizeVector& newDstDims); void dimsNormalization(InferenceEngine::SizeVector& newSrcDims, InferenceEngine::SizeVector& newDstDims);
void dimsGluing(const size_t realNDims, const InferenceEngine::SizeVector& newSrcDims, const InferenceEngine::SizeVector& newDstDims); void dimsGluing(const size_t realNDims, const InferenceEngine::SizeVector& newSrcDims, const InferenceEngine::SizeVector& newDstDims);
void indicesCalculation(); void indicesCalculation();
void indicesCalculationForOptimized();
const size_t DATA_ID = 0; const size_t DATA_ID = 0;
const size_t BEGIN_ID = 1; const size_t BEGIN_ID = 1;
@ -56,6 +56,8 @@ private:
InferenceEngine::SizeVector strideDims; InferenceEngine::SizeVector strideDims;
struct { struct {
MKLDNNMemoryPtr srcMemPtr = nullptr;
MKLDNNMemoryPtr dstMemPtr = nullptr;
InferenceEngine::SizeVector srcDims; InferenceEngine::SizeVector srcDims;
InferenceEngine::SizeVector dstDims; InferenceEngine::SizeVector dstDims;
InferenceEngine::SizeVector srcStrides; InferenceEngine::SizeVector srcStrides;
@ -69,6 +71,8 @@ private:
size_t workAmount = 0; size_t workAmount = 0;
size_t lastDstDim = 0; size_t lastDstDim = 0;
size_t dataSize = 0; size_t dataSize = 0;
size_t srcShift = 0;
bool isOptimized = false;
bool equalDims = false; bool equalDims = false;
bool parametersAreConstant = true; bool parametersAreConstant = true;
} params; } params;

View File

@ -8,20 +8,20 @@
#include "memory_formats_attribute.hpp" #include "memory_formats_attribute.hpp"
namespace ngraph { using namespace ngraph;
using namespace ov;
template class ngraph::MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>; template class ov::MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
constexpr VariantTypeInfo VariantWrapper<MLKDNNInputMemoryFormats>::type_info; constexpr VariantTypeInfo VariantWrapper<MLKDNNInputMemoryFormats>::type_info;
std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) { std::string ngraph::getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
return MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>::getMemoryFormats(node); return MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>::getMemoryFormats(node);
} }
template class ngraph::MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>; template class ov::MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
constexpr VariantTypeInfo VariantWrapper<MLKDNNOutputMemoryFormats>::type_info; constexpr VariantTypeInfo VariantWrapper<MLKDNNOutputMemoryFormats>::type_info;
std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) { std::string ngraph::getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
return MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>::getMemoryFormats(node); return MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>::getMemoryFormats(node);
} }
} // namespace ngraph

View File

@ -25,6 +25,25 @@ public:
std::string getMemoryFormats() const { return memory_format; } std::string getMemoryFormats() const { return memory_format; }
}; };
class MLKDNNInputMemoryFormats : public MLKDNNMemoryFormats {
public:
MLKDNNInputMemoryFormats() = default;
explicit MLKDNNInputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
};
std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
class MLKDNNOutputMemoryFormats : public MLKDNNMemoryFormats {
public:
MLKDNNOutputMemoryFormats() = default;
explicit MLKDNNOutputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
};
std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
} // namespace ngraph
namespace ov {
template <typename MemoryFormatsType> template <typename MemoryFormatsType>
class MLKDNNMemoryFormatsHelper : public VariantImpl<MemoryFormatsType> { class MLKDNNMemoryFormatsHelper : public VariantImpl<MemoryFormatsType> {
public: public:
@ -35,7 +54,7 @@ public:
using MemoryFormatsWrapper = VariantWrapper<MemoryFormatsType>; using MemoryFormatsWrapper = VariantWrapper<MemoryFormatsType>;
if (!rtInfo.count(MemoryFormatsWrapper::type_info.name)) return ""; if (!rtInfo.count(MemoryFormatsWrapper::type_info.name)) return "";
const auto &attr = rtInfo.at(MemoryFormatsWrapper::type_info.name); const auto &attr = rtInfo.at(MemoryFormatsWrapper::type_info.name);
MemoryFormatsType mem_format = as_type_ptr<MemoryFormatsWrapper>(attr)->get(); MemoryFormatsType mem_format = ngraph::as_type_ptr<MemoryFormatsWrapper>(attr)->get();
return mem_format.getMemoryFormats(); return mem_format.getMemoryFormats();
} }
@ -48,7 +67,7 @@ public:
} }
if (unique_mem_format.size() > 1) { if (unique_mem_format.size() > 1) {
throw ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " no rule defined for multiple values."); throw ngraph::ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " no rule defined for multiple values.");
} }
std::string final_mem_format; std::string final_mem_format;
@ -59,46 +78,29 @@ public:
} }
std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node> & node) override { std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node> & node) override {
throw ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " has no default initialization."); throw ngraph::ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " has no default initialization.");
} }
}; };
extern template class MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats>;
class MLKDNNInputMemoryFormats : public MLKDNNMemoryFormats {
public:
MLKDNNInputMemoryFormats() = default;
explicit MLKDNNInputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
};
extern template class MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
template<> template<>
class VariantWrapper<MLKDNNInputMemoryFormats> : public MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats> { class VariantWrapper<ngraph::MLKDNNInputMemoryFormats> : public MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats> {
public: public:
static constexpr VariantTypeInfo type_info{MLKDNNInputMemoryFormatsAttr, 0}; static constexpr VariantTypeInfo type_info{ngraph::MLKDNNInputMemoryFormatsAttr, 0};
const VariantTypeInfo &get_type_info() const override { return type_info; } const VariantTypeInfo &get_type_info() const override { return type_info; }
VariantWrapper(const MLKDNNInputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>(value) {} VariantWrapper(const ngraph::MLKDNNInputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats>(value) {}
}; };
std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node); extern template class MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats>;
class MLKDNNOutputMemoryFormats : public MLKDNNMemoryFormats {
public:
MLKDNNOutputMemoryFormats() = default;
explicit MLKDNNOutputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
};
extern template class MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
template<> template<>
class VariantWrapper<MLKDNNOutputMemoryFormats> : public MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats> { class VariantWrapper<ngraph::MLKDNNOutputMemoryFormats> : public MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats> {
public: public:
static constexpr VariantTypeInfo type_info{MLKDNNOutputMemoryFormatsAttr, 0}; static constexpr VariantTypeInfo type_info{ngraph::MLKDNNOutputMemoryFormatsAttr, 0};
const VariantTypeInfo &get_type_info() const override { return type_info; } const VariantTypeInfo &get_type_info() const override { return type_info; }
VariantWrapper(const MLKDNNOutputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>(value) {} VariantWrapper(const ngraph::MLKDNNOutputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats>(value) {}
}; };
std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node); } // namespace ov
} // namespace ngraph

View File

@ -12,7 +12,7 @@ ie_add_plugin(NAME ${TARGET_NAME}
SOURCES ${SOURCES} ${HEADERS} SOURCES ${SOURCES} ${HEADERS}
VERSION_DEFINES_FOR multi_device_plugin.cpp) VERSION_DEFINES_FOR multi_device_plugin.cpp)
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine) target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ngraph inference_engine_transformations)
set_ie_threading_interface_for(${TARGET_NAME}) set_ie_threading_interface_for(${TARGET_NAME})

View File

@ -10,6 +10,10 @@
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <ngraph/opsets/opset1.hpp>
#include <transformations/utils/utils.hpp>
#include "ngraph_ops/convolution_ie.hpp"
#include "ngraph_ops/deconvolution_ie.hpp"
#include <ie_metric_helpers.hpp> #include <ie_metric_helpers.hpp>
#include <threading/ie_executor_manager.hpp> #include <threading/ie_executor_manager.hpp>
@ -21,6 +25,30 @@
namespace MultiDevicePlugin { namespace MultiDevicePlugin {
using namespace InferenceEngine; using namespace InferenceEngine;
namespace { namespace {
std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) {
auto nGraphFunc = network.getFunction();
bool isINTModel = ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
if (isINTModel) {
return METRIC_VALUE(INT8);
}
for (auto & node : nGraphFunc->get_ordered_ops()) {
if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) ||
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node) ||
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(node) ||
std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) ||
std::dynamic_pointer_cast<ngraph::op::DeconvolutionIE>(node)) {
auto layerType = node->input(1).get_element_type().get_type_name();
if (layerType == "f32")
return METRIC_VALUE(FP32);
if (layerType == "f16")
return METRIC_VALUE(FP16);
}
}
return METRIC_VALUE(FP32);
}
std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config, std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
const std::map<std::string, std::string> & local) { const std::map<std::string, std::string> & local) {
for (auto && kvp : local) { for (auto && kvp : local) {
@ -28,7 +56,10 @@ namespace {
} }
return config; return config;
} }
std::vector<std::string> supported_configKeys = {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES}; std::vector<std::string> supported_configKeys = {
MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
CONFIG_KEY_INTERNAL(WORK_MODE)
};
} // namespace } // namespace
std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig( std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
@ -98,8 +129,8 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name, InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter> & options) const { const std::map<std::string, InferenceEngine::Parameter> & options) const {
if (name == MULTI_CONFIG_KEY(DEVICE_PRIORITIES)) { if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name)) {
auto it = _config.find(MULTI_CONFIG_KEY(DEVICE_PRIORITIES)); auto it = _config.find(name);
if (it == _config.end()) { if (it == _config.end()) {
IE_THROW() << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set"; IE_THROW() << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set";
} else { } else {
@ -148,17 +179,23 @@ InferenceEngine::Parameter MultiDeviceInferencePlugin::GetMetric(const std::stri
// Is called only when caching is enabled // Is called only when caching is enabled
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetwork(const std::string& modelPath, IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetwork(const std::string& modelPath,
const std::map<std::string, std::string>& config) { const std::map<std::string, std::string>& config) {
return LoadExeNetworkImpl(modelPath, {}, config); return LoadNetworkImpl(modelPath, {}, config);
} }
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const CNNNetwork &network, IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const CNNNetwork &network,
const std::map<std::string, std::string>& config) { const std::map<std::string, std::string>& config) {
return LoadExeNetworkImpl({}, network, config); if (network.getFunction() == nullptr) {
IE_THROW() << "MULTI device supports just ngraph network representation";
}
auto networkPrecision = GetNetworkPrecision(network);
return LoadNetworkImpl({}, network, config, networkPrecision);
} }
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const std::string& modelPath, IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(const std::string& modelPath,
CNNNetwork network, CNNNetwork network,
const std::map<std::string, std::string>& config) { const std::map<std::string, std::string>& config,
const std::string &networkPrecision) {
if (GetCore() == nullptr) { if (GetCore() == nullptr) {
IE_THROW() << "Please, work with MULTI device via InferenceEngine::Core object"; IE_THROW() << "Please, work with MULTI device via InferenceEngine::Core object";
} }
@ -168,16 +205,39 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(c
} }
auto fullConfig = mergeConfigs(_config, config); auto fullConfig = mergeConfigs(_config, config);
auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
if (priorities == fullConfig.end()) {
IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
}
auto metaDevices = ParseMetaDevices(priorities->second, fullConfig);
// collect the settings that are applicable to the devices we are loading the network to // collect the settings that are applicable to the devices we are loading the network to
std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig; std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
std::vector<DeviceInformation> metaDevices;
auto workMode = fullConfig.find(CONFIG_KEY_INTERNAL(WORK_MODE));
auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
// not found device priorities for -d AUTO use case
if (priorities == fullConfig.end()) {
if (workMode != fullConfig.end()) {
std::string allDevices;
auto availableDevices = GetCore()->GetAvailableDevices();
if (availableDevices.empty()) {
IE_THROW(NotFound) << "No available device found";
}
for (auto&& device : availableDevices) {
allDevices += device;
allDevices += ((device == availableDevices[availableDevices.size()-1]) ? "" : ",");
}
metaDevices = ParseMetaDevices(allDevices, fullConfig);
multiNetworkConfig.insert({MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, allDevices});
} else {
IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
}
} else { // for use case -d MULTI:xPU or -d AUTO:xPU
metaDevices = ParseMetaDevices(priorities->second, fullConfig);
multiNetworkConfig.insert(*priorities); multiNetworkConfig.insert(*priorities);
}
// check if it is -d AUTO or -d AUTO:xPU use case
if (workMode != fullConfig.end()) {
auto targetDevice = SelectDevice(metaDevices, networkPrecision);
// std::cout << "!!! DEBUG: select device is " << targetDevice.deviceName << std::endl;
metaDevices = { targetDevice };
}
DeviceMap<SoExecutableNetworkInternal> executableNetworkPerDevice; DeviceMap<SoExecutableNetworkInternal> executableNetworkPerDevice;
std::mutex load_mutex; std::mutex load_mutex;
@ -275,4 +335,125 @@ QueryNetworkResult MultiDeviceInferencePlugin::QueryNetwork(const CNNNetwork&
return queryResult; return queryResult;
} }
DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
if (metaDevices.empty()) {
IE_THROW(NotFound) << "No available device to select in AUTO plugin";
}
if (metaDevices.size() == 1) {
return metaDevices.at(0);
}
std::vector<DeviceInformation> CPU;
std::vector<DeviceInformation> dGPU;
std::vector<DeviceInformation> iGPU;
std::vector<DeviceInformation> MYRIAD;
std::vector<DeviceInformation> VPUX;
for (auto& item : metaDevices) {
if (item.deviceName.find("CPU") == 0) {
CPU.push_back(item);
continue;
}
if (item.deviceName.find("MYRIAD") == 0) {
MYRIAD.push_back(item);
continue;
}
if (item.deviceName.find("VPUX") == 0) {
VPUX.push_back(item);
continue;
}
if (item.deviceName.find("GPU") == 0) {
auto gpuFullDeviceName = GetCore()->GetMetric(item.deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
iGPU.push_back(item);
} else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
dGPU.push_back(item);
}
continue;
}
}
if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
IE_THROW(NotFound) << "No available device found";
}
// Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
if (!dGPU.empty()) {
for (auto&& item : dGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!VPUX.empty()) {
for (auto&& item : VPUX) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!iGPU.empty()) {
for (auto&& item : iGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!MYRIAD.empty()) {
for (auto&& item : MYRIAD) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
if (supportNetwork != capability.end()) {
return item;
}
}
}
// If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
if (networkPrecision == "FP32") {
if (!dGPU.empty()) {
for (auto&& item : dGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!VPUX.empty()) {
for (auto&& item : VPUX) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!iGPU.empty()) {
for (auto&& item : iGPU) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
} else if (!MYRIAD.empty()) {
for (auto&& item : MYRIAD) {
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
if (supportNetwork != capability.end()) {
return item;
}
}
}
}
if (CPU.empty()) {
IE_THROW() << "Cannot select any device";
}
return CPU[0];
}
} // namespace MultiDevicePlugin } // namespace MultiDevicePlugin

View File

@ -41,9 +41,11 @@ protected:
const MultiDevicePlugin::DeviceName & deviceName) const; const MultiDevicePlugin::DeviceName & deviceName) const;
private: private:
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const std::string& modelPath, InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetworkImpl(const std::string& modelPath,
InferenceEngine::CNNNetwork network, InferenceEngine::CNNNetwork network,
const std::map<std::string, std::string>& config); const std::map<std::string, std::string>& config,
const std::string &networkPrecision = METRIC_VALUE(FP32));
DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
}; };
} // namespace MultiDevicePlugin } // namespace MultiDevicePlugin

View File

@ -192,24 +192,6 @@ private:
std::ostream & operator<< (std::ostream & out, const Mask & mask); std::ostream & operator<< (std::ostream & out, const Mask & mask);
extern template class VariantImpl<Mask::Ptr>;
template<>
class VariantWrapper<Mask::Ptr> : public VariantImpl<Mask::Ptr> {
public:
static constexpr VariantTypeInfo type_info{"Variant::RuntimeAttribute::Mask", 0};
const VariantTypeInfo &get_type_info() const override {
return type_info;
}
static std::shared_ptr<VariantWrapper<Mask::Ptr>> create(const value_type & value) {
return std::make_shared<VariantWrapper<Mask::Ptr>>(value);
}
explicit VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
};
Mask::Ptr getMask(const Output<const Node> & output); Mask::Ptr getMask(const Output<const Node> & output);
Mask::Ptr getMask(const Output<Node> & output); Mask::Ptr getMask(const Output<Node> & output);
@ -217,3 +199,25 @@ Mask::Ptr getMask(const Output<Node> & output);
void setMask(Output<Node> output, const Mask::Ptr & mask); void setMask(Output<Node> output, const Mask::Ptr & mask);
} // namespace ngraph } // namespace ngraph
namespace ov {
extern template class VariantImpl<ngraph::Mask::Ptr>;
template<>
class VariantWrapper<ngraph::Mask::Ptr> : public VariantImpl<ngraph::Mask::Ptr> {
public:
static constexpr VariantTypeInfo type_info{"Variant::RuntimeAttribute::Mask", 0};
const VariantTypeInfo &get_type_info() const override {
return type_info;
}
static std::shared_ptr<VariantWrapper<ngraph::Mask::Ptr>> create(const value_type & value) {
return std::make_shared<VariantWrapper<ngraph::Mask::Ptr>>(value);
}
explicit VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
};
} // namespace ov

View File

@ -35,6 +35,8 @@
#include <transformations/common_optimizations/conv_mul_fusion.hpp> #include <transformations/common_optimizations/conv_mul_fusion.hpp>
#include <transformations/common_optimizations/nop_elimination.hpp> #include <transformations/common_optimizations/nop_elimination.hpp>
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp> #include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
#include <transformations/common_optimizations/leaky_relu_fusion.hpp>
#include <transformations/common_optimizations/normalize_l2_fusion.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
@ -79,11 +81,13 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::F
common_fusions->add_matcher<ngraph::pass::SwishFusion>(); common_fusions->add_matcher<ngraph::pass::SwishFusion>();
common_fusions->add_matcher<ngraph::pass::HSwishFusion>(); common_fusions->add_matcher<ngraph::pass::HSwishFusion>();
common_fusions->add_matcher<ngraph::pass::HSigmoidFusion>(); common_fusions->add_matcher<ngraph::pass::HSigmoidFusion>();
common_fusions->add_matcher<ngraph::pass::NormalizeL2Fusion>();
common_fusions->add_matcher<ngraph::pass::ClampFusion>(); common_fusions->add_matcher<ngraph::pass::ClampFusion>();
common_fusions->add_matcher<ngraph::pass::PadFusion>(); common_fusions->add_matcher<ngraph::pass::PadFusion>();
common_fusions->add_matcher<ngraph::pass::MVNFusion>(); common_fusions->add_matcher<ngraph::pass::MVNFusion>();
common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>(); common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
common_fusions->add_matcher<ngraph::pass::GeluFusion>(); common_fusions->add_matcher<ngraph::pass::GeluFusion>();
common_fusions->add_matcher<ngraph::pass::LeakyReluFusion>();
common_fusions->set_name("ngraph::pass::CommonFusions"); common_fusions->set_name("ngraph::pass::CommonFusions");
manager.register_pass<ngraph::pass::BinarizeWeights>(); manager.register_pass<ngraph::pass::BinarizeWeights>();

View File

@ -12,10 +12,6 @@
namespace ngraph { namespace ngraph {
template class ngraph::VariantImpl<Mask::Ptr>;
constexpr VariantTypeInfo VariantWrapper<Mask::Ptr>::type_info;
Mask::Ptr getMask(const Output<const Node> & output) { Mask::Ptr getMask(const Output<const Node> & output) {
auto &rtInfo = output.get_rt_info(); auto &rtInfo = output.get_rt_info();
using MaskWrapper = VariantWrapper<Mask::Ptr>; using MaskWrapper = VariantWrapper<Mask::Ptr>;
@ -57,6 +53,12 @@ std::ostream & operator<< (std::ostream & out, const Mask & mask) {
return out; return out;
} }
} // namespace ngraph } // namespace ngraph
namespace ov {
template class ngraph::VariantImpl<ngraph::Mask::Ptr>;
constexpr VariantTypeInfo VariantWrapper<ngraph::Mask::Ptr>::type_info;
} // namespace ov

View File

@ -45,6 +45,15 @@ DECLARE_CONFIG_KEY(CPU_THREADS_PER_STREAM);
*/ */
DECLARE_CONFIG_KEY(FORCE_DISABLE_CACHE); DECLARE_CONFIG_KEY(FORCE_DISABLE_CACHE);
/**
* @brief The name for setting work mode internal in MULTI device plugin option.
*
* This option should be used with value only:
* PluginConfigInternalParams::MULTI_MODE_AUTO or PluginConfigInternalParams::MULTI_MODE_LEGACY
*/
DECLARE_CONFIG_KEY(WORK_MODE);
DECLARE_CONFIG_VALUE(MULTI_MODE_AUTO);
} // namespace PluginConfigInternalParams } // namespace PluginConfigInternalParams
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -0,0 +1,33 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <string>
#include "ie_api.h"
#include "ie_parallel.hpp"
#include "threading/ie_istreams_executor.hpp"
namespace InferenceEngine {
/**
* @class TBBStreamsExecutor
* @brief CPU Streams executor implementation. Use TBB thread pool to run tasks
*/
class INFERENCE_ENGINE_API_CLASS(TBBStreamsExecutor) : public IStreamsExecutor {
public:
using Ptr = std::shared_ptr<TBBStreamsExecutor>;
explicit TBBStreamsExecutor(const Config& config = {});
~TBBStreamsExecutor() override;
void run(Task task) override;
void Execute(Task task) override;
int GetStreamId() override;
int GetNumaNodeId() override;
private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
} // namespace InferenceEngine

Some files were not shown because too many files have changed in this diff Show More