Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
ef937a5a52
@ -4,17 +4,13 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
Release:
|
Release:
|
||||||
BUILD_TYPE: 'Release'
|
BUILD_TYPE: 'Release'
|
||||||
PROTOBUF_LITE: 'OFF'
|
PROTOBUF_LITE: 'ON'
|
||||||
TOX_COMMAND: 'tox && tox -e zoo_models'
|
TOX_COMMAND: 'tox && tox -e zoo_models'
|
||||||
Debug:
|
Debug:
|
||||||
BUILD_TYPE: 'Debug'
|
BUILD_TYPE: 'Debug'
|
||||||
PROTOBUF_LITE: 'OFF'
|
|
||||||
TOX_COMMAND: 'tox'
|
|
||||||
Protobuf_lite:
|
|
||||||
BUILD_TYPE: 'Release'
|
|
||||||
PROTOBUF_LITE: 'ON'
|
PROTOBUF_LITE: 'ON'
|
||||||
TOX_COMMAND: 'tox && tox -e zoo_models'
|
TOX_COMMAND: 'tox'
|
||||||
maxParallel: 3
|
maxParallel: 2
|
||||||
|
|
||||||
# About 300% of total time
|
# About 300% of total time
|
||||||
timeoutInMinutes: 90
|
timeoutInMinutes: 90
|
||||||
@ -56,10 +52,10 @@ jobs:
|
|||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||||
sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
|
|
||||||
sudo mkdir -p $(MODELS_DIR)
|
sudo mkdir -p $(MODELS_DIR)
|
||||||
sudo apt --assume-yes install nfs-common
|
sudo apt --assume-yes install nfs-common
|
||||||
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
|
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
|
||||||
|
mkdir -p $(MODELS_DIR)/models_data
|
||||||
displayName: 'Make dirs'
|
displayName: 'Make dirs'
|
||||||
|
|
||||||
- checkout: self
|
- checkout: self
|
||||||
@ -76,15 +72,15 @@ jobs:
|
|||||||
workingDirectory: $(WORK_DIR)
|
workingDirectory: $(WORK_DIR)
|
||||||
displayName: 'Install dependencies'
|
displayName: 'Install dependencies'
|
||||||
|
|
||||||
|
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)"
|
||||||
|
displayName: 'Update models'
|
||||||
|
condition: ne(variables['BUILD_TYPE'], 'Debug')
|
||||||
|
|
||||||
- script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
|
- script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
|
||||||
displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
|
displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
|
||||||
|
|
||||||
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
|
|
||||||
displayName: 'Get models'
|
|
||||||
condition: ne(variables['BUILD_TYPE'], 'Debug')
|
|
||||||
|
|
||||||
- script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
|
- script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
|
||||||
displayName: 'Create swap'
|
displayName: 'Create swap'
|
||||||
|
|
||||||
- script: sudo docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
|
- script: sudo docker run --name openvino-onnx-ci-container --volume $(MODELS_DIR)/models_data/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
|
||||||
displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
|
displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
|
||||||
|
@ -16,7 +16,7 @@ jobs:
|
|||||||
timeoutInMinutes: 120
|
timeoutInMinutes: 120
|
||||||
|
|
||||||
pool:
|
pool:
|
||||||
name: WIN_VMSS_VENV_F8S_WU2
|
name: WIN_VMSS_VENV_F16S_WU2
|
||||||
|
|
||||||
variables:
|
variables:
|
||||||
system.debug: true
|
system.debug: true
|
||||||
@ -34,8 +34,6 @@ jobs:
|
|||||||
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
||||||
INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
|
INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
|
||||||
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
||||||
IB_DIR: C:\Program Files (x86)\IncrediBuild
|
|
||||||
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- script: |
|
- script: |
|
||||||
@ -59,12 +57,6 @@ jobs:
|
|||||||
rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
|
rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
|
||||||
displayName: 'Make dir'
|
displayName: 'Make dir'
|
||||||
|
|
||||||
- script: |
|
|
||||||
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
|
||||||
call install_ib_console.bat
|
|
||||||
workingDirectory: $(WORK_DIR)
|
|
||||||
displayName: 'Install IncrediBuild'
|
|
||||||
|
|
||||||
- checkout: self
|
- checkout: self
|
||||||
clean: true
|
clean: true
|
||||||
lfs: false
|
lfs: false
|
||||||
@ -109,9 +101,7 @@ jobs:
|
|||||||
- script: dir $(REPO_DIR)\inference-engine\temp\ /s
|
- script: dir $(REPO_DIR)\inference-engine\temp\ /s
|
||||||
displayName: 'List temp SDKs'
|
displayName: 'List temp SDKs'
|
||||||
|
|
||||||
- script: |
|
- script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
|
||||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
|
||||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
|
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'Build Win'
|
displayName: 'Build Win'
|
||||||
|
|
||||||
@ -153,10 +143,8 @@ jobs:
|
|||||||
displayName: 'PaddlePaddle Frontend UT'
|
displayName: 'PaddlePaddle Frontend UT'
|
||||||
continueOnError: false
|
continueOnError: false
|
||||||
|
|
||||||
- script: |
|
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
|
||||||
set PATH=$(IB_DIR);%PATH%
|
displayName: 'IE UT old'
|
||||||
call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
|
|
||||||
displayName: 'IE UT old - IB'
|
|
||||||
continueOnError: false
|
continueOnError: false
|
||||||
|
|
||||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
|
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
|
||||||
@ -187,11 +175,8 @@ jobs:
|
|||||||
displayName: 'TEMPLATE FuncTests'
|
displayName: 'TEMPLATE FuncTests'
|
||||||
continueOnError: false
|
continueOnError: false
|
||||||
|
|
||||||
# call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
|
- script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
|
||||||
- script: |
|
displayName: 'CPU FuncTests'
|
||||||
set PATH=$(IB_DIR);%PATH%
|
|
||||||
call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24
|
|
||||||
displayName: 'CPU FuncTests - IB'
|
|
||||||
continueOnError: false
|
continueOnError: false
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
@ -213,8 +198,3 @@ jobs:
|
|||||||
buildPlatform: 'x64' # Optional
|
buildPlatform: 'x64' # Optional
|
||||||
buildConfiguration: 'Windows' # Optional
|
buildConfiguration: 'Windows' # Optional
|
||||||
#publishRunAttachments: true # Optional
|
#publishRunAttachments: true # Optional
|
||||||
|
|
||||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
|
||||||
displayName: Stop IncrediBuild
|
|
||||||
continueOnError: true
|
|
||||||
enabled: false
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
jobs:
|
jobs:
|
||||||
- job: WinCC
|
- job: WinCC
|
||||||
# About 150% of total time
|
# About 150% of total time
|
||||||
timeoutInMinutes: 120
|
timeoutInMinutes: 60
|
||||||
|
|
||||||
pool:
|
pool:
|
||||||
name: WIN_VMSS_VENV_F8S_WU2
|
name: WIN_VMSS_VENV_F8S_WU2
|
||||||
@ -10,26 +10,22 @@ jobs:
|
|||||||
system.debug: true
|
system.debug: true
|
||||||
VSTS_HTTP_RETRY: 5
|
VSTS_HTTP_RETRY: 5
|
||||||
VSTS_HTTP_TIMEOUT: 200
|
VSTS_HTTP_TIMEOUT: 200
|
||||||
WORKERS_NUMBER: 8
|
|
||||||
BUILD_TYPE: Release
|
BUILD_TYPE: Release
|
||||||
REPO_DIR: $(Build.Repository.LocalPath)
|
REPO_DIR: $(Build.Repository.LocalPath)
|
||||||
OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
|
OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
|
||||||
MODELS_PATH: $(REPO_DIR)\..\testdata
|
MODELS_PATH: $(REPO_DIR)\..\testdata
|
||||||
WORK_DIR: $(Pipeline.Workspace)\_w
|
WORK_DIR: $(Pipeline.Workspace)\_w
|
||||||
BUILD_DIR: D:\build
|
BUILD_DIR: D:\build
|
||||||
BIN_DIR: $(REPO_DIR)\bin\intel64
|
|
||||||
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||||
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
|
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
|
||||||
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
||||||
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
||||||
IB_DIR: C:\Program Files (x86)\IncrediBuild
|
|
||||||
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
|
|
||||||
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- script: |
|
- script: |
|
||||||
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
|
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
|
||||||
where python3
|
where python3
|
||||||
|
python3 --version
|
||||||
where python
|
where python
|
||||||
python --version
|
python --version
|
||||||
where java
|
where java
|
||||||
@ -46,12 +42,6 @@ jobs:
|
|||||||
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
|
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
|
||||||
displayName: 'Make dir'
|
displayName: 'Make dir'
|
||||||
|
|
||||||
- script: |
|
|
||||||
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
|
||||||
call install_ib_console.bat
|
|
||||||
workingDirectory: $(WORK_DIR)
|
|
||||||
displayName: 'Install IncrediBuild'
|
|
||||||
|
|
||||||
- checkout: self
|
- checkout: self
|
||||||
clean: true
|
clean: true
|
||||||
lfs: false
|
lfs: false
|
||||||
@ -59,7 +49,8 @@ jobs:
|
|||||||
path: openvino
|
path: openvino
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
|
rem Speed up build
|
||||||
|
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
|
||||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||||
workingDirectory: $(WORK_DIR)
|
workingDirectory: $(WORK_DIR)
|
||||||
displayName: 'Install dependencies'
|
displayName: 'Install dependencies'
|
||||||
@ -70,20 +61,19 @@ jobs:
|
|||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'CMake'
|
displayName: 'CMake'
|
||||||
|
|
||||||
- script: |
|
- script: dir $(REPO_DIR)\inference-engine\temp\ /s
|
||||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
displayName: 'List temp SDKs'
|
||||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
|
|
||||||
|
- script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'Build Win'
|
displayName: 'Build Win CC'
|
||||||
|
|
||||||
- script: dir $(REPO_DIR)\bin\ /s
|
- script: dir $(REPO_DIR)\bin\ /s
|
||||||
displayName: 'List files'
|
displayName: 'List bin files'
|
||||||
|
|
||||||
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
|
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'Install'
|
displayName: 'Install'
|
||||||
|
|
||||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
- script: dir $(INSTALL_DIR) /s
|
||||||
displayName: Stop IncrediBuild
|
displayName: 'List install files'
|
||||||
continueOnError: true
|
|
||||||
enabled: false
|
|
||||||
|
@ -4,7 +4,7 @@ LABEL version=2021.03.30.1
|
|||||||
|
|
||||||
# Build configuration arguments
|
# Build configuration arguments
|
||||||
ARG BUILD_TYPE=Release
|
ARG BUILD_TYPE=Release
|
||||||
ARG PROTOBUF_LITE=OFF
|
ARG PROTOBUF_LITE=ON
|
||||||
|
|
||||||
ARG http_proxy
|
ARG http_proxy
|
||||||
ARG https_proxy
|
ARG https_proxy
|
||||||
|
@ -75,6 +75,6 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins
|
|||||||
*.md @openvinotoolkit/openvino-docs-maintainers
|
*.md @openvinotoolkit/openvino-docs-maintainers
|
||||||
|
|
||||||
# Control 3d party dependencies
|
# Control 3d party dependencies
|
||||||
*requirements* @openvino-configuration-mgmt
|
**/*requirements*.* @openvino-configuration-mgmt
|
||||||
*setup.py @openvino-configuration-mgmt
|
**/setup.py @openvino-configuration-mgmt
|
||||||
/scripts/install_dependencies/ @openvino-configuration-mgmt
|
/scripts/install_dependencies/ @openvino-configuration-mgmt
|
||||||
|
@ -18,9 +18,11 @@ FunctionTemplate: '^(operator.+|\w+)$'
|
|||||||
TypeAliasName: '^\w+$'
|
TypeAliasName: '^\w+$'
|
||||||
VariableReference: '^\w+$'
|
VariableReference: '^\w+$'
|
||||||
|
|
||||||
|
EnumName: '^[A-Z][\w]+$'
|
||||||
|
# excepts element_type
|
||||||
|
# TODO: Fix interpolate
|
||||||
|
EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
|
||||||
# TODO: align
|
# TODO: align
|
||||||
EnumConstantName: '^.*$'
|
|
||||||
EnumName: '^.*$'
|
|
||||||
UsingDeclaration: '^.*$'
|
UsingDeclaration: '^.*$'
|
||||||
TypedefName: '^.*$'
|
TypedefName: '^.*$'
|
||||||
|
|
||||||
|
34
docs/IE_DG/Paddle_Support.md
Normal file
34
docs/IE_DG/Paddle_Support.md
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# Paddle Support in the OpenVINO™ {#openvino_docs_IE_DG_Paddle_Support}
|
||||||
|
|
||||||
|
Starting from the 2022.1 release, OpenVINO™ supports reading native Paddle models.
|
||||||
|
`Core::ReadNetwork()` method provides a uniform way to read models from IR or Paddle format, it is a recommended approach to reading models.
|
||||||
|
|
||||||
|
## Read Paddle Models from IR
|
||||||
|
|
||||||
|
After [Converting a Paddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md) to [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md), it can be read as recommended. Example:
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
InferenceEngine::Core core;
|
||||||
|
auto network = core.ReadNetwork("model.xml");
|
||||||
|
```
|
||||||
|
|
||||||
|
## Read Paddle Models from Paddle Format (Paddle `inference model` model type)
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
InferenceEngine::Core core;
|
||||||
|
auto network = core.ReadNetwork("model.pdmodel");
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reshape feature:**
|
||||||
|
|
||||||
|
OpenVINO™ does not provide a mechanism to specify pre-processing, such as mean values subtraction and reverse input channels, for the Paddle format.
|
||||||
|
If a Paddle model contains dynamic shapes for input, use the `CNNNetwork::reshape` method for shape specialization.
|
||||||
|
|
||||||
|
## NOTE
|
||||||
|
|
||||||
|
* Paddle [`inference model`](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/inference_en.md) mainly contains two kinds of files `model.pdmodel`(model file) and `model.pdiparams`(params file), which are used for inference.
|
||||||
|
* Supported Paddle models list and how to export these models are described in [Convert a Paddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md).
|
||||||
|
* For `Normalize` Paddle Models, the input data should be in FP32 format.
|
||||||
|
* When reading Paddle models from Paddle format, make sure that `model.pdmodel` and `model.pdiparams` are in the same folder directory.
|
128
docs/IE_DG/supported_plugins/AUTO.md
Normal file
128
docs/IE_DG/supported_plugins/AUTO.md
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# Auto-Device Plugin {#openvino_docs_IE_DG_supported_plugins_AUTO}
|
||||||
|
|
||||||
|
## Auto-Device Plugin Execution
|
||||||
|
|
||||||
|
Auto-device is a new special "virtual" or "proxy" device in the OpenVINO™ toolkit.
|
||||||
|
|
||||||
|
Use "AUTO" as the device name to delegate selection of an actual accelerator to OpenVINO.
|
||||||
|
With the 2021.4 release, Auto-device internally recognizes and selects devices from CPU,
|
||||||
|
integrated GPU and discrete Intel GPUs (when available) depending on the device capabilities and the characteristic of CNN models,
|
||||||
|
for example, precisions. Then Auto-device assigns inference requests to the selected device.
|
||||||
|
|
||||||
|
From the application point of view, this is just another device that handles all accelerators in full system.
|
||||||
|
|
||||||
|
With the 2021.4 release, Auto-device setup is done in three major steps:
|
||||||
|
* Step 1: Configure each device as usual (for example, via the conventional <code>SetConfig</code> method)
|
||||||
|
* Step 2: Load a network to the Auto-device plugin. This is the only change needed in your application
|
||||||
|
* Step 3: Just like with any other executable network (resulted from <code>LoadNetwork</code>), create as many requests as needed to saturate the devices.
|
||||||
|
These steps are covered below in details.
|
||||||
|
|
||||||
|
|
||||||
|
## Defining and Configuring the Auto-Device Plugin
|
||||||
|
Following the OpenVINO notions of “devices”, the Auto-device has “AUTO” name. The only configuration option for Auto-device is a limited device list:
|
||||||
|
|
||||||
|
| Parameter name | Parameter values | Default | Description |
|
||||||
|
| :--- | :--- | :--- |:-----------------------------------------------------------------------------|
|
||||||
|
| "AUTO_DEVICE_LIST" | comma-separated device names <span style="color:red">with no spaces</span>| N/A | Device candidate list to be selected |
|
||||||
|
|
||||||
|
You can use the configuration name directly as a string or use <code>IE::KEY_AUTO_DEVICE_LIST</code> from <code>ie_plugin_config.hpp</code>,
|
||||||
|
which defines the same string.
|
||||||
|
|
||||||
|
There are two ways to use Auto-device:
|
||||||
|
1. Directly indicate device by “AUTO” or empty string:
|
||||||
|
|
||||||
|
@snippet snippets/AUTO0.cpp part0
|
||||||
|
|
||||||
|
2. Use Auto-device configuration to limit the device candidates list to be selected:
|
||||||
|
|
||||||
|
@snippet snippets/AUTO1.cpp part1
|
||||||
|
|
||||||
|
Auto-device supports query device optimization capabilities in metric;
|
||||||
|
|
||||||
|
| Parameter name | Parameter values |
|
||||||
|
| :--- | :--- |
|
||||||
|
| "OPTIMIZATION_CAPABILITIES" | Auto-Device capabilities |
|
||||||
|
|
||||||
|
## Enumerating Available Devices and Auto-Device Selecting Logic
|
||||||
|
|
||||||
|
### Enumerating Available Devices
|
||||||
|
|
||||||
|
Inference Engine now features a dedicated API to enumerate devices and their capabilities.
|
||||||
|
See [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md).
|
||||||
|
This is the example output from the sample (truncated to the devices' names only):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./hello_query_device
|
||||||
|
Available devices:
|
||||||
|
Device: CPU
|
||||||
|
...
|
||||||
|
Device: GPU.0
|
||||||
|
...
|
||||||
|
Device: GPU.1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Default Auto-Device selecting logic
|
||||||
|
|
||||||
|
With the 2021.4 release, Auto-Device selects the most suitable device with following default logic:
|
||||||
|
1. Check if dGPU, iGPU and CPU device are available
|
||||||
|
2. Get the precision of the input model, such as FP32
|
||||||
|
3. According to the priority of dGPU, iGPU and CPU (in this order), if the device supports the precision of input network, select it as the most suitable device
|
||||||
|
|
||||||
|
For example, CPU, dGPU and iGPU can support below precision and optimization capabilities:
|
||||||
|
|
||||||
|
| Device | OPTIMIZATION_CAPABILITIES |
|
||||||
|
| :--- | :--- |
|
||||||
|
| CPU | WINOGRAD FP32 FP16 INT8 BIN |
|
||||||
|
| dGPU | FP32 BIN BATCHED_BLOB FP16 INT8 |
|
||||||
|
| iGPU | FP32 BIN BATCHED_BLOB FP16 INT8 |
|
||||||
|
|
||||||
|
When application use Auto-device to run FP16 IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to dGPU.
|
||||||
|
|
||||||
|
When application use Auto-device to run FP16 IR on system with CPU and iGPU, Auto-device will offload this workload to iGPU.
|
||||||
|
|
||||||
|
When application use Auto-device to run WINOGRAD-enabled IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to CPU.
|
||||||
|
|
||||||
|
In any case, when loading the network to dGPU or iGPU fails, the networks falls back to CPU as the last choice.
|
||||||
|
|
||||||
|
### Limit Auto Target Devices Logic
|
||||||
|
|
||||||
|
According to the Auto-device selection logic from the previous section,
|
||||||
|
the most suitable device from available devices to load mode as follows:
|
||||||
|
|
||||||
|
@snippet snippets/AUTO2.cpp part2
|
||||||
|
|
||||||
|
Another way to load mode to device from limited choice of devices is with Auto-device:
|
||||||
|
|
||||||
|
@snippet snippets/AUTO3.cpp part3
|
||||||
|
|
||||||
|
## Configuring the Individual Devices and Creating the Auto-Device on Top
|
||||||
|
|
||||||
|
As described in the first section, configure each individual device as usual and then just create the "AUTO" device on top:
|
||||||
|
|
||||||
|
@snippet snippets/AUTO4.cpp part4
|
||||||
|
|
||||||
|
Alternatively, you can combine all the individual device settings into single config and load it,
|
||||||
|
allowing the Auto-device plugin to parse and apply it to the right devices. See the code example here:
|
||||||
|
|
||||||
|
@snippet snippets/AUTO5.cpp part5
|
||||||
|
|
||||||
|
## Using the Auto-Device with OpenVINO Samples and Benchmark App
|
||||||
|
|
||||||
|
Note that every OpenVINO sample that supports "-d" (which stands for "device") command-line option transparently accepts the Auto-device.
|
||||||
|
The Benchmark Application is the best example of the optimal usage of the Auto-device.
|
||||||
|
You do not need to set the number of requests and CPU threads, as the application provides optimal out-of-the-box performance.
|
||||||
|
Below is the example command-line to evaluate AUTO performance with that:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./benchmark_app –d AUTO –m <model> -i <input> -niter 1000
|
||||||
|
```
|
||||||
|
You can also use the auto-device with limit device choice:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./benchmark_app –d AUTO:CPU,GPU –m <model> -i <input> -niter 1000
|
||||||
|
```
|
||||||
|
Note that the default CPU stream is 1 if using “-d AUTO”.
|
||||||
|
|
||||||
|
Note that you can use the FP16 IR to work with auto-device.
|
||||||
|
Also note that no demos are (yet) fully optimized for the auto-device, by means of selecting the most suitable device,
|
||||||
|
using the GPU streams/throttling, and so on.
|
@ -66,10 +66,8 @@ In addition to common parameters, the MYRIAD plugin accepts the following option
|
|||||||
|
|
||||||
| Parameter Name | Parameter Values | Default | Description |
|
| Parameter Name | Parameter Values | Default | Description |
|
||||||
| :--- | :--- | :--- | :--- |
|
| :--- | :--- | :--- | :--- |
|
||||||
| `KEY_VPU_MYRIAD_PLATFORM` | empty string/`VPU_MYRIAD_2450`/`VPU_MYRIAD_2480` | empty string | If set, the plugin will use a device with specific platform to allocate a network. |
|
|
||||||
| `KEY_VPU_MYRIAD_PROTOCOL` | empty string/`VPU_MYRIAD_USB`/`VPU_MYRIAD_PCIE` | empty string | If set, the plugin will use a device with specific protocol to allocate a network. |
|
| `KEY_VPU_MYRIAD_PROTOCOL` | empty string/`VPU_MYRIAD_USB`/`VPU_MYRIAD_PCIE` | empty string | If set, the plugin will use a device with specific protocol to allocate a network. |
|
||||||
| `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO` | `NO` | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
| `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO` | `NO` | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
||||||
| `KEY_VPU_PLATFORM` | empty string/`VPU_2450`/`VPU_2480` | empty string | **Deprecated** Use `KEY_VPU_MYRIAD_PLATFORM` instead. <br />If set, the plugin will use a device with specific platform to allocate a network. |
|
|
||||||
| `KEY_VPU_FORCE_RESET` | `YES`/`NO` | `NO` | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
| `KEY_VPU_FORCE_RESET` | `YES`/`NO` | `NO` | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
||||||
|
|
||||||
## Device allocation <a name="MYRIAD_DEVICE_ALLOC"> </a>
|
## Device allocation <a name="MYRIAD_DEVICE_ALLOC"> </a>
|
||||||
|
@ -14,6 +14,7 @@ The Inference Engine provides unique capabilities to infer deep learning models
|
|||||||
|[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs |
|
|[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs |
|
||||||
|[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor|
|
|[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor|
|
||||||
|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel |
|
|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel |
|
||||||
|
|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel® device for inference automatically |
|
||||||
|[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). |
|
|[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). |
|
||||||
|
|
||||||
Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
|
Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
|
||||||
|
@ -0,0 +1,62 @@
|
|||||||
|
# Converting a Paddle* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle}
|
||||||
|
|
||||||
|
A summary of the steps for optimizing and deploying a model that was trained with Paddle\*:
|
||||||
|
|
||||||
|
1. [Configure the Model Optimizer](../Config_Model_Optimizer.md) for Paddle\*.
|
||||||
|
2. [Convert a Paddle\* Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
|
||||||
|
3. Test the model in the Intermediate Representation format using the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in the target environment via provided Inference Engine [sample applications](../../../IE_DG/Samples_Overview.md).
|
||||||
|
4. [Integrate](../../../IE_DG/Samples_Overview.md) the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in your application to deploy the model in the target environment.
|
||||||
|
|
||||||
|
## Supported Topologies
|
||||||
|
|
||||||
|
| Model Name| Model Type| Description|
|
||||||
|
| ------------- | ------------ | ------------- |
|
||||||
|
|ppocr-det| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
|
||||||
|
|ppocr-rec| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
|
||||||
|
|ResNet-50| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
|
||||||
|
|MobileNet v2| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
|
||||||
|
|MobileNet v3| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
|
||||||
|
|BiSeNet v2| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|
||||||
|
|DeepLab v3 plus| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|
||||||
|
|Faster-SCNN| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|
||||||
|
|OCRNET| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
|
||||||
|
|Yolo v3| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
|
||||||
|
|ppyolo| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
|
||||||
|
|
||||||
|
> **NOTE:** The verified models are exported from the repository of branch release/2.1.
|
||||||
|
|
||||||
|
## Convert a Paddle* Model <a name="Convert_From_Paddle"></a>
|
||||||
|
|
||||||
|
To convert a Paddle\* model:
|
||||||
|
|
||||||
|
1. Go to the `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer` directory.
|
||||||
|
2. Use the `mo.py` script to simply convert a model, specifying the framework, the path to the input model `.pdmodel` file and the path to an output directory with write permissions:
|
||||||
|
```sh
|
||||||
|
python3 mo.py --input_model <INPUT_MODEL>.pdmodel --output_dir <OUTPUT_MODEL_DIR> --framework=paddle
|
||||||
|
```
|
||||||
|
|
||||||
|
Parameters to convert your model:
|
||||||
|
|
||||||
|
* [Framework-agnostic parameters](Converting_Model_General.md): These parameters are used to convert a model trained with any supported framework.
|
||||||
|
> **NOTE:** `--scale`, `--scale_values`, `--mean_values`, `--mean_file` are unsupported in the current version of mo_paddle.
|
||||||
|
|
||||||
|
### Example of Converting a Paddle* Model
|
||||||
|
Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer.
|
||||||
|
```sh
|
||||||
|
python3 mo.py --model_name yolov3_darknet53_270e_coco --output_dir <OUTPUT_MODEL_DIR> --framework=paddle --data_type=FP32 --reverse_input_channels --input_shape=[2,3,608,608],[1,2],[1,2] --input=image,im_shape,scale_factor --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1 --input_model=yolov3.pdmodel
|
||||||
|
```
|
||||||
|
|
||||||
|
## Supported Paddle\* Layers
|
||||||
|
Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
|
||||||
|
|
||||||
|
## Frequently Asked Questions (FAQ)
|
||||||
|
|
||||||
|
The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
In this document, you learned:
|
||||||
|
|
||||||
|
* Basic information about how the Model Optimizer works with Paddle\* models
|
||||||
|
* Which Paddle\* models are supported
|
||||||
|
* How to convert a trained Paddle\* model using the Model Optimizer with framework-agnostic command-line options
|
@ -16,7 +16,7 @@ The <code>mo.py</code> script is the universal entry point that can deduce the f
|
|||||||
* `.onnx` - ONNX\* models
|
* `.onnx` - ONNX\* models
|
||||||
* `.nnet` - Kaldi\* models.
|
* `.nnet` - Kaldi\* models.
|
||||||
|
|
||||||
If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet}`` option to specify the framework type explicitly.
|
If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet,paddle}`` option to specify the framework type explicitly.
|
||||||
|
|
||||||
For example, the following commands are equivalent:
|
For example, the following commands are equivalent:
|
||||||
```sh
|
```sh
|
||||||
@ -33,6 +33,7 @@ Framework-specific parameters for:
|
|||||||
* [MXNet](Convert_Model_From_MxNet.md),
|
* [MXNet](Convert_Model_From_MxNet.md),
|
||||||
* [ONNX](Convert_Model_From_ONNX.md),
|
* [ONNX](Convert_Model_From_ONNX.md),
|
||||||
* [Kaldi](Convert_Model_From_Kaldi.md).
|
* [Kaldi](Convert_Model_From_Kaldi.md).
|
||||||
|
* [Paddle](Convert_Model_From_Paddle.md).
|
||||||
|
|
||||||
|
|
||||||
## See Also
|
## See Also
|
||||||
|
@ -326,6 +326,7 @@ limitations under the License.
|
|||||||
</tab>
|
</tab>
|
||||||
<tab type="user" title="Heterogeneous Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_HETERO"/>
|
<tab type="user" title="Heterogeneous Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_HETERO"/>
|
||||||
<tab type="user" title="Multi-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_MULTI"/>
|
<tab type="user" title="Multi-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_MULTI"/>
|
||||||
|
<tab type="user" title="Auto-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_AUTO"/>
|
||||||
<tab type="user" title="GNA Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GNA"/>
|
<tab type="user" title="GNA Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GNA"/>
|
||||||
</tab>
|
</tab>
|
||||||
<tab type="user" title="Known Issues" url="@ref openvino_docs_IE_DG_Known_Issues_Limitations"/>
|
<tab type="user" title="Known Issues" url="@ref openvino_docs_IE_DG_Known_Issues_Limitations"/>
|
||||||
|
@ -4,7 +4,16 @@
|
|||||||
|
|
||||||
**Category**: Comparison binary operation
|
**Category**: Comparison binary operation
|
||||||
|
|
||||||
**Short description**: *Greater* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
|
**Short description**: *Greater* performs element-wise comparison operation with two given tensors applying broadcast rules specified in the `auto_broadcast` attribute.
|
||||||
|
|
||||||
|
**Detailed description**
|
||||||
|
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attribute is not `none`. Broadcasting is performed according to `auto_broadcast` value.
|
||||||
|
|
||||||
|
After broadcasting, *Greater* does the following with the input tensors *a* and *b*:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
o_{i} = a_{i} > b_{i}
|
||||||
|
\f]
|
||||||
|
|
||||||
**Attributes**:
|
**Attributes**:
|
||||||
|
|
||||||
@ -13,39 +22,33 @@
|
|||||||
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
||||||
* **Range of values**:
|
* **Range of values**:
|
||||||
* *none* - no auto-broadcasting is allowed, all input shapes should match
|
* *none* - no auto-broadcasting is allowed, all input shapes should match
|
||||||
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
|
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
|
||||||
|
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
|
||||||
* **Type**: string
|
* **Type**: string
|
||||||
* **Default value**: "numpy"
|
* **Default value**: "numpy"
|
||||||
* **Required**: *no*
|
* **Required**: *no*
|
||||||
|
|
||||||
**Inputs**
|
**Inputs**
|
||||||
|
|
||||||
* **1**: A tensor of type *T*. **Required.**
|
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
|
||||||
* **2**: A tensor of type *T*. **Required.**
|
* **2**: A tensor of type *T* and arbitrary shape. **Required.**
|
||||||
|
|
||||||
**Outputs**
|
**Outputs**
|
||||||
|
|
||||||
* **1**: The result of element-wise comparison operation. A tensor of type boolean.
|
* **1**: The result of element-wise comparison operation applied to the input tensors. A tensor of type *T_BOOL* and shape equal to broadcasted shape of two inputs.
|
||||||
|
|
||||||
**Types**
|
**Types**
|
||||||
|
|
||||||
* *T*: arbitrary supported type.
|
* *T*: arbitrary supported type.
|
||||||
|
* *T_BOOL*: `boolean`.
|
||||||
**Detailed description**
|
|
||||||
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
|
|
||||||
|
|
||||||
After broadcasting *Greater* does the following with the input tensors *a* and *b*:
|
|
||||||
|
|
||||||
\f[
|
|
||||||
o_{i} = a_{i} > b_{i}
|
|
||||||
\f]
|
|
||||||
|
|
||||||
**Examples**
|
**Examples**
|
||||||
|
|
||||||
*Example 1*
|
*Example 1: no broadcast*
|
||||||
|
|
||||||
```xml
|
```xml
|
||||||
<layer ... type="Greater">
|
<layer ... type="Greater">
|
||||||
|
<data auto_broadcast="none"/>
|
||||||
<input>
|
<input>
|
||||||
<port id="0">
|
<port id="0">
|
||||||
<dim>256</dim>
|
<dim>256</dim>
|
||||||
@ -65,9 +68,10 @@ o_{i} = a_{i} > b_{i}
|
|||||||
</layer>
|
</layer>
|
||||||
```
|
```
|
||||||
|
|
||||||
*Example 2: broadcast*
|
*Example 2: numpy broadcast*
|
||||||
```xml
|
```xml
|
||||||
<layer ... type="Greater">
|
<layer ... type="Greater">
|
||||||
|
<data auto_broadcast="numpy"/>
|
||||||
<input>
|
<input>
|
||||||
<port id="0">
|
<port id="0">
|
||||||
<dim>8</dim>
|
<dim>8</dim>
|
||||||
|
@ -4,7 +4,18 @@
|
|||||||
|
|
||||||
**Category**: Comparison binary operation
|
**Category**: Comparison binary operation
|
||||||
|
|
||||||
**Short description**: *NotEqual* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
|
**Short description**: *NotEqual* performs element-wise comparison operation with two given tensors applying
|
||||||
|
multi-directional broadcast rules specified in the `auto_broadcast` attribute.
|
||||||
|
|
||||||
|
**Detailed description**
|
||||||
|
Before performing comparison operation, input tensors *a* and *b* are broadcasted if their shapes are different.
|
||||||
|
Broadcasting is performed according to `auto_broadcast` value.
|
||||||
|
|
||||||
|
After broadcasting, *NotEqual* does the following with the input tensors *a* and *b*:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
o_{i} = a_{i} != b_{i}
|
||||||
|
\f]
|
||||||
|
|
||||||
**Attributes**:
|
**Attributes**:
|
||||||
|
|
||||||
@ -13,7 +24,8 @@
|
|||||||
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
||||||
* **Range of values**:
|
* **Range of values**:
|
||||||
* *none* - no auto-broadcasting is allowed, all input shapes should match
|
* *none* - no auto-broadcasting is allowed, all input shapes should match
|
||||||
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
|
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
|
||||||
|
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
|
||||||
* **Type**: string
|
* **Type**: string
|
||||||
* **Default value**: "numpy"
|
* **Default value**: "numpy"
|
||||||
* **Required**: *no*
|
* **Required**: *no*
|
||||||
@ -31,15 +43,6 @@
|
|||||||
|
|
||||||
* *T*: arbitrary supported type.
|
* *T*: arbitrary supported type.
|
||||||
|
|
||||||
**Detailed description**
|
|
||||||
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
|
|
||||||
|
|
||||||
After broadcasting *NotEqual* does the following with the input tensors *a* and *b*:
|
|
||||||
|
|
||||||
\f[
|
|
||||||
o_{i} = a_{i} \neq b_{i}
|
|
||||||
\f]
|
|
||||||
|
|
||||||
**Examples**
|
**Examples**
|
||||||
|
|
||||||
*Example 1*
|
*Example 1*
|
||||||
|
@ -6,33 +6,7 @@
|
|||||||
|
|
||||||
**Short description**: *LogicalXor* performs element-wise logical XOR operation with two given tensors applying multi-directional broadcast rules.
|
**Short description**: *LogicalXor* performs element-wise logical XOR operation with two given tensors applying multi-directional broadcast rules.
|
||||||
|
|
||||||
**Attributes**:
|
**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
|
||||||
|
|
||||||
* *auto_broadcast*
|
|
||||||
|
|
||||||
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
|
||||||
* **Range of values**:
|
|
||||||
* *none* - no auto-broadcasting is allowed, all input shapes should match
|
|
||||||
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
|
|
||||||
* **Type**: string
|
|
||||||
* **Default value**: "numpy"
|
|
||||||
* **Required**: *no*
|
|
||||||
|
|
||||||
**Inputs**
|
|
||||||
|
|
||||||
* **1**: A tensor of type *T*. **Required.**
|
|
||||||
* **2**: A tensor of type *T*. **Required.**
|
|
||||||
|
|
||||||
**Outputs**
|
|
||||||
|
|
||||||
* **1**: The result of element-wise logical XOR operation. A tensor of type *T*.
|
|
||||||
|
|
||||||
**Types**
|
|
||||||
|
|
||||||
* *T*: boolean type.
|
|
||||||
|
|
||||||
**Detailed description**
|
|
||||||
Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
|
|
||||||
|
|
||||||
After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
|
After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
|
||||||
|
|
||||||
@ -40,9 +14,35 @@ After broadcasting *LogicalXor* does the following with the input tensors *a* an
|
|||||||
o_{i} = a_{i} \oplus b_{i}
|
o_{i} = a_{i} \oplus b_{i}
|
||||||
\f]
|
\f]
|
||||||
|
|
||||||
|
**Attributes**:
|
||||||
|
|
||||||
|
* *auto_broadcast*
|
||||||
|
|
||||||
|
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
||||||
|
* **Range of values**:
|
||||||
|
* *none* - no auto-broadcasting is allowed, all input shapes must match
|
||||||
|
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
|
||||||
|
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
|
||||||
|
* **Type**: string
|
||||||
|
* **Default value**: "numpy"
|
||||||
|
* **Required**: *no*
|
||||||
|
|
||||||
|
**Inputs**
|
||||||
|
|
||||||
|
* **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
|
||||||
|
* **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
|
||||||
|
|
||||||
|
**Outputs**
|
||||||
|
|
||||||
|
* **1**: The result of element-wise *logicalXor* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
|
||||||
|
|
||||||
|
**Types**
|
||||||
|
|
||||||
|
* *T_BOOL*: `boolean`.
|
||||||
|
|
||||||
**Examples**
|
**Examples**
|
||||||
|
|
||||||
*Example 1*
|
*Example 1: no broadcast*
|
||||||
|
|
||||||
```xml
|
```xml
|
||||||
<layer ... type="LogicalXor">
|
<layer ... type="LogicalXor">
|
||||||
@ -65,7 +65,7 @@ o_{i} = a_{i} \oplus b_{i}
|
|||||||
</layer>
|
</layer>
|
||||||
```
|
```
|
||||||
|
|
||||||
*Example 2: broadcast*
|
*Example 2: numpy broadcast*
|
||||||
```xml
|
```xml
|
||||||
<layer ... type="LogicalXor">
|
<layer ... type="LogicalXor">
|
||||||
<input>
|
<input>
|
||||||
|
@ -163,7 +163,7 @@ strides = [1, 1]
|
|||||||
kernel = [2, 2]
|
kernel = [2, 2]
|
||||||
rounding_type = "floor"
|
rounding_type = "floor"
|
||||||
auto_pad = "same_upper"
|
auto_pad = "same_upper"
|
||||||
output = [[[[5, 5, -6],
|
output = [[[[5, 5, 3],
|
||||||
[8, 9, 9]
|
[8, 9, 9]
|
||||||
[8, 9, 9]],
|
[8, 9, 9]],
|
||||||
[[6, 5, 5],
|
[[6, 5, 5],
|
||||||
|
360
docs/ops/pooling/MaxPool_8.md
Normal file
360
docs/ops/pooling/MaxPool_8.md
Normal file
@ -0,0 +1,360 @@
|
|||||||
|
## MaxPool <a name="MaxPool"></a> {#openvino_docs_ops_pooling_MaxPool_8}
|
||||||
|
|
||||||
|
**Versioned name**: *MaxPool-8*
|
||||||
|
|
||||||
|
**Category**: *Pooling*
|
||||||
|
|
||||||
|
**Short description**: Performs the max pooling operation on input.
|
||||||
|
|
||||||
|
**Detailed description**: Input shape can be either 3D, 4D, or 5D. The max pooling operation is performed with respect to input shape from the third dimension to the last dimension. If paddings are used, during the pooling calculation their values are `-inf`. The max pooling operation involves sliding a filter over each channel of a feature map and downsampling by choosing the largest value within the region covered by the filter.
|
||||||
|
|
||||||
|
**Attributes**: *Pooling* attributes are specified in the `data` node, which is a child of the layer node.
|
||||||
|
|
||||||
|
* *strides*
|
||||||
|
|
||||||
|
* **Description**: *strides* is a distance (in pixels) to slide the window on the feature map over the (z, y, x) axes for 3D poolings and (y, x) axes for 2D poolings. For example, *strides* equal to "4,2,1" means sliding the window 4 pixels at a time over depth dimension, 2 over height dimension, and 1 over width dimension.
|
||||||
|
* **Range of values**: integer values starting from 0
|
||||||
|
* **Type**: int[]
|
||||||
|
* **Required**: *yes*
|
||||||
|
|
||||||
|
* *dilations*
|
||||||
|
|
||||||
|
* **Description**: *dilations* specify the index of the next pixel to select when pooling. If not present, the dilation defaults to 1, meaning the adjacent pixel is chosen. A value of 2 indicates that one pixel is skipped and every other pixel is considered. Dilations specify one value for each spatial axis of the kernel: `(z, y, x)` for 3D poolings and `(y, x)` for 2D poolings.
|
||||||
|
* **Range of values**: integer values starting from 0
|
||||||
|
* **Type**: int[]
|
||||||
|
* **Default value**: `[1,1,...]`
|
||||||
|
* **Required**: *no*
|
||||||
|
|
||||||
|
* *pads_begin*
|
||||||
|
|
||||||
|
* **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal to "1,2" means adding 1 pixel to the top of the input and 2 to the left of the input. All added padding values are equal to negative infinity.
|
||||||
|
* **Range of values**: integer values starting from 0
|
||||||
|
* **Type**: int[]
|
||||||
|
* **Required**: *yes*
|
||||||
|
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
|
||||||
|
|
||||||
|
* *pads_end*
|
||||||
|
|
||||||
|
* **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal to "1,2" means adding 1 pixel to the bottom of the input and 2 to the right of the input. All added padding values are equal to negative infinity.
|
||||||
|
* **Range of values**: integer values starting from 0
|
||||||
|
* **Type**: int[]
|
||||||
|
* **Required**: *yes*
|
||||||
|
* **Note**: the attribute is ignored when the *auto_pad* attribute is specified.
|
||||||
|
|
||||||
|
* *kernel*
|
||||||
|
|
||||||
|
* **Description**: *kernel* is a size of each filter. For example, *kernel* equal to (2, 3) means that each filter has height equal to 2 and width equal to 3.
|
||||||
|
* **Range of values**: integer values starting from 1
|
||||||
|
* **Type**: int[]
|
||||||
|
* **Required**: *yes*
|
||||||
|
|
||||||
|
* *rounding_type*
|
||||||
|
|
||||||
|
* **Description**: *rounding_type* is a type of rounding to be used to compute output shape.
|
||||||
|
* **Range of values**:
|
||||||
|
* *ceil*
|
||||||
|
* *floor*
|
||||||
|
* **Type**: string
|
||||||
|
* **Default value**: *floor*
|
||||||
|
* **Required**: *no*
|
||||||
|
|
||||||
|
* *auto_pad*
|
||||||
|
|
||||||
|
* **Description**: *auto_pad* how the padding is calculated. Possible values:
|
||||||
|
* *explicit*: explicit padding values from `pads_begin` and `pads_end` are used.
|
||||||
|
* *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value, an extra padding is added at the end (at the beginning).
|
||||||
|
* *valid* padding is not used.
|
||||||
|
* **Type**: string
|
||||||
|
* **Default value**: *explicit*
|
||||||
|
* **Required**: *no*
|
||||||
|
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is not equal to explicit.
|
||||||
|
|
||||||
|
* *index_element_type*
|
||||||
|
|
||||||
|
* **Description**: the type of output tensor with indices
|
||||||
|
* **Range of values**: "i64" or "i32"
|
||||||
|
* **Type**: string
|
||||||
|
* **Default value**: "i64"
|
||||||
|
* **Required**: *No*
|
||||||
|
|
||||||
|
* *axis*
|
||||||
|
|
||||||
|
* **Description**: indicator of the first dimension in the input shape that should be used to calculate the upper bound of allowed index output values. The upper bound is the product of dimensions starting from the one pointed by the 'axis' attribute until the end of the input shape.
|
||||||
|
* **Range of values**: integer number. Negative value means counting dimension from the end. The range is `[-R, R - 1]`, where `R` is the rank of the input tensor.
|
||||||
|
* **Type**: int
|
||||||
|
* **Default value**: 0
|
||||||
|
* **Required**: *No*
|
||||||
|
|
||||||
|
**Inputs**:
|
||||||
|
|
||||||
|
* **1**: 3D, 4D, or 5D input tensor of type T. Required.
|
||||||
|
|
||||||
|
**Outputs**:
|
||||||
|
* **1**: Input shape can be either `[N, C, H]`, `[N, C, H, W]`, or `[N, C, H, W, D]`. The corresponding output shape is `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, H_out, W_out, D_out]`. Output tensor has the same data type as the input tensor.
|
||||||
|
|
||||||
|
* **2**: Output tensor of type *T_IND* with indices of values selected by the pooling operation.
|
||||||
|
Shape of this output matches the first output. The type of this output can be specified using the `index_element_type` attribute.
|
||||||
|
Values are computed as indices in a tensor flattened to 1D, not considering padding. Examples for a 5D input tensor:
|
||||||
|
* When `axis == 0`, the values are in the range `[0, N * C * H * W * D)`.
|
||||||
|
* When `axis == 2`, the values are in the range `[0, H * W * D)`.
|
||||||
|
|
||||||
|
Note: the values of this output can only be calculated correctly if `pads_value` is set to `-infinity`.
|
||||||
|
|
||||||
|
|
||||||
|
**Types**
|
||||||
|
|
||||||
|
* *T*: floating point or integer type.
|
||||||
|
|
||||||
|
* *T_IND*: `int64` or `int32`.
|
||||||
|
|
||||||
|
|
||||||
|
**Mathematical Formulation**
|
||||||
|
Output shape calculation based on `auto_pad` and `rounding_type`:
|
||||||
|
* `auto_pad = explicit` and `rounding_type = floor`
|
||||||
|
`H_out = floor((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)`
|
||||||
|
`W_out = floor((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)`
|
||||||
|
`D_out = floor((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)`
|
||||||
|
|
||||||
|
* `auto_pad = explicit` and `rounding_type = ceil`
|
||||||
|
`H_out = ceil((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)`
|
||||||
|
`W_out = ceil((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)`
|
||||||
|
`D_out = ceil((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)`
|
||||||
|
|
||||||
|
* `auto_pad = valid`
|
||||||
|
`H_out = ceil((H - ((kernel[0] - 1) * dilations[0] + 1) + 1) / strides[0])`
|
||||||
|
`W_out = ceil((W - ((kernel[1] - 1) * dilations[1] + 1) + 1) / strides[1])`
|
||||||
|
`D_out = ceil((D - ((kernel[2] - 1) * dilations[2] + 1) + 1) / strides[2])`
|
||||||
|
|
||||||
|
* `auto_pad = same_upper / same_lower`
|
||||||
|
`H_out = H`
|
||||||
|
`W_out = W`
|
||||||
|
`D_out = D`
|
||||||
|
|
||||||
|
|
||||||
|
If `H + pads_begin[i] + pads_end[i] - kernel[i]` is not divisible by `strides[i]` evenly, the result is rounded with respect to the `rounding_type` attribute.
|
||||||
|
|
||||||
|
Example 1 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = explicit`.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[[-1, 2, 3],
|
||||||
|
[4, 5, -6],
|
||||||
|
[-7, 8, 9]]]]
|
||||||
|
strides = [1, 1]
|
||||||
|
pads_begin = [1, 1]
|
||||||
|
pads_end = [1, 1]
|
||||||
|
kernel = [2, 2]
|
||||||
|
rounding_type = "floor"
|
||||||
|
auto_pad = "explicit"
|
||||||
|
output0 = [[[[-1, 2, 3, 3],
|
||||||
|
[4, 5, 5, -6],
|
||||||
|
[4, 8, 9, 9],
|
||||||
|
[-7, 8, 9, 9]]]]
|
||||||
|
output1 = [[[[0, 1, 2, 2],
|
||||||
|
[3, 4, 4, 5],
|
||||||
|
[3, 7, 8, 8],
|
||||||
|
[6, 7, 8, 8]]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 2 shows how *MaxPool* operates with 3D input using 1D kernel and `auto_pad = valid`.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[-1, 2, 3, 5, -7, 9, 1]]]
|
||||||
|
strides = [1]
|
||||||
|
kernel = [3]
|
||||||
|
rounding_type = "floor"
|
||||||
|
auto_pad = "valid"
|
||||||
|
output0 = [[[3, 5, 5, 9, 9]]]
|
||||||
|
output1 = [[[2, 3, 3, 5, 5]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 3 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = same_lower`.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[[-1, 2, 3],
|
||||||
|
[4, 5, -6],
|
||||||
|
[-7, 8, 9]]]]
|
||||||
|
strides = [1, 1]
|
||||||
|
kernel = [2, 2]
|
||||||
|
rounding_type = "floor"
|
||||||
|
auto_pad = "same_lower"
|
||||||
|
output0 = [[[[-1, 2, 3],
|
||||||
|
[4, 5, 5]
|
||||||
|
[4, 8, 9]]]]
|
||||||
|
output1 = [[[[0, 1, 2],
|
||||||
|
[3, 4, 4]
|
||||||
|
[3, 7, 8]]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 4 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = same_upper`.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[[-1, 2, 3],
|
||||||
|
[4, 5, -6],
|
||||||
|
[-7, 8, 9]],
|
||||||
|
[[2, -1, 5],
|
||||||
|
[6, -7, 1],
|
||||||
|
[8, 2, -3]]]]
|
||||||
|
strides = [1, 1]
|
||||||
|
kernel = [2, 2]
|
||||||
|
rounding_type = "floor"
|
||||||
|
auto_pad = "same_upper"
|
||||||
|
output0 = [[[[5, 5, 3],
|
||||||
|
[8, 9, 9]
|
||||||
|
[8, 9, 9]],
|
||||||
|
[[6, 5, 5],
|
||||||
|
[8, 2, 1],
|
||||||
|
[8, 2, -3]]]]
|
||||||
|
output1 = [[[[4, 4, 2],
|
||||||
|
[7, 8, 8]
|
||||||
|
[7, 8, 8]],
|
||||||
|
[[12, 11, 11],
|
||||||
|
[15, 16, 14],
|
||||||
|
[15, 16, 17]]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 5 shows how *MaxPool* operates with 4D input using 2D kernel, `auto_pad = valid` and `rounding_type = ceil`.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[[-1, 2, 3],
|
||||||
|
[4, 5, -6],
|
||||||
|
[-7, 8, 9]]]]
|
||||||
|
strides = [2, 2]
|
||||||
|
kernel = [2, 2]
|
||||||
|
rounding_type = "ceil"
|
||||||
|
auto_pad = "valid"
|
||||||
|
output0 = [[[[5, 3],
|
||||||
|
[8, 9]]]]
|
||||||
|
output1 = [[[[4, 2],
|
||||||
|
[7, 8]]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 6 shows how *MaxPool* operates on 4D input using dilated 2D kernel, `auto_pad = explicit` and `rounding_type = floor`.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[[1, 2, 3],
|
||||||
|
[4, 5, 6],
|
||||||
|
[7, 8, 9]]]]
|
||||||
|
strides = [1, 1]
|
||||||
|
kernel = [2, 2]
|
||||||
|
dilations = [2, 2]
|
||||||
|
rounding_type = "floor"
|
||||||
|
auto_pad = "explicit"
|
||||||
|
pads_begin = [1, 1]
|
||||||
|
pads_end = [1, 1]
|
||||||
|
output0 = [[[[5, 6, 5],
|
||||||
|
[8, 9, 8],
|
||||||
|
[5, 6, 5]]]]
|
||||||
|
output1 = [[[[4, 5, 4],
|
||||||
|
[7, 8, 7],
|
||||||
|
[4, 5, 4]]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 7 shows how *MaxPool* operates on 4D input using 2D kernel, with non-default `axis` value.
|
||||||
|
|
||||||
|
```
|
||||||
|
input = [[[[1, 2, 3],
|
||||||
|
[4, 5, 6],
|
||||||
|
[7, 8, 9]],
|
||||||
|
[[10, 11, 12],
|
||||||
|
[13, 14, 15],
|
||||||
|
[16, 17, 18]]
|
||||||
|
]]
|
||||||
|
strides = [1, 1]
|
||||||
|
kernel = [2, 2]
|
||||||
|
dilations = [1, 1]
|
||||||
|
rounding_type = "floor"
|
||||||
|
auto_pad = "explicit"
|
||||||
|
pads_begin = [0, 0]
|
||||||
|
pads_end = [0, 0]
|
||||||
|
axis = 2
|
||||||
|
output0 = [[[[5, 6],
|
||||||
|
[8, 9]],
|
||||||
|
[[14, 15],
|
||||||
|
[17, 18]]]]
|
||||||
|
output1 = [[[[4, 5],
|
||||||
|
[7, 8]],
|
||||||
|
[[4, 5],
|
||||||
|
[7, 8]]]]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<layer ... type="MaxPool" ... >
|
||||||
|
<data auto_pad="same_upper" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
|
||||||
|
<input>
|
||||||
|
<port id="0">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
</port>
|
||||||
|
</input>
|
||||||
|
<output>
|
||||||
|
<port id="1">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
</port>
|
||||||
|
<port id="2">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
</port>
|
||||||
|
</output>
|
||||||
|
</layer>
|
||||||
|
|
||||||
|
<layer ... type="MaxPool" ... >
|
||||||
|
<data auto_pad="explicit" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
|
||||||
|
<input>
|
||||||
|
<port id="0">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
</port>
|
||||||
|
</input>
|
||||||
|
<output>
|
||||||
|
<port id="1">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>17</dim>
|
||||||
|
<dim>17</dim>
|
||||||
|
</port>
|
||||||
|
<port id="2">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>17</dim>
|
||||||
|
<dim>17</dim>
|
||||||
|
</port>
|
||||||
|
</output>
|
||||||
|
</layer>
|
||||||
|
|
||||||
|
<layer ... type="MaxPool" ... >
|
||||||
|
<data auto_pad="valid" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
|
||||||
|
<input>
|
||||||
|
<port id="0">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
<dim>32</dim>
|
||||||
|
</port>
|
||||||
|
</input>
|
||||||
|
<output>
|
||||||
|
<port id="1">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>16</dim>
|
||||||
|
<dim>16</dim>
|
||||||
|
</port>
|
||||||
|
<port id="2">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>16</dim>
|
||||||
|
<dim>16</dim>
|
||||||
|
</port>
|
||||||
|
</output>
|
||||||
|
</layer>
|
||||||
|
```
|
12
docs/snippets/AUTO0.cpp
Normal file
12
docs/snippets/AUTO0.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
//! [part0]
|
||||||
|
InferenceEngine::Core ie;
|
||||||
|
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
|
||||||
|
// these 2 lines below are equivalent
|
||||||
|
InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO");
|
||||||
|
InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "");
|
||||||
|
//! [part0]
|
||||||
|
return 0;
|
||||||
|
}
|
15
docs/snippets/AUTO1.cpp
Normal file
15
docs/snippets/AUTO1.cpp
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
//! [part1]
|
||||||
|
InferenceEngine::Core ie;
|
||||||
|
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
|
||||||
|
// "AUTO" plugin is (globally) pre-configured with the explicit option:
|
||||||
|
ie.SetConfig({{"AUTO_DEVICE_LIST", "CPU,GPU"}}, "AUTO");
|
||||||
|
// the below 3 lines are equivalent (the first line leverages the pre-configured AUTO, while second and third explicitly pass the same settings)
|
||||||
|
InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO", {});
|
||||||
|
InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "AUTO", {{"AUTO_DEVICE_LIST", "CPU,GPU"}});
|
||||||
|
InferenceEngine::ExecutableNetwork exec2 = ie.LoadNetwork(network, "AUTO:CPU,GPU");
|
||||||
|
//! [part1]
|
||||||
|
return 0;
|
||||||
|
}
|
10
docs/snippets/AUTO2.cpp
Normal file
10
docs/snippets/AUTO2.cpp
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
//! [part2]
|
||||||
|
InferenceEngine::Core ie;
|
||||||
|
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
|
||||||
|
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO");
|
||||||
|
//! [part2]
|
||||||
|
return 0;
|
||||||
|
}
|
10
docs/snippets/AUTO3.cpp
Normal file
10
docs/snippets/AUTO3.cpp
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
//! [part3]
|
||||||
|
InferenceEngine::Core ie;
|
||||||
|
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
|
||||||
|
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO:CPU,GPU");
|
||||||
|
//! [part3]
|
||||||
|
return 0;
|
||||||
|
}
|
19
docs/snippets/AUTO4.cpp
Normal file
19
docs/snippets/AUTO4.cpp
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
const std::map<std::string, std::string> cpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } };
|
||||||
|
const std::map<std::string, std::string> gpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } };
|
||||||
|
//! [part4]
|
||||||
|
InferenceEngine::Core ie;
|
||||||
|
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
|
||||||
|
// configure the CPU device first
|
||||||
|
ie.SetConfig(cpu_config, "CPU");
|
||||||
|
// configure the GPU device
|
||||||
|
ie.SetConfig(gpu_config, "GPU");
|
||||||
|
// load the network to the auto-device
|
||||||
|
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO");
|
||||||
|
// new metric allows to query the optimization capabilities
|
||||||
|
std::vector<std::string> device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
//! [part4]
|
||||||
|
return 0;
|
||||||
|
}
|
15
docs/snippets/AUTO5.cpp
Normal file
15
docs/snippets/AUTO5.cpp
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
std::string device_name = "AUTO:CPU,GPU";
|
||||||
|
const std::map< std::string, std::string > full_config = {};
|
||||||
|
//! [part5]
|
||||||
|
InferenceEngine::Core ie;
|
||||||
|
InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
|
||||||
|
// 'device_name' can be "AUTO:CPU,GPU" to configure the auto-device to use CPU and GPU
|
||||||
|
InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device_name, full_config);
|
||||||
|
// new metric allows to query the optimization capabilities
|
||||||
|
std::vector<std::string> device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
//! [part5]
|
||||||
|
return 0;
|
||||||
|
}
|
@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (C) 2018-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <ie_core.hpp>
|
||||||
|
#include <ie_ngraph_utils.hpp>
|
||||||
|
#include <ngraph/ngraph.hpp>
|
||||||
|
#include <shared_test_classes/base/layer_test_utils.hpp>
|
||||||
|
|
||||||
|
#include "comparison.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using ComparisonTypes = ngraph::helpers::ComparisonTypes;
|
||||||
|
|
||||||
|
namespace reference_tests {
|
||||||
|
namespace ComparisonOpsRefTestDefinitions {
|
||||||
|
namespace {
|
||||||
|
TEST_P(ReferenceComparisonLayerTest, GreaterCompareWithHardcodedRefs) {
|
||||||
|
Exec();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <element::Type_t IN_ET>
|
||||||
|
std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
|
||||||
|
using T = typename element_type_traits<IN_ET>::value_type;
|
||||||
|
std::vector<RefComparisonParams> compParams {
|
||||||
|
// 1D // 2D // 3D // 4D
|
||||||
|
Builder {}
|
||||||
|
.compType(ComparisonTypes::GREATER)
|
||||||
|
.input1({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
|
||||||
|
.input2({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
|
||||||
|
.expected({{2, 2}, element::boolean, std::vector<char> {0, 0, 0, 0}}),
|
||||||
|
Builder {}
|
||||||
|
.compType(ComparisonTypes::GREATER)
|
||||||
|
.input1({{2, 3}, type, std::vector<T> {0, 6, 45, 1, 21, 21}})
|
||||||
|
.input2({{2, 3}, type, std::vector<T> {1, 18, 23, 1, 19, 21}})
|
||||||
|
.expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 1, 0, 1, 0}}),
|
||||||
|
Builder {}
|
||||||
|
.compType(ComparisonTypes::GREATER)
|
||||||
|
.input1({{1}, type, std::vector<T> {53}})
|
||||||
|
.input2({{1}, type, std::vector<T> {53}})
|
||||||
|
.expected({{1}, element::boolean, std::vector<char> {0}}),
|
||||||
|
Builder {}
|
||||||
|
.compType(ComparisonTypes::GREATER)
|
||||||
|
.input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 12, 8}})
|
||||||
|
.input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
|
||||||
|
.expected({{2, 4}, element::boolean, std::vector<char> {0, 0, 0, 0, 0, 0, 1, 0}}),
|
||||||
|
Builder {}
|
||||||
|
.compType(ComparisonTypes::GREATER)
|
||||||
|
.input1({{3, 1, 2}, type, std::vector<T> {2, 1, 4, 1, 3, 1}})
|
||||||
|
.input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
|
||||||
|
.expected({{3, 2, 2}, element::boolean, std::vector<char> {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}}),
|
||||||
|
Builder {}
|
||||||
|
.compType(ComparisonTypes::GREATER)
|
||||||
|
.input1({{2, 1, 2, 1}, type, std::vector<T> {2, 1, 4, 1}})
|
||||||
|
.input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
|
||||||
|
.expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {1, 0, 1, 0}})};
|
||||||
|
return compParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<RefComparisonParams> generateComparisonCombinedParams() {
|
||||||
|
const std::vector<std::vector<RefComparisonParams>> compTypeParams {
|
||||||
|
generateComparisonParams<element::Type_t::f32>(element::f32),
|
||||||
|
generateComparisonParams<element::Type_t::f16>(element::f16),
|
||||||
|
generateComparisonParams<element::Type_t::i32>(element::i32),
|
||||||
|
generateComparisonParams<element::Type_t::i64>(element::i64),
|
||||||
|
generateComparisonParams<element::Type_t::u32>(element::u32),
|
||||||
|
generateComparisonParams<element::Type_t::u64>(element::u64),
|
||||||
|
generateComparisonParams<element::Type_t::boolean>(element::boolean)};
|
||||||
|
std::vector<RefComparisonParams> combinedParams;
|
||||||
|
|
||||||
|
for (const auto& params : compTypeParams) {
|
||||||
|
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||||
|
}
|
||||||
|
return combinedParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
|
||||||
|
ReferenceComparisonLayerTest::getTestCaseName);
|
||||||
|
} // namespace ComparisonOpsRefTestDefinitions
|
||||||
|
} // namespace reference_tests
|
@ -0,0 +1,48 @@
|
|||||||
|
// Copyright (C) 2018-2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <ie_core.hpp>
|
||||||
|
#include <ie_ngraph_utils.hpp>
|
||||||
|
#include <ngraph/ngraph.hpp>
|
||||||
|
#include <shared_test_classes/base/layer_test_utils.hpp>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
|
#include "logical.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using LogicalTypes = ngraph::helpers::LogicalTypes;
|
||||||
|
|
||||||
|
namespace reference_tests {
|
||||||
|
namespace LogicalOpsRefTestDefinitions {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
std::vector<RefLogicalParams> generateLogicalParams() {
|
||||||
|
std::vector<RefLogicalParams> logicalParams {
|
||||||
|
Builder {}
|
||||||
|
.opType(LogicalTypes::LOGICAL_XOR)
|
||||||
|
.input1({{2, 2}, element::boolean, std::vector<char> {true, false, true, false}})
|
||||||
|
.input2({{2, 2}, element::boolean, std::vector<char> {false, true, true, false}})
|
||||||
|
.expected({{2, 2}, element::boolean, std::vector<char> {true, true, false, false}}),
|
||||||
|
Builder {}
|
||||||
|
.opType(LogicalTypes::LOGICAL_XOR)
|
||||||
|
.input1({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}})
|
||||||
|
.input2({{1, 1, 2, 1}, element::boolean, std::vector<char> {true, false}})
|
||||||
|
.expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {false, false, false, false}}),
|
||||||
|
Builder {}
|
||||||
|
.opType(LogicalTypes::LOGICAL_XOR)
|
||||||
|
.input1({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true}})
|
||||||
|
.input2({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, true, true, false, false, true, true, false}})
|
||||||
|
.expected({{3, 4}, element::boolean, std::vector<char> {false, false, false, false, false, true, false, false, false, false, false, true}})};
|
||||||
|
return logicalParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_LogicalOr_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()),
|
||||||
|
ReferenceLogicalLayerTest::getTestCaseName);
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace LogicalOpsRefTestDefinitions
|
||||||
|
} // namespace reference_tests
|
@ -7,10 +7,10 @@ import sys
|
|||||||
import errno
|
import errno
|
||||||
import subprocess # nosec
|
import subprocess # nosec
|
||||||
import typing
|
import typing
|
||||||
|
import multiprocessing
|
||||||
from fnmatch import fnmatchcase
|
from fnmatch import fnmatchcase
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shutil import copyfile, rmtree
|
from shutil import copyfile, rmtree
|
||||||
from distutils.command.install import install
|
|
||||||
from distutils.command.build import build
|
from distutils.command.build import build
|
||||||
from distutils.command.clean import clean
|
from distutils.command.clean import clean
|
||||||
from distutils.errors import DistutilsSetupError
|
from distutils.errors import DistutilsSetupError
|
||||||
@ -27,11 +27,11 @@ PYTHON_VERSION = f'python{sys.version_info.major}.{sys.version_info.minor}'
|
|||||||
|
|
||||||
# The following variables can be defined in environment or .env file
|
# The following variables can be defined in environment or .env file
|
||||||
CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
|
CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
|
||||||
CORE_LIBS_DIR = config('CORE_LIBS_DIR', '')
|
CORE_LIBS_DIR = config('CORE_LIBS_DIR', 'deployment_tools/inference_engine/lib/intel64')
|
||||||
PLUGINS_LIBS_DIR = config('PLUGINS_LIBS_DIR', '')
|
PLUGINS_LIBS_DIR = config('PLUGINS_LIBS_DIR', 'deployment_tools/inference_engine/lib/intel64')
|
||||||
NGRAPH_LIBS_DIR = config('NGRAPH_LIBS_DIR', '')
|
NGRAPH_LIBS_DIR = config('NGRAPH_LIBS_DIR', 'deployment_tools/ngraph/lib')
|
||||||
TBB_LIBS_DIR = config('TBB_LIBS_DIR', '')
|
TBB_LIBS_DIR = config('TBB_LIBS_DIR', 'deployment_tools/inference_engine/external/tbb/lib')
|
||||||
PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', '')
|
PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
|
||||||
LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
|
LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
|
||||||
|
|
||||||
LIB_INSTALL_CFG = {
|
LIB_INSTALL_CFG = {
|
||||||
@ -118,7 +118,66 @@ class PrebuiltExtension(Extension):
|
|||||||
class CustomBuild(build):
|
class CustomBuild(build):
|
||||||
"""Custom implementation of build_clib"""
|
"""Custom implementation of build_clib"""
|
||||||
|
|
||||||
|
cmake_build_types = ['Release', 'Debug', 'RelWithDebInfo', 'MinSizeRel']
|
||||||
|
user_options = [
|
||||||
|
('config=', None, 'Build configuration [{types}].'.format(types='|'.join(cmake_build_types))),
|
||||||
|
('jobs=', None, 'Specifies the number of jobs to use with make.'),
|
||||||
|
('cmake-args=', None, 'Additional options to be passed to CMake.'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def initialize_options(self):
|
||||||
|
"""Set default values for all the options that this command supports."""
|
||||||
|
super().initialize_options()
|
||||||
|
self.build_base = 'build'
|
||||||
|
self.config = None
|
||||||
|
self.jobs = None
|
||||||
|
self.cmake_args = None
|
||||||
|
|
||||||
|
def finalize_options(self):
|
||||||
|
"""Set final values for all the options that this command supports."""
|
||||||
|
super().finalize_options()
|
||||||
|
|
||||||
|
if not self.config:
|
||||||
|
if self.debug:
|
||||||
|
self.config = 'Debug'
|
||||||
|
else:
|
||||||
|
self.announce('Set default value for CMAKE_BUILD_TYPE = Release.', level=4)
|
||||||
|
self.config = 'Release'
|
||||||
|
else:
|
||||||
|
build_types = [item.lower() for item in self.cmake_build_types]
|
||||||
|
try:
|
||||||
|
i = build_types.index(str(self.config).lower())
|
||||||
|
self.config = self.cmake_build_types[i]
|
||||||
|
self.debug = True if 'Debug' == self.config else False
|
||||||
|
except ValueError:
|
||||||
|
self.announce('Unsupported CMAKE_BUILD_TYPE value: ' + self.config, level=4)
|
||||||
|
self.announce('Supported values: {types}'.format(types=', '.join(self.cmake_build_types)), level=4)
|
||||||
|
sys.exit(1)
|
||||||
|
if self.jobs is None and os.getenv('MAX_JOBS') is not None:
|
||||||
|
self.jobs = os.getenv('MAX_JOBS')
|
||||||
|
self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
global CMAKE_BUILD_DIR
|
||||||
|
self.jobs = multiprocessing.cpu_count()
|
||||||
|
plat_specifier = '.{0}-{1}.{2}'.format(self.plat_name, *sys.version_info[:2])
|
||||||
|
self.build_temp = os.path.join(self.build_base, 'temp' + plat_specifier, self.config)
|
||||||
|
|
||||||
|
# if setup.py is directly called use CMake to build product
|
||||||
|
if CMAKE_BUILD_DIR == '.':
|
||||||
|
openvino_root_dir = os.path.normpath(os.path.join(CMAKE_BUILD_DIR, '../../../../'))
|
||||||
|
self.announce('Configuring cmake project', level=3)
|
||||||
|
|
||||||
|
self.spawn(['cmake', '-H' + openvino_root_dir, '-B' + self.build_temp,
|
||||||
|
'-DCMAKE_BUILD_TYPE={type}'.format(type=self.config),
|
||||||
|
'-DENABLE_PYTHON=ON',
|
||||||
|
'-DNGRAPH_ONNX_FRONTEND_ENABLE=ON'])
|
||||||
|
|
||||||
|
self.announce('Building binaries', level=3)
|
||||||
|
self.spawn(['cmake', '--build', self.build_temp,
|
||||||
|
'--config', self.config, '-j', str(self.jobs)])
|
||||||
|
CMAKE_BUILD_DIR = self.build_temp
|
||||||
|
|
||||||
self.run_command('build_clib')
|
self.run_command('build_clib')
|
||||||
build.run(self)
|
build.run(self)
|
||||||
# Copy extra package_data content filtered by find_packages
|
# Copy extra package_data content filtered by find_packages
|
||||||
@ -133,14 +192,6 @@ class CustomBuild(build):
|
|||||||
copyfile(path, dst / path_rel)
|
copyfile(path, dst / path_rel)
|
||||||
|
|
||||||
|
|
||||||
class CustomInstall(install):
|
|
||||||
"""Enable build_clib during the installation"""
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
self.run_command('build_clib')
|
|
||||||
install.run(self)
|
|
||||||
|
|
||||||
|
|
||||||
class PrepareLibs(build_clib):
|
class PrepareLibs(build_clib):
|
||||||
"""Prepare prebuilt libraries"""
|
"""Prepare prebuilt libraries"""
|
||||||
|
|
||||||
@ -369,6 +420,7 @@ if os.path.exists(package_license):
|
|||||||
packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
|
packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
|
||||||
package_data: typing.Dict[str, list] = {}
|
package_data: typing.Dict[str, list] = {}
|
||||||
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
version=config('WHEEL_VERSION', '0.0.0'),
|
version=config('WHEEL_VERSION', '0.0.0'),
|
||||||
author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
|
author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
|
||||||
@ -376,14 +428,13 @@ setup(
|
|||||||
license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
|
license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
|
||||||
author=config('WHEEL_AUTHOR', 'Intel Corporation'),
|
author=config('WHEEL_AUTHOR', 'Intel Corporation'),
|
||||||
description=config('WHEEL_DESC', 'Inference Engine Python* API'),
|
description=config('WHEEL_DESC', 'Inference Engine Python* API'),
|
||||||
install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'requirements.txt')),
|
install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
|
||||||
long_description=get_description(config('WHEEL_OVERVIEW', 'pypi_overview.md')),
|
long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
|
||||||
long_description_content_type='text/markdown',
|
long_description_content_type='text/markdown',
|
||||||
download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
|
download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
|
||||||
url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
|
url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
|
||||||
cmdclass={
|
cmdclass={
|
||||||
'build': CustomBuild,
|
'build': CustomBuild,
|
||||||
'install': CustomInstall,
|
|
||||||
'build_clib': PrepareLibs,
|
'build_clib': PrepareLibs,
|
||||||
'build_ext': CopyExt,
|
'build_ext': CopyExt,
|
||||||
'clean': CustomClean,
|
'clean': CustomClean,
|
||||||
|
@ -212,6 +212,9 @@ int main(int argc, char* argv[]) {
|
|||||||
bool perf_counts = false;
|
bool perf_counts = false;
|
||||||
// Update config per device according to command line parameters
|
// Update config per device according to command line parameters
|
||||||
for (auto& device : devices) {
|
for (auto& device : devices) {
|
||||||
|
if (device == "AUTO") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (!config.count(device))
|
if (!config.count(device))
|
||||||
config[device] = {};
|
config[device] = {};
|
||||||
std::map<std::string, std::string>& device_config = config.at(device);
|
std::map<std::string, std::string>& device_config = config.at(device);
|
||||||
|
@ -627,10 +627,9 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
if (FLAGS_q.compare("user") == 0) {
|
if (FLAGS_q.compare("user") == 0) {
|
||||||
if (!FLAGS_rg.empty()) {
|
if (!FLAGS_rg.empty()) {
|
||||||
slog::warn
|
slog::warn << "Custom scale factor will be used for imported gna model: " << FLAGS_rg << slog::endl;
|
||||||
<< "Custom scale factor will be ignored - using scale factor from provided imported gna model: "
|
}
|
||||||
<< FLAGS_rg << slog::endl;
|
|
||||||
} else {
|
|
||||||
auto scaleFactorInput = ParseScaleFactors(FLAGS_sf);
|
auto scaleFactorInput = ParseScaleFactors(FLAGS_sf);
|
||||||
if (numInputFiles != scaleFactorInput.size()) {
|
if (numInputFiles != scaleFactorInput.size()) {
|
||||||
std::string errMessage(
|
std::string errMessage(
|
||||||
@ -641,11 +640,9 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
for (size_t i = 0; i < scaleFactorInput.size(); ++i) {
|
for (size_t i = 0; i < scaleFactorInput.size(); ++i) {
|
||||||
slog::info << "For input " << i << " using scale factor of " << scaleFactorInput[i] << slog::endl;
|
slog::info << "For input " << i << " using scale factor of " << scaleFactorInput[i] << slog::endl;
|
||||||
std::string scaleFactorConfigKey =
|
std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
|
||||||
GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
|
|
||||||
gnaPluginConfig[scaleFactorConfigKey] = scaleFactorInput[i];
|
gnaPluginConfig[scaleFactorConfigKey] = scaleFactorInput[i];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// "static" quantization with calculated scale factor
|
// "static" quantization with calculated scale factor
|
||||||
if (!FLAGS_rg.empty()) {
|
if (!FLAGS_rg.empty()) {
|
||||||
|
@ -1,136 +0,0 @@
|
|||||||
// Copyright (C) 2018-2021 Intel Corporation
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <memory>
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
#include "ie_metric_helpers.hpp"
|
|
||||||
#include "auto_exec_network.hpp"
|
|
||||||
#include "auto_infer_request.hpp"
|
|
||||||
|
|
||||||
namespace AutoPlugin {
|
|
||||||
using namespace InferenceEngine;
|
|
||||||
|
|
||||||
AutoExecutableNetwork::AutoExecutableNetwork(NetworkFuture cpuFuture,
|
|
||||||
NetworkFuture acceleratorFuture,
|
|
||||||
bool enablePerfCount)
|
|
||||||
: _cpuFuture(std::move(cpuFuture))
|
|
||||||
, _acceleratorFuture(std::move(acceleratorFuture))
|
|
||||||
, _enablePerfCount(enablePerfCount) {
|
|
||||||
// both are valid, like AUTO:CPU,GPU
|
|
||||||
if (_cpuFuture.valid() && _acceleratorFuture.valid()) {
|
|
||||||
try {
|
|
||||||
_networkFirstReady = _cpuFuture.get();
|
|
||||||
_alreadyActualNetwork = false;
|
|
||||||
} catch (const std::exception& e) {
|
|
||||||
printf("Warning: load network to CPU failed: %s\n", e.what());
|
|
||||||
_networkActualNeeded = _acceleratorFuture.get();
|
|
||||||
_alreadyActualNetwork = true;
|
|
||||||
}
|
|
||||||
} else if (_acceleratorFuture.valid()) { // only accelerator is valid, like AUTO:GPU
|
|
||||||
_networkActualNeeded = _acceleratorFuture.get();
|
|
||||||
_alreadyActualNetwork = true;
|
|
||||||
} else if (_cpuFuture.valid()) { // only CPU is valid, like AUTO:CPU
|
|
||||||
_networkActualNeeded = _cpuFuture.get();
|
|
||||||
_alreadyActualNetwork = true;
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "No device task available";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AutoExecutableNetwork::~AutoExecutableNetwork() = default;
|
|
||||||
|
|
||||||
InferenceEngine::IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
|
|
||||||
OutputsDataMap networkOutputs) {
|
|
||||||
InferenceEngine::SoExecutableNetworkInternal network;
|
|
||||||
SoIInferRequestInternal inferRequest;
|
|
||||||
if (TryGetActualNetwork(network)) {
|
|
||||||
inferRequest = {_networkActualNeeded, _networkActualNeeded->CreateInferRequest()};
|
|
||||||
} else {
|
|
||||||
inferRequest = {_networkFirstReady, _networkFirstReady->CreateInferRequest()};
|
|
||||||
}
|
|
||||||
return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest,
|
|
||||||
shared_from_this(), _alreadyActualNetwork,
|
|
||||||
_enablePerfCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AutoExecutableNetwork::TryGetActualNetwork(InferenceEngine::SoExecutableNetworkInternal& soExecNetwork) {
|
|
||||||
// try to get actual network
|
|
||||||
if (_acceleratorFuture.valid() && _acceleratorFuture.wait_for(std::chrono::nanoseconds(0)) == std::future_status::ready) {
|
|
||||||
soExecNetwork = _acceleratorFuture.get();
|
|
||||||
_alreadyActualNetwork = true;
|
|
||||||
_networkActualNeeded = soExecNetwork;
|
|
||||||
// reapply config to actual network
|
|
||||||
// fixme: GPU doesn't support SetConfig and throw exception
|
|
||||||
try {
|
|
||||||
_networkActualNeeded->SetConfig(_cacheConfig);
|
|
||||||
} catch (...) {
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// if already get actual network
|
|
||||||
if (_alreadyActualNetwork) {
|
|
||||||
soExecNetwork = _networkActualNeeded;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoExecutableNetwork::WaitForActualDevice() const {
|
|
||||||
if (_alreadyActualNetwork) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_acceleratorFuture.valid()) {
|
|
||||||
_networkActualNeeded = _acceleratorFuture.get();
|
|
||||||
_alreadyActualNetwork = true;
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Export failed due to no valid executable network";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoExecutableNetwork::Export(std::ostream& networkModel) {
|
|
||||||
//fixme: the Export should work with actual device, so we have to wait!!!
|
|
||||||
WaitForActualDevice();
|
|
||||||
_networkActualNeeded->Export(networkModel);
|
|
||||||
}
|
|
||||||
|
|
||||||
RemoteContext::Ptr AutoExecutableNetwork::GetContext() const {
|
|
||||||
// fixme: the GetContext should work with actual device, so we have to wait!!!
|
|
||||||
WaitForActualDevice();
|
|
||||||
return _networkActualNeeded->GetContext();
|
|
||||||
}
|
|
||||||
|
|
||||||
InferenceEngine::CNNNetwork AutoExecutableNetwork::GetExecGraphInfo() {
|
|
||||||
WaitForActualDevice();
|
|
||||||
return _networkActualNeeded->GetExecGraphInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
Parameter AutoExecutableNetwork::GetMetric(const std::string &name) const {
|
|
||||||
// fixme: should we wait actual device? meanwhile it will block inference, how to fix?
|
|
||||||
// WaitForActualDevice();
|
|
||||||
if (_alreadyActualNetwork) {
|
|
||||||
return _networkActualNeeded->GetMetric(name);
|
|
||||||
} else {
|
|
||||||
return _networkFirstReady->GetMetric(name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
|
|
||||||
//fixme: have to store the config and reapply when the networks swapped
|
|
||||||
_cacheConfig = config;
|
|
||||||
if (_alreadyActualNetwork) {
|
|
||||||
_networkActualNeeded->SetConfig(config);
|
|
||||||
} else {
|
|
||||||
_networkFirstReady->SetConfig(config);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
|
|
||||||
//fixme: carefuly select between FirstLoaded and ActuallyNeeded
|
|
||||||
return _cacheConfig;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace AutoPlugin
|
|
@ -1,56 +0,0 @@
|
|||||||
// Copyright (C) 2018-2021 Intel Corporation
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <mutex>
|
|
||||||
#include <queue>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
|
|
||||||
#include <threading/ie_itask_executor.hpp>
|
|
||||||
|
|
||||||
namespace AutoPlugin {
|
|
||||||
|
|
||||||
using DeviceName = std::string;
|
|
||||||
using NetworkFuture = std::future<InferenceEngine::SoExecutableNetworkInternal>;
|
|
||||||
|
|
||||||
class AutoExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
|
|
||||||
public:
|
|
||||||
using Ptr = std::shared_ptr<AutoExecutableNetwork>;
|
|
||||||
|
|
||||||
explicit AutoExecutableNetwork(NetworkFuture cpuTask,
|
|
||||||
NetworkFuture acceleratorTask,
|
|
||||||
bool enablePerfCount);
|
|
||||||
|
|
||||||
void Export(std::ostream& networkModel) override;
|
|
||||||
InferenceEngine::RemoteContext::Ptr GetContext() const override;
|
|
||||||
InferenceEngine::CNNNetwork GetExecGraphInfo() override;
|
|
||||||
InferenceEngine::Parameter GetMetric(const std::string &name) const override;
|
|
||||||
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
|
|
||||||
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
|
|
||||||
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
|
|
||||||
InferenceEngine::OutputsDataMap networkOutputs) override;
|
|
||||||
bool TryGetActualNetwork(InferenceEngine::SoExecutableNetworkInternal& soExecNetwork);
|
|
||||||
|
|
||||||
~AutoExecutableNetwork();
|
|
||||||
|
|
||||||
private:
|
|
||||||
void WaitForActualDevice() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
InferenceEngine::SoExecutableNetworkInternal _networkFirstReady;
|
|
||||||
mutable InferenceEngine::SoExecutableNetworkInternal _networkActualNeeded;
|
|
||||||
NetworkFuture _cpuFuture;
|
|
||||||
mutable NetworkFuture _acceleratorFuture;
|
|
||||||
bool _enablePerfCount;
|
|
||||||
mutable std::atomic<bool> _alreadyActualNetwork = {false};
|
|
||||||
std::map<std::string, InferenceEngine::Parameter> _cacheConfig;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace AutoPlugin
|
|
@ -1,103 +0,0 @@
|
|||||||
// Copyright (C) 2018-2021 Intel Corporation
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include "auto_infer_request.hpp"
|
|
||||||
#include <ie_input_info.hpp>
|
|
||||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
|
||||||
|
|
||||||
namespace AutoPlugin {
|
|
||||||
using namespace InferenceEngine;
|
|
||||||
|
|
||||||
AutoInferRequest::AutoInferRequest(const InputsDataMap& networkInputs,
|
|
||||||
const OutputsDataMap& networkOutputs,
|
|
||||||
const SoIInferRequestInternal& inferRequest,
|
|
||||||
const InferenceEngine::IExecutableNetworkInternal::Ptr autoExecutableNetwork,
|
|
||||||
bool alreadyActualNetwork,
|
|
||||||
bool enablePerfCount)
|
|
||||||
: IInferRequestInternal(networkInputs, networkOutputs)
|
|
||||||
, _inferRequest(inferRequest)
|
|
||||||
, _autoExecutableNetwork(std::dynamic_pointer_cast<AutoPlugin::AutoExecutableNetwork>(autoExecutableNetwork))
|
|
||||||
, _alreadyActualNetwork(alreadyActualNetwork)
|
|
||||||
, _enablePerfCount(enablePerfCount) {
|
|
||||||
IE_ASSERT(_autoExecutableNetwork != nullptr);
|
|
||||||
for (const auto &it : _networkInputs)
|
|
||||||
_inputs[it.first] = _inferRequest->GetBlob(it.first);
|
|
||||||
for (const auto &it : _networkOutputs)
|
|
||||||
_outputs[it.first] = _inferRequest->GetBlob(it.first);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
|
|
||||||
if (_enablePerfCount) {
|
|
||||||
try {
|
|
||||||
return _inferRequest->GetPerformanceCounts();
|
|
||||||
} catch (...) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::InferImpl() {
|
|
||||||
HotSwapRequests(); //safe to call here (before actual inference started)
|
|
||||||
SetBlobsToDeviceRequest();
|
|
||||||
_inferRequest->Infer();
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
|
|
||||||
IInferRequestInternal::SetBlob(name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
Blob::Ptr AutoInferRequest::GetBlob(const std::string& name) {
|
|
||||||
return IInferRequestInternal::GetBlob(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::Cancel() {
|
|
||||||
_inferRequest->Cancel();
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::StartAsync() {
|
|
||||||
HotSwapRequests(); //safe to call here (before actual inference started)
|
|
||||||
SetBlobsToDeviceRequest();
|
|
||||||
_inferRequest->StartAsync();
|
|
||||||
}
|
|
||||||
|
|
||||||
InferenceEngine::StatusCode AutoInferRequest::Wait(int64_t millis_timeout) {
|
|
||||||
return _inferRequest->Wait(millis_timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::SetCallback(Callback callback) {
|
|
||||||
_callback = callback;
|
|
||||||
_inferRequest->SetCallback(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::HotSwapRequests() {
|
|
||||||
if (!_alreadyActualNetwork) {
|
|
||||||
InferenceEngine::SoExecutableNetworkInternal tempSoExecNetwork;
|
|
||||||
if (_autoExecutableNetwork->TryGetActualNetwork(tempSoExecNetwork)) {
|
|
||||||
_alreadyActualNetwork = true;
|
|
||||||
_inferRequest = {tempSoExecNetwork, tempSoExecNetwork->CreateInferRequest()};
|
|
||||||
_inferRequest->SetCallback(_callback);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferRequest::SetBlobsToDeviceRequest() {
|
|
||||||
for (const auto &it : _networkInputs) {
|
|
||||||
const auto &name = it.first;
|
|
||||||
// this assumes the request is already in BUSY state
|
|
||||||
auto blob = GetBlob(name);
|
|
||||||
if (_inferRequest->GetBlob(name) != blob)
|
|
||||||
_inferRequest->SetBlob(name, blob);
|
|
||||||
}
|
|
||||||
for (const auto &it : _networkOutputs) {
|
|
||||||
const auto &name = it.first;
|
|
||||||
// this assumes the request is already in BUSY state
|
|
||||||
auto blob = GetBlob(name);
|
|
||||||
if (_inferRequest->GetBlob(name) != blob)
|
|
||||||
_inferRequest->SetBlob(name, blob);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace AutoPlugin
|
|
@ -1,55 +0,0 @@
|
|||||||
// Copyright (C) 2018-2021 Intel Corporation
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
|
||||||
#include <ie_blob.h>
|
|
||||||
#include <ie_common.h>
|
|
||||||
#include <map>
|
|
||||||
#include <memory>
|
|
||||||
#include <mutex>
|
|
||||||
#include <queue>
|
|
||||||
#include <string>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <utility>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "auto_exec_network.hpp"
|
|
||||||
|
|
||||||
namespace AutoPlugin {
|
|
||||||
|
|
||||||
class AutoInferRequest : public InferenceEngine::IInferRequestInternal {
|
|
||||||
public:
|
|
||||||
using Ptr = std::shared_ptr<AutoInferRequest>;
|
|
||||||
explicit AutoInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
|
|
||||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
|
||||||
const InferenceEngine::SoIInferRequestInternal& inferRequest,
|
|
||||||
const InferenceEngine::IExecutableNetworkInternal::Ptr executeNetwork,
|
|
||||||
bool alreadyActualNetwork,
|
|
||||||
bool enablePerfCount);
|
|
||||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
|
||||||
void InferImpl() override;
|
|
||||||
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
|
|
||||||
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
|
|
||||||
void Cancel() override;
|
|
||||||
//async impl
|
|
||||||
void StartAsync() override;
|
|
||||||
InferenceEngine::StatusCode Wait(int64_t millis_timeout) override;
|
|
||||||
void SetCallback(Callback callback) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void HotSwapRequests();
|
|
||||||
void SetBlobsToDeviceRequest();
|
|
||||||
|
|
||||||
private:
|
|
||||||
InferenceEngine::SoIInferRequestInternal _inferRequest;
|
|
||||||
AutoPlugin::AutoExecutableNetwork::Ptr _autoExecutableNetwork;
|
|
||||||
Callback _callback; // need to save the callback for hot-swap of the requests
|
|
||||||
bool _alreadyActualNetwork{ false };
|
|
||||||
bool _enablePerfCount { false };
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace AutoPlugin
|
|
@ -2,397 +2,10 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <memory>
|
|
||||||
#include <map>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include <ie_metric_helpers.hpp>
|
|
||||||
#include <threading/ie_executor_manager.hpp>
|
|
||||||
#include <ie_algorithm.hpp>
|
|
||||||
#include <ngraph/opsets/opset1.hpp>
|
|
||||||
#include <transformations/utils/utils.hpp>
|
|
||||||
#include <ie_icore.hpp>
|
|
||||||
|
|
||||||
#include "auto_plugin.hpp"
|
#include "auto_plugin.hpp"
|
||||||
#include "ngraph_ops/convolution_ie.hpp"
|
|
||||||
#include "ngraph_ops/deconvolution_ie.hpp"
|
|
||||||
|
|
||||||
namespace AutoPlugin {
|
namespace AutoPlugin {
|
||||||
namespace {
|
|
||||||
std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) {
|
|
||||||
auto nGraphFunc = network.getFunction();
|
|
||||||
bool isINTModel = ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
|
|
||||||
if (isINTModel) {
|
|
||||||
return METRIC_VALUE(INT8);
|
|
||||||
}
|
|
||||||
for (auto & node : nGraphFunc->get_ordered_ops()) {
|
|
||||||
if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(node) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) ||
|
|
||||||
std::dynamic_pointer_cast<ngraph::op::DeconvolutionIE>(node)) {
|
|
||||||
auto layerType = node->input(1).get_element_type().get_type_name();
|
|
||||||
if (layerType == "f32")
|
|
||||||
return METRIC_VALUE(FP32);
|
|
||||||
if (layerType == "f16")
|
|
||||||
return METRIC_VALUE(FP16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return METRIC_VALUE(FP32);
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
AutoInferencePlugin::AutoInferencePlugin() {
|
|
||||||
_pluginName = "AUTO";
|
|
||||||
}
|
|
||||||
|
|
||||||
IE::IExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadNetwork(const std::string& fileName,
|
|
||||||
const ConfigType& config) {
|
|
||||||
return LoadNetworkImpl(fileName, {}, config);
|
|
||||||
}
|
|
||||||
|
|
||||||
IE::IExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadExeNetworkImpl(const IE::CNNNetwork& network,
|
|
||||||
const ConfigType& config) {
|
|
||||||
if (network.getFunction() == nullptr) {
|
|
||||||
IE_THROW() << "AUTO device supports just ngraph network representation";
|
|
||||||
}
|
|
||||||
|
|
||||||
auto networkPrecision = GetNetworkPrecision(network);
|
|
||||||
return LoadNetworkImpl({}, network, config, networkPrecision);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<AutoExecutableNetwork> AutoInferencePlugin::LoadNetworkImpl(const std::string& modelPath,
|
|
||||||
const InferenceEngine::CNNNetwork& network,
|
|
||||||
const ConfigType& config,
|
|
||||||
const std::string& networkPrecision) {
|
|
||||||
if (GetCore() == nullptr) {
|
|
||||||
IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (modelPath.empty() && network.getFunction() == nullptr) {
|
|
||||||
IE_THROW() << "AUTO device supports just ngraph network representation";
|
|
||||||
}
|
|
||||||
|
|
||||||
auto fullConfig = mergeConfigs(_config, config);
|
|
||||||
CheckConfig(fullConfig);
|
|
||||||
auto metaDevices = GetDeviceList(fullConfig);
|
|
||||||
auto core = GetCore(); // shared_ptr that holds the Core while the lambda below (which captures that by val) works
|
|
||||||
auto LoadNetworkAsync =
|
|
||||||
[core, modelPath, network](const std::string& device)
|
|
||||||
-> IE::SoExecutableNetworkInternal {
|
|
||||||
IE::SoExecutableNetworkInternal executableNetwork;
|
|
||||||
if (!modelPath.empty()) {
|
|
||||||
executableNetwork = core->LoadNetwork(modelPath, device, {});
|
|
||||||
} else {
|
|
||||||
executableNetwork = core->LoadNetwork(network, device, {});
|
|
||||||
}
|
|
||||||
return executableNetwork;
|
|
||||||
};
|
|
||||||
|
|
||||||
NetworkFuture cpuFuture;
|
|
||||||
NetworkFuture acceleratorFuture;
|
|
||||||
|
|
||||||
// start CPU task
|
|
||||||
const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
|
|
||||||
[=](const std::string& d)->bool{return d.find("CPU") != std::string::npos;});
|
|
||||||
if (CPUIter != metaDevices.end()) {
|
|
||||||
cpuFuture = std::async(std::launch::async, LoadNetworkAsync, *CPUIter);
|
|
||||||
}
|
|
||||||
|
|
||||||
// start accelerator task, like GPU
|
|
||||||
const auto accelerator = SelectDevice(metaDevices, networkPrecision);
|
|
||||||
bool isAccelerator = accelerator.find("CPU") == std::string::npos;
|
|
||||||
if (isAccelerator) {
|
|
||||||
acceleratorFuture = std::async(std::launch::async, LoadNetworkAsync, accelerator);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool enablePerfCount = fullConfig.find(IE::PluginConfigParams::KEY_PERF_COUNT) != fullConfig.end();
|
|
||||||
|
|
||||||
return std::make_shared<AutoExecutableNetwork>(std::move(cpuFuture), std::move(acceleratorFuture), enablePerfCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const {
|
|
||||||
IE::QueryNetworkResult queryResult = {};
|
|
||||||
if (GetCore() == nullptr) {
|
|
||||||
IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (network.getFunction() == nullptr) {
|
|
||||||
IE_THROW() << "AUTO device supports just ngraph network representation";
|
|
||||||
}
|
|
||||||
|
|
||||||
auto fullConfig = mergeConfigs(_config, config);
|
|
||||||
auto metaDevices = GetDeviceList(fullConfig);
|
|
||||||
std::unordered_set<std::string> supportedLayers;
|
|
||||||
for (auto&& value : metaDevices) {
|
|
||||||
try {
|
|
||||||
auto deviceQr = GetCore()->QueryNetwork(network, value, {});
|
|
||||||
std::unordered_set<std::string> deviceSupportedLayers;
|
|
||||||
for (auto &&layerQr : deviceQr.supportedLayersMap) {
|
|
||||||
deviceSupportedLayers.emplace(layerQr.first);
|
|
||||||
}
|
|
||||||
supportedLayers = supportedLayers.empty()
|
|
||||||
? deviceSupportedLayers : (deviceSupportedLayers.empty()
|
|
||||||
? supportedLayers : IE::details::Intersection(
|
|
||||||
supportedLayers, deviceSupportedLayers));
|
|
||||||
break;
|
|
||||||
} catch (...) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto&& supportedLayer : supportedLayers) {
|
|
||||||
queryResult.supportedLayersMap[supportedLayer] = GetName();
|
|
||||||
}
|
|
||||||
return queryResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
|
|
||||||
const std::map<std::string, IE::Parameter> & options) const {
|
|
||||||
auto it = _config.find(name);
|
|
||||||
if (it == _config.end()) {
|
|
||||||
IE_THROW() << "Unsupported config key: " << name;
|
|
||||||
} else {
|
|
||||||
return { it->second };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferencePlugin::SetConfig(const ConfigType& config) {
|
|
||||||
for (auto && kvp : config) {
|
|
||||||
if (kvp.first.find("AUTO_") == 0) {
|
|
||||||
_config[kvp.first] = kvp.second;
|
|
||||||
} else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
|
|
||||||
if (kvp.second == IE::PluginConfigParams::YES ||
|
|
||||||
kvp.second == IE::PluginConfigParams::NO) {
|
|
||||||
_config[kvp.first] = kvp.second;
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Unsupported config value: " << kvp.second
|
|
||||||
<< " for key: " << kvp.first;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Unsupported config key: " << kvp.first;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
|
|
||||||
const std::map<std::string, IE::Parameter> & options) const {
|
|
||||||
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
|
||||||
std::vector<std::string> metrics;
|
|
||||||
metrics.emplace_back(METRIC_KEY(SUPPORTED_METRICS));
|
|
||||||
metrics.emplace_back(METRIC_KEY(FULL_DEVICE_NAME));
|
|
||||||
metrics.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
|
||||||
metrics.emplace_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
|
||||||
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
|
|
||||||
std::string device_name = {"Inference Engine AUTO device"};
|
|
||||||
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
|
|
||||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
|
||||||
std::vector<std::string> configKeys = {
|
|
||||||
IE::KEY_AUTO_DEVICE_LIST,
|
|
||||||
IE::PluginConfigParams::KEY_PERF_COUNT
|
|
||||||
};
|
|
||||||
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
|
|
||||||
} else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
|
|
||||||
std::vector<std::string> capabilities = GetOptimizationCapabilities(options);
|
|
||||||
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Unsupported metric key " << name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////// private & protected functions ///////////////////
|
|
||||||
std::vector<DeviceName> AutoInferencePlugin::GetDeviceList(const ConfigType& config) const {
|
|
||||||
std::vector<DeviceName> deviceList;
|
|
||||||
|
|
||||||
auto deviceListConfig = config.find(IE::KEY_AUTO_DEVICE_LIST);
|
|
||||||
if (deviceListConfig == config.end()) {
|
|
||||||
deviceList = GetCore()->GetAvailableDevices();
|
|
||||||
} else {
|
|
||||||
deviceList = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (deviceList.empty()) {
|
|
||||||
IE_THROW() << "Please, check environment due to no supported devices can be used";
|
|
||||||
}
|
|
||||||
|
|
||||||
return deviceList;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> AutoInferencePlugin::GetOptimizationCapabilities(const std::map<std::string, IE::Parameter> & options) const {
|
|
||||||
// FIXME: workaround to get devicelist.
|
|
||||||
std::unordered_set<std::string> capabilities;
|
|
||||||
std::vector<std::string> queryDeviceLists{"CPU", "GPU"};
|
|
||||||
|
|
||||||
if (options.find(IE::KEY_AUTO_DEVICE_LIST) != options.end()) {
|
|
||||||
auto deviceListConfig = options.at(IE::KEY_AUTO_DEVICE_LIST).as<std::string>();
|
|
||||||
queryDeviceLists = IE::DeviceIDParser::getHeteroDevices(deviceListConfig);
|
|
||||||
} else if (_config.find(IE::KEY_AUTO_DEVICE_LIST) != _config.end()) {
|
|
||||||
auto deviceListConfig = _config.at(IE::KEY_AUTO_DEVICE_LIST);
|
|
||||||
queryDeviceLists = IE::DeviceIDParser::getHeteroDevices(deviceListConfig);
|
|
||||||
}
|
|
||||||
for (auto &item : queryDeviceLists) {
|
|
||||||
try {
|
|
||||||
std::vector<std::string> device_cap =
|
|
||||||
GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
for (auto &cap : device_cap) {
|
|
||||||
capabilities.insert(cap);
|
|
||||||
}
|
|
||||||
} catch (...) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return {capabilities.begin(), capabilities.end()};
|
|
||||||
}
|
|
||||||
|
|
||||||
void AutoInferencePlugin::CheckConfig(const ConfigType& config) {
|
|
||||||
std::vector<std::string> supportedConfigKeys = GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
|
|
||||||
for (auto&& kvp : config) {
|
|
||||||
if (kvp.first.find("AUTO_") == 0) {
|
|
||||||
continue;
|
|
||||||
} else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
|
|
||||||
if (kvp.second == IE::PluginConfigParams::YES ||
|
|
||||||
kvp.second == IE::PluginConfigParams::NO) {
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Unsupported config value: " << kvp.second
|
|
||||||
<< " for key: " << kvp.first;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
IE_THROW() << "Unsupported config key: " << kvp.first;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DeviceName AutoInferencePlugin::SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision) {
|
|
||||||
if (metaDevices.empty()) {
|
|
||||||
IE_THROW(NotFound) << "No available device to select in AUTO plugin";
|
|
||||||
}
|
|
||||||
if (metaDevices.size() == 1) {
|
|
||||||
return metaDevices.at(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<DeviceName> CPU;
|
|
||||||
std::vector<DeviceName> dGPU;
|
|
||||||
std::vector<DeviceName> iGPU;
|
|
||||||
std::vector<DeviceName> MYRIAD;
|
|
||||||
std::vector<DeviceName> VPUX;
|
|
||||||
|
|
||||||
for (auto& item : metaDevices) {
|
|
||||||
if (item.find("CPU") == 0) {
|
|
||||||
CPU.push_back(item);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (item.find("MYRIAD") == 0) {
|
|
||||||
MYRIAD.push_back(item);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (item.find("VPUX") == 0) {
|
|
||||||
VPUX.push_back(item);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (item.find("GPU") == 0) {
|
|
||||||
auto gpuFullDeviceName = GetCore()->GetMetric(item, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
|
|
||||||
if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
|
|
||||||
iGPU.push_back(item);
|
|
||||||
} else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
|
|
||||||
dGPU.push_back(item);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
|
|
||||||
IE_THROW(NotFound) << "No available device found";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
|
|
||||||
if (!dGPU.empty()) {
|
|
||||||
for (auto&& item : dGPU) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!VPUX.empty()) {
|
|
||||||
for (auto&& item : VPUX) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!iGPU.empty()) {
|
|
||||||
for (auto&& item : iGPU) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!MYRIAD.empty()) {
|
|
||||||
for (auto&& item : MYRIAD) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
|
|
||||||
if (networkPrecision == "FP32") {
|
|
||||||
if (!dGPU.empty()) {
|
|
||||||
for (auto&& item : dGPU) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!VPUX.empty()) {
|
|
||||||
for (auto&& item : VPUX) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!iGPU.empty()) {
|
|
||||||
for (auto&& item : iGPU) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!MYRIAD.empty()) {
|
|
||||||
for (auto&& item : MYRIAD) {
|
|
||||||
std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
|
||||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
|
||||||
if (supportNetwork != capability.end()) {
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CPU.empty()) {
|
|
||||||
IE_THROW() << "Cannot select any device";
|
|
||||||
}
|
|
||||||
return CPU[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
ConfigType AutoInferencePlugin::mergeConfigs(ConfigType config, const ConfigType& local) {
|
|
||||||
for (auto && kvp : local) {
|
|
||||||
config[kvp.first] = kvp.second;
|
|
||||||
}
|
|
||||||
return config;
|
|
||||||
}
|
|
||||||
|
|
||||||
// define CreatePluginEngine to create plugin instance
|
// define CreatePluginEngine to create plugin instance
|
||||||
static const IE::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
|
static const InferenceEngine::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
|
||||||
IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version)
|
IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version)
|
||||||
} // namespace AutoPlugin
|
} // namespace AutoPlugin
|
||||||
|
@ -4,43 +4,14 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include <unordered_set>
|
|
||||||
#include <type_traits>
|
|
||||||
|
|
||||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
||||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||||
#include <threading/ie_executor_manager.hpp>
|
|
||||||
|
|
||||||
#include "auto_exec_network.hpp"
|
|
||||||
|
|
||||||
namespace AutoPlugin {
|
namespace AutoPlugin {
|
||||||
namespace IE = InferenceEngine;
|
class AutoInferencePlugin : public InferenceEngine::IInferencePlugin {
|
||||||
using ConfigType = std::map<std::string, std::string>;
|
|
||||||
|
|
||||||
class AutoInferencePlugin : public IE::IInferencePlugin {
|
|
||||||
public:
|
public:
|
||||||
AutoInferencePlugin();
|
AutoInferencePlugin() = default;
|
||||||
~AutoInferencePlugin() = default;
|
~AutoInferencePlugin() = default;
|
||||||
IE::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const IE::CNNNetwork& network, const ConfigType& config) override;
|
|
||||||
IE::IExecutableNetworkInternal::Ptr LoadNetwork(const std::string& fileName, const ConfigType& config) override;
|
|
||||||
IE::QueryNetworkResult QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const override;
|
|
||||||
IE::Parameter GetMetric(const std::string& name, const std::map<std::string, IE::Parameter>& options) const override;
|
|
||||||
IE::Parameter GetConfig(const std::string& name, const std::map<std::string, IE::Parameter> & options) const override;
|
|
||||||
void SetConfig(const ConfigType& config) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::shared_ptr<AutoExecutableNetwork> LoadNetworkImpl(const std::string& modelPath,
|
|
||||||
const InferenceEngine::CNNNetwork& network,
|
|
||||||
const ConfigType &config,
|
|
||||||
const std::string &networkPrecision = METRIC_VALUE(FP32));
|
|
||||||
std::vector<DeviceName> GetDeviceList(const ConfigType& config) const;
|
|
||||||
std::vector<std::string> GetOptimizationCapabilities(const std::map<std::string, IE::Parameter>& options) const;
|
|
||||||
DeviceName SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
|
|
||||||
void CheckConfig(const ConfigType& config);
|
|
||||||
static ConfigType mergeConfigs(ConfigType config, const ConfigType& local);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace AutoPlugin
|
} // namespace AutoPlugin
|
||||||
|
@ -60,6 +60,7 @@
|
|||||||
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
|
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
|
||||||
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
|
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
|
||||||
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
|
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
|
||||||
|
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
|
||||||
#include <transformations/op_conversions/convert_gather_0d.hpp>
|
#include <transformations/op_conversions/convert_gather_0d.hpp>
|
||||||
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
|
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
|
||||||
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
|
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
|
||||||
@ -362,6 +363,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
|||||||
pass_config->disable<ngraph::pass::ConvertBroadcast3>();
|
pass_config->disable<ngraph::pass::ConvertBroadcast3>();
|
||||||
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
|
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
|
||||||
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||||
|
pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
|
||||||
|
|
||||||
if (!config.enable_loop_unrolling) {
|
if (!config.enable_loop_unrolling) {
|
||||||
pass_config->disable<ngraph::pass::ConvertTensorIteratorToRNNSequence>();
|
pass_config->disable<ngraph::pass::ConvertTensorIteratorToRNNSequence>();
|
||||||
@ -388,11 +390,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
|||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT");
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
|
|
||||||
ngraph::pass::Manager manager;
|
|
||||||
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
|
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
|
||||||
// With this key users can work-around such issues
|
// With this key users can work-around such issues
|
||||||
if (!config.enable_fp16_for_quantized_models) {
|
if (!config.enable_fp16_for_quantized_models) {
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
|
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
|
||||||
|
manager.run_passes(nGraphFunc);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||||
|
@ -15,15 +15,15 @@ namespace CLDNNPlugin {
|
|||||||
|
|
||||||
static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
|
static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel:
|
case ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL:
|
||||||
return cldnn::coordinate_transformation_mode::half_pixel;
|
return cldnn::coordinate_transformation_mode::half_pixel;
|
||||||
case ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel:
|
case ngraph::op::v4::Interpolate::CoordinateTransformMode::PYTORCH_HALF_PIXEL:
|
||||||
return cldnn::coordinate_transformation_mode::pytorch_half_pixel;
|
return cldnn::coordinate_transformation_mode::pytorch_half_pixel;
|
||||||
case ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric:
|
case ngraph::op::v4::Interpolate::CoordinateTransformMode::ASYMMETRIC:
|
||||||
return cldnn::coordinate_transformation_mode::asymmetric;
|
return cldnn::coordinate_transformation_mode::asymmetric;
|
||||||
case ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn:
|
case ngraph::op::v4::Interpolate::CoordinateTransformMode::TF_HALF_PIXEL_FOR_NN:
|
||||||
return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn;
|
return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn;
|
||||||
case ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners:
|
case ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS:
|
||||||
return cldnn::coordinate_transformation_mode::align_corners;
|
return cldnn::coordinate_transformation_mode::align_corners;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,15 +32,15 @@ static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngr
|
|||||||
|
|
||||||
static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) {
|
static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor:
|
case ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_FLOOR:
|
||||||
return cldnn::nearest_mode::round_prefer_floor;
|
return cldnn::nearest_mode::round_prefer_floor;
|
||||||
case ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil:
|
case ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_CEIL:
|
||||||
return cldnn::nearest_mode::round_prefer_ceil;
|
return cldnn::nearest_mode::round_prefer_ceil;
|
||||||
case ngraph::op::v4::Interpolate::NearestMode::floor:
|
case ngraph::op::v4::Interpolate::NearestMode::FLOOR:
|
||||||
return cldnn::nearest_mode::floor;
|
return cldnn::nearest_mode::floor;
|
||||||
case ngraph::op::v4::Interpolate::NearestMode::ceil:
|
case ngraph::op::v4::Interpolate::NearestMode::CEIL:
|
||||||
return cldnn::nearest_mode::ceil;
|
return cldnn::nearest_mode::ceil;
|
||||||
case ngraph::op::v4::Interpolate::NearestMode::simple:
|
case ngraph::op::v4::Interpolate::NearestMode::SIMPLE:
|
||||||
return cldnn::nearest_mode::simple;
|
return cldnn::nearest_mode::simple;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,18 +49,18 @@ static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMo
|
|||||||
|
|
||||||
static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) {
|
static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes: return cldnn::shape_calculation_mode::sizes;
|
case ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES: return cldnn::shape_calculation_mode::sizes;
|
||||||
case ngraph::op::v4::Interpolate::ShapeCalcMode::scales: return cldnn::shape_calculation_mode::scales;
|
case ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES: return cldnn::shape_calculation_mode::scales;
|
||||||
}
|
}
|
||||||
IE_THROW() << "Unknown shape calculation mode: " << static_cast<int>(mode);
|
IE_THROW() << "Unknown shape calculation mode: " << static_cast<int>(mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) {
|
static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case ngraph::op::v4::Interpolate::InterpolateMode::nearest: return cldnn::resample_type::nearest;
|
case ngraph::op::v4::Interpolate::InterpolateMode::NEAREST: return cldnn::resample_type::nearest;
|
||||||
case ngraph::op::v4::Interpolate::InterpolateMode::linear: return cldnn::resample_type::caffe_bilinear;
|
case ngraph::op::v4::Interpolate::InterpolateMode::LINEAR: return cldnn::resample_type::caffe_bilinear;
|
||||||
case ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx: return cldnn::resample_type::linear_onnx;
|
case ngraph::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX: return cldnn::resample_type::linear_onnx;
|
||||||
case ngraph::op::v4::Interpolate::InterpolateMode::cubic: return cldnn::resample_type::cubic;
|
case ngraph::op::v4::Interpolate::InterpolateMode::CUBIC: return cldnn::resample_type::cubic;
|
||||||
}
|
}
|
||||||
IE_THROW() << "Unknown interpolation mode: " << static_cast<int>(mode);
|
IE_THROW() << "Unknown interpolation mode: " << static_cast<int>(mode);
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include "dnn_types.h"
|
#include "dnn_types.h"
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cpp/ie_cnn_network.h>
|
#include <cpp/ie_cnn_network.h>
|
||||||
|
#include <ie_algorithm.hpp>
|
||||||
|
|
||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
namespace GNALimitations {
|
namespace GNALimitations {
|
||||||
@ -114,5 +115,10 @@ public:
|
|||||||
|
|
||||||
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
|
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
|
||||||
|
|
||||||
|
inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
|
||||||
|
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
|
||||||
|
return total_size / bufferMaxSize + 1;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace GNALimitations
|
} // namespace GNALimitations
|
||||||
} // namespace GNAPluginNS
|
} // namespace GNAPluginNS
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include "gna_slope_scale.h"
|
#include "gna_slope_scale.h"
|
||||||
#include "runtime/pwl.h"
|
#include "runtime/pwl.h"
|
||||||
#include "gna_data_types.hpp"
|
#include "gna_data_types.hpp"
|
||||||
|
#include "round_float_define.hpp"
|
||||||
|
|
||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
namespace frontend {
|
namespace frontend {
|
||||||
@ -41,8 +42,8 @@ struct ScaleFactorUpdateResult {
|
|||||||
* @param p2 Second float value
|
* @param p2 Second float value
|
||||||
* @return Returns true if two float values are equal
|
* @return Returns true if two float values are equal
|
||||||
*/
|
*/
|
||||||
static bool fp32eq(float p1, float p2) {
|
static bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
|
||||||
return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
|
return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -73,14 +74,14 @@ static float selectBestOutputScaleFactors(float inScale, std::vector<float> outS
|
|||||||
auto sd = 0.0;
|
auto sd = 0.0;
|
||||||
for (size_t j = 0; j < slopes.size(); ++j) {
|
for (size_t j = 0; j < slopes.size(); ++j) {
|
||||||
auto s = gna_slope(slopes[j], inScale, outScale);
|
auto s = gna_slope(slopes[j], inScale, outScale);
|
||||||
auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
|
auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
|
||||||
if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) && slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) {
|
if (slope < std::numeric_limits<int16_t>::min() || slope > std::numeric_limits<int16_t>::max()) {
|
||||||
sd += std::numeric_limits<int8_t>::max();
|
sd += std::numeric_limits<int8_t>::max();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
|
auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
|
||||||
if (fp32eq(testSlope, slopes[j])) {
|
if (fp32eq(testSlope, slopes[j], 1.0E-6)) {
|
||||||
return outScale;
|
return outScale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -683,7 +683,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
auto input = layer->insData[0].lock();
|
auto input = layer->insData[0].lock();
|
||||||
|
|
||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
auto reshaped_dims = Get2DReshapedData(input, 8)->getDims();
|
auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
|
||||||
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
|
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
|
||||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||||
uint32_t num_rows_in = reshaped_dims[1];
|
uint32_t num_rows_in = reshaped_dims[1];
|
||||||
@ -908,7 +908,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
auto inputs = layer->insData.begin()->lock();
|
auto inputs = layer->insData.begin()->lock();
|
||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
|
|
||||||
auto reshaped_dims = Get2DReshapedData(inputs, 8)->getDims();
|
auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
|
||||||
uint32_t num_rows_in = reshaped_dims[1];
|
uint32_t num_rows_in = reshaped_dims[1];
|
||||||
uint32_t num_columns_in = reshaped_dims[0];
|
uint32_t num_columns_in = reshaped_dims[0];
|
||||||
uint32_t num_rows_out = num_rows_in;
|
uint32_t num_rows_out = num_rows_in;
|
||||||
@ -1410,7 +1410,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
|||||||
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
|
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs;
|
auto input_data = HasTo2DReshapeData(layer) ?
|
||||||
|
Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
|
||||||
auto in_dims = input_data->getDims();
|
auto in_dims = input_data->getDims();
|
||||||
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
||||||
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
|
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
|
||||||
@ -2212,8 +2213,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
|||||||
|
|
||||||
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
||||||
} else {
|
} else {
|
||||||
IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out));
|
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -2498,8 +2499,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
|||||||
|
|
||||||
memoryLayer.reserved_size = ALIGN64(memorySize);
|
memoryLayer.reserved_size = ALIGN64(memorySize);
|
||||||
} else {
|
} else {
|
||||||
IE_ASSERT(memoryLayer.reserved_size >= ALIGN64(num_data_bytes_in));
|
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||||
}
|
}
|
||||||
|
|
||||||
return prevLayer;
|
return prevLayer;
|
||||||
|
@ -15,7 +15,9 @@ namespace GNAPluginNS {
|
|||||||
* @param input a pointer to data to be reshaped
|
* @param input a pointer to data to be reshaped
|
||||||
* @param maxZeroDimSize the maximum size of zero dimension
|
* @param maxZeroDimSize the maximum size of zero dimension
|
||||||
*/
|
*/
|
||||||
inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t maxZeroDimSize) {
|
inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t minZeroDimSize,
|
||||||
|
size_t maxZeroDimSize) {
|
||||||
|
IE_ASSERT(minZeroDimSize > 0);
|
||||||
auto dims = input->getDims();
|
auto dims = input->getDims();
|
||||||
uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims));
|
uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims));
|
||||||
uint32_t numColumnsIn = 1;
|
uint32_t numColumnsIn = 1;
|
||||||
@ -23,7 +25,7 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
|
|||||||
if (numRowsIn % 8 == 0) {
|
if (numRowsIn % 8 == 0) {
|
||||||
if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) {
|
if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) {
|
||||||
size_t indexDivide = maxZeroDimSize;
|
size_t indexDivide = maxZeroDimSize;
|
||||||
while (indexDivide > 1) {
|
while (indexDivide > minZeroDimSize) {
|
||||||
if ((numRowsIn / 8) % indexDivide == 0) break;
|
if ((numRowsIn / 8) % indexDivide == 0) break;
|
||||||
--indexDivide;
|
--indexDivide;
|
||||||
}
|
}
|
||||||
@ -55,4 +57,5 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
// Don't reshape diagonallayers with bias connection
|
// Don't reshape diagonallayers with bias connection
|
||||||
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace GNAPluginNS
|
} // namespace GNAPluginNS
|
@ -54,6 +54,7 @@
|
|||||||
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
|
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
|
||||||
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
|
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
|
||||||
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
||||||
|
#include <transformations/common_optimizations/transpose_sinking.hpp>
|
||||||
#include <transformations/utils/utils.hpp>
|
#include <transformations/utils/utils.hpp>
|
||||||
|
|
||||||
#include "transformations/remove_extra_reshapes.hpp"
|
#include "transformations/remove_extra_reshapes.hpp"
|
||||||
@ -703,9 +704,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
|||||||
manager.register_pass<SplitConvolutionWithBias>();
|
manager.register_pass<SplitConvolutionWithBias>();
|
||||||
manager.register_pass<SplitConvolution>();
|
manager.register_pass<SplitConvolution>();
|
||||||
manager.register_pass<HandleTransposesAroundMatMul>();
|
manager.register_pass<HandleTransposesAroundMatMul>();
|
||||||
manager.register_pass<SwapInputMatMul>();
|
|
||||||
manager.register_pass<SwapInputMatMulWithBias>();
|
|
||||||
manager.register_pass<SwapInputMatMulWithFq>();
|
manager.register_pass<SwapInputMatMulWithFq>();
|
||||||
|
manager.register_pass<SwapInputMatMulWithBias>();
|
||||||
|
manager.register_pass<SwapInputMatMul>();
|
||||||
manager.register_pass<InsertTransposeAfterConvOrPool>();
|
manager.register_pass<InsertTransposeAfterConvOrPool>();
|
||||||
manager.register_pass<ReorderActivationAndPooling>();
|
manager.register_pass<ReorderActivationAndPooling>();
|
||||||
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
|
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
|
||||||
@ -727,6 +728,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
|||||||
pass_config->disable<ngraph::pass::ReluFakeQuantizeFusion>();
|
pass_config->disable<ngraph::pass::ReluFakeQuantizeFusion>();
|
||||||
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue 52034
|
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue 52034
|
||||||
pass_config->disable<ngraph::pass::AddFakeQuantizeFusion>();
|
pass_config->disable<ngraph::pass::AddFakeQuantizeFusion>();
|
||||||
|
// TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph transformations
|
||||||
|
pass_config->disable<ngraph::pass::TransposeReduction>();
|
||||||
manager.run_passes(graph);
|
manager.run_passes(graph);
|
||||||
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
|
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
|
||||||
}
|
}
|
||||||
@ -1576,6 +1579,18 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
|
|||||||
transpose_inputs_info,
|
transpose_inputs_info,
|
||||||
transpose_outputs_info);
|
transpose_outputs_info);
|
||||||
|
|
||||||
|
// If scale factors are defined in configuration we still need to use them instead of imported values,
|
||||||
|
// for example to change the scale factors for the old models.
|
||||||
|
if (!config.inputScaleFactors.empty()) {
|
||||||
|
IE_ASSERT(config.inputScaleFactors.size() == inputsDesc->inputScaleFactors.size());
|
||||||
|
for (size_t i = 0; i < config.inputScaleFactors.size(); ++i) {
|
||||||
|
if (config.inputScaleFactors[i] != GNAPluginNS::kScaleFactorDefault) {
|
||||||
|
gnalog() << "[Import Network] Using input scale factor defined in configuration for input " << i << std::endl;
|
||||||
|
inputsDesc->inputScaleFactors[i] = config.inputScaleFactors[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if GNA_LIB_VER == 2
|
#if GNA_LIB_VER == 2
|
||||||
auto getOrientation = [](Gna2Operation & gnaOperation) {
|
auto getOrientation = [](Gna2Operation & gnaOperation) {
|
||||||
return gnaOperation.Type == Gna2OperationTypeConvolution ?
|
return gnaOperation.Type == Gna2OperationTypeConvolution ?
|
||||||
|
@ -95,7 +95,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
|
|||||||
}
|
}
|
||||||
// missing scale factors are set to be 1.0f
|
// missing scale factors are set to be 1.0f
|
||||||
if (inputScaleFactors.size() <= input_index) {
|
if (inputScaleFactors.size() <= input_index) {
|
||||||
inputScaleFactors.resize(input_index + 1, 1.f);
|
inputScaleFactors.resize(input_index + 1, GNAPluginNS::kScaleFactorDefault);
|
||||||
}
|
}
|
||||||
inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
|
inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
|
||||||
} else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)) {
|
} else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)) {
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
|
|
||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
|
|
||||||
|
static const float kScaleFactorDefault = 1.f;
|
||||||
|
|
||||||
struct Config {
|
struct Config {
|
||||||
Config() {
|
Config() {
|
||||||
AdjustKeyMapValues();
|
AdjustKeyMapValues();
|
||||||
|
@ -45,4 +45,18 @@ public:
|
|||||||
};
|
};
|
||||||
std::vector<SplitConnectedLayerInfo> splitOutputLayers;
|
std::vector<SplitConnectedLayerInfo> splitOutputLayers;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
|
||||||
|
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) {
|
||||||
|
std::vector<uint32_t> splitSizes;
|
||||||
|
uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment;
|
||||||
|
uint32_t usedSize = 0;
|
||||||
|
while (usedSize < totalSize) {
|
||||||
|
uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
|
||||||
|
splitSizes.push_back(partSize);
|
||||||
|
usedSize += partSize;
|
||||||
|
}
|
||||||
|
return splitSizes;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace GNAPluginNS
|
} // namespace GNAPluginNS
|
||||||
|
@ -87,7 +87,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
|||||||
});
|
});
|
||||||
IE_ASSERT(inputLayer != nullptr);
|
IE_ASSERT(inputLayer != nullptr);
|
||||||
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
|
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
|
||||||
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
|
Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
|
||||||
std::vector<float> weightsValues(weightsSize, fillValue);
|
std::vector<float> weightsValues(weightsSize, fillValue);
|
||||||
IE_ASSERT(diagLayer != nullptr);
|
IE_ASSERT(diagLayer != nullptr);
|
||||||
diagLayer->_weights = make_shared_blob<float>(
|
diagLayer->_weights = make_shared_blob<float>(
|
||||||
@ -1113,6 +1113,9 @@ void InsertConcatAligningFilterPass::run() {
|
|||||||
SizeVector({filterWeights.size()}),
|
SizeVector({filterWeights.size()}),
|
||||||
Layout::C));
|
Layout::C));
|
||||||
concatAligningFilter->_weights->allocate();
|
concatAligningFilter->_weights->allocate();
|
||||||
|
if (!concatAligningFilter->_weights->buffer().as<float*>()) {
|
||||||
|
THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
|
||||||
|
}
|
||||||
|
|
||||||
CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
|
CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
|
||||||
|
|
||||||
@ -1395,15 +1398,20 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
|
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
|
||||||
}
|
}
|
||||||
auto oData = l->outData.front();
|
auto oData = l->outData.front();
|
||||||
auto out_width = GetDataDimSize(oData, DataDimName::W);
|
auto oDims = oData->getDims();
|
||||||
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
|
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
|
||||||
// gna limit this to be OxFFFF
|
if (totalElementsSize <= GNALimitations::bufferMaxSize) {
|
||||||
auto maxAffineElements = 65536 - 64;
|
|
||||||
if (totalElementsForOutput <= maxAffineElements) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
|
auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
|
||||||
|
IE_ASSERT(firstValuableDim != std::end(oDims));
|
||||||
|
auto splittedElementsSize = *firstValuableDim;
|
||||||
|
auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
|
||||||
|
|
||||||
|
// Split output size should be multiple by 64 to avoid align filters insertion
|
||||||
|
auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
|
||||||
|
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize);
|
||||||
|
|
||||||
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
|
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
|
||||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
||||||
@ -1421,27 +1429,13 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
|
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
|
||||||
|
|
||||||
// create split layer outputs
|
// create split layer outputs
|
||||||
size_t usedElements = 0;
|
for (auto elementsNum : splitSizes) {
|
||||||
for (size_t i = 0; i < totalSplits; i++) {
|
auto newDims = oDims;
|
||||||
SizeVector newDims;
|
newDims[splittedDimIx] = elementsNum;
|
||||||
size_t elements_num = std::min(totalElementsForOutput - usedElements,
|
|
||||||
static_cast<size_t>(maxAffineElements));
|
|
||||||
if (inputDesc.getDims().size() == 2) {
|
|
||||||
newDims = SizeVector{1, elements_num};
|
|
||||||
} else {
|
|
||||||
elements_num = elements_num - elements_num % out_width;
|
|
||||||
newDims = SizeVector{1, elements_num / out_width, out_width};
|
|
||||||
}
|
|
||||||
|
|
||||||
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
|
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
|
||||||
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
|
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
|
||||||
getCreatorLayer(data) = split;
|
getCreatorLayer(data) = split;
|
||||||
split->outData.push_back(data);
|
split->outData.push_back(data);
|
||||||
|
|
||||||
usedElements += elements_num;
|
|
||||||
if (usedElements == totalElementsForOutput) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// replacing connection X->eltwise to X->split
|
// replacing connection X->eltwise to X->split
|
||||||
auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
|
auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
|
||||||
@ -1461,7 +1455,7 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
concat->outData.push_back(masterEltwise->outData.front());
|
concat->outData.push_back(masterEltwise->outData.front());
|
||||||
getCreatorLayer(masterEltwise->outData.front()) = concat;
|
getCreatorLayer(masterEltwise->outData.front()) = concat;
|
||||||
|
|
||||||
for (size_t k = 0; k != totalSplits; k++) {
|
for (size_t k = 0; k != splitSizes.size(); k++) {
|
||||||
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
|
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
|
||||||
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
|
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
|
||||||
IE_ASSERT(eltwiseRaw != nullptr);
|
IE_ASSERT(eltwiseRaw != nullptr);
|
||||||
@ -1521,7 +1515,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
|||||||
if (was_reshaped) {
|
if (was_reshaped) {
|
||||||
dataDims = reshaped_data[insData->getName()];
|
dataDims = reshaped_data[insData->getName()];
|
||||||
} else {
|
} else {
|
||||||
dataDims = HasTo2DReshapeData(l) ? Get2DReshapedData(insData, 8)->getDims() : insData->getDims();
|
dataDims = HasTo2DReshapeData(l) ?
|
||||||
|
Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
|
||||||
|
insData->getDims();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dataDims.size() <= 2) {
|
if (dataDims.size() <= 2) {
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
#include <ngraph/rt_info.hpp>
|
#include <ngraph/rt_info.hpp>
|
||||||
#include "backend/gna_limitations.hpp"
|
#include "backend/gna_limitations.hpp"
|
||||||
|
#include "layers/gna_split_layer.hpp"
|
||||||
|
|
||||||
using namespace GNAPluginNS;
|
using namespace GNAPluginNS;
|
||||||
|
|
||||||
@ -19,22 +20,6 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
|
|||||||
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
|
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
|
||||||
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
|
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
|
||||||
|
|
||||||
static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
|
|
||||||
uint32_t width = conv->get_input_shape(0).back();
|
|
||||||
uint32_t in_channels = conv->get_input_shape(0).at(1);
|
|
||||||
uint32_t usedWidth = 0;
|
|
||||||
std::vector<int64_t> split_sizes;
|
|
||||||
uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
|
|
||||||
width_max_size = width_max_size - width_max_size % 64;
|
|
||||||
while (usedWidth < width) {
|
|
||||||
uint32_t width_part = std::min(width - usedWidth, width_max_size);
|
|
||||||
split_sizes.push_back(width_part);
|
|
||||||
usedWidth += width_part;
|
|
||||||
}
|
|
||||||
IE_ASSERT(usedWidth == width);
|
|
||||||
return split_sizes;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||||
std::shared_ptr<ngraph::Node> add,
|
std::shared_ptr<ngraph::Node> add,
|
||||||
std::shared_ptr<ngraph::Node> bias,
|
std::shared_ptr<ngraph::Node> bias,
|
||||||
@ -45,15 +30,21 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto split_sizes = GetConvSplitSizes(conv);
|
uint32_t width = conv->get_input_shape(0).back();
|
||||||
|
uint32_t in_channels = conv->get_input_shape(0).at(1);
|
||||||
|
auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
|
||||||
IE_ASSERT(split_sizes.size() > 1);
|
IE_ASSERT(split_sizes.size() > 1);
|
||||||
|
std::vector<int64_t> split_sizes_casted(split_sizes.size());
|
||||||
|
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
|
||||||
|
return static_cast<int64_t>(size);
|
||||||
|
});
|
||||||
|
|
||||||
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
|
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
|
||||||
otherwise this split axis isn't supported */
|
otherwise this split axis isn't supported */
|
||||||
const int64_t width_axis = conv->get_input_shape(0).size() - 1;
|
const int64_t width_axis = conv->get_input_shape(0).size() - 1;
|
||||||
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
|
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
|
||||||
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
|
||||||
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes));
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
|
||||||
ngraph::copy_runtime_info(conv, split_node);
|
ngraph::copy_runtime_info(conv, split_node);
|
||||||
split_node->set_friendly_name(conv->get_friendly_name() + "/split");
|
split_node->set_friendly_name(conv->get_friendly_name() + "/split");
|
||||||
ngraph::OutputVector convOutputs;
|
ngraph::OutputVector convOutputs;
|
||||||
|
@ -41,23 +41,6 @@ namespace VPUConfigParams {
|
|||||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::MYRIAD_ENABLE_FORCE_RESET instead")
|
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::MYRIAD_ENABLE_FORCE_RESET instead")
|
||||||
DECLARE_VPU_MYRIAD_CONFIG_KEY(FORCE_RESET);
|
DECLARE_VPU_MYRIAD_CONFIG_KEY(FORCE_RESET);
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated
|
|
||||||
* @brief This option allows to specify device.
|
|
||||||
* If specified device is not available then creating infer request will throw an exception.
|
|
||||||
*/
|
|
||||||
INFERENCE_ENGINE_DEPRECATED("")
|
|
||||||
DECLARE_VPU_MYRIAD_CONFIG_KEY(PLATFORM);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated
|
|
||||||
* @brief Supported keys definition for VPU_MYRIAD_CONFIG_KEY(PLATFORM) option.
|
|
||||||
*/
|
|
||||||
INFERENCE_ENGINE_DEPRECATED("")
|
|
||||||
DECLARE_VPU_MYRIAD_CONFIG_VALUE(2450);
|
|
||||||
INFERENCE_ENGINE_DEPRECATED("")
|
|
||||||
DECLARE_VPU_MYRIAD_CONFIG_VALUE(2480);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @deprecated Use InferenceEngine::MYRIAD_DDR_TYPE instead
|
* @deprecated Use InferenceEngine::MYRIAD_DDR_TYPE instead
|
||||||
* @brief This option allows to specify device memory type.
|
* @brief This option allows to specify device memory type.
|
||||||
|
@ -19,10 +19,6 @@
|
|||||||
#include "ie_plugin_config.hpp"
|
#include "ie_plugin_config.hpp"
|
||||||
#include "ie_version.hpp"
|
#include "ie_version.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
|
||||||
class Function;
|
|
||||||
} // namespace ngraph
|
|
||||||
|
|
||||||
namespace InferenceEngine {
|
namespace InferenceEngine {
|
||||||
class IExtension;
|
class IExtension;
|
||||||
class Blob;
|
class Blob;
|
||||||
@ -30,6 +26,9 @@ class RemoteContext;
|
|||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
|
|
||||||
|
class Function;
|
||||||
|
|
||||||
namespace runtime {
|
namespace runtime {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -72,7 +71,7 @@ public:
|
|||||||
* * binPath parameter is not used.
|
* * binPath parameter is not used.
|
||||||
* @return Function
|
* @return Function
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<ngraph::Function> read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const;
|
std::shared_ptr<ov::Function> read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -86,7 +85,7 @@ public:
|
|||||||
* * binPath parameter is not used.
|
* * binPath parameter is not used.
|
||||||
* @return Function
|
* @return Function
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<ngraph::Function> read_model(const std::string& modelPath, const std::string& binPath = {}) const;
|
std::shared_ptr<ov::Function> read_model(const std::string& modelPath, const std::string& binPath = {}) const;
|
||||||
/**
|
/**
|
||||||
* @brief Reads models from IR and ONNX formats
|
* @brief Reads models from IR and ONNX formats
|
||||||
* @param model string with model in IR or ONNX format
|
* @param model string with model in IR or ONNX format
|
||||||
@ -101,7 +100,7 @@ public:
|
|||||||
* constant data becomes to point to invalid memory.
|
* constant data becomes to point to invalid memory.
|
||||||
* @return Function
|
* @return Function
|
||||||
*/
|
*/
|
||||||
std::shared_ptr<ngraph::Function> read_model(const std::string& model,
|
std::shared_ptr<ov::Function> read_model(const std::string& model,
|
||||||
const std::shared_ptr<const InferenceEngine::Blob>& weights) const;
|
const std::shared_ptr<const InferenceEngine::Blob>& weights) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -116,7 +115,7 @@ public:
|
|||||||
* operation
|
* operation
|
||||||
* @return An executable network reference
|
* @return An executable network reference
|
||||||
*/
|
*/
|
||||||
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network,
|
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ov::Function>& network,
|
||||||
const std::string& deviceName,
|
const std::string& deviceName,
|
||||||
const std::map<std::string, std::string>& config = {});
|
const std::map<std::string, std::string>& config = {});
|
||||||
|
|
||||||
@ -145,7 +144,7 @@ public:
|
|||||||
* operation
|
* operation
|
||||||
* @return An executable network object
|
* @return An executable network object
|
||||||
*/
|
*/
|
||||||
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network,
|
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ov::Function>& network,
|
||||||
const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
||||||
const std::map<std::string, std::string>& config = {});
|
const std::map<std::string, std::string>& config = {});
|
||||||
|
|
||||||
@ -189,7 +188,7 @@ public:
|
|||||||
* @param config Optional map of pairs: (config parameter name, config parameter value)
|
* @param config Optional map of pairs: (config parameter name, config parameter value)
|
||||||
* @return An object containing a map of pairs a layer name -> a device name supporting this layer.
|
* @return An object containing a map of pairs a layer name -> a device name supporting this layer.
|
||||||
*/
|
*/
|
||||||
InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ngraph::Function>& network,
|
InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ov::Function>& network,
|
||||||
const std::string& deviceName,
|
const std::string& deviceName,
|
||||||
const std::map<std::string, std::string>& config = {}) const;
|
const std::map<std::string, std::string>& config = {}) const;
|
||||||
|
|
||||||
|
@ -62,18 +62,17 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
|
|||||||
} else if (deviceName_.find("MULTI:") == 0) {
|
} else if (deviceName_.find("MULTI:") == 0) {
|
||||||
deviceName_ = "MULTI";
|
deviceName_ = "MULTI";
|
||||||
config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
|
config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
|
||||||
} else if (deviceName_.find("AUTO") == 0) {
|
} else if (deviceName.find("AUTO") == 0) {
|
||||||
deviceName_ = "AUTO";
|
deviceName_ = "MULTI";
|
||||||
if (deviceName.size() > std::string("AUTO").size()) {
|
if (deviceName.find("AUTO:") == 0) {
|
||||||
std::string deviceList = deviceName.substr(std::string("AUTO:").size());
|
config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] =
|
||||||
if (deviceList.find("AUTO") != std::string::npos) {
|
deviceName.substr(std::string("AUTO:").size());
|
||||||
IE_THROW() << "Device list for AUTO should not be AUTO";
|
|
||||||
}
|
|
||||||
config_[InferenceEngine::KEY_AUTO_DEVICE_LIST] = deviceName.substr(std::string("AUTO:").size());
|
|
||||||
}
|
}
|
||||||
|
config_.insert({CONFIG_KEY_INTERNAL(WORK_MODE), ""});
|
||||||
} else {
|
} else {
|
||||||
if (deviceName_.empty()) {
|
if (deviceName_ == "AUTO") {
|
||||||
deviceName_ = "AUTO";
|
deviceName_ = "MULTI";
|
||||||
|
config_.insert({CONFIG_KEY_INTERNAL(WORK_MODE), ""});
|
||||||
}
|
}
|
||||||
InferenceEngine::DeviceIDParser parser(deviceName_);
|
InferenceEngine::DeviceIDParser parser(deviceName_);
|
||||||
deviceName_ = parser.getDeviceName();
|
deviceName_ = parser.getDeviceName();
|
||||||
@ -579,7 +578,21 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto parsed = parseDeviceNameIntoConfig(deviceName);
|
// AUTO case
|
||||||
|
{
|
||||||
|
if (deviceName.find("AUTO:") == 0) {
|
||||||
|
IE_THROW()
|
||||||
|
<< "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
|
||||||
|
"To get individual devices's metrics call GetMetric for each device separately";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string pluginName = deviceName;
|
||||||
|
if (pluginName == "AUTO") {
|
||||||
|
pluginName = "MULTI";
|
||||||
|
}
|
||||||
|
|
||||||
|
auto parsed = parseDeviceNameIntoConfig(pluginName);
|
||||||
|
|
||||||
// we need to return a copy of Parameter object which is created on Core side,
|
// we need to return a copy of Parameter object which is created on Core side,
|
||||||
// not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
|
// not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
|
||||||
@ -629,11 +642,14 @@ public:
|
|||||||
* @param deviceName A name of device
|
* @param deviceName A name of device
|
||||||
* @return Reference to a CPP plugin wrapper
|
* @return Reference to a CPP plugin wrapper
|
||||||
*/
|
*/
|
||||||
InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
|
InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& pluginName) const {
|
||||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName");
|
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName");
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(pluginsMutex);
|
std::lock_guard<std::mutex> lock(pluginsMutex);
|
||||||
|
auto deviceName = pluginName;
|
||||||
|
if (deviceName == "AUTO") {
|
||||||
|
deviceName = "MULTI";
|
||||||
|
}
|
||||||
auto it = pluginRegistry.find(deviceName);
|
auto it = pluginRegistry.find(deviceName);
|
||||||
if (it == pluginRegistry.end()) {
|
if (it == pluginRegistry.end()) {
|
||||||
IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
|
IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
|
||||||
@ -856,9 +872,9 @@ public:
|
|||||||
} else if (deviceName.find("AUTO") == 0) {
|
} else if (deviceName.find("AUTO") == 0) {
|
||||||
auto pos = deviceName.find_first_of(":");
|
auto pos = deviceName.find_first_of(":");
|
||||||
if (pos != std::string::npos) {
|
if (pos != std::string::npos) {
|
||||||
deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1));
|
deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1));
|
||||||
}
|
}
|
||||||
deviceNames.emplace_back("AUTO");
|
deviceNames.emplace_back("MULTI");
|
||||||
} else {
|
} else {
|
||||||
deviceNames.push_back(deviceName);
|
deviceNames.push_back(deviceName);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,301 @@
|
|||||||
|
// Copyright (C) 2018-2019 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "threading/ie_tbb_streams_executor.hpp"
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <list>
|
||||||
|
#include <memory>
|
||||||
|
#include <queue>
|
||||||
|
#include <thread>
|
||||||
|
#include <tuple>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "details/ie_exception.hpp"
|
||||||
|
#include "ie_parallel.hpp"
|
||||||
|
#include "ie_parallel_custom_arena.hpp"
|
||||||
|
#include "ie_system_conf.h"
|
||||||
|
#include "threading/ie_thread_affinity.hpp"
|
||||||
|
|
||||||
|
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||||
|
# include <tbb/concurrent_queue.h>
|
||||||
|
# include <tbb/enumerable_thread_specific.h>
|
||||||
|
# include <tbb/global_control.h>
|
||||||
|
# include <tbb/task_group.h>
|
||||||
|
# include <tbb/task_scheduler_observer.h>
|
||||||
|
|
||||||
|
namespace InferenceEngine {
|
||||||
|
struct TBBStreamsExecutor::Impl {
|
||||||
|
struct Stream;
|
||||||
|
using TaskQueue = tbb::concurrent_queue<Task>;
|
||||||
|
using StreamQueue = tbb::concurrent_bounded_queue<Stream*>;
|
||||||
|
using LocalStreams = tbb::enumerable_thread_specific<Stream*>;
|
||||||
|
struct Shared : public std::enable_shared_from_this<Shared> {
|
||||||
|
using Ptr = std::shared_ptr<Shared>;
|
||||||
|
TaskQueue _taskQueue;
|
||||||
|
StreamQueue _streamQueue;
|
||||||
|
};
|
||||||
|
struct Stream {
|
||||||
|
struct Observer : tbb::task_scheduler_observer {
|
||||||
|
Stream* _thisStream = nullptr;
|
||||||
|
LocalStreams* _localStream = nullptr;
|
||||||
|
CpuSet _mask;
|
||||||
|
int _ncpus = 0;
|
||||||
|
int _threadBindingStep = 0;
|
||||||
|
int _offset = 0;
|
||||||
|
|
||||||
|
Observer(custom::task_arena& arena,
|
||||||
|
Stream* thisStream,
|
||||||
|
LocalStreams* localStream,
|
||||||
|
const bool pinToCores,
|
||||||
|
const int streamId,
|
||||||
|
const int threadsPerStream,
|
||||||
|
const int threadBindingStep,
|
||||||
|
const int threadBindingOffset)
|
||||||
|
: tbb::task_scheduler_observer{static_cast<tbb::task_arena&>(arena)},
|
||||||
|
_thisStream{thisStream},
|
||||||
|
_localStream{localStream},
|
||||||
|
_threadBindingStep{threadBindingStep},
|
||||||
|
_offset{streamId * threadsPerStream + threadBindingOffset} {
|
||||||
|
if (pinToCores) {
|
||||||
|
std::tie(_mask, _ncpus) = GetProcessMask();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void on_scheduler_entry(bool) override {
|
||||||
|
_localStream->local() = _thisStream;
|
||||||
|
if (nullptr != _mask) {
|
||||||
|
PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
|
||||||
|
_threadBindingStep,
|
||||||
|
_ncpus,
|
||||||
|
_mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void on_scheduler_exit(bool) override {
|
||||||
|
_localStream->local() = nullptr;
|
||||||
|
if (nullptr != _mask) {
|
||||||
|
PinCurrentThreadByMask(_ncpus, _mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~Observer() override = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit Stream(Impl* impl, const bool externStream = false) : _impl{impl} {
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
|
||||||
|
if (_impl->_streamIdQueue.empty()) {
|
||||||
|
_streamId = _impl->_streamId++;
|
||||||
|
} else {
|
||||||
|
_streamId = _impl->_streamIdQueue.front();
|
||||||
|
_impl->_streamIdQueue.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_numaNodeId = _impl->_config._streams
|
||||||
|
? _impl->_usedNumaNodes.at((_streamId % _impl->_config._streams) /
|
||||||
|
((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1) /
|
||||||
|
_impl->_usedNumaNodes.size()))
|
||||||
|
: _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size());
|
||||||
|
auto concurrency =
|
||||||
|
(0 == _impl->_config._threadsPerStream) ? tbb::task_arena::automatic : _impl->_config._threadsPerStream;
|
||||||
|
auto masterThreads = externStream ? 1u : 0u;
|
||||||
|
if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) {
|
||||||
|
if (Config::PreferredCoreType::ROUND_ROBIN != _impl->_config._threadPreferredCoreType) {
|
||||||
|
if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) {
|
||||||
|
_arena.initialize(concurrency);
|
||||||
|
} else {
|
||||||
|
const auto selected_core_type =
|
||||||
|
Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType
|
||||||
|
? custom::info::core_types().back() // running on Big cores only
|
||||||
|
: custom::info::core_types().front(); // running on Little cores only
|
||||||
|
_arena.initialize(custom::task_arena::constraints{}
|
||||||
|
.set_core_type(selected_core_type)
|
||||||
|
.set_max_concurrency(concurrency));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// assigning the stream to the core type in the round-robin fashion
|
||||||
|
// wrapping around total_streams (i.e. how many streams all different core types can handle
|
||||||
|
// together)
|
||||||
|
const auto total_streams = _impl->_totalSreamsOnCoreTypes.back().second;
|
||||||
|
const auto streamId_wrapped = _streamId % total_streams;
|
||||||
|
const auto& selected_core_type =
|
||||||
|
std::find_if(_impl->_totalSreamsOnCoreTypes.cbegin(),
|
||||||
|
_impl->_totalSreamsOnCoreTypes.cend(),
|
||||||
|
[streamId_wrapped](const decltype(_impl->_totalSreamsOnCoreTypes)::value_type& p) {
|
||||||
|
return p.second > streamId_wrapped;
|
||||||
|
})
|
||||||
|
->first;
|
||||||
|
_arena.initialize(custom::task_arena::constraints{}
|
||||||
|
.set_core_type(selected_core_type)
|
||||||
|
.set_max_concurrency(concurrency));
|
||||||
|
}
|
||||||
|
} else if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
|
||||||
|
_arena.initialize(custom::task_arena::constraints{_numaNodeId, concurrency});
|
||||||
|
} else {
|
||||||
|
_arena.initialize(concurrency, masterThreads);
|
||||||
|
}
|
||||||
|
_observer.reset(new Observer{_arena,
|
||||||
|
this,
|
||||||
|
&(_impl->_localStream),
|
||||||
|
(ThreadBindingType::CORES == _impl->_config._threadBindingType),
|
||||||
|
_streamId,
|
||||||
|
_impl->_config._threadsPerStream,
|
||||||
|
_impl->_config._threadBindingStep,
|
||||||
|
_impl->_config._threadBindingOffset});
|
||||||
|
_observer->observe(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
~Stream() {
|
||||||
|
static_cast<tbb::task_arena&>(_arena).terminate();
|
||||||
|
_observer->observe(false);
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
|
||||||
|
_impl->_streamIdQueue.push(_streamId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Impl* _impl = nullptr;
|
||||||
|
int _streamId = 0;
|
||||||
|
int _numaNodeId = 0;
|
||||||
|
custom::task_arena _arena;
|
||||||
|
std::unique_ptr<Observer> _observer;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Streams = std::list<Stream>;
|
||||||
|
using ExternStreams = tbb::enumerable_thread_specific<Stream>;
|
||||||
|
|
||||||
|
explicit Impl(const Config& config)
|
||||||
|
: _config{config},
|
||||||
|
_shared{std::make_shared<Shared>()},
|
||||||
|
_localStream{nullptr},
|
||||||
|
_externStreams{this, true} {
|
||||||
|
if (_config._streams * _config._threadsPerStream >= static_cast<int>(std::thread::hardware_concurrency())) {
|
||||||
|
_maxTbbThreads.reset(
|
||||||
|
new tbb::global_control{tbb::global_control::max_allowed_parallelism,
|
||||||
|
static_cast<std::size_t>(_config._streams * _config._threadsPerStream + 1)});
|
||||||
|
}
|
||||||
|
auto numaNodes = getAvailableNUMANodes();
|
||||||
|
if (_config._streams != 0) {
|
||||||
|
std::copy_n(std::begin(numaNodes),
|
||||||
|
std::min(static_cast<std::size_t>(_config._streams), numaNodes.size()),
|
||||||
|
std::back_inserter(_usedNumaNodes));
|
||||||
|
} else {
|
||||||
|
_usedNumaNodes = numaNodes;
|
||||||
|
}
|
||||||
|
if (ThreadBindingType::HYBRID_AWARE == config._threadBindingType) {
|
||||||
|
const auto core_types = custom::info::core_types();
|
||||||
|
const int threadsPerStream =
|
||||||
|
(0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream;
|
||||||
|
int sum = 0;
|
||||||
|
// reversed order, so BIG cores are first
|
||||||
|
for (auto iter = core_types.rbegin(); iter < core_types.rend(); iter++) {
|
||||||
|
const auto& type = *iter;
|
||||||
|
// calculating the #streams per core type
|
||||||
|
const int num_streams_for_core_type =
|
||||||
|
std::max(1,
|
||||||
|
custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(type)) /
|
||||||
|
threadsPerStream);
|
||||||
|
sum += num_streams_for_core_type;
|
||||||
|
// prefix sum, so the core type for a given stream id will be deduced just as a upper_bound
|
||||||
|
// (notice that the map keeps the elements in the descending order, so the big cores are populated
|
||||||
|
// first)
|
||||||
|
_totalSreamsOnCoreTypes.emplace_back(type, sum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_shared->_streamQueue.set_capacity(_config._streams);
|
||||||
|
for (int streamId = 0; streamId < _config._streams; ++streamId) {
|
||||||
|
_streams.emplace_back(this);
|
||||||
|
_shared->_streamQueue.push(&(_streams.back()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~Impl() {
|
||||||
|
for (int streamId = 0; streamId < _config._streams; ++streamId) {
|
||||||
|
Stream* stream = nullptr;
|
||||||
|
_shared->_streamQueue.pop(stream);
|
||||||
|
(void)stream;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Schedule(Shared::Ptr& shared, Task task) {
|
||||||
|
Stream* stream = nullptr;
|
||||||
|
if (shared->_streamQueue.try_pop(stream)) {
|
||||||
|
struct TryPop {
|
||||||
|
void operator()() const {
|
||||||
|
try {
|
||||||
|
do {
|
||||||
|
Task task = std::move(_task);
|
||||||
|
task();
|
||||||
|
} while (_shared->_taskQueue.try_pop(_task));
|
||||||
|
} catch (...) {
|
||||||
|
}
|
||||||
|
if (_shared->_streamQueue.try_push(_stream)) {
|
||||||
|
if (_shared->_taskQueue.try_pop(_task)) {
|
||||||
|
Schedule(_shared, std::move(_task));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Stream* _stream;
|
||||||
|
mutable Shared::Ptr _shared;
|
||||||
|
mutable Task _task;
|
||||||
|
};
|
||||||
|
stream->_arena.enqueue(TryPop{stream, shared->shared_from_this(), std::move(task)});
|
||||||
|
} else {
|
||||||
|
shared->_taskQueue.push(std::move(task));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Config _config;
|
||||||
|
std::unique_ptr<tbb::global_control> _maxTbbThreads;
|
||||||
|
std::mutex _streamIdMutex;
|
||||||
|
int _streamId = 0;
|
||||||
|
std::queue<int> _streamIdQueue;
|
||||||
|
std::vector<int> _usedNumaNodes;
|
||||||
|
Shared::Ptr _shared;
|
||||||
|
LocalStreams _localStream;
|
||||||
|
ExternStreams _externStreams;
|
||||||
|
Streams _streams;
|
||||||
|
using StreamIdToCoreTypes = std::vector<std::pair<custom::core_type_id, int>>;
|
||||||
|
StreamIdToCoreTypes _totalSreamsOnCoreTypes;
|
||||||
|
};
|
||||||
|
|
||||||
|
TBBStreamsExecutor::TBBStreamsExecutor(const Config& config) : _impl{new TBBStreamsExecutor::Impl{config}} {}
|
||||||
|
|
||||||
|
TBBStreamsExecutor::~TBBStreamsExecutor() {
|
||||||
|
_impl.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
int TBBStreamsExecutor::GetStreamId() {
|
||||||
|
auto stream = _impl->_localStream.local();
|
||||||
|
if (nullptr == stream) {
|
||||||
|
stream = &(_impl->_externStreams.local());
|
||||||
|
}
|
||||||
|
return stream->_streamId;
|
||||||
|
}
|
||||||
|
|
||||||
|
int TBBStreamsExecutor::GetNumaNodeId() {
|
||||||
|
auto stream = _impl->_localStream.local();
|
||||||
|
if (nullptr == stream) {
|
||||||
|
stream = &(_impl->_externStreams.local());
|
||||||
|
}
|
||||||
|
return stream->_numaNodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TBBStreamsExecutor::run(Task task) {
|
||||||
|
if (_impl->_config._streams == 0) {
|
||||||
|
Execute(std::move(task));
|
||||||
|
} else {
|
||||||
|
Impl::Schedule(_impl->_shared, std::move(task));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TBBStreamsExecutor::Execute(Task task) {
|
||||||
|
auto stream = _impl->_localStream.local();
|
||||||
|
if (nullptr == stream) {
|
||||||
|
_impl->_externStreams.local()._arena.execute(std::move(task));
|
||||||
|
} else {
|
||||||
|
stream->_arena.execute(std::move(task));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace InferenceEngine
|
||||||
|
#endif // ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
@ -43,7 +43,9 @@ ngraph::pass::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
|
|||||||
Shape bias_shape(bias->get_shape());
|
Shape bias_shape(bias->get_shape());
|
||||||
Shape output_shape(fc->get_shape());
|
Shape output_shape(fc->get_shape());
|
||||||
size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
|
size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
|
||||||
if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) {
|
if (bias_shape.empty() ||
|
||||||
|
(bias_shape.back() != output_shape.back() && bias_shape.back() != 1) ||
|
||||||
|
bias_shape.back() != bias_size) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ public:
|
|||||||
const float dequantizationMul,
|
const float dequantizationMul,
|
||||||
const float dequantizationSub,
|
const float dequantizationSub,
|
||||||
const ngraph::element::Type originalPrecision,
|
const ngraph::element::Type originalPrecision,
|
||||||
const ngraph::PartialShape dataNodeOutputShape,
|
const ngraph::PartialShape& dataNodeOutputShape,
|
||||||
element::Type precision,
|
element::Type precision,
|
||||||
const element::Type deqPrecision = element::f32,
|
const element::Type deqPrecision = element::f32,
|
||||||
std::shared_ptr<ngraph::Node> input = nullptr);
|
std::shared_ptr<ngraph::Node> input = nullptr);
|
||||||
|
@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <ngraph/ngraph.hpp>
|
||||||
|
#include "layer_transformation.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace pass {
|
||||||
|
namespace low_precision {
|
||||||
|
|
||||||
|
class LP_TRANSFORMATIONS_API PadTransformation : public LayerTransformation {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
PadTransformation(const Params& params = Params());
|
||||||
|
bool transform(TransformationContext& context, pattern::Matcher& m) override;
|
||||||
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||||
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace low_precision
|
||||||
|
} // namespace pass
|
||||||
|
} // namespace ngraph
|
@ -17,11 +17,13 @@ class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public Precisi
|
|||||||
};
|
};
|
||||||
|
|
||||||
using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>;
|
using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>;
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
extern template class LP_TRANSFORMATIONS_API VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
|
namespace ov {
|
||||||
|
extern template class LP_TRANSFORMATIONS_API VariantImpl<ngraph::AvgPoolPrecisionPreservedAttributePtr>;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class LP_TRANSFORMATIONS_API VariantWrapper<AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<AvgPoolPrecisionPreservedAttributePtr> {
|
class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<ngraph::AvgPoolPrecisionPreservedAttributePtr> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 };
|
static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 };
|
||||||
|
|
||||||
@ -31,9 +33,9 @@ public:
|
|||||||
|
|
||||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||||
|
|
||||||
AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
|
ngraph::AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
|
||||||
|
|
||||||
void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AvgPoolPrecisionPreservedAttribute>>>>& attributes);
|
void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<ngraph::AvgPoolPrecisionPreservedAttribute>>>>& attributes);
|
||||||
std::string to_string() override;
|
std::string to_string() override;
|
||||||
};
|
};
|
||||||
} // namespace ngraph
|
} // namespace ov
|
||||||
|
@ -62,12 +62,15 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>;
|
using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>;
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<IntervalsAlignmentAttributePtr>;
|
namespace ov {
|
||||||
|
|
||||||
|
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::IntervalsAlignmentAttributePtr>;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>> :
|
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>> :
|
||||||
public VariantImpl<std::shared_ptr<IntervalsAlignmentAttribute>> {
|
public VariantImpl<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 };
|
static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 };
|
||||||
|
|
||||||
@ -77,12 +80,13 @@ public:
|
|||||||
|
|
||||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||||
|
|
||||||
std::shared_ptr<IntervalsAlignmentAttribute> get() const { return this->m_value; }
|
std::shared_ptr<ngraph::IntervalsAlignmentAttribute> get() const { return this->m_value; }
|
||||||
|
|
||||||
static std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> create(
|
static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>>> create(
|
||||||
const std::shared_ptr<ngraph::Node>& node,
|
const std::shared_ptr<ngraph::Node>& node,
|
||||||
const AttributeParameters& params);
|
const AttributeParameters& params);
|
||||||
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>>>& attributes);
|
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>>>>& attributes);
|
||||||
std::string to_string() override;
|
std::string to_string() override;
|
||||||
};
|
};
|
||||||
} // namespace ngraph
|
|
||||||
|
} // namespace ov
|
||||||
|
@ -16,11 +16,14 @@
|
|||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute {
|
class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute {
|
||||||
};
|
};
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PerTensorQuantizationAttribute>;
|
namespace ov {
|
||||||
|
|
||||||
|
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::PerTensorQuantizationAttribute>;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class LP_TRANSFORMATIONS_API VariantWrapper<PerTensorQuantizationAttribute> : public VariantImpl<PerTensorQuantizationAttribute> {
|
class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::PerTensorQuantizationAttribute> : public VariantImpl<ngraph::PerTensorQuantizationAttribute> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 };
|
static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 };
|
||||||
|
|
||||||
@ -30,4 +33,5 @@ public:
|
|||||||
return type_info;
|
return type_info;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace ngraph
|
|
||||||
|
} // namespace ov
|
||||||
|
@ -31,10 +31,14 @@ public:
|
|||||||
|
|
||||||
using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>;
|
using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>;
|
||||||
|
|
||||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PrecisionPreservedAttributePtr>;
|
} // namespace ngraph
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
|
||||||
|
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::PrecisionPreservedAttributePtr>;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class LP_TRANSFORMATIONS_API VariantWrapper<PrecisionPreservedAttributePtr> : public VariantImpl<PrecisionPreservedAttributePtr> {
|
class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::PrecisionPreservedAttributePtr> : public VariantImpl<ngraph::PrecisionPreservedAttributePtr> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 };
|
static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 };
|
||||||
|
|
||||||
@ -44,8 +48,9 @@ public:
|
|||||||
|
|
||||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||||
|
|
||||||
PrecisionPreservedAttributePtr get() { return this->m_value; }
|
ngraph::PrecisionPreservedAttributePtr get() { return this->m_value; }
|
||||||
|
|
||||||
std::string to_string() override;
|
std::string to_string() override;
|
||||||
};
|
};
|
||||||
} // namespace ngraph
|
|
||||||
|
} // namespace ov
|
||||||
|
@ -34,11 +34,14 @@ public:
|
|||||||
static const std::vector<ngraph::element::Type> defaultPrecisions;
|
static const std::vector<ngraph::element::Type> defaultPrecisions;
|
||||||
PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions);
|
PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions);
|
||||||
};
|
};
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<PrecisionsAttribute>>;
|
namespace ov {
|
||||||
|
|
||||||
|
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<ngraph::PrecisionsAttribute>>;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<PrecisionsAttribute>> {
|
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<ngraph::PrecisionsAttribute>> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 };
|
static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 };
|
||||||
|
|
||||||
@ -50,15 +53,16 @@ public:
|
|||||||
|
|
||||||
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
|
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
|
||||||
|
|
||||||
std::shared_ptr<PrecisionsAttribute> get() { return this->m_value; }
|
std::shared_ptr<ngraph::PrecisionsAttribute> get() { return this->m_value; }
|
||||||
|
|
||||||
// create attribute instance for node
|
// create attribute instance for node
|
||||||
static std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>> create(
|
static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>>> create(
|
||||||
const std::shared_ptr<ngraph::Node>& node,
|
const std::shared_ptr<ngraph::Node>& node,
|
||||||
const AttributeParameters& params);
|
const AttributeParameters& params);
|
||||||
// merge attribute instances which can be got from different sources: node, input port or output port
|
// merge attribute instances which can be got from different sources: node, input port or output port
|
||||||
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>>& attributes);
|
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>>>>& attributes);
|
||||||
// vizualize shared attributes details in VizualizeTree pass
|
// vizualize shared attributes details in VizualizeTree pass
|
||||||
std::string to_string() override;
|
std::string to_string() override;
|
||||||
};
|
};
|
||||||
} // namespace ngraph
|
|
||||||
|
} // namespace ov
|
||||||
|
@ -32,12 +32,15 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>;
|
using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>;
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<QuantizationAlignmentAttributePtr>;
|
namespace ov {
|
||||||
|
|
||||||
|
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::QuantizationAlignmentAttributePtr>;
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>> :
|
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>> :
|
||||||
public VariantImpl<std::shared_ptr<QuantizationAlignmentAttribute>> {
|
public VariantImpl<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 };
|
static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 };
|
||||||
|
|
||||||
@ -49,12 +52,12 @@ public:
|
|||||||
|
|
||||||
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
|
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
|
||||||
|
|
||||||
std::shared_ptr<QuantizationAlignmentAttribute> get() { return this->m_value; }
|
std::shared_ptr<ngraph::QuantizationAlignmentAttribute> get() { return this->m_value; }
|
||||||
|
|
||||||
static std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>> create(
|
static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>>> create(
|
||||||
const std::shared_ptr<ngraph::Node>& node,
|
const std::shared_ptr<ngraph::Node>& node,
|
||||||
const AttributeParameters& params);
|
const AttributeParameters& params);
|
||||||
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>>>& attributes);
|
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>>>>& attributes);
|
||||||
std::string to_string() override;
|
std::string to_string() override;
|
||||||
};
|
};
|
||||||
} // namespace ngraph
|
} // namespace ov
|
||||||
|
@ -55,8 +55,8 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
auto convolution = m.get_match_root();
|
auto convolution = m.get_match_root();
|
||||||
|
|
||||||
if (!canConvolutionBeTransformed(context, convolution)) {
|
if (!canConvolutionBeTransformed(context, convolution)) {
|
||||||
auto weightInput = convolution->get_input_node_shared_ptr(1);
|
const auto weightInput = convolution->get_input_node_shared_ptr(1);
|
||||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
|
const auto reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
|
||||||
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(convolution, 1ul) :
|
NetworkHelper::getDequantization(convolution, 1ul) :
|
||||||
NetworkHelper::getDequantization(reshapeFromWeights);
|
NetworkHelper::getDequantization(reshapeFromWeights);
|
||||||
@ -69,7 +69,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
reshapeFromWeights->input_value(1),
|
reshapeFromWeights->input_value(1),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
if (is_type<opset1::Constant>(resultConstant)) {
|
||||||
replace_node(weightInput, resultConstant);
|
replace_node(weightInput, resultConstant);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -84,10 +84,9 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
{
|
{
|
||||||
std::shared_ptr<opset1::Subtract> subtract;
|
std::shared_ptr<opset1::Subtract> subtract;
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
|
NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
|
||||||
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
|
|
||||||
|
|
||||||
auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
|
auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||||
|
|
||||||
if (optimizedSubtract == nullptr) {
|
if (optimizedSubtract == nullptr) {
|
||||||
optimizedSubtract = dequantization.subtract;
|
optimizedSubtract = dequantization.subtract;
|
||||||
}
|
}
|
||||||
@ -99,7 +98,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
size_t length = subtract->get_output_partial_shape(0).rank().get_length();
|
size_t length = subtract->get_output_partial_shape(0).rank().get_length();
|
||||||
|
|
||||||
// Insert explicit broadcast for channel dimension [1] and immediately fold it
|
// Insert explicit broadcast for channel dimension [1] and immediately fold it
|
||||||
Shape broadcastShape(subtract->get_output_partial_shape(0).rank().get_length(), 1);
|
Shape broadcastShape(length, 1);
|
||||||
broadcastShape[1] = subtract->get_output_partial_shape(0)[1].get_length();
|
broadcastShape[1] = subtract->get_output_partial_shape(0)[1].get_length();
|
||||||
|
|
||||||
std::shared_ptr<Node> newShift = fold<opset1::Broadcast>(
|
std::shared_ptr<Node> newShift = fold<opset1::Broadcast>(
|
||||||
@ -122,11 +121,9 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
const size_t groupsCount = NetworkHelper::getGroupsCount(convolution);
|
const size_t groupsCount = NetworkHelper::getGroupsCount(convolution);
|
||||||
std::shared_ptr<Node> newMultiplyAfterConst;
|
std::shared_ptr<Node> newMultiplyAfterConst;
|
||||||
if (groupsCount > 1ul) {
|
if (groupsCount > 1ul) {
|
||||||
std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1));
|
const std::vector<float> scales = dequantization.multiplyConstant->cast_vector<float>();
|
||||||
|
|
||||||
const std::vector<float> scales = multiplyConst->cast_vector<float>();
|
|
||||||
if (scales.size() == 1ul) {
|
if (scales.size() == 1ul) {
|
||||||
newMultiplyAfterConst = dequantization.multiply->input_value(1).get_node_shared_ptr()->clone_with_new_inputs({});
|
newMultiplyAfterConst = dequantization.multiplyConstant->clone_with_new_inputs({});
|
||||||
} else {
|
} else {
|
||||||
const ngraph::PartialShape inputPShape = convolution->get_input_partial_shape(0);
|
const ngraph::PartialShape inputPShape = convolution->get_input_partial_shape(0);
|
||||||
const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
|
const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
|
||||||
@ -150,17 +147,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
|
|
||||||
newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
||||||
dequantization.multiply->get_input_element_type(1),
|
dequantization.multiplyConstant->get_element_type(),
|
||||||
newMulShape,
|
newMulShape,
|
||||||
outputScales);
|
outputScales);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(
|
|
||||||
dequantization.multiply->input_value(1).get_node_shared_ptr());
|
|
||||||
newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
||||||
reducedConstant->get_output_element_type(0),
|
dequantization.multiplyConstant->get_element_type(),
|
||||||
Shape{ 1 },
|
Shape{ 1 },
|
||||||
reducedConstant->cast_vector<float>()[0]);
|
dequantization.multiplyConstant->cast_vector<float>()[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
|
const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
|
||||||
@ -190,7 +185,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
|
|
||||||
if (is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
|
if (is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
|
||||||
auto newConvolution = convolution->clone_with_new_inputs({
|
auto newConvolution = convolution->clone_with_new_inputs({
|
||||||
convolution->get_input_node_ptr(0)->get_input_source_output(0),
|
convolution->get_input_node_ptr(0)->input_value(0),
|
||||||
convolution->input_value(1)});
|
convolution->input_value(1)});
|
||||||
replace_node(convolution, newConvolution);
|
replace_node(convolution, newConvolution);
|
||||||
NetworkHelper::copyInfo(convolution, newConvolution);
|
NetworkHelper::copyInfo(convolution, newConvolution);
|
||||||
@ -206,7 +201,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->input_value(1).get_node_shared_ptr());
|
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
|
||||||
|
|
||||||
dequantization = reshapeFromWeights == nullptr ?
|
dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(convolution, 1ul) :
|
NetworkHelper::getDequantization(convolution, 1ul) :
|
||||||
@ -221,12 +216,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
|
|
||||||
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
|
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
|
||||||
reshapeFromWeights == nullptr ?
|
reshapeFromWeights == nullptr ?
|
||||||
convolution->input_value(1).get_node_shared_ptr() :
|
convolution->get_input_node_shared_ptr(1) :
|
||||||
convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
||||||
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
||||||
|
|
||||||
{
|
{
|
||||||
Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
|
const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
|
||||||
|
assert(newScalePShape.is_static());
|
||||||
|
Shape newScaleShape = newScalePShape.to_shape();
|
||||||
|
|
||||||
if (!newScaleShape.empty()) {
|
if (!newScaleShape.empty()) {
|
||||||
// that's all we need: [C, 1, 1, 1] => [C, 1, 1]
|
// that's all we need: [C, 1, 1, 1] => [C, 1, 1]
|
||||||
newScaleShape.pop_back();
|
newScaleShape.pop_back();
|
||||||
@ -268,9 +266,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
} else {
|
} else {
|
||||||
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
||||||
|
|
||||||
const Shape weightsShape = subtractFromWeights->input(0).get_shape();
|
const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
|
||||||
Shape zeroPointShape(weightsShape.size(), 1ul);
|
assert(weightsPShape.is_static());
|
||||||
zeroPointShape[0] = weightsShape[0];
|
|
||||||
|
const size_t weightsRankValue = weightsPShape.rank().get_length();
|
||||||
|
Shape zeroPointShape(weightsRankValue, 1ul);
|
||||||
|
zeroPointShape[0] = static_cast<size_t>(weightsPShape[0].get_length());
|
||||||
|
|
||||||
auto zeroPointConstant = fold<opset1::Broadcast>(
|
auto zeroPointConstant = fold<opset1::Broadcast>(
|
||||||
subtractFromWeights->input_value(1),
|
subtractFromWeights->input_value(1),
|
||||||
@ -288,7 +289,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
std::shared_ptr<Node> childNode = reshapeFromWeights == nullptr ? convolution : reshapeFromWeights;
|
std::shared_ptr<Node> childNode = reshapeFromWeights == nullptr ? convolution : reshapeFromWeights;
|
||||||
|
|
||||||
auto newConvolution = convolution->clone_with_new_inputs({
|
auto newConvolution = convolution->clone_with_new_inputs({
|
||||||
convolution->get_input_source_output(0),
|
convolution->input_value(0),
|
||||||
childNode.get() == convolution.get() ?
|
childNode.get() == convolution.get() ?
|
||||||
convolution->get_input_node_ptr(1)->input_value(0) :
|
convolution->get_input_node_ptr(1)->input_value(0) :
|
||||||
childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})});
|
childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})});
|
||||||
@ -311,7 +312,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
|
|
||||||
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
|
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
|
||||||
convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
|
convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
|
||||||
ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
|
copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
|
||||||
updateOutput(context, finalDequantization, convolution);
|
updateOutput(context, finalDequantization, convolution);
|
||||||
|
|
||||||
// [C, 1, 1] -> [1, C, 1, 1]
|
// [C, 1, 1] -> [1, C, 1, 1]
|
||||||
|
@ -87,7 +87,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
reshapeFromWeights->input_value(1),
|
reshapeFromWeights->input_value(1),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
if (as_type_ptr<opset1::Constant>(resultConstant)) {
|
if (is_type<opset1::Constant>(resultConstant)) {
|
||||||
replace_node(weightsInput, resultConstant);
|
replace_node(weightsInput, resultConstant);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -100,16 +100,14 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
||||||
{
|
{
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
|
NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
|
||||||
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
|
|
||||||
|
|
||||||
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||||
}
|
}
|
||||||
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
|
|
||||||
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
||||||
reducedConstant->get_output_element_type(0),
|
dequantization.multiplyConstant->get_element_type(),
|
||||||
Shape{ 1 },
|
Shape{ 1 },
|
||||||
reducedConstant->cast_vector<float>()[0]);
|
dequantization.multiplyConstant->cast_vector<float>()[0]);
|
||||||
auto inputs = convolutionBackpropData->input_values();
|
auto inputs = convolutionBackpropData->input_values();
|
||||||
inputs[0] = dequantization.multiply->input_value(0);
|
inputs[0] = dequantization.multiply->input_value(0);
|
||||||
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
|
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||||
@ -126,7 +124,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
|
||||||
|
|
||||||
replace_node(convolutionBackpropData, newMultiplyAfter);
|
replace_node(convolutionBackpropData, newMultiplyAfter);
|
||||||
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
||||||
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
||||||
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
||||||
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
|
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||||
@ -137,7 +135,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
|
|
||||||
{
|
{
|
||||||
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
|
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
|
||||||
|
|
||||||
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
|
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
|
||||||
|
|
||||||
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
|
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
|
||||||
@ -152,7 +149,10 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
||||||
|
|
||||||
{
|
{
|
||||||
Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
|
const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
|
||||||
|
assert(newScalePShape.is_static());
|
||||||
|
Shape newScaleShape = newScalePShape.to_shape();
|
||||||
|
|
||||||
auto inputs = convolutionBackpropData->input_values();
|
auto inputs = convolutionBackpropData->input_values();
|
||||||
inputs[1] = multiplyFromWeights->input_value(0);
|
inputs[1] = multiplyFromWeights->input_value(0);
|
||||||
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
||||||
@ -164,7 +164,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
false),
|
false),
|
||||||
convolutionBackpropData->get_output_element_type(0)));
|
convolutionBackpropData->get_output_element_type(0)));
|
||||||
replace_node(convolutionBackpropData, newMultiplyAfter);
|
replace_node(convolutionBackpropData, newMultiplyAfter);
|
||||||
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subtractFromWeights != nullptr) {
|
if (subtractFromWeights != nullptr) {
|
||||||
@ -175,9 +175,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
} else {
|
} else {
|
||||||
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
||||||
|
|
||||||
const Shape weightsShape = subtractFromWeights->input(0).get_shape();
|
const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
|
||||||
Shape zeroPointShape(weightsShape.size(), 1ul);
|
assert(weightsPShape.is_static());
|
||||||
zeroPointShape[1] = weightsShape[1];
|
|
||||||
|
const size_t weightsRankValue = weightsPShape.rank().get_length();
|
||||||
|
Shape zeroPointShape(weightsRankValue, 1ul);
|
||||||
|
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
|
||||||
|
|
||||||
auto zeroPointConstant = fold<opset1::Broadcast>(
|
auto zeroPointConstant = fold<opset1::Broadcast>(
|
||||||
subtractFromWeights->get_input_node_shared_ptr(1),
|
subtractFromWeights->get_input_node_shared_ptr(1),
|
||||||
@ -215,7 +218,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
|
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,8 +56,10 @@ bool FakeQuantizeTransformation::transform(TransformationContext& context, ngrap
|
|||||||
namespace fq {
|
namespace fq {
|
||||||
|
|
||||||
static std::shared_ptr<Node> updateShape(std::shared_ptr<Node> constantOp, const PartialShape& targetShape) {
|
static std::shared_ptr<Node> updateShape(std::shared_ptr<Node> constantOp, const PartialShape& targetShape) {
|
||||||
|
assert(constantOp->get_output_partial_shape(0).is_static());
|
||||||
const Shape shape = constantOp->get_output_shape(0);
|
const Shape shape = constantOp->get_output_shape(0);
|
||||||
if ((shape.size() < static_cast<size_t>(targetShape.rank().get_length())) && (shape.size() > 1ul)) {
|
|
||||||
|
if ((shape.size() > 1ul) && (shape.size() < static_cast<size_t>(targetShape.rank().get_length()))) {
|
||||||
constantOp = fold<opset1::Unsqueeze>(
|
constantOp = fold<opset1::Unsqueeze>(
|
||||||
constantOp,
|
constantOp,
|
||||||
std::make_shared<opset1::Constant>(ngraph::element::i32, Shape{ 1 }, std::vector<size_t>({ 0ul })));
|
std::make_shared<opset1::Constant>(ngraph::element::i32, Shape{ 1 }, std::vector<size_t>({ 0ul })));
|
||||||
@ -93,19 +95,19 @@ static std::shared_ptr<opset1::Constant> getConstant(const std::shared_ptr<Node>
|
|||||||
} // namespace fq
|
} // namespace fq
|
||||||
|
|
||||||
bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr<Node>& eltwise) {
|
bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr<Node>& eltwise) {
|
||||||
|
const std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
|
||||||
|
if (constant == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Shape shape = constant->get_shape();
|
||||||
|
if (shape_size(shape) != 1ul) {
|
||||||
const auto eltwiseInputPShape = eltwise->get_input_partial_shape(0);
|
const auto eltwiseInputPShape = eltwise->get_input_partial_shape(0);
|
||||||
const auto eltwiseOutputPShape = eltwise->get_output_partial_shape(0);
|
const auto eltwiseOutputPShape = eltwise->get_output_partial_shape(0);
|
||||||
if (eltwiseInputPShape != eltwiseOutputPShape || eltwiseInputPShape.rank().is_dynamic() || eltwiseOutputPShape.rank().is_dynamic()) {
|
if (eltwiseInputPShape != eltwiseOutputPShape || eltwiseInputPShape.rank().is_dynamic() || eltwiseOutputPShape.rank().is_dynamic()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
|
|
||||||
if (constant == nullptr) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Shape shape = constant->get_output_shape(0);
|
|
||||||
if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
|
|
||||||
if ((eltwiseOutputPShape.rank().get_length() - shape.size()) > 1) {
|
if ((eltwiseOutputPShape.rank().get_length() - shape.size()) > 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -179,8 +181,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
|||||||
inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (is_type<opset1::Convert>(eltwise)) {
|
} else if (is_type<opset1::Convert>(eltwise)) {
|
||||||
// issue #40611
|
// issue #40611
|
||||||
if ((eltwise->input(0).get_element_type() == element::i32) &&
|
if ((eltwise->get_input_element_type(0) == element::i32) &&
|
||||||
((eltwise->output(0).get_element_type() == element::f16) || (eltwise->output(0).get_element_type() == element::f32))) {
|
((eltwise->get_output_element_type(0) == element::f16) || (eltwise->get_output_element_type(0) == element::f32))) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -190,7 +192,7 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
|||||||
const auto data = fq::getData(eltwise);
|
const auto data = fq::getData(eltwise);
|
||||||
const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise);
|
const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise);
|
||||||
|
|
||||||
std::shared_ptr<opset1::FakeQuantize> newFakeQuantize = as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
|
const auto newFakeQuantize = as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
|
||||||
data->output(outputIdx),
|
data->output(outputIdx),
|
||||||
inputLowConst_f32,
|
inputLowConst_f32,
|
||||||
inputHighConst_f32,
|
inputHighConst_f32,
|
||||||
|
@ -90,7 +90,7 @@ bool FakeQuantizeDequantization::checkShape(const std::shared_ptr<ngraph::Node>&
|
|||||||
|
|
||||||
if (!inPShape.rank().is_dynamic()) {
|
if (!inPShape.rank().is_dynamic()) {
|
||||||
for (int i = 0; i < inPShape.rank().get_length(); ++i) {
|
for (int i = 0; i < inPShape.rank().get_length(); ++i) {
|
||||||
if (inPShape[i] != outPShape[i] && !inPShape.is_dynamic()) {
|
if (inPShape[i] != outPShape[i] && !inPShape[i].is_dynamic()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,7 +108,7 @@ bool FakeQuantizeDequantization::checkElementwise(const std::shared_ptr<ngraph::
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ngraph::Shape constShape = constant->get_output_shape(0);
|
const ngraph::Shape constShape = constant->get_shape();
|
||||||
if ((constShape.size() > 5ul)) {
|
if ((constShape.size() > 5ul)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -40,8 +40,12 @@ bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, n
|
|||||||
|
|
||||||
namespace fuse_fq {
|
namespace fuse_fq {
|
||||||
|
|
||||||
std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const Shape& targetShape) {
|
std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const PartialShape& targetPShape) {
|
||||||
|
assert(targetPShape.is_static());
|
||||||
|
assert(op->get_output_partial_shape(0).is_static());
|
||||||
|
const Shape targetShape = targetPShape.to_shape();
|
||||||
const Shape shape = op->get_output_shape(0);
|
const Shape shape = op->get_output_shape(0);
|
||||||
|
|
||||||
if ((shape.size() < targetShape.size()) && (shape.size() > 1ul)) {
|
if ((shape.size() < targetShape.size()) && (shape.size() > 1ul)) {
|
||||||
op = fold<opset1::Unsqueeze>(
|
op = fold<opset1::Unsqueeze>(
|
||||||
op,
|
op,
|
||||||
@ -81,14 +85,19 @@ bool eltwiseWithConstant(const std::shared_ptr<Node>& eltwise) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shape shape = constant->get_output_shape(0);
|
Shape shape = constant->get_shape();
|
||||||
if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
|
if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
|
||||||
const Shape eltwiseShape = eltwise->get_output_shape(0);
|
const auto eltwisePShape = eltwise->get_output_partial_shape(0);
|
||||||
if ((eltwiseShape.size() - shape.size()) > 1) {
|
if (eltwisePShape.rank().is_dynamic()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((eltwiseShape.size() - shape.size()) == 1ul) {
|
const size_t eltwiseOutRank = eltwisePShape.rank().get_length();
|
||||||
|
if ((eltwiseOutRank - shape.size()) > 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((eltwiseOutRank - shape.size()) == 1ul) {
|
||||||
shape.insert(shape.begin(), 1ul);
|
shape.insert(shape.begin(), 1ul);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,22 +127,22 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
|
|||||||
constant :
|
constant :
|
||||||
foldConvert(constant, eltwise->get_output_element_type(0));
|
foldConvert(constant, eltwise->get_output_element_type(0));
|
||||||
|
|
||||||
inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_shape(0));
|
inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_shape(0));
|
inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
} else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
||||||
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
|
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
|
||||||
constant :
|
constant :
|
||||||
foldConvert(constant, eltwise->get_output_element_type(0));
|
foldConvert(constant, eltwise->get_output_element_type(0));
|
||||||
|
|
||||||
inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_shape(0));
|
inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_shape(0));
|
inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
} else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
||||||
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
|
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
|
||||||
constant :
|
constant :
|
||||||
foldConvert(constant, eltwise->get_output_element_type(0));
|
foldConvert(constant, eltwise->get_output_element_type(0));
|
||||||
|
|
||||||
inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_shape(0));
|
inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_shape(0));
|
inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (is_type<opset1::Add>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
} else if (is_type<opset1::Add>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
|
||||||
if (is_type<opset1::Convolution>(fuse_fq::getData(eltwise)) ||
|
if (is_type<opset1::Convolution>(fuse_fq::getData(eltwise)) ||
|
||||||
is_type<opset1::GroupConvolution>(fuse_fq::getData(eltwise))) {
|
is_type<opset1::GroupConvolution>(fuse_fq::getData(eltwise))) {
|
||||||
@ -144,8 +153,8 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
|
|||||||
constant :
|
constant :
|
||||||
foldConvert(constant, eltwise->get_output_element_type(0));
|
foldConvert(constant, eltwise->get_output_element_type(0));
|
||||||
|
|
||||||
inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_shape(0));
|
inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_shape(0));
|
inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (is_type<opset1::Convert>(eltwise)) {
|
} else if (is_type<opset1::Convert>(eltwise)) {
|
||||||
// issue #40611
|
// issue #40611
|
||||||
if ((eltwise->input(0).get_element_type() == element::i32) && (eltwise->output(0).get_element_type() == element::f32)) {
|
if ((eltwise->input(0).get_element_type() == element::i32) && (eltwise->output(0).get_element_type() == element::f32)) {
|
||||||
|
@ -72,7 +72,7 @@ bool InterpolateTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer
|
|||||||
std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(layer);
|
std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(layer);
|
||||||
if (interpolate4) {
|
if (interpolate4) {
|
||||||
const auto attrs = interpolate4->get_attrs();
|
const auto attrs = interpolate4->get_attrs();
|
||||||
return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest;
|
return attrs.mode == op::v4::Interpolate::InterpolateMode::NEAREST;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -108,7 +108,7 @@ bool InterpolateTransformation::canBeTransformed(const TransformationContext& co
|
|||||||
if (interpolate4) {
|
if (interpolate4) {
|
||||||
const auto interpAttrs = interpolate4->get_attrs();
|
const auto interpAttrs = interpolate4->get_attrs();
|
||||||
|
|
||||||
if (interpAttrs.mode != op::v4::Interpolate::InterpolateMode::nearest) {
|
if (interpAttrs.mode != op::v4::Interpolate::InterpolateMode::NEAREST) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,7 +126,7 @@ bool InterpolateTransformation::canBeTransformed(const TransformationContext& co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interpAttrs.coordinate_transformation_mode == op::v4::Interpolate::CoordinateTransformMode::align_corners) {
|
if (interpAttrs.coordinate_transformation_mode == op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -50,6 +50,7 @@
|
|||||||
#include "low_precision/multiply.hpp"
|
#include "low_precision/multiply.hpp"
|
||||||
#include "low_precision/mvn.hpp"
|
#include "low_precision/mvn.hpp"
|
||||||
#include "low_precision/normalize_l2.hpp"
|
#include "low_precision/normalize_l2.hpp"
|
||||||
|
#include "low_precision/pad.hpp"
|
||||||
#include "low_precision/prelu.hpp"
|
#include "low_precision/prelu.hpp"
|
||||||
#include "low_precision/reduce_max.hpp"
|
#include "low_precision/reduce_max.hpp"
|
||||||
#include "low_precision/reduce_mean.hpp"
|
#include "low_precision/reduce_mean.hpp"
|
||||||
@ -219,6 +220,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
|
|||||||
common->add_matcher<ngraph::pass::low_precision::MultiplyTransformation>(params);
|
common->add_matcher<ngraph::pass::low_precision::MultiplyTransformation>(params);
|
||||||
common->add_matcher<ngraph::pass::low_precision::MVNTransformation>(params);
|
common->add_matcher<ngraph::pass::low_precision::MVNTransformation>(params);
|
||||||
common->add_matcher<ngraph::pass::low_precision::NormalizeL2Transformation>(params);
|
common->add_matcher<ngraph::pass::low_precision::NormalizeL2Transformation>(params);
|
||||||
|
common->add_matcher<ngraph::pass::low_precision::PadTransformation>(params);
|
||||||
common->add_matcher<ngraph::pass::low_precision::PReluTransformation>(params);
|
common->add_matcher<ngraph::pass::low_precision::PReluTransformation>(params);
|
||||||
common->add_matcher<ngraph::pass::low_precision::ReduceMaxTransformation>(params);
|
common->add_matcher<ngraph::pass::low_precision::ReduceMaxTransformation>(params);
|
||||||
common->add_matcher<ngraph::pass::low_precision::ReduceMeanTransformation>(params);
|
common->add_matcher<ngraph::pass::low_precision::ReduceMeanTransformation>(params);
|
||||||
|
@ -141,6 +141,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const s
|
|||||||
{ name<opset1::ReduceMin>() },
|
{ name<opset1::ReduceMin>() },
|
||||||
{ name<opset1::Relu>() },
|
{ name<opset1::Relu>() },
|
||||||
// TODO: there are conditions
|
// TODO: there are conditions
|
||||||
|
{ name<opset1::Pad>() },
|
||||||
{ name<opset1::Reshape>() },
|
{ name<opset1::Reshape>() },
|
||||||
{ name<opset1::Squeeze>() },
|
{ name<opset1::Squeeze>() },
|
||||||
{ name<opset1::Split>() },
|
{ name<opset1::Split>() },
|
||||||
@ -166,7 +167,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const s
|
|||||||
std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(node);
|
std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(node);
|
||||||
if (interpolate4) {
|
if (interpolate4) {
|
||||||
const auto attrs = interpolate4->get_attrs();
|
const auto attrs = interpolate4->get_attrs();
|
||||||
return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest;
|
return attrs.mode == op::v4::Interpolate::InterpolateMode::NEAREST;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,6 +195,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isSupported(const std::share
|
|||||||
{ name<ngraph::op::MVN>() },
|
{ name<ngraph::op::MVN>() },
|
||||||
{ name<opset6::MVN>() },
|
{ name<opset6::MVN>() },
|
||||||
{ name<opset1::NormalizeL2>() },
|
{ name<opset1::NormalizeL2>() },
|
||||||
|
{ name<opset1::Pad>() },
|
||||||
{ name<opset1::PRelu>() },
|
{ name<opset1::PRelu>() },
|
||||||
{ name<opset1::ReduceMax>() },
|
{ name<opset1::ReduceMax>() },
|
||||||
{ name<opset1::ReduceMean>() },
|
{ name<opset1::ReduceMean>() },
|
||||||
|
@ -94,7 +94,10 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) :
|
Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) :
|
||||||
dequantization1.subtractConstant->get_shape();
|
dequantization1.subtractConstant->get_shape();
|
||||||
|
|
||||||
const auto weightsShape = newMatMul->get_input_shape(1);
|
const auto weightsPShape = newMatMul->get_input_partial_shape(1);
|
||||||
|
assert(weightsPShape.is_static());
|
||||||
|
const auto weightsShape = weightsPShape.to_shape();
|
||||||
|
|
||||||
const size_t firstWeightsIdx = matMul->get_transpose_b() ? weightsShape.size() - 1ul : weightsShape.size() - 2ul;
|
const size_t firstWeightsIdx = matMul->get_transpose_b() ? weightsShape.size() - 1ul : weightsShape.size() - 2ul;
|
||||||
const size_t lastDataIdx = matMul->get_transpose_a() ? broadcastShape.size() - 2 : broadcastShape.size() - 1;
|
const size_t lastDataIdx = matMul->get_transpose_a() ? broadcastShape.size() - 2 : broadcastShape.size() - 1;
|
||||||
broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx];
|
broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx];
|
||||||
@ -118,8 +121,8 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
parent = newSubtract;
|
parent = newSubtract;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto transpose = [](const std::shared_ptr<Node>& node) -> std::shared_ptr<Node> {
|
auto transpose = [](const std::shared_ptr<opset1::Constant>& node) -> std::shared_ptr<Node> {
|
||||||
const Shape outputShape = node->get_output_shape(0);
|
const Shape outputShape = node->get_shape();
|
||||||
if (outputShape.size() < 2ul) {
|
if (outputShape.size() < 2ul) {
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
@ -153,7 +156,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ngraph::opset1::Multiply>(
|
const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<opset1::Multiply>(
|
||||||
mulConst1,
|
mulConst1,
|
||||||
foldConvert(mulConst2, element::f32)));
|
foldConvert(mulConst2, element::f32)));
|
||||||
|
|
||||||
|
@ -164,17 +164,17 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
|
|||||||
|
|
||||||
Shape constShape;
|
Shape constShape;
|
||||||
int inputIndex;
|
int inputIndex;
|
||||||
if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
if (const auto constant = as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
||||||
inputIndex = 0;
|
inputIndex = 0;
|
||||||
constShape = operation->get_input_shape(1);
|
constShape = constant->get_shape();
|
||||||
if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
|
if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
|
||||||
(is_type<opset1::Subtract>(operation->get_input_node_shared_ptr(0)) &&
|
(is_type<opset1::Subtract>(operation->get_input_node_shared_ptr(0)) &&
|
||||||
is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) {
|
is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
|
} else if (const auto constant = as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
|
||||||
inputIndex = 1;
|
inputIndex = 1;
|
||||||
constShape = operation->get_input_shape(0);
|
constShape = constant->get_shape();
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -191,12 +191,12 @@ size_t NetworkHelper::getInputChannelsCount(std::shared_ptr<Node> layer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t NetworkHelper::getGroupsCount(std::shared_ptr<Node> layer) {
|
size_t NetworkHelper::getGroupsCount(std::shared_ptr<Node> layer) {
|
||||||
if (as_type_ptr<opset1::Convolution>(layer)) {
|
if (is_type<opset1::Convolution>(layer)) {
|
||||||
return 1;
|
return 1;
|
||||||
} else if (auto group_convolution = as_type_ptr<opset1::GroupConvolution>(layer)) {
|
} else if (is_type<opset1::GroupConvolution>(layer)) {
|
||||||
return layer->get_input_shape(1)[0]; // input weights for opset1::GC is in format GOI..., see the specification
|
return layer->get_input_partial_shape(1)[0].get_length(); // input weights for opset1::GC is in format GOI..., see the specification
|
||||||
} else {
|
} else {
|
||||||
THROW_TRANSFORMATION_EXCEPTION << "Invalid layer type of " << layer->get_friendly_name() << "; expected Convolutino or GroupConvolution";
|
THROW_TRANSFORMATION_EXCEPTION << "Invalid layer type of " << layer->get_friendly_name() << "; expected Convolution or GroupConvolution";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,9 +239,15 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
|||||||
auto b = addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0);
|
auto b = addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0);
|
||||||
std::shared_ptr<Node> bDivA;
|
std::shared_ptr<Node> bDivA;
|
||||||
|
|
||||||
if (shape_size(b->get_output_shape(0)) == 1 ||
|
const auto aPShape = a->get_output_partial_shape(0);
|
||||||
shape_size(a->get_output_shape(0)) == 1 ||
|
assert(aPShape.is_static());
|
||||||
shape_size(b->get_output_shape(0)) == shape_size(a->get_output_shape(0))) {
|
const auto aShape = aPShape.to_shape();
|
||||||
|
|
||||||
|
const auto bPShape = b->get_output_partial_shape(0);
|
||||||
|
assert(bPShape.is_static());
|
||||||
|
const auto bShape = bPShape.to_shape();
|
||||||
|
|
||||||
|
if ((shape_size(bShape) == 1) || (shape_size(aShape) == 1) || (shape_size(bShape) == shape_size(aShape))) {
|
||||||
// safely division to avoid NaN
|
// safely division to avoid NaN
|
||||||
const std::vector<float> bValues = as_type_ptr<opset1::Constant>(b)->cast_vector<float>();
|
const std::vector<float> bValues = as_type_ptr<opset1::Constant>(b)->cast_vector<float>();
|
||||||
const std::vector<float> aValues = as_type_ptr<opset1::Constant>(a)->cast_vector<float>();
|
const std::vector<float> aValues = as_type_ptr<opset1::Constant>(a)->cast_vector<float>();
|
||||||
@ -263,7 +269,7 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
|||||||
auto aPrecision = a->get_output_element_type(0);
|
auto aPrecision = a->get_output_element_type(0);
|
||||||
bDivA = std::make_shared<opset1::Constant>(
|
bDivA = std::make_shared<opset1::Constant>(
|
||||||
aPrecision,
|
aPrecision,
|
||||||
aBroadcasted ? b->get_output_shape(0) : a->get_output_shape(0),
|
aBroadcasted ? bShape : aShape,
|
||||||
bDivAValues);
|
bDivAValues);
|
||||||
} else {
|
} else {
|
||||||
b = foldConvert(b, element::f32);
|
b = foldConvert(b, element::f32);
|
||||||
@ -463,7 +469,14 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto newInput = multiply->input_value(1 - constant1->output(0).get_target_inputs().begin()->get_index());
|
auto newInput = multiply->input_value(1 - constant1->output(0).get_target_inputs().begin()->get_index());
|
||||||
auto newConst = fold<opset1::Multiply>(constant1, constant2);
|
auto multiplyResult = fold<opset1::Multiply>(constant1, constant2);
|
||||||
|
{
|
||||||
|
// optimize constant shape: used in rfcn-resnet101-coco
|
||||||
|
const auto multiplyResultConstant = as_type_ptr<opset1::Constant>(multiplyResult);
|
||||||
|
if ((multiplyResultConstant != nullptr) && NetworkHelper::isScalarLike(multiplyResultConstant)) {
|
||||||
|
multiplyResult = NetworkHelper::toScalar(multiplyResultConstant);
|
||||||
|
}
|
||||||
|
}
|
||||||
auto inputPrecision0 = nextMultiply->get_origin_input_type(0);
|
auto inputPrecision0 = nextMultiply->get_origin_input_type(0);
|
||||||
auto inputPrecision1 = nextMultiply->get_origin_input_type(1);
|
auto inputPrecision1 = nextMultiply->get_origin_input_type(1);
|
||||||
auto outputPrecision = nextMultiply->get_overridden_output_type(0);
|
auto outputPrecision = nextMultiply->get_overridden_output_type(0);
|
||||||
@ -472,7 +485,7 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
|
|||||||
std::vector<element::Type>{ inputPrecision0, inputPrecision1 },
|
std::vector<element::Type>{ inputPrecision0, inputPrecision1 },
|
||||||
std::vector<element::Type>{ outputPrecision },
|
std::vector<element::Type>{ outputPrecision },
|
||||||
ngraph::op::TemporaryReplaceOutputType(newInput, inputPrecision0).get(),
|
ngraph::op::TemporaryReplaceOutputType(newInput, inputPrecision0).get(),
|
||||||
ngraph::op::TemporaryReplaceOutputType(newConst, inputPrecision1).get());
|
ngraph::op::TemporaryReplaceOutputType(multiplyResult, inputPrecision1).get());
|
||||||
copy_runtime_info(multiply, newMultiply);
|
copy_runtime_info(multiply, newMultiply);
|
||||||
replace_node(nextMultiply, newMultiply);
|
replace_node(nextMultiply, newMultiply);
|
||||||
return newMultiply;
|
return newMultiply;
|
||||||
@ -734,9 +747,12 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
|||||||
auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0));
|
auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0));
|
||||||
|
|
||||||
if (constant) {
|
if (constant) {
|
||||||
const bool roundValues = roundValuesWasSet ? roundValuesArg : fq->output(0).get_element_type().is_integral();
|
const bool roundValues = roundValuesWasSet ? roundValuesArg : fq->get_output_element_type(0).is_integral();
|
||||||
|
|
||||||
|
const auto constPShape = fq->get_output_partial_shape(0);
|
||||||
|
assert(constPShape.is_static());
|
||||||
|
const Shape constShape = constPShape.to_shape();
|
||||||
|
|
||||||
Shape constShape = fq->get_output_shape(0);
|
|
||||||
if (constShape.empty() || constShape.size() > 5lu) {
|
if (constShape.empty() || constShape.size() > 5lu) {
|
||||||
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
|
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
|
||||||
}
|
}
|
||||||
@ -1117,7 +1133,7 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization(
|
|||||||
const float dequantizationMul,
|
const float dequantizationMul,
|
||||||
const float dequantizationSub,
|
const float dequantizationSub,
|
||||||
const ngraph::element::Type originalPrecision,
|
const ngraph::element::Type originalPrecision,
|
||||||
const ngraph::PartialShape dataNodeOutputShape,
|
const ngraph::PartialShape& dataNodeOutputShape,
|
||||||
element::Type precision,
|
element::Type precision,
|
||||||
const ngraph::element::Type deqPrecision,
|
const ngraph::element::Type deqPrecision,
|
||||||
std::shared_ptr<ngraph::Node> input) {
|
std::shared_ptr<ngraph::Node> input) {
|
||||||
@ -1767,7 +1783,9 @@ std::vector<element::Type> NetworkHelper::precisionIntersection(
|
|||||||
|
|
||||||
bool NetworkHelper::isFQByDynamicDimension(const std::shared_ptr<opset1::FakeQuantize>& fq) {
|
bool NetworkHelper::isFQByDynamicDimension(const std::shared_ptr<opset1::FakeQuantize>& fq) {
|
||||||
const auto pInputShape = fq->get_input_partial_shape(0);
|
const auto pInputShape = fq->get_input_partial_shape(0);
|
||||||
auto olShape = fq->get_input_shape(3);
|
const auto olPShape = fq->get_input_partial_shape(3);
|
||||||
|
assert(olPShape.is_static());
|
||||||
|
auto olShape = olPShape.to_shape();
|
||||||
|
|
||||||
if (shape_size(olShape) > 1ul) {
|
if (shape_size(olShape) > 1ul) {
|
||||||
if (pInputShape.rank().is_dynamic()) {
|
if (pInputShape.rank().is_dynamic()) {
|
||||||
|
@ -78,12 +78,12 @@ bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& co
|
|||||||
const std::vector<int64_t> axesByChannels = { 1, 2, 3 };
|
const std::vector<int64_t> axesByChannels = { 1, 2, 3 };
|
||||||
|
|
||||||
std::vector<int64_t> axesValues = axes->cast_vector<int64_t>();
|
std::vector<int64_t> axesValues = axes->cast_vector<int64_t>();
|
||||||
if (!(axesValues == axesAcrossSpatial || axesValues == axesByChannels)) {
|
if ((axesValues != axesAcrossSpatial) && (axesValues != axesByChannels)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ngraph::Shape outputShape = scalesConst->get_output_shape(0);
|
const Shape outputShape = scalesConst->get_shape();
|
||||||
const size_t size = ngraph::shape_size(outputShape);
|
const size_t size = shape_size(outputShape);
|
||||||
if (size != 1ul) {
|
if (size != 1ul) {
|
||||||
const auto channelsInterval = operation->get_output_partial_shape(0)[1];
|
const auto channelsInterval = operation->get_output_partial_shape(0)[1];
|
||||||
if (channelsInterval.is_dynamic() || static_cast<size_t>(channelsInterval.get_length()) != size) {
|
if (channelsInterval.is_dynamic() || static_cast<size_t>(channelsInterval.get_length()) != size) {
|
||||||
|
277
inference-engine/src/low_precision_transformations/src/pad.cpp
Normal file
277
inference-engine/src/low_precision_transformations/src/pad.cpp
Normal file
@ -0,0 +1,277 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "low_precision/pad.hpp"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <ngraph/ngraph.hpp>
|
||||||
|
|
||||||
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace pass {
|
||||||
|
namespace low_precision {
|
||||||
|
|
||||||
|
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PadTransformation, "PadTransformation", 0);
|
||||||
|
|
||||||
|
PadTransformation::PadTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
|
auto mul = pattern::wrap_type<opset1::Multiply>();
|
||||||
|
auto padsBegin = pattern::wrap_type<opset1::Constant>();
|
||||||
|
auto padsEnd = pattern::wrap_type<opset1::Constant>();
|
||||||
|
auto padsValue = pattern::wrap_type<opset1::Constant>();
|
||||||
|
auto matcher = pattern::wrap_type<opset1::Pad>({ mul, padsBegin, padsEnd, padsValue });
|
||||||
|
|
||||||
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
|
auto op = m.get_match_root();
|
||||||
|
if (transformation_callback(op)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return transform(*context, m);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "PadTransformation");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PadTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
|
||||||
|
if (!canBeTransformed(context, m.get_match_root())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto pad = as_type_ptr<opset1::Pad>(NetworkHelper::separateInStandaloneBranch(m.get_match_root()));
|
||||||
|
const auto padConstant = as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
|
||||||
|
const auto padConstantValue = padConstant->cast_vector<float>()[0];
|
||||||
|
|
||||||
|
const auto padsBegin = pad->get_pads_begin();
|
||||||
|
const auto padsEnd = pad->get_pads_end();
|
||||||
|
const auto padMode = pad->get_pad_mode();
|
||||||
|
|
||||||
|
auto dequantization = NetworkHelper::getDequantization(pad);
|
||||||
|
|
||||||
|
if (padMode == op::PadMode::CONSTANT) {
|
||||||
|
auto bcastConstant = [&](const std::shared_ptr<opset1::Constant> &constant) {
|
||||||
|
size_t padIdx = 0;
|
||||||
|
for (size_t i = 0; i < padsBegin.size(); ++i) {
|
||||||
|
if (padsBegin[i] != 0 || padsEnd[i] != 0) {
|
||||||
|
padIdx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto inputPShape = pad->get_input_partial_shape(0);
|
||||||
|
assert(inputPShape[padIdx].is_static());
|
||||||
|
assert(inputPShape.rank().is_static());
|
||||||
|
auto bcastedShape = Shape(inputPShape.rank().get_length(), 1ul);
|
||||||
|
bcastedShape[padIdx] = inputPShape[padIdx].get_length();
|
||||||
|
|
||||||
|
const auto bCastConst = opset1::Constant::create(element::i32, Shape{bcastedShape.size()}, bcastedShape);
|
||||||
|
return as_type_ptr<opset1::Constant>(fold<opset1::Broadcast>(constant, bCastConst));
|
||||||
|
};
|
||||||
|
|
||||||
|
if (dequantization.subtract && shape_size(dequantization.subtractConstant->get_shape()) == 1ul) {
|
||||||
|
const auto broadcastedConstant = bcastConstant(dequantization.subtractConstant);
|
||||||
|
replace_node(dequantization.subtractConstant, broadcastedConstant);
|
||||||
|
dequantization.subtractConstant = broadcastedConstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (padConstantValue != 0.f && shape_size(dequantization.multiplyConstant->get_shape()) == 1ul) {
|
||||||
|
const auto broadcastedConstant = bcastConstant(dequantization.multiplyConstant);
|
||||||
|
replace_node(dequantization.multiplyConstant, broadcastedConstant);
|
||||||
|
dequantization.multiplyConstant = broadcastedConstant;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto foldConstantIfNecessary = [&padMode, &padsBegin, &padsEnd](
|
||||||
|
const std::shared_ptr<opset1::Constant>& constant,
|
||||||
|
const std::shared_ptr<opset1::Pad>& pad,
|
||||||
|
float padVal) {
|
||||||
|
const auto constantShape = constant->get_shape();
|
||||||
|
if (shape_size(constantShape) == 1ul) {
|
||||||
|
return NetworkHelper::toScalar(constant);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> padsForConstantBegin(constantShape.size(), 0ul);
|
||||||
|
std::vector<size_t> padsForConstantEnd(constantShape.size(), 0ul);
|
||||||
|
bool foldingIsNecessary = false;
|
||||||
|
|
||||||
|
// folding is necessary when dequantization and padding by the same dimension
|
||||||
|
for (size_t i = 0; i < constantShape.size(); ++i) {
|
||||||
|
if (padsBegin[i] != 0ul && constantShape[i] != 1ul) {
|
||||||
|
foldingIsNecessary = true;
|
||||||
|
padsForConstantBegin[i] = padsBegin[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (padsEnd[i] != 0ul && constantShape[i] != 1ul) {
|
||||||
|
foldingIsNecessary = true;
|
||||||
|
padsForConstantEnd[i] = padsEnd[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (foldingIsNecessary) {
|
||||||
|
const auto beginConst = opset1::Constant::create(element::u32, { padsForConstantBegin.size() }, padsForConstantBegin);
|
||||||
|
const auto endConst = opset1::Constant::create(element::u32, { padsForConstantEnd.size() }, padsForConstantEnd);
|
||||||
|
const auto padValueConstant = opset1::Constant::create(constant->get_element_type(), Shape{}, { padVal });
|
||||||
|
const auto foldedConstant = fold<opset1::Pad>(constant, beginConst, endConst, padValueConstant, padMode);
|
||||||
|
return as_type_ptr<opset1::Constant>(foldedConstant);
|
||||||
|
} else {
|
||||||
|
return constant;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (dequantization.subtract) {
|
||||||
|
const auto normalizedSubConst = NetworkHelper::normalizeDequantizationShape(dequantization.subtract);
|
||||||
|
float padValueForSub = padConstantValue;
|
||||||
|
if (padMode == op::PadMode::CONSTANT) {
|
||||||
|
padValueForSub = 0.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto newSubConstant = foldConstantIfNecessary(normalizedSubConst, pad, padValueForSub);
|
||||||
|
replace_node(normalizedSubConst, newSubConstant);
|
||||||
|
dequantization.subtractConstant = newSubConstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const auto normalizedMulConst = NetworkHelper::normalizeDequantizationShape(dequantization.multiply);
|
||||||
|
float padValueForMul = padConstantValue;
|
||||||
|
if (padMode == op::PadMode::CONSTANT) {
|
||||||
|
padValueForMul = 1.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto newMulConstant = foldConstantIfNecessary(normalizedMulConst, pad, padValueForMul);
|
||||||
|
replace_node(normalizedMulConst, newMulConstant);
|
||||||
|
dequantization.multiplyConstant = newMulConstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we must convert pad value in low precision
|
||||||
|
const auto convertedZero = opset1::Constant::create(dequantization.data.get_element_type(), Shape{}, { padConstantValue });
|
||||||
|
pad->set_argument(3, convertedZero);
|
||||||
|
|
||||||
|
moveDequantizationAfter(context, pad, dequantization, true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PadTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
|
||||||
|
if (!LayerTransformation::canBeTransformedSpatialDimension(context, op)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto pad = as_type_ptr<opset1::Pad>(op);
|
||||||
|
if (!pad) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto dequantization = NetworkHelper::getDequantization(op);
|
||||||
|
if (dequantization.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto mode = pad->get_pad_mode();
|
||||||
|
if (mode == op::PadMode::CONSTANT) {
|
||||||
|
auto padAndDqByTheSameDimension = [&](const std::shared_ptr<opset1::Constant>& deqConst) {
|
||||||
|
const auto padsBegin = pad->get_pads_begin();
|
||||||
|
const auto padsEnd = pad->get_pads_end();
|
||||||
|
|
||||||
|
int beginNonZeroIdx = -1;
|
||||||
|
for (size_t i = 0; i < padsBegin.size(); ++i) {
|
||||||
|
const bool padDimensionNotUnique = (beginNonZeroIdx != -1) && (padsBegin[i] != 0);
|
||||||
|
if (padDimensionNotUnique) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (padsBegin[i] != 0) {
|
||||||
|
beginNonZeroIdx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int endNonZeroIdx = -1;
|
||||||
|
for (size_t i = 0; i < padsEnd.size(); ++i) {
|
||||||
|
const bool padDimensionNotUnique = (endNonZeroIdx != -1) && (padsEnd[i] != 0);
|
||||||
|
if (padDimensionNotUnique) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (padsEnd[i] != 0) {
|
||||||
|
endNonZeroIdx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((beginNonZeroIdx != endNonZeroIdx) && (beginNonZeroIdx != -1) && (endNonZeroIdx != -1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t paddingDimension = beginNonZeroIdx != -1 ? beginNonZeroIdx : endNonZeroIdx;
|
||||||
|
const auto padInputPShape = pad->get_input_partial_shape(0);
|
||||||
|
const auto padInputRank = padInputPShape.rank();
|
||||||
|
if (padInputRank.is_dynamic() || padInputPShape[paddingDimension].is_dynamic()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const size_t inputRankValue = padInputRank.get_length();
|
||||||
|
auto deqShape = deqConst->get_shape();
|
||||||
|
if (shape_size(deqShape) > 1ul) {
|
||||||
|
while (deqShape.size() < inputRankValue) {
|
||||||
|
deqShape.insert(deqShape.begin(), 1ul);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < deqShape.size(); ++i) {
|
||||||
|
const bool deqAndPadDimensionsMismatched = (deqShape[i] > 1ul) && (i != paddingDimension);
|
||||||
|
if (deqAndPadDimensionsMismatched) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (dequantization.subtract && !padAndDqByTheSameDimension(dequantization.subtractConstant)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto constant = as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
|
||||||
|
const auto constantValue = constant->cast_vector<float>()[0];
|
||||||
|
if (constantValue != 0.f && !padAndDqByTheSameDimension(dequantization.multiplyConstant)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode == op::PadMode::REFLECT) {
|
||||||
|
auto deqShape = dequantization.multiplyConstant->get_shape();
|
||||||
|
if (shape_size(deqShape) == 1ul) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
const auto padInputRank = pad->get_input_partial_shape(0).rank();
|
||||||
|
if (padInputRank.is_dynamic()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t inputRankValue = padInputRank.get_length();
|
||||||
|
while (deqShape.size() < inputRankValue) {
|
||||||
|
deqShape.insert(deqShape.begin(), 1ul);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto padsBegin = pad->get_pads_begin();
|
||||||
|
const auto padsEnd = pad->get_pads_end();
|
||||||
|
|
||||||
|
// PadTransformation with "REFLECT" mode doesn't support dequantization and padding by the same dimension
|
||||||
|
for (size_t i = 0; i < deqShape.size(); ++i) {
|
||||||
|
if (deqShape[i] != 1ul && (padsBegin[i] != 0ul || padsEnd[i] != 0ul)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PadTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace low_precision
|
||||||
|
} // namespace pass
|
||||||
|
} // namespace ngraph
|
@ -47,7 +47,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
|||||||
auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) {
|
auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) {
|
||||||
// reshape for element-wise constant is not required
|
// reshape for element-wise constant is not required
|
||||||
auto constantShape = originalConstant->get_shape();
|
auto constantShape = originalConstant->get_shape();
|
||||||
if (shape_size(constantShape) == 1ul) {
|
if (NetworkHelper::isScalarLike(originalConstant)) {
|
||||||
if (!constantShape.empty()) {
|
if (!constantShape.empty()) {
|
||||||
const auto newConstant = NetworkHelper::toScalar(originalConstant);
|
const auto newConstant = NetworkHelper::toScalar(originalConstant);
|
||||||
replace_node(originalConstant, newConstant);
|
replace_node(originalConstant, newConstant);
|
||||||
@ -75,19 +75,28 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shape newOperationConstantBroadcastedShape = originalConstant->output(0).get_shape();
|
auto getBCastedConst = [](const std::shared_ptr<opset1::Constant>& constant, size_t dimensionsToBroadcast) -> std::shared_ptr<Node> {
|
||||||
|
if (dimensionsToBroadcast == 1ul) {
|
||||||
|
return constant;
|
||||||
|
}
|
||||||
|
|
||||||
|
Shape newOperationConstantBroadcastedShape = constant->get_shape();
|
||||||
// add dimensions to broadcast values
|
// add dimensions to broadcast values
|
||||||
if (newOperationConstantBroadcastedShape.size() == 2ul) {
|
if (newOperationConstantBroadcastedShape.size() == 2ul) {
|
||||||
newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast);
|
newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast);
|
||||||
} else {
|
} else {
|
||||||
newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
|
newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
|
||||||
}
|
}
|
||||||
const std::shared_ptr<Node> broadcastedConstant = fold<opset1::Broadcast>(
|
|
||||||
originalConstant,
|
const auto targetShapeConstant = opset1::Constant::create(
|
||||||
std::make_shared<opset1::Constant>(
|
|
||||||
element::i32,
|
element::i32,
|
||||||
Shape({ newOperationConstantBroadcastedShape.size() }),
|
Shape{ newOperationConstantBroadcastedShape.size() },
|
||||||
newOperationConstantBroadcastedShape));
|
newOperationConstantBroadcastedShape);
|
||||||
|
|
||||||
|
return fold<opset1::Broadcast>(constant, targetShapeConstant);
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
|
||||||
|
|
||||||
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
|
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
|
||||||
newReshapeConstValues[1] = reshapeOutputPShape[1].get_length();
|
newReshapeConstValues[1] = reshapeOutputPShape[1].get_length();
|
||||||
@ -190,7 +199,7 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
|
|||||||
subtractShapeWithBatch.insert(subtractShapeWithBatch.begin(), 1ul);
|
subtractShapeWithBatch.insert(subtractShapeWithBatch.begin(), 1ul);
|
||||||
}
|
}
|
||||||
|
|
||||||
const Shape multiplyShape = dequantization.multiply == nullptr ? Shape{} : dequantization.multiply->input(1).get_shape();
|
const Shape multiplyShape = dequantization.multiply == nullptr ? Shape{} : dequantization.multiplyConstant->get_shape();
|
||||||
Shape multiplyShapeWithBatch = multiplyShape;
|
Shape multiplyShapeWithBatch = multiplyShape;
|
||||||
if ((dequantization.multiply != nullptr) &&
|
if ((dequantization.multiply != nullptr) &&
|
||||||
(multiplyShapeWithBatch.size() > 1ul) &&
|
(multiplyShapeWithBatch.size() > 1ul) &&
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <ngraph/variant.hpp>
|
#include <ngraph/variant.hpp>
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
using namespace ov;
|
||||||
|
|
||||||
template class ngraph::VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
|
template class ngraph::VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
using namespace ov;
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
|
|
||||||
IntervalsAlignmentAttribute::IntervalsAlignmentAttribute(
|
IntervalsAlignmentAttribute::IntervalsAlignmentAttribute(
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
|
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
using namespace ov;
|
||||||
|
|
||||||
template class ngraph::VariantImpl<PerTensorQuantizationAttribute>;
|
template class ngraph::VariantImpl<PerTensorQuantizationAttribute>;
|
||||||
constexpr VariantTypeInfo VariantWrapper<PerTensorQuantizationAttribute>::type_info;
|
constexpr VariantTypeInfo VariantWrapper<PerTensorQuantizationAttribute>::type_info;
|
@ -8,6 +8,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
using namespace ov;
|
||||||
|
|
||||||
PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) {
|
PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) {
|
||||||
sharedValue->value = value;
|
sharedValue->value = value;
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
using namespace ov;
|
||||||
|
|
||||||
// order defines default precision
|
// order defines default precision
|
||||||
const std::vector<ngraph::element::Type> PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 };
|
const std::vector<ngraph::element::Type> PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 };
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <ngraph/opsets/opset1.hpp>
|
#include <ngraph/opsets/opset1.hpp>
|
||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
|
using namespace ov;
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
|
|
||||||
|
@ -42,47 +42,40 @@ void transposeDequantizationConstant(std::shared_ptr<Node>& transpose) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dequantization.multiply->get_input_node_ptr(1)->get_output_shape(0).size() > 1ul) {
|
|
||||||
auto transposeDeqConstant = [](
|
auto transposeDeqConstant = [](
|
||||||
std::shared_ptr<Node> dequantizationConstant,
|
const std::shared_ptr<opset1::Constant>& dequantizationConstant,
|
||||||
const PartialShape& transposeOutputShape,
|
const PartialShape& transposeOutputPShape,
|
||||||
const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> {
|
const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> {
|
||||||
const auto dequantizationShape = dequantizationConstant->get_output_shape(0);
|
const auto constantShape = dequantizationConstant->get_shape();
|
||||||
if (dequantizationShape.empty() || (dequantizationShape.size() == 1ul)) {
|
if (shape_size(constantShape) == 1ul) {
|
||||||
return nullptr;
|
return NetworkHelper::toScalar(dequantizationConstant);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dequantizationShape.size() != static_cast<size_t>(transposeOutputShape.rank().get_length())) {
|
assert(transposeOutputPShape.rank().is_static());
|
||||||
dequantizationConstant = fold<opset1::Unsqueeze>(
|
const size_t transposeOutRank = transposeOutputPShape.rank().get_length();
|
||||||
dequantizationConstant,
|
if (constantShape.size() != transposeOutRank) {
|
||||||
std::make_shared<opset1::Constant>(element::i32, Shape{ 1 }, std::vector<size_t>{0}));
|
const auto unsqueezeConst = opset1::Constant::create(element::i32, Shape{ 1 }, std::vector<size_t>{ 0 });
|
||||||
}
|
const auto deqConstantWithBatch = fold<opset1::Unsqueeze>(dequantizationConstant, unsqueezeConst);
|
||||||
|
return fold<opset1::Transpose>(deqConstantWithBatch, transposeConstant);
|
||||||
|
} else {
|
||||||
return fold<opset1::Transpose>(dequantizationConstant, transposeConstant);
|
return fold<opset1::Transpose>(dequantizationConstant, transposeConstant);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
auto constant = transposeDeqConstant(
|
const auto constant = transposeDeqConstant(
|
||||||
dequantization.subtractConstant,
|
dequantization.subtractConstant,
|
||||||
transpose->get_output_partial_shape(0),
|
transpose->get_output_partial_shape(0),
|
||||||
transpose->get_input_node_shared_ptr(1));
|
transpose->get_input_node_shared_ptr(1));
|
||||||
if (constant != nullptr) {
|
replace_node(dequantization.subtractConstant, constant);
|
||||||
replace_node(
|
|
||||||
dequantization.subtract->get_input_node_shared_ptr(1),
|
|
||||||
constant);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dequantization.multiply != nullptr) {
|
if (dequantization.multiply != nullptr) {
|
||||||
auto constant = transposeDeqConstant(
|
const auto constant = transposeDeqConstant(
|
||||||
dequantization.multiplyConstant,
|
dequantization.multiplyConstant,
|
||||||
transpose->get_output_partial_shape(0),
|
transpose->get_output_partial_shape(0),
|
||||||
transpose->get_input_node_shared_ptr(1));
|
transpose->get_input_node_shared_ptr(1));
|
||||||
if (constant != nullptr) {
|
replace_node(dequantization.multiplyConstant, constant);
|
||||||
replace_node(
|
|
||||||
dequantization.multiply->get_input_node_shared_ptr(1),
|
|
||||||
constant);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,14 +74,13 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1));
|
const Shape multiplyConstShape = dequantization.multiplyConstant->get_shape();
|
||||||
const Shape multiplyConstShape = multiplyConst->get_output_shape(0);
|
|
||||||
if (!multiplyConstShape.empty() && (shape_size(multiplyConstShape) != 1ul)) {
|
if (!multiplyConstShape.empty() && (shape_size(multiplyConstShape) != 1ul)) {
|
||||||
const size_t groupsCount = NetworkHelper::getGroupsCount(layer);
|
const size_t groupsCount = NetworkHelper::getGroupsCount(layer);
|
||||||
const ngraph::PartialShape inputPShape = layer->get_input_partial_shape(0);
|
const PartialShape inputPShape = layer->get_input_partial_shape(0);
|
||||||
const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
|
const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
|
||||||
|
|
||||||
const std::vector<float> scales = multiplyConst->cast_vector<float>();
|
const std::vector<float> scales = dequantization.multiplyConstant->cast_vector<float>();
|
||||||
for (size_t group = 0; group < groupsCount; ++group) {
|
for (size_t group = 0; group < groupsCount; ++group) {
|
||||||
for (size_t i = 0; i < inputChannelsInGroup; ++i) {
|
for (size_t i = 0; i < inputChannelsInGroup; ++i) {
|
||||||
if (scales[group * inputChannelsInGroup] != scales[group * inputChannelsInGroup + i]) {
|
if (scales[group * inputChannelsInGroup] != scales[group * inputChannelsInGroup + i]) {
|
||||||
@ -90,30 +89,33 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const ngraph::PartialShape outputPShape = layer->get_output_partial_shape(0);
|
const PartialShape outputPShape = layer->get_output_partial_shape(0);
|
||||||
const auto rank = outputPShape.rank().get_length();
|
const auto rank = outputPShape.rank();
|
||||||
if ((rank != 4) && (rank != 5)) {
|
if (rank.is_dynamic()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto rankVal = rank.get_length();
|
||||||
|
if ((rankVal != 4) && (rankVal != 5)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const std::shared_ptr<opset1::Multiply> multiply = as_type_ptr<opset1::Multiply>(layer->input_value(0).get_node_shared_ptr());
|
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer);
|
||||||
if (multiply == nullptr) {
|
if (dequantization.multiply == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// SS takes inputs [0: data, 1: scales, 2: shifts], takes scales (index = 1)
|
if (dequantization.multiplyConstant == nullptr) {
|
||||||
const std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(multiply->input_value(1).get_node_shared_ptr());
|
|
||||||
if (multiplyConst == nullptr) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// exactly cast vector as original code has a conversion;
|
// exactly cast vector as original code has a conversion;
|
||||||
// optimize cast:
|
// optimize cast:
|
||||||
// two branches depending on real type of the constant?
|
// two branches depending on real type of the constant?
|
||||||
const auto scalesBuffer = multiplyConst->cast_vector<float>();
|
const auto scalesBuffer = dequantization.multiplyConstant->cast_vector<float>();
|
||||||
size_t scalesBufferSize = shape_size(multiplyConst->get_output_shape(0));
|
size_t scalesBufferSize = shape_size(dequantization.multiplyConstant->get_shape());
|
||||||
for (size_t i = 1lu; i < scalesBufferSize; ++i) {
|
for (size_t i = 1ul; i < scalesBufferSize; ++i) {
|
||||||
if (scalesBuffer[i - 1] != scalesBuffer[i]) {
|
if (scalesBuffer[i - 1] != scalesBuffer[i]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -132,11 +134,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
|
|
||||||
// TODO Implement similar checks in other weightable operaitons
|
// TODO Implement similar checks in other weightable operaitons
|
||||||
|
|
||||||
const std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->input_value(1).get_node_shared_ptr());
|
const std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->get_input_node_shared_ptr(1));
|
||||||
|
|
||||||
std::shared_ptr<opset1::FakeQuantize> fqFromWeights;
|
std::shared_ptr<opset1::FakeQuantize> fqFromWeights;
|
||||||
if (reshapeFromWeights == nullptr) {
|
if (reshapeFromWeights == nullptr) {
|
||||||
fqFromWeights = as_type_ptr<opset1::FakeQuantize>(layer->input_value(1).get_node_shared_ptr());
|
fqFromWeights = as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
||||||
if (fqFromWeights == nullptr) {
|
if (fqFromWeights == nullptr) {
|
||||||
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, 1ul);
|
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, 1ul);
|
||||||
fqFromWeights = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
fqFromWeights = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
||||||
@ -154,23 +156,29 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Shape constOutputShape = fqFromWeights->get_input_node_ptr(3)->get_output_shape(0);
|
const auto olPShape = fqFromWeights->get_input_partial_shape(3);
|
||||||
if (fqFromWeights->get_input_node_ptr(4)->get_output_shape(0) != constOutputShape) {
|
const auto ohPShape = fqFromWeights->get_input_partial_shape(4);
|
||||||
|
if (olPShape.is_dynamic() || ohPShape.is_dynamic() || olPShape != ohPShape) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
|
||||||
if (
|
const auto fqOutPShape = fqFromWeights->get_output_partial_shape(0);
|
||||||
// expected, it's ok: return true
|
const size_t outChannelsIdx = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
||||||
(shape_size(constOutputShape) != 1ul) &&
|
if (fqOutPShape.rank().is_dynamic() || fqOutPShape[outChannelsIdx].is_dynamic()) {
|
||||||
// not expected, something wrong: return false
|
|
||||||
((constOutputShape.size() <= outChannelsShapeIndex) ||
|
|
||||||
// Check if all dimensions of scale except the output channels are all ones
|
|
||||||
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
|
|
||||||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
|
|
||||||
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex])))) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Shape constShape = olPShape.to_shape();
|
||||||
|
if (shape_size(constShape) != 1ul) {
|
||||||
|
const size_t constChannels = constShape[outChannelsIdx];
|
||||||
|
const size_t fqOutChannels = fqOutPShape[outChannelsIdx].get_length();
|
||||||
|
const bool constChannelsAndFqChannelsMismatched = (constChannels != 1ul) && (fqOutChannels != constChannels);
|
||||||
|
|
||||||
|
if ((constShape.size() <= outChannelsIdx) || (shape_size(constShape) != constChannels) || constChannelsAndFqChannelsMismatched) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO: LPT: is it possible to share with isQuantized?
|
// TODO: LPT: is it possible to share with isQuantized?
|
||||||
const FakeQuantizeDequantization dequantizationOnWeights = reshapeFromWeights == nullptr ?
|
const FakeQuantizeDequantization dequantizationOnWeights = reshapeFromWeights == nullptr ?
|
||||||
@ -180,33 +188,33 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const opset1::Constant* weightsData = as_type<opset1::Constant>(dequantizationOnWeights.data.get_node());
|
const auto weightsData = as_type_ptr<opset1::Constant>(dequantizationOnWeights.data.get_node_shared_ptr());
|
||||||
if (weightsData == nullptr) {
|
if (weightsData == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ngraph::element::Type weightsDataPrecision = weightsData->output(0).get_element_type();
|
const auto weightsDataPrecision = weightsData->get_element_type();
|
||||||
if (!DataPrecision::isSupported(weightsDataPrecision)) {
|
if (!DataPrecision::isSupported(weightsDataPrecision)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) {
|
if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) {
|
||||||
const auto subtractConstantType = dequantizationOnWeights.subtractConstant->output(0).get_element_type();
|
const auto subtractConstantType = dequantizationOnWeights.subtractConstant->get_element_type();
|
||||||
if (subtractConstantType != weightsDataPrecision) {
|
if (subtractConstantType != weightsDataPrecision) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
const size_t outChannelsIdx = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
||||||
if (dequantizationOnWeights.subtract) {
|
if (dequantizationOnWeights.subtract) {
|
||||||
const auto subConstShape = dequantizationOnWeights.subtractConstant->get_shape();
|
const auto subConstShape = dequantizationOnWeights.subtractConstant->get_shape();
|
||||||
if (shape_size(subConstShape) > 1ul && shape_size(subConstShape) != subConstShape[outChannelsShapeIndex]) {
|
if (shape_size(subConstShape) > 1ul && shape_size(subConstShape) != subConstShape[outChannelsIdx]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (dequantizationOnWeights.multiply) {
|
if (dequantizationOnWeights.multiply) {
|
||||||
const auto mulConstShape = dequantizationOnWeights.multiplyConstant->get_shape();
|
const auto mulConstShape = dequantizationOnWeights.multiplyConstant->get_shape();
|
||||||
if (shape_size(mulConstShape) > 1ul && shape_size(mulConstShape) != mulConstShape[outChannelsShapeIndex]) {
|
if (shape_size(mulConstShape) > 1ul && shape_size(mulConstShape) != mulConstShape[outChannelsIdx]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -321,7 +329,7 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) {
|
bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) {
|
||||||
if (!as_type_ptr<opset1::Convolution>(layer) && !as_type_ptr<opset1::GroupConvolution>(layer)) {
|
if (!is_type<opset1::Convolution>(layer) && !is_type<opset1::GroupConvolution>(layer)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,7 +349,7 @@ bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr<Node>& lay
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) {
|
std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) {
|
||||||
auto fq = as_type_ptr<opset1::FakeQuantize>(node->input_value(1).get_node_shared_ptr());
|
auto fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_shared_ptr(1));
|
||||||
// TODO: temporary workaround
|
// TODO: temporary workaround
|
||||||
if (fq == nullptr) {
|
if (fq == nullptr) {
|
||||||
fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
||||||
|
@ -26,8 +26,13 @@ Config::Config() {
|
|||||||
// for the TBB code-path, additional configuration depending on the OS and CPU types
|
// for the TBB code-path, additional configuration depending on the OS and CPU types
|
||||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||||
#if defined(__APPLE__) || defined(_WIN32)
|
#if defined(__APPLE__) || defined(_WIN32)
|
||||||
// 'CORES' is not implemented for Win/MacOS; so the 'NUMA' is default
|
// 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default
|
||||||
|
auto numaNodes = getAvailableNUMANodes();
|
||||||
|
if (numaNodes.size() > 1) {
|
||||||
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
|
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
|
||||||
|
} else {
|
||||||
|
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NONE;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) {
|
if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) {
|
||||||
|
@ -12,8 +12,11 @@
|
|||||||
#include "mkldnn_itt.h"
|
#include "mkldnn_itt.h"
|
||||||
#include "nodes/mkldnn_memory_node.hpp"
|
#include "nodes/mkldnn_memory_node.hpp"
|
||||||
#include <threading/ie_executor_manager.hpp>
|
#include <threading/ie_executor_manager.hpp>
|
||||||
|
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||||
|
#include <threading/ie_tbb_streams_executor.hpp>
|
||||||
|
#else
|
||||||
#include <threading/ie_cpu_streams_executor.hpp>
|
#include <threading/ie_cpu_streams_executor.hpp>
|
||||||
|
#endif
|
||||||
#include <ie_system_conf.h>
|
#include <ie_system_conf.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
@ -32,6 +35,14 @@ MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap network
|
|||||||
return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this()));
|
return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ImmediateSerialExecutor : public ITaskExecutor {
|
||||||
|
void run(InferenceEngine::Task task) override {
|
||||||
|
std::lock_guard<std::mutex> l{_mutex};
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
std::mutex _mutex;
|
||||||
|
};
|
||||||
|
|
||||||
MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
|
MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
|
||||||
const Config &cfg,
|
const Config &cfg,
|
||||||
const MKLDNNExtensionManager::Ptr& extMgr,
|
const MKLDNNExtensionManager::Ptr& extMgr,
|
||||||
@ -61,11 +72,20 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
|
|||||||
} else {
|
} else {
|
||||||
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, isFloatModel);
|
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, isFloatModel);
|
||||||
streamsExecutorConfig._name = "CPUStreamsExecutor";
|
streamsExecutorConfig._name = "CPUStreamsExecutor";
|
||||||
_taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
|
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||||
|
_taskExecutor = std::make_shared<TBBStreamsExecutor>(streamsExecutorConfig);
|
||||||
|
#else
|
||||||
|
_taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
if (0 != cfg.streamExecutorConfig._streams) {
|
if (0 != cfg.streamExecutorConfig._streams) {
|
||||||
_callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
|
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||||
|
// There is no additional threads but we still need serialize callback execution to preserve legacy behaviour
|
||||||
|
_callbackExecutor = std::make_shared<ImmediateSerialExecutor>();
|
||||||
|
#else
|
||||||
|
_callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
|
||||||
IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE});
|
IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE});
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
_callbackExecutor = _taskExecutor;
|
_callbackExecutor = _taskExecutor;
|
||||||
}
|
}
|
||||||
@ -146,6 +166,19 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() {
|
|||||||
return graphLock;
|
return graphLock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const {
|
||||||
|
int streamId = 0;
|
||||||
|
int numaNodeId = 0;
|
||||||
|
auto streamsExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(_taskExecutor.get());
|
||||||
|
if (nullptr != streamsExecutor) {
|
||||||
|
streamId = streamsExecutor->GetStreamId();
|
||||||
|
numaNodeId = streamsExecutor->GetNumaNodeId();
|
||||||
|
}
|
||||||
|
auto graphLock = Graph::Lock(_graphs[streamId % _graphs.size()]);
|
||||||
|
IE_ASSERT(graphLock._graph.IsReady());
|
||||||
|
return graphLock;
|
||||||
|
}
|
||||||
|
|
||||||
void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
|
void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock{_cfgMutex};
|
std::lock_guard<std::mutex> lock{_cfgMutex};
|
||||||
@ -171,9 +204,8 @@ InferenceEngine::CNNNetwork MKLDNNExecNetwork::GetExecGraphInfo() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const {
|
Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const {
|
||||||
if (_graphs.size() == 0)
|
if (_graphs.size() == 0) IE_THROW() << "No graph was found";
|
||||||
IE_THROW() << "No graph was found";
|
Config engConfig = GetGraph()._graph.getProperty();
|
||||||
Config engConfig = const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty();
|
|
||||||
auto option = engConfig._config.find(name);
|
auto option = engConfig._config.find(name);
|
||||||
if (option != engConfig._config.end()) {
|
if (option != engConfig._config.end()) {
|
||||||
return option->second;
|
return option->second;
|
||||||
@ -187,8 +219,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
|
|||||||
IE_THROW() << "No graph was found";
|
IE_THROW() << "No graph was found";
|
||||||
|
|
||||||
if (name == METRIC_KEY(NETWORK_NAME)) {
|
if (name == METRIC_KEY(NETWORK_NAME)) {
|
||||||
IE_SET_METRIC_RETURN(NETWORK_NAME,
|
IE_SET_METRIC_RETURN(NETWORK_NAME, GetGraph()._graph.dump().getName());
|
||||||
const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.dump().getName());
|
|
||||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||||
std::vector<std::string> metrics;
|
std::vector<std::string> metrics;
|
||||||
metrics.push_back(METRIC_KEY(NETWORK_NAME));
|
metrics.push_back(METRIC_KEY(NETWORK_NAME));
|
||||||
@ -198,12 +229,12 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
|
|||||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
||||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
||||||
std::vector<std::string> configKeys;
|
std::vector<std::string> configKeys;
|
||||||
for (auto && key : const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty()._config) {
|
for (auto && key : GetGraph()._graph.getProperty()._config) {
|
||||||
configKeys.push_back(key.first);
|
configKeys.push_back(key.first);
|
||||||
}
|
}
|
||||||
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
|
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
|
||||||
} else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
|
} else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
|
||||||
Config engConfig = const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty();
|
Config engConfig = GetGraph()._graph.getProperty();
|
||||||
auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
|
auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
|
||||||
IE_ASSERT(option != engConfig._config.end());
|
IE_ASSERT(option != engConfig._config.end());
|
||||||
auto streams = std::stoi(option->second);
|
auto streams = std::stoi(option->second);
|
||||||
|
@ -59,8 +59,9 @@ protected:
|
|||||||
Graph& _graph;
|
Graph& _graph;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// WARNING: Do not use _graphs directly.
|
// WARNING: Do not use _graphs directly.
|
||||||
std::deque<Graph> _graphs;
|
mutable std::deque<Graph> _graphs;
|
||||||
NumaNodesWeights& _numaNodesWeights;
|
NumaNodesWeights& _numaNodesWeights;
|
||||||
|
|
||||||
/* WARNING: Use GetGraph() function to get access to graph in current stream.
|
/* WARNING: Use GetGraph() function to get access to graph in current stream.
|
||||||
@ -68,6 +69,8 @@ protected:
|
|||||||
* even from main thread
|
* even from main thread
|
||||||
*/
|
*/
|
||||||
Graph::Lock GetGraph();
|
Graph::Lock GetGraph();
|
||||||
|
Graph::Lock GetGraph() const;
|
||||||
|
|
||||||
|
|
||||||
bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const;
|
bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const;
|
||||||
};
|
};
|
||||||
|
@ -24,22 +24,20 @@
|
|||||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
#include <transformations/common_optimizations/common_optimizations.hpp>
|
||||||
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
|
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
|
||||||
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
|
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
|
||||||
#include <transformations/common_optimizations/depth_to_space_fusion.hpp>
|
|
||||||
#include <transformations/common_optimizations/softmax_fusion.hpp>
|
#include <transformations/common_optimizations/softmax_fusion.hpp>
|
||||||
#include <transformations/common_optimizations/normalize_l2_fusion.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_depth_to_space.hpp>
|
#include <transformations/op_conversions/convert_depth_to_space.hpp>
|
||||||
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
|
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
|
||||||
#include <transformations/op_conversions/convert_space_to_depth.hpp>
|
#include <transformations/op_conversions/convert_space_to_depth.hpp>
|
||||||
#include <transformations/op_conversions/convert_gelu.hpp>
|
#include <transformations/op_conversions/convert_gelu.hpp>
|
||||||
#include <transformations/op_conversions/convert_gather_v7_to_gather_v1.hpp>
|
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
|
||||||
#include <transformations/op_conversions/convert_gather_v1_to_gather_v7.hpp>
|
#include <transformations/op_conversions/convert_gather_upgrade.hpp>
|
||||||
#include <transformations/op_conversions/gelu7_downgrade.hpp>
|
#include <transformations/op_conversions/gelu7_downgrade.hpp>
|
||||||
#include <transformations/op_conversions/hswish_decomposition.hpp>
|
#include <transformations/op_conversions/hswish_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
|
#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/mvn6_decomposition.hpp>
|
#include <transformations/op_conversions/mvn6_decomposition.hpp>
|
||||||
|
#include <transformations/op_conversions/normalize_l2_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/reduce_l1_decomposition.hpp>
|
#include <transformations/op_conversions/reduce_l1_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/reduce_l2_decomposition.hpp>
|
#include <transformations/op_conversions/reduce_l2_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
|
|
||||||
#include <transformations/op_conversions/softplus_decomposition.hpp>
|
#include <transformations/op_conversions/softplus_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/convert_space_to_batch.hpp>
|
#include <transformations/op_conversions/convert_space_to_batch.hpp>
|
||||||
#include <transformations/op_conversions/convert_batch_to_space.hpp>
|
#include <transformations/op_conversions/convert_batch_to_space.hpp>
|
||||||
@ -53,7 +51,6 @@
|
|||||||
#include <transformations/op_conversions/gru_cell_decomposition.hpp>
|
#include <transformations/op_conversions/gru_cell_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/log_softmax_decomposition.hpp>
|
#include <transformations/op_conversions/log_softmax_decomposition.hpp>
|
||||||
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
|
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
|
||||||
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
|
|
||||||
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
|
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
|
||||||
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
|
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
|
||||||
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
|
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
|
||||||
@ -249,7 +246,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator, ngraph::pass::ConvertGRUSequenceToTensorIterator,
|
pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
|
||||||
|
ngraph::pass::ConvertGRUSequenceToTensorIterator,
|
||||||
ngraph::pass::ConvertLSTMSequenceToTensorIterator>(
|
ngraph::pass::ConvertLSTMSequenceToTensorIterator>(
|
||||||
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
|
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
|
||||||
return isSequencePrimitiveSupported(node);
|
return isSequencePrimitiveSupported(node);
|
||||||
@ -280,18 +278,17 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
|||||||
return MKLDNNMVNNode::isSupportedOperation(node, errorMessage);
|
return MKLDNNMVNNode::isSupportedOperation(node, errorMessage);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pass_config->set_callback<ngraph::pass::NormalizeL2Decomposition>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
std::string errorMsg;
|
||||||
|
return MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
|
||||||
|
});
|
||||||
|
|
||||||
pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
|
pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
|
||||||
[](const_node_ptr &node) -> bool {
|
[](const_node_ptr &node) -> bool {
|
||||||
return node->input_value(0).get_partial_shape().rank().get_length() > 5;
|
return node->input_value(0).get_partial_shape().rank().get_length() > 5;
|
||||||
});
|
});
|
||||||
|
|
||||||
auto normalizeL2FusionCallback = [](const_node_ptr &node) -> bool {
|
|
||||||
std::string errorMsg;
|
|
||||||
return !MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
|
|
||||||
};
|
|
||||||
pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithAdd>(normalizeL2FusionCallback);
|
|
||||||
pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithMax>(normalizeL2FusionCallback);
|
|
||||||
|
|
||||||
// List of enabled/disabled transformations
|
// List of enabled/disabled transformations
|
||||||
pass_config->disable<ngraph::pass::ConvertGELU>();
|
pass_config->disable<ngraph::pass::ConvertGELU>();
|
||||||
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
|
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
|
||||||
@ -307,10 +304,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
|||||||
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
|
pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
|
||||||
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||||
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
|
pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
|
||||||
pass_config->disable<ngraph::pass::ConvertDeformableConv8To1>();
|
|
||||||
|
|
||||||
|
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
|
||||||
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
|
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
|
||||||
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
|
pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
|
||||||
|
pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
|
||||||
|
|
||||||
if (useLpt) {
|
if (useLpt) {
|
||||||
pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool {
|
pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool {
|
||||||
|
@ -741,9 +741,10 @@ private:
|
|||||||
|
|
||||||
bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||||
try {
|
try {
|
||||||
const auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op);
|
if (!one_of(op->get_type_info(),
|
||||||
if (!defConvNode) {
|
ngraph::op::v1::DeformableConvolution::type_info,
|
||||||
errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1.";
|
ngraph::op::v8::DeformableConvolution::type_info)) {
|
||||||
|
errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1 or v8.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
@ -759,28 +760,35 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar
|
|||||||
if (!isSupportedOperation(op, errorMessage)) {
|
if (!isSupportedOperation(op, errorMessage)) {
|
||||||
IE_THROW(NotImplemented) << errorMessage;
|
IE_THROW(NotImplemented) << errorMessage;
|
||||||
}
|
}
|
||||||
auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op);
|
auto defConvNodeBase = std::dynamic_pointer_cast<ngraph::op::util::DeformableConvolutionBase>(op);
|
||||||
|
|
||||||
group = defConvNode->get_group();
|
group = defConvNodeBase->get_group();
|
||||||
deformable_group = defConvNode->get_deformable_group();
|
deformable_group = defConvNodeBase->get_deformable_group();
|
||||||
|
auto& strides = defConvNodeBase->get_strides();
|
||||||
auto& strides = defConvNode->get_strides();
|
|
||||||
for (int i = 0; i < strides.size(); i++) {
|
for (int i = 0; i < strides.size(); i++) {
|
||||||
stride.push_back(strides[i]);
|
stride.push_back(strides[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& dilations = defConvNode->get_dilations();
|
auto& dilations = defConvNodeBase->get_dilations();
|
||||||
for (int i = 1; i <= dilations.size(); i++) {
|
for (int i = 1; i <= dilations.size(); i++) {
|
||||||
dilation.push_back(dilations[dilations.size() - i] - 1);
|
dilation.push_back(dilations[dilations.size() - i] - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
paddingL = defConvNode->get_pads_begin();
|
paddingL = defConvNodeBase->get_pads_begin();
|
||||||
|
|
||||||
|
if (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info) {
|
||||||
|
auto defConvNode = std::dynamic_pointer_cast<ngraph::op::v8::DeformableConvolution>(op);
|
||||||
|
with_bilinear_pad = defConvNode->get_bilinear_interpolation_pad();
|
||||||
|
} else {
|
||||||
|
with_bilinear_pad = false;
|
||||||
|
}
|
||||||
|
enforceRef = (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
|
void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
|
||||||
std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' ";
|
std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' ";
|
||||||
|
|
||||||
if (getParentEdges().size() != 3)
|
if (getParentEdges().size() != 3 && getParentEdges().size() != 4)
|
||||||
IE_THROW() << errorPrefix << "has incorrect number of input edges";
|
IE_THROW() << errorPrefix << "has incorrect number of input edges";
|
||||||
if (getChildEdges().empty())
|
if (getChildEdges().empty())
|
||||||
IE_THROW() << errorPrefix << "has incorrect number of output edges";
|
IE_THROW() << errorPrefix << "has incorrect number of output edges";
|
||||||
@ -806,22 +814,29 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
|
|||||||
if (!supportedPrimitiveDescriptors.empty())
|
if (!supportedPrimitiveDescriptors.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
size_t inputsNumber = getOriginalInputsNumber();
|
||||||
NodeConfig config;
|
NodeConfig config;
|
||||||
config.dynBatchSupport = false;
|
config.dynBatchSupport = false;
|
||||||
config.inConfs.resize(3);
|
config.inConfs.resize(inputsNumber);
|
||||||
config.inConfs[0].constant = false;
|
config.inConfs[0].constant = false;
|
||||||
config.inConfs[0].inPlace = -1;
|
config.inConfs[0].inPlace = -1;
|
||||||
config.inConfs[1].constant = false;
|
config.inConfs[1].constant = false;
|
||||||
config.inConfs[1].inPlace = -1;
|
config.inConfs[1].inPlace = -1;
|
||||||
config.inConfs[1].constant = false;
|
config.inConfs[2].constant = false;
|
||||||
config.inConfs[1].inPlace = -1;
|
config.inConfs[2].inPlace = -1;
|
||||||
|
if (inputsNumber > 3) {
|
||||||
|
config.inConfs[3].constant = false;
|
||||||
|
config.inConfs[3].inPlace = -1;
|
||||||
|
}
|
||||||
|
|
||||||
config.outConfs.resize(1);
|
config.outConfs.resize(1);
|
||||||
config.outConfs[0].constant = false;
|
config.outConfs[0].constant = false;
|
||||||
config.outConfs[0].inPlace = -1;
|
config.outConfs[0].inPlace = -1;
|
||||||
|
|
||||||
impl_desc_type impl_type;
|
impl_desc_type impl_type;
|
||||||
if (mayiuse(cpu::x64::avx512_common)) {
|
if (enforceRef) {
|
||||||
|
impl_type = impl_desc_type::ref;
|
||||||
|
} else if (mayiuse(cpu::x64::avx512_common)) {
|
||||||
impl_type = impl_desc_type::jit_avx512;
|
impl_type = impl_desc_type::jit_avx512;
|
||||||
} else if (mayiuse(cpu::x64::avx2)) {
|
} else if (mayiuse(cpu::x64::avx2)) {
|
||||||
impl_type = impl_desc_type::jit_avx2;
|
impl_type = impl_desc_type::jit_avx2;
|
||||||
@ -831,8 +846,8 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
|
|||||||
impl_type = impl_desc_type::ref;
|
impl_type = impl_desc_type::ref;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mayiuse(cpu::x64::sse41)) {
|
if (!enforceRef && mayiuse(cpu::x64::sse41)) {
|
||||||
// optimzed implementation
|
// optimized implementation
|
||||||
auto dataFormat = memory::format_tag::nhwc;
|
auto dataFormat = memory::format_tag::nhwc;
|
||||||
auto offFormat = memory::format_tag::nchw;
|
auto offFormat = memory::format_tag::nchw;
|
||||||
auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o
|
auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o
|
||||||
@ -842,8 +857,25 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
|
|||||||
memory::data_type::f32, dataFormat);
|
memory::data_type::f32, dataFormat);
|
||||||
config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(),
|
config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(),
|
||||||
memory::data_type::f32, offFormat);
|
memory::data_type::f32, offFormat);
|
||||||
|
|
||||||
|
auto& wDims = getParentEdgeAt(2)->getShape().getStaticDims();
|
||||||
|
if (group > 1 && wDims.size() != 5) {
|
||||||
|
auto new_dims = InferenceEngine::SizeVector({group, div_up(wDims[0], group)});
|
||||||
|
for (int i = 1; i < wDims.size(); i++) {
|
||||||
|
new_dims.push_back(wDims[i]);
|
||||||
|
}
|
||||||
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
|
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
|
||||||
memory::data_type::f32, weiFormat);
|
memory::data_type::f32, weiFormat);
|
||||||
|
} else {
|
||||||
|
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
|
||||||
|
memory::data_type::f32, weiFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (inputsNumber > 3) {
|
||||||
|
config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(3)->getShape().getStaticDims(),
|
||||||
|
memory::data_type::f32, memory::format_tag::nchw);
|
||||||
|
}
|
||||||
config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(),
|
config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(),
|
||||||
memory::data_type::f32, dataFormat);
|
memory::data_type::f32, dataFormat);
|
||||||
supportedPrimitiveDescriptors.push_back({config, impl_type});
|
supportedPrimitiveDescriptors.push_back({config, impl_type});
|
||||||
@ -855,6 +887,10 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
|
|||||||
memory::format_tag::nchw);
|
memory::format_tag::nchw);
|
||||||
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32,
|
config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32,
|
||||||
memory::format_tag::oihw);
|
memory::format_tag::oihw);
|
||||||
|
if (inputsNumber > 3) {
|
||||||
|
config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(3)->getShape().getStaticDims(), memory::data_type::f32,
|
||||||
|
memory::format_tag::nchw);
|
||||||
|
}
|
||||||
config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32,
|
config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32,
|
||||||
memory::format_tag::nchw);
|
memory::format_tag::nchw);
|
||||||
supportedPrimitiveDescriptors.push_back({config, impl_type});
|
supportedPrimitiveDescriptors.push_back({config, impl_type});
|
||||||
@ -874,6 +910,7 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
|
|||||||
jcp.dg = deformable_group;
|
jcp.dg = deformable_group;
|
||||||
|
|
||||||
jcp.ngroups = group;
|
jcp.ngroups = group;
|
||||||
|
|
||||||
jcp.mb = srcDims[0];
|
jcp.mb = srcDims[0];
|
||||||
|
|
||||||
jcp.oc = dstDims[1] / jcp.ngroups;
|
jcp.oc = dstDims[1] / jcp.ngroups;
|
||||||
@ -884,9 +921,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
|
|||||||
jcp.oh = dstDims[2];
|
jcp.oh = dstDims[2];
|
||||||
jcp.ow = dstDims[3];
|
jcp.ow = dstDims[3];
|
||||||
|
|
||||||
bool with_groups = group > 1;
|
jcp.kh = weiDims[2];
|
||||||
jcp.kh = weiDims[with_groups + 2];
|
jcp.kw = weiDims[3];
|
||||||
jcp.kw = weiDims[with_groups + 3];
|
|
||||||
|
|
||||||
jcp.t_pad = paddingL[0];
|
jcp.t_pad = paddingL[0];
|
||||||
jcp.l_pad = paddingL[1];
|
jcp.l_pad = paddingL[1];
|
||||||
@ -898,6 +934,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
|
|||||||
jcp.dilate_w = dilation[1];
|
jcp.dilate_w = dilation[1];
|
||||||
|
|
||||||
jcp.with_bias = false;
|
jcp.with_bias = false;
|
||||||
|
jcp.with_bi_pad = with_bilinear_pad;
|
||||||
|
jcp.with_modulation = getParentEdges().size() > 3;
|
||||||
|
|
||||||
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
|
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
|
||||||
jcp.ic_block = simd_w;
|
jcp.ic_block = simd_w;
|
||||||
@ -910,13 +948,16 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
|
|||||||
jcp.typesize_in = sizeof(float);
|
jcp.typesize_in = sizeof(float);
|
||||||
jcp.typesize_off = sizeof(float);
|
jcp.typesize_off = sizeof(float);
|
||||||
jcp.typesize_out = sizeof(float);
|
jcp.typesize_out = sizeof(float);
|
||||||
|
jcp.typesize_modulation = sizeof(float);
|
||||||
|
|
||||||
jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3;
|
jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3;
|
||||||
jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4;
|
jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4;
|
||||||
|
|
||||||
jcp.nthr = dnnl_get_max_threads();
|
jcp.nthr = dnnl_get_max_threads();
|
||||||
|
|
||||||
if (mayiuse(cpu::x64::avx512_common)) {
|
if (enforceRef) {
|
||||||
|
return;
|
||||||
|
} else if (mayiuse(cpu::x64::avx512_common)) {
|
||||||
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx512_common>(jcp));
|
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx512_common>(jcp));
|
||||||
} else if (mayiuse(cpu::x64::avx2)) {
|
} else if (mayiuse(cpu::x64::avx2)) {
|
||||||
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx2>(jcp));
|
def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx2>(jcp));
|
||||||
@ -930,9 +971,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
|
|||||||
|
|
||||||
void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst,
|
void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst,
|
||||||
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
|
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
|
||||||
const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides) {
|
const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides,
|
||||||
|
const float* modulation, const std::vector<size_t>& modulation_strides) {
|
||||||
const bool with_groups = jcp.ngroups > 1;
|
const bool with_groups = jcp.ngroups > 1;
|
||||||
|
|
||||||
const int G = jcp.ngroups;
|
const int G = jcp.ngroups;
|
||||||
const int MB = jcp.mb;
|
const int MB = jcp.mb;
|
||||||
const int OH = jcp.oh;
|
const int OH = jcp.oh;
|
||||||
@ -956,65 +997,79 @@ void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const f
|
|||||||
|
|
||||||
const int DG = jcp.dg;
|
const int DG = jcp.dg;
|
||||||
|
|
||||||
const int channel_per_deformable_group = IC * G / DG;
|
const int channel_per_deformable_group = (IC * G) / DG;
|
||||||
|
|
||||||
|
const bool with_bi_pad = jcp.with_bi_pad;
|
||||||
auto ker = [=](int g, int mb, int oc, int oh, int ow) {
|
auto ker = [=](int g, int mb, int oc, int oh, int ow) {
|
||||||
float d = 0;
|
float d = 0;
|
||||||
const int h_in = oh * KSH - padT;
|
const int h_in = oh * KSH - padT;
|
||||||
const int w_in = ow * KSW - padL;
|
const int w_in = ow * KSW - padL;
|
||||||
|
|
||||||
for (int ic = 0; ic < IC; ic++) {
|
for (int ic = 0; ic < IC; ic++) {
|
||||||
const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1] + h_in * src_strides[2] + w_in * src_strides[3];
|
const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1];
|
||||||
const int deformable_group_index = ic / channel_per_deformable_group;
|
const int deformable_group_index = (IC * g + ic) / channel_per_deformable_group;
|
||||||
const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1];
|
const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1];
|
||||||
|
const float *modulation_offset_ptr = nullptr;
|
||||||
|
if (modulation != nullptr) {
|
||||||
|
modulation_offset_ptr = modulation + mb * modulation_strides[0] + (deformable_group_index * KH * KW) * modulation_strides[1];
|
||||||
|
}
|
||||||
|
|
||||||
for (int kh = 0; kh < KH; kh++) {
|
for (int kh = 0; kh < KH; kh++) {
|
||||||
for (int kw = 0; kw < KW; kw++) {
|
for (int kw = 0; kw < KW; kw++) {
|
||||||
const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
|
const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
|
||||||
const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
|
const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
|
||||||
const float offset_h = data_offset_ptr[data_offset_h_index];
|
const float offset_h = data_offset_ptr[data_offset_h_index];
|
||||||
const float offset_w = data_offset_ptr[data_offset_w_index];
|
const float offset_w = data_offset_ptr[data_offset_w_index];
|
||||||
float val = 0.0f;
|
float map_h = h_in + kh * (KDH + 1) + offset_h;
|
||||||
const float h_im = h_in + kh * (KDH + 1) + offset_h;
|
float map_w = w_in + kw * (KDW + 1) + offset_w;
|
||||||
const float w_im = w_in + kw * (KDW + 1) + offset_w;
|
bool skip_compute;
|
||||||
|
if (with_bilinear_pad) {
|
||||||
if (h_im >= 0 && w_im >= 0 && h_im < IH && w_im < IW) {
|
skip_compute = !(static_cast<int>(map_w) > -1 &&
|
||||||
float map_h = kh * (KDH + 1) + offset_h;
|
static_cast<int>(map_w) < IW &&
|
||||||
float map_w = kw * (KDW + 1) + offset_w;
|
static_cast<int>(map_h) > -1 &&
|
||||||
const int cur_height = IH - h_in;
|
static_cast<int>(map_h) < IH);
|
||||||
const int cur_width = IW - w_in;
|
|
||||||
int h_low = static_cast<int>(floorf(map_h));
|
|
||||||
int w_low = static_cast<int>(floorf(map_w));
|
|
||||||
int h_high;
|
|
||||||
int w_high;
|
|
||||||
if (h_low >= cur_height - 1) {
|
|
||||||
h_high = h_low = cur_height - 1;
|
|
||||||
map_h = static_cast<float>(h_low);
|
|
||||||
} else {
|
} else {
|
||||||
h_high = h_low + 1;
|
skip_compute = !(map_w >= 0 &&
|
||||||
}
|
map_w < IW &&
|
||||||
|
map_h >= 0 &&
|
||||||
if (w_low >= cur_width - 1) {
|
map_h < IH);
|
||||||
w_high = w_low = cur_width - 1;
|
|
||||||
map_w = static_cast<float>(w_low);
|
|
||||||
} else {
|
|
||||||
w_high = w_low + 1;
|
|
||||||
}
|
}
|
||||||
|
if (!skip_compute) {
|
||||||
|
const int cur_h_end = IH;
|
||||||
|
const int cur_w_end = IW;
|
||||||
|
int h_low = with_bi_pad ? static_cast<int>(floorf(map_h)) :
|
||||||
|
std::max(static_cast<int>(floorf(map_h)), 0);
|
||||||
|
int w_low = with_bi_pad ? static_cast<int>(floorf(map_w)) :
|
||||||
|
std::max(static_cast<int>(floorf(map_w)), 0);
|
||||||
|
const int cur_h_start = h_low;
|
||||||
|
const int cur_w_start = w_low;
|
||||||
|
int h_high = with_bi_pad ? h_low + 1 : std::min(static_cast<int>(ceilf(map_h)), cur_h_end - 1);
|
||||||
|
int w_high = with_bi_pad ? w_low + 1 : std::min(static_cast<int>(ceilf(map_w)), cur_w_end - 1);
|
||||||
|
|
||||||
float lh = map_h - h_low;
|
float lh = map_h - h_low;
|
||||||
float lw = map_w - w_low;
|
float lw = map_w - w_low;
|
||||||
float hh = 1 - lh, hw = 1 - lw;
|
float hh = 1 - lh, hw = 1 - lw;
|
||||||
|
|
||||||
float v1 = data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]];
|
float v1 = (cur_w_start >= 0 && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]] : 0.0f;
|
||||||
float v2 = data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]];
|
float v2 = (w_high < cur_w_end && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]] : 0.0f;
|
||||||
float v3 = data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]];
|
float v3 = (cur_w_start >= 0 && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]] : 0.0f;
|
||||||
float v4 = data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]];
|
float v4 = (w_high < cur_w_end && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]] : 0.0f;
|
||||||
float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
|
float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
|
||||||
|
|
||||||
val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
|
float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
|
||||||
|
|
||||||
|
float modulation_scalar = 1.0f;
|
||||||
|
|
||||||
|
if (modulation_offset_ptr != nullptr) {
|
||||||
|
size_t modulation_index = (kh * KW + kw) * modulation_strides[1] + oh * modulation_strides[2] + ow * modulation_strides[3];
|
||||||
|
modulation_scalar = modulation_offset_ptr[modulation_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
const float weight = with_groups ? weights[(g + oc / G) * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] +
|
||||||
|
kw * wei_strides[3]]
|
||||||
|
: weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]];
|
||||||
|
d += val * weight * modulation_scalar;
|
||||||
}
|
}
|
||||||
d += val * (with_groups ? weights[g * wei_strides[0] + oc * wei_strides[1] + ic * wei_strides[2] + kh * wei_strides[3] +
|
|
||||||
kw * wei_strides[4]]
|
|
||||||
: weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1058,6 +1113,8 @@ void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const f
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
|
void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
|
||||||
|
const size_t inputsNumber = getOriginalInputsNumber();
|
||||||
|
|
||||||
auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
|
auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
|
||||||
auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
|
auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
|
||||||
auto &srcMemory2 = getParentEdgeAt(2)->getMemory();
|
auto &srcMemory2 = getParentEdgeAt(2)->getMemory();
|
||||||
@ -1066,8 +1123,18 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
|
|||||||
const auto *src = reinterpret_cast<const float *>(srcMemory0.GetPtr());
|
const auto *src = reinterpret_cast<const float *>(srcMemory0.GetPtr());
|
||||||
const auto *offsets = reinterpret_cast<const float *>(srcMemory1.GetPtr());
|
const auto *offsets = reinterpret_cast<const float *>(srcMemory1.GetPtr());
|
||||||
const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
|
const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
|
||||||
|
float* modulation = nullptr;
|
||||||
|
if (inputsNumber > 3) {
|
||||||
|
modulation = reinterpret_cast<float *>(getParentEdgeAt(3)->getMemory().GetPtr());
|
||||||
|
}
|
||||||
|
|
||||||
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
|
float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
|
||||||
|
|
||||||
|
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||||
|
if (!selectedPrimitiveDescriptor)
|
||||||
|
IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||||
|
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||||
|
|
||||||
auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
||||||
std::vector<size_t> src_strides(src_block_desc.getStrides().size());
|
std::vector<size_t> src_strides(src_block_desc.getStrides().size());
|
||||||
for (int i = 0; i < src_strides.size(); i++) {
|
for (int i = 0; i < src_strides.size(); i++) {
|
||||||
@ -1080,13 +1147,19 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
|
|||||||
dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i];
|
dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
|
auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
|
||||||
auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
|
auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
|
||||||
|
InferenceEngine::SizeVector modulation_strides;
|
||||||
|
if (inputsNumber > 3) {
|
||||||
|
modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (def_conv_kernel) {
|
if (def_conv_kernel) {
|
||||||
executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides);
|
executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides);
|
||||||
} else {
|
} else {
|
||||||
executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides);
|
executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides, modulation, modulation_strides);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,8 +22,6 @@ struct jit_def_conv_params {
|
|||||||
int kd, kh, kw;
|
int kd, kh, kw;
|
||||||
int stride_d, stride_h, stride_w;
|
int stride_d, stride_h, stride_w;
|
||||||
int dilate_d, dilate_h, dilate_w;
|
int dilate_d, dilate_h, dilate_w;
|
||||||
bool with_bias;
|
|
||||||
bool with_sum;
|
|
||||||
int nthr;
|
int nthr;
|
||||||
int nb_ic, ic_block;
|
int nb_ic, ic_block;
|
||||||
int nb_oc, oc_block;
|
int nb_oc, oc_block;
|
||||||
@ -32,13 +30,19 @@ struct jit_def_conv_params {
|
|||||||
int ur_w_tail;
|
int ur_w_tail;
|
||||||
int typesize_in;
|
int typesize_in;
|
||||||
int typesize_off;
|
int typesize_off;
|
||||||
|
int typesize_modulation;
|
||||||
int typesize_bia;
|
int typesize_bia;
|
||||||
int typesize_out;
|
int typesize_out;
|
||||||
|
bool with_bias;
|
||||||
|
bool with_sum;
|
||||||
|
bool with_modulation;
|
||||||
|
bool with_bi_pad;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct jit_def_conv_call_args {
|
struct jit_def_conv_call_args {
|
||||||
const void *src;
|
const void *src;
|
||||||
const void *off;
|
const void *off;
|
||||||
|
const void *modulation;
|
||||||
const void *filt;
|
const void *filt;
|
||||||
const void *bias;
|
const void *bias;
|
||||||
const void *dst;
|
const void *dst;
|
||||||
@ -75,11 +79,13 @@ public:
|
|||||||
bool canBeInPlace() const override {
|
bool canBeInPlace() const override {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
bool enforceRef = false;
|
||||||
|
|
||||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
size_t group = 1;
|
size_t group = 1;
|
||||||
|
bool with_bilinear_pad = false;
|
||||||
std::vector<ptrdiff_t> stride = {};
|
std::vector<ptrdiff_t> stride = {};
|
||||||
std::vector<ptrdiff_t> dilation = {};
|
std::vector<ptrdiff_t> dilation = {};
|
||||||
std::vector<ptrdiff_t> paddingL = {};
|
std::vector<ptrdiff_t> paddingL = {};
|
||||||
@ -92,10 +98,10 @@ private:
|
|||||||
|
|
||||||
void executeReference(const float* src, const float* offsets, const float* weights, float* dst,
|
void executeReference(const float* src, const float* offsets, const float* weights, float* dst,
|
||||||
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
|
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
|
||||||
const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides);
|
const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides,
|
||||||
|
const float* modulation = nullptr, const std::vector<size_t>& modulation_strides = {});
|
||||||
void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst,
|
void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst,
|
||||||
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
|
const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides, const std::vector<size_t>& dst_strides);
|
||||||
const std::vector<size_t>& dst_strides);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace MKLDNNPlugin
|
} // namespace MKLDNNPlugin
|
||||||
|
@ -272,6 +272,8 @@ void MKLDNNStridedSliceNode::createPrimitive() {
|
|||||||
auto srcOrder = srcBlockingDesc.getOrder();
|
auto srcOrder = srcBlockingDesc.getOrder();
|
||||||
params.srcDims = srcBlockingDesc.getBlockDims();
|
params.srcDims = srcBlockingDesc.getBlockDims();
|
||||||
params.dstDims = dstBlockingDesc.getBlockDims();
|
params.dstDims = dstBlockingDesc.getBlockDims();
|
||||||
|
params.srcMemPtr = srcMemPtr;
|
||||||
|
params.dstMemPtr = dstMemPtr;
|
||||||
params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size();
|
params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size();
|
||||||
|
|
||||||
if (params.parametersAreConstant) {
|
if (params.parametersAreConstant) {
|
||||||
@ -282,8 +284,6 @@ void MKLDNNStridedSliceNode::createPrimitive() {
|
|||||||
SizeVector newSrcDims, newDstDims;
|
SizeVector newSrcDims, newDstDims;
|
||||||
dimsNormalization(newSrcDims, newDstDims);
|
dimsNormalization(newSrcDims, newDstDims);
|
||||||
dimsGluing(realNDims, newSrcDims, newDstDims);
|
dimsGluing(realNDims, newSrcDims, newDstDims);
|
||||||
|
|
||||||
if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
|
|
||||||
indicesCalculation();
|
indicesCalculation();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -510,14 +510,35 @@ void MKLDNNStridedSliceNode::dimsGluing(const size_t realNDims, const SizeVector
|
|||||||
if (params.dstDims.size() > 2)
|
if (params.dstDims.size() > 2)
|
||||||
params.lastDstDim /= newDstDims[secondDim.first];
|
params.lastDstDim /= newDstDims[secondDim.first];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// some parameter calculations for common execution
|
||||||
|
params.isOptimized = params.nDimsForWork == 1 && params.dstDims.size() > 1;
|
||||||
|
if (params.isOptimized) {
|
||||||
|
if (params.dstDims.size() == 2)
|
||||||
|
params.dstDims[1] = 1;
|
||||||
|
|
||||||
|
params.workAmount = params.dstDims[0] * params.dstDims[1];
|
||||||
|
params.srcShift = (begin[0] * params.srcStrides[0] + begin[1] * params.srcStrides[1]) * params.dataSize;
|
||||||
|
} else {
|
||||||
|
params.srcShift = stride.back() == 1 && stride.size() > 1 ?
|
||||||
|
begin[params.nDimsForWork] * params.srcStrides[params.nDimsForWork] * params.dataSize : 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNStridedSliceNode::indicesCalculation() {
|
void MKLDNNStridedSliceNode::indicesCalculation() {
|
||||||
// indices calculation before execution for the best performance
|
// indices calculation before execution for the best performance
|
||||||
params.nThreads = parallel_get_max_threads();
|
|
||||||
params.srcIndices.resize(params.workAmount, 0);
|
params.srcIndices.resize(params.workAmount, 0);
|
||||||
params.dstIndices.resize(params.workAmount, 0);
|
params.dstIndices.resize(params.workAmount, 0);
|
||||||
|
|
||||||
|
// should choose more optimal thread count
|
||||||
|
const size_t nthr = parallel_get_max_threads();
|
||||||
|
params.nThreads = nthr > params.workAmount ? params.workAmount : nthr;
|
||||||
|
|
||||||
|
if (params.isOptimized) {
|
||||||
|
indicesCalculationForOptimized();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
auto getSrcIdx = [this](const SizeVector& indexes){
|
auto getSrcIdx = [this](const SizeVector& indexes){
|
||||||
size_t srcIdx = 0;
|
size_t srcIdx = 0;
|
||||||
for (int i = 0; i < params.nDimsForWork; ++i)
|
for (int i = 0; i < params.nDimsForWork; ++i)
|
||||||
@ -542,11 +563,11 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
|
|||||||
if (coords[k] < params.dstDims[k]) {
|
if (coords[k] < params.dstDims[k]) {
|
||||||
srcIdx += stride[k] * params.srcStrides[k] * params.dataSize;
|
srcIdx += stride[k] * params.srcStrides[k] * params.dataSize;
|
||||||
break;
|
break;
|
||||||
} else {
|
}
|
||||||
|
|
||||||
coords[k] = 0;
|
coords[k] = 0;
|
||||||
out = true;
|
out = true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (out)
|
if (out)
|
||||||
srcIdx = getSrcIdx(coords);
|
srcIdx = getSrcIdx(coords);
|
||||||
@ -554,6 +575,25 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MKLDNNStridedSliceNode::indicesCalculationForOptimized() {
|
||||||
|
const size_t dstIdx0 = params.dstStrides[0] * params.dataSize;
|
||||||
|
const size_t dstIdx1 = params.dstStrides[1] * params.dataSize;
|
||||||
|
const size_t srcIdx0 = stride[0] * params.srcStrides[0] * params.dataSize;
|
||||||
|
const size_t srcIdx1 = stride[1] * params.srcStrides[1] * params.dataSize;
|
||||||
|
|
||||||
|
for (size_t i0 = 0; i0 < params.dstDims[0]; i0++) {
|
||||||
|
const size_t idx = i0 * params.dstDims[1];
|
||||||
|
|
||||||
|
params.dstIndices[idx] = i0 * dstIdx0;
|
||||||
|
params.srcIndices[idx] = i0 * srcIdx0;
|
||||||
|
|
||||||
|
for (size_t i1 = 1; i1 < params.dstDims[1]; i1++) {
|
||||||
|
params.dstIndices[idx + i1] = params.dstIndices[idx] + i1 * dstIdx1;
|
||||||
|
params.srcIndices[idx + i1] = params.srcIndices[idx] + i1 * srcIdx1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
|
void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
|
||||||
if (!params.parametersAreConstant) {
|
if (!params.parametersAreConstant) {
|
||||||
auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
|
auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
|
||||||
@ -586,42 +626,15 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
|
|||||||
SizeVector newSrcDims, newDstDims;
|
SizeVector newSrcDims, newDstDims;
|
||||||
dimsNormalization(newSrcDims, newDstDims);
|
dimsNormalization(newSrcDims, newDstDims);
|
||||||
dimsGluing(dstDims.size(), newSrcDims, newDstDims);
|
dimsGluing(dstDims.size(), newSrcDims, newDstDims);
|
||||||
|
|
||||||
if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
|
|
||||||
indicesCalculation();
|
indicesCalculation();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.dstDims.size() > 1 && params.nDimsForWork == 1)
|
|
||||||
stridedSliceV();
|
|
||||||
else
|
|
||||||
stridedSlice();
|
stridedSlice();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNStridedSliceNode::stridedSliceV() {
|
inline void MKLDNNStridedSliceNode::stridedSlice() {
|
||||||
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(DATA_ID)->getMemoryPtr()->GetPtr()) +
|
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(params.srcMemPtr->GetPtr()) + params.srcShift;
|
||||||
(begin[0] * params.srcStrides[0] + begin[1] * params.srcStrides[1]) * params.dataSize;
|
uint8_t* dstData = reinterpret_cast<uint8_t*>(params.dstMemPtr->GetPtr());
|
||||||
uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
|
||||||
|
|
||||||
const size_t dstIdx = params.dstStrides[0] * params.dataSize;
|
|
||||||
const size_t srcIdx = stride[0] * params.srcStrides[0] * params.dataSize;
|
|
||||||
const size_t dstShift = params.dstStrides[1] * params.dataSize;
|
|
||||||
const size_t srcShift = stride[1] * params.srcStrides[1] * params.dataSize;
|
|
||||||
|
|
||||||
if (params.dstDims.size() > 2) {
|
|
||||||
parallel_for2d(params.dstDims[0], params.dstDims[1], [&](const size_t i, const size_t j) {
|
|
||||||
cpu_memcpy(&dstData[i * dstIdx + j * dstShift], &srcData[i * srcIdx + j * srcShift], params.lastDstDim);
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
parallel_for(params.dstDims[0], [&](const size_t i) {
|
|
||||||
cpu_memcpy(&dstData[i * dstIdx], &srcData[i * srcIdx], params.lastDstDim);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void MKLDNNStridedSliceNode::stridedSlice() {
|
|
||||||
const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(DATA_ID)->getMemoryPtr()->GetPtr()) +
|
|
||||||
(stride.back() == 1 && stride.size() > 1 ? begin[params.nDimsForWork] * params.srcStrides[params.nDimsForWork] * params.dataSize : 0);
|
|
||||||
uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
|
||||||
|
|
||||||
parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
|
parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
|
||||||
size_t start = 0, end = 0;
|
size_t start = 0, end = 0;
|
||||||
|
@ -27,14 +27,14 @@ public:
|
|||||||
static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
|
static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void stridedSliceV();
|
inline void stridedSlice();
|
||||||
void stridedSlice();
|
|
||||||
|
|
||||||
void addHiddenDims(const size_t nSrcDims);
|
void addHiddenDims(const size_t nSrcDims);
|
||||||
void orderParametersByLayouts();
|
void orderParametersByLayouts();
|
||||||
void dimsNormalization(InferenceEngine::SizeVector& newSrcDims, InferenceEngine::SizeVector& newDstDims);
|
void dimsNormalization(InferenceEngine::SizeVector& newSrcDims, InferenceEngine::SizeVector& newDstDims);
|
||||||
void dimsGluing(const size_t realNDims, const InferenceEngine::SizeVector& newSrcDims, const InferenceEngine::SizeVector& newDstDims);
|
void dimsGluing(const size_t realNDims, const InferenceEngine::SizeVector& newSrcDims, const InferenceEngine::SizeVector& newDstDims);
|
||||||
void indicesCalculation();
|
void indicesCalculation();
|
||||||
|
void indicesCalculationForOptimized();
|
||||||
|
|
||||||
const size_t DATA_ID = 0;
|
const size_t DATA_ID = 0;
|
||||||
const size_t BEGIN_ID = 1;
|
const size_t BEGIN_ID = 1;
|
||||||
@ -56,6 +56,8 @@ private:
|
|||||||
InferenceEngine::SizeVector strideDims;
|
InferenceEngine::SizeVector strideDims;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
MKLDNNMemoryPtr srcMemPtr = nullptr;
|
||||||
|
MKLDNNMemoryPtr dstMemPtr = nullptr;
|
||||||
InferenceEngine::SizeVector srcDims;
|
InferenceEngine::SizeVector srcDims;
|
||||||
InferenceEngine::SizeVector dstDims;
|
InferenceEngine::SizeVector dstDims;
|
||||||
InferenceEngine::SizeVector srcStrides;
|
InferenceEngine::SizeVector srcStrides;
|
||||||
@ -69,6 +71,8 @@ private:
|
|||||||
size_t workAmount = 0;
|
size_t workAmount = 0;
|
||||||
size_t lastDstDim = 0;
|
size_t lastDstDim = 0;
|
||||||
size_t dataSize = 0;
|
size_t dataSize = 0;
|
||||||
|
size_t srcShift = 0;
|
||||||
|
bool isOptimized = false;
|
||||||
bool equalDims = false;
|
bool equalDims = false;
|
||||||
bool parametersAreConstant = true;
|
bool parametersAreConstant = true;
|
||||||
} params;
|
} params;
|
||||||
|
@ -8,20 +8,20 @@
|
|||||||
|
|
||||||
#include "memory_formats_attribute.hpp"
|
#include "memory_formats_attribute.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
using namespace ngraph;
|
||||||
|
using namespace ov;
|
||||||
|
|
||||||
template class ngraph::MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
|
template class ov::MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
|
||||||
constexpr VariantTypeInfo VariantWrapper<MLKDNNInputMemoryFormats>::type_info;
|
constexpr VariantTypeInfo VariantWrapper<MLKDNNInputMemoryFormats>::type_info;
|
||||||
|
|
||||||
std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
|
std::string ngraph::getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
|
||||||
return MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>::getMemoryFormats(node);
|
return MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>::getMemoryFormats(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
template class ngraph::MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
|
template class ov::MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
|
||||||
constexpr VariantTypeInfo VariantWrapper<MLKDNNOutputMemoryFormats>::type_info;
|
constexpr VariantTypeInfo VariantWrapper<MLKDNNOutputMemoryFormats>::type_info;
|
||||||
|
|
||||||
std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
|
std::string ngraph::getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
|
||||||
return MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>::getMemoryFormats(node);
|
return MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>::getMemoryFormats(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ngraph
|
|
||||||
|
@ -25,6 +25,25 @@ public:
|
|||||||
std::string getMemoryFormats() const { return memory_format; }
|
std::string getMemoryFormats() const { return memory_format; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class MLKDNNInputMemoryFormats : public MLKDNNMemoryFormats {
|
||||||
|
public:
|
||||||
|
MLKDNNInputMemoryFormats() = default;
|
||||||
|
explicit MLKDNNInputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
|
||||||
|
|
||||||
|
class MLKDNNOutputMemoryFormats : public MLKDNNMemoryFormats {
|
||||||
|
public:
|
||||||
|
MLKDNNOutputMemoryFormats() = default;
|
||||||
|
explicit MLKDNNOutputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
|
||||||
|
};
|
||||||
|
std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
|
||||||
|
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
template <typename MemoryFormatsType>
|
template <typename MemoryFormatsType>
|
||||||
class MLKDNNMemoryFormatsHelper : public VariantImpl<MemoryFormatsType> {
|
class MLKDNNMemoryFormatsHelper : public VariantImpl<MemoryFormatsType> {
|
||||||
public:
|
public:
|
||||||
@ -35,7 +54,7 @@ public:
|
|||||||
using MemoryFormatsWrapper = VariantWrapper<MemoryFormatsType>;
|
using MemoryFormatsWrapper = VariantWrapper<MemoryFormatsType>;
|
||||||
if (!rtInfo.count(MemoryFormatsWrapper::type_info.name)) return "";
|
if (!rtInfo.count(MemoryFormatsWrapper::type_info.name)) return "";
|
||||||
const auto &attr = rtInfo.at(MemoryFormatsWrapper::type_info.name);
|
const auto &attr = rtInfo.at(MemoryFormatsWrapper::type_info.name);
|
||||||
MemoryFormatsType mem_format = as_type_ptr<MemoryFormatsWrapper>(attr)->get();
|
MemoryFormatsType mem_format = ngraph::as_type_ptr<MemoryFormatsWrapper>(attr)->get();
|
||||||
return mem_format.getMemoryFormats();
|
return mem_format.getMemoryFormats();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,7 +67,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (unique_mem_format.size() > 1) {
|
if (unique_mem_format.size() > 1) {
|
||||||
throw ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " no rule defined for multiple values.");
|
throw ngraph::ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " no rule defined for multiple values.");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string final_mem_format;
|
std::string final_mem_format;
|
||||||
@ -59,46 +78,29 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node> & node) override {
|
std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node> & node) override {
|
||||||
throw ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " has no default initialization.");
|
throw ngraph::ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " has no default initialization.");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
extern template class MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats>;
|
||||||
class MLKDNNInputMemoryFormats : public MLKDNNMemoryFormats {
|
|
||||||
public:
|
|
||||||
MLKDNNInputMemoryFormats() = default;
|
|
||||||
explicit MLKDNNInputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
extern template class MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class VariantWrapper<MLKDNNInputMemoryFormats> : public MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats> {
|
class VariantWrapper<ngraph::MLKDNNInputMemoryFormats> : public MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{MLKDNNInputMemoryFormatsAttr, 0};
|
static constexpr VariantTypeInfo type_info{ngraph::MLKDNNInputMemoryFormatsAttr, 0};
|
||||||
const VariantTypeInfo &get_type_info() const override { return type_info; }
|
const VariantTypeInfo &get_type_info() const override { return type_info; }
|
||||||
|
|
||||||
VariantWrapper(const MLKDNNInputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>(value) {}
|
VariantWrapper(const ngraph::MLKDNNInputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats>(value) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
|
extern template class MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats>;
|
||||||
|
|
||||||
class MLKDNNOutputMemoryFormats : public MLKDNNMemoryFormats {
|
|
||||||
public:
|
|
||||||
MLKDNNOutputMemoryFormats() = default;
|
|
||||||
explicit MLKDNNOutputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
extern template class MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
class VariantWrapper<MLKDNNOutputMemoryFormats> : public MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats> {
|
class VariantWrapper<ngraph::MLKDNNOutputMemoryFormats> : public MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats> {
|
||||||
public:
|
public:
|
||||||
static constexpr VariantTypeInfo type_info{MLKDNNOutputMemoryFormatsAttr, 0};
|
static constexpr VariantTypeInfo type_info{ngraph::MLKDNNOutputMemoryFormatsAttr, 0};
|
||||||
const VariantTypeInfo &get_type_info() const override { return type_info; }
|
const VariantTypeInfo &get_type_info() const override { return type_info; }
|
||||||
|
|
||||||
VariantWrapper(const MLKDNNOutputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>(value) {}
|
VariantWrapper(const ngraph::MLKDNNOutputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats>(value) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
|
} // namespace ov
|
||||||
|
|
||||||
} // namespace ngraph
|
|
||||||
|
@ -12,7 +12,7 @@ ie_add_plugin(NAME ${TARGET_NAME}
|
|||||||
SOURCES ${SOURCES} ${HEADERS}
|
SOURCES ${SOURCES} ${HEADERS}
|
||||||
VERSION_DEFINES_FOR multi_device_plugin.cpp)
|
VERSION_DEFINES_FOR multi_device_plugin.cpp)
|
||||||
|
|
||||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
|
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ngraph inference_engine_transformations)
|
||||||
|
|
||||||
set_ie_threading_interface_for(${TARGET_NAME})
|
set_ie_threading_interface_for(${TARGET_NAME})
|
||||||
|
|
||||||
|
@ -10,6 +10,10 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
|
#include <ngraph/opsets/opset1.hpp>
|
||||||
|
#include <transformations/utils/utils.hpp>
|
||||||
|
#include "ngraph_ops/convolution_ie.hpp"
|
||||||
|
#include "ngraph_ops/deconvolution_ie.hpp"
|
||||||
|
|
||||||
#include <ie_metric_helpers.hpp>
|
#include <ie_metric_helpers.hpp>
|
||||||
#include <threading/ie_executor_manager.hpp>
|
#include <threading/ie_executor_manager.hpp>
|
||||||
@ -21,6 +25,30 @@
|
|||||||
namespace MultiDevicePlugin {
|
namespace MultiDevicePlugin {
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) {
|
||||||
|
auto nGraphFunc = network.getFunction();
|
||||||
|
bool isINTModel = ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
|
||||||
|
if (isINTModel) {
|
||||||
|
return METRIC_VALUE(INT8);
|
||||||
|
}
|
||||||
|
for (auto & node : nGraphFunc->get_ordered_ops()) {
|
||||||
|
if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) ||
|
||||||
|
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node) ||
|
||||||
|
std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
|
||||||
|
std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(node) ||
|
||||||
|
std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) ||
|
||||||
|
std::dynamic_pointer_cast<ngraph::op::DeconvolutionIE>(node)) {
|
||||||
|
auto layerType = node->input(1).get_element_type().get_type_name();
|
||||||
|
if (layerType == "f32")
|
||||||
|
return METRIC_VALUE(FP32);
|
||||||
|
if (layerType == "f16")
|
||||||
|
return METRIC_VALUE(FP16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return METRIC_VALUE(FP32);
|
||||||
|
}
|
||||||
|
|
||||||
std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
|
std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
|
||||||
const std::map<std::string, std::string> & local) {
|
const std::map<std::string, std::string> & local) {
|
||||||
for (auto && kvp : local) {
|
for (auto && kvp : local) {
|
||||||
@ -28,7 +56,10 @@ namespace {
|
|||||||
}
|
}
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
std::vector<std::string> supported_configKeys = {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES};
|
std::vector<std::string> supported_configKeys = {
|
||||||
|
MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
|
||||||
|
CONFIG_KEY_INTERNAL(WORK_MODE)
|
||||||
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
|
std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
|
||||||
@ -98,8 +129,8 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
|
|||||||
|
|
||||||
InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name,
|
InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name,
|
||||||
const std::map<std::string, InferenceEngine::Parameter> & options) const {
|
const std::map<std::string, InferenceEngine::Parameter> & options) const {
|
||||||
if (name == MULTI_CONFIG_KEY(DEVICE_PRIORITIES)) {
|
if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name)) {
|
||||||
auto it = _config.find(MULTI_CONFIG_KEY(DEVICE_PRIORITIES));
|
auto it = _config.find(name);
|
||||||
if (it == _config.end()) {
|
if (it == _config.end()) {
|
||||||
IE_THROW() << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set";
|
IE_THROW() << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set";
|
||||||
} else {
|
} else {
|
||||||
@ -148,17 +179,23 @@ InferenceEngine::Parameter MultiDeviceInferencePlugin::GetMetric(const std::stri
|
|||||||
// Is called only when caching is enabled
|
// Is called only when caching is enabled
|
||||||
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetwork(const std::string& modelPath,
|
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetwork(const std::string& modelPath,
|
||||||
const std::map<std::string, std::string>& config) {
|
const std::map<std::string, std::string>& config) {
|
||||||
return LoadExeNetworkImpl(modelPath, {}, config);
|
return LoadNetworkImpl(modelPath, {}, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const CNNNetwork &network,
|
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const CNNNetwork &network,
|
||||||
const std::map<std::string, std::string>& config) {
|
const std::map<std::string, std::string>& config) {
|
||||||
return LoadExeNetworkImpl({}, network, config);
|
if (network.getFunction() == nullptr) {
|
||||||
|
IE_THROW() << "MULTI device supports just ngraph network representation";
|
||||||
|
}
|
||||||
|
|
||||||
|
auto networkPrecision = GetNetworkPrecision(network);
|
||||||
|
return LoadNetworkImpl({}, network, config, networkPrecision);
|
||||||
}
|
}
|
||||||
|
|
||||||
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const std::string& modelPath,
|
IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(const std::string& modelPath,
|
||||||
CNNNetwork network,
|
CNNNetwork network,
|
||||||
const std::map<std::string, std::string>& config) {
|
const std::map<std::string, std::string>& config,
|
||||||
|
const std::string &networkPrecision) {
|
||||||
if (GetCore() == nullptr) {
|
if (GetCore() == nullptr) {
|
||||||
IE_THROW() << "Please, work with MULTI device via InferenceEngine::Core object";
|
IE_THROW() << "Please, work with MULTI device via InferenceEngine::Core object";
|
||||||
}
|
}
|
||||||
@ -168,16 +205,39 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(c
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto fullConfig = mergeConfigs(_config, config);
|
auto fullConfig = mergeConfigs(_config, config);
|
||||||
auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
|
|
||||||
if (priorities == fullConfig.end()) {
|
|
||||||
IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
|
|
||||||
}
|
|
||||||
|
|
||||||
auto metaDevices = ParseMetaDevices(priorities->second, fullConfig);
|
|
||||||
|
|
||||||
// collect the settings that are applicable to the devices we are loading the network to
|
// collect the settings that are applicable to the devices we are loading the network to
|
||||||
std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
|
std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
|
||||||
|
std::vector<DeviceInformation> metaDevices;
|
||||||
|
auto workMode = fullConfig.find(CONFIG_KEY_INTERNAL(WORK_MODE));
|
||||||
|
auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
|
||||||
|
|
||||||
|
// not found device priorities for -d AUTO use case
|
||||||
|
if (priorities == fullConfig.end()) {
|
||||||
|
if (workMode != fullConfig.end()) {
|
||||||
|
std::string allDevices;
|
||||||
|
auto availableDevices = GetCore()->GetAvailableDevices();
|
||||||
|
if (availableDevices.empty()) {
|
||||||
|
IE_THROW(NotFound) << "No available device found";
|
||||||
|
}
|
||||||
|
for (auto&& device : availableDevices) {
|
||||||
|
allDevices += device;
|
||||||
|
allDevices += ((device == availableDevices[availableDevices.size()-1]) ? "" : ",");
|
||||||
|
}
|
||||||
|
metaDevices = ParseMetaDevices(allDevices, fullConfig);
|
||||||
|
multiNetworkConfig.insert({MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, allDevices});
|
||||||
|
} else {
|
||||||
|
IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
|
||||||
|
}
|
||||||
|
} else { // for use case -d MULTI:xPU or -d AUTO:xPU
|
||||||
|
metaDevices = ParseMetaDevices(priorities->second, fullConfig);
|
||||||
multiNetworkConfig.insert(*priorities);
|
multiNetworkConfig.insert(*priorities);
|
||||||
|
}
|
||||||
|
// check if it is -d AUTO or -d AUTO:xPU use case
|
||||||
|
if (workMode != fullConfig.end()) {
|
||||||
|
auto targetDevice = SelectDevice(metaDevices, networkPrecision);
|
||||||
|
// std::cout << "!!! DEBUG: select device is " << targetDevice.deviceName << std::endl;
|
||||||
|
metaDevices = { targetDevice };
|
||||||
|
}
|
||||||
|
|
||||||
DeviceMap<SoExecutableNetworkInternal> executableNetworkPerDevice;
|
DeviceMap<SoExecutableNetworkInternal> executableNetworkPerDevice;
|
||||||
std::mutex load_mutex;
|
std::mutex load_mutex;
|
||||||
@ -275,4 +335,125 @@ QueryNetworkResult MultiDeviceInferencePlugin::QueryNetwork(const CNNNetwork&
|
|||||||
return queryResult;
|
return queryResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
|
||||||
|
if (metaDevices.empty()) {
|
||||||
|
IE_THROW(NotFound) << "No available device to select in AUTO plugin";
|
||||||
|
}
|
||||||
|
if (metaDevices.size() == 1) {
|
||||||
|
return metaDevices.at(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<DeviceInformation> CPU;
|
||||||
|
std::vector<DeviceInformation> dGPU;
|
||||||
|
std::vector<DeviceInformation> iGPU;
|
||||||
|
std::vector<DeviceInformation> MYRIAD;
|
||||||
|
std::vector<DeviceInformation> VPUX;
|
||||||
|
|
||||||
|
for (auto& item : metaDevices) {
|
||||||
|
if (item.deviceName.find("CPU") == 0) {
|
||||||
|
CPU.push_back(item);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (item.deviceName.find("MYRIAD") == 0) {
|
||||||
|
MYRIAD.push_back(item);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (item.deviceName.find("VPUX") == 0) {
|
||||||
|
VPUX.push_back(item);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (item.deviceName.find("GPU") == 0) {
|
||||||
|
auto gpuFullDeviceName = GetCore()->GetMetric(item.deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
|
||||||
|
if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
|
||||||
|
iGPU.push_back(item);
|
||||||
|
} else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
|
||||||
|
dGPU.push_back(item);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
|
||||||
|
IE_THROW(NotFound) << "No available device found";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
|
||||||
|
if (!dGPU.empty()) {
|
||||||
|
for (auto&& item : dGPU) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!VPUX.empty()) {
|
||||||
|
for (auto&& item : VPUX) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!iGPU.empty()) {
|
||||||
|
for (auto&& item : iGPU) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!MYRIAD.empty()) {
|
||||||
|
for (auto&& item : MYRIAD) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
|
||||||
|
if (networkPrecision == "FP32") {
|
||||||
|
if (!dGPU.empty()) {
|
||||||
|
for (auto&& item : dGPU) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!VPUX.empty()) {
|
||||||
|
for (auto&& item : VPUX) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!iGPU.empty()) {
|
||||||
|
for (auto&& item : iGPU) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!MYRIAD.empty()) {
|
||||||
|
for (auto&& item : MYRIAD) {
|
||||||
|
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||||
|
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||||
|
if (supportNetwork != capability.end()) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (CPU.empty()) {
|
||||||
|
IE_THROW() << "Cannot select any device";
|
||||||
|
}
|
||||||
|
return CPU[0];
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace MultiDevicePlugin
|
} // namespace MultiDevicePlugin
|
||||||
|
@ -41,9 +41,11 @@ protected:
|
|||||||
const MultiDevicePlugin::DeviceName & deviceName) const;
|
const MultiDevicePlugin::DeviceName & deviceName) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const std::string& modelPath,
|
InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetworkImpl(const std::string& modelPath,
|
||||||
InferenceEngine::CNNNetwork network,
|
InferenceEngine::CNNNetwork network,
|
||||||
const std::map<std::string, std::string>& config);
|
const std::map<std::string, std::string>& config,
|
||||||
|
const std::string &networkPrecision = METRIC_VALUE(FP32));
|
||||||
|
DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace MultiDevicePlugin
|
} // namespace MultiDevicePlugin
|
||||||
|
@ -192,24 +192,6 @@ private:
|
|||||||
|
|
||||||
std::ostream & operator<< (std::ostream & out, const Mask & mask);
|
std::ostream & operator<< (std::ostream & out, const Mask & mask);
|
||||||
|
|
||||||
extern template class VariantImpl<Mask::Ptr>;
|
|
||||||
|
|
||||||
template<>
|
|
||||||
class VariantWrapper<Mask::Ptr> : public VariantImpl<Mask::Ptr> {
|
|
||||||
public:
|
|
||||||
static constexpr VariantTypeInfo type_info{"Variant::RuntimeAttribute::Mask", 0};
|
|
||||||
|
|
||||||
const VariantTypeInfo &get_type_info() const override {
|
|
||||||
return type_info;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::shared_ptr<VariantWrapper<Mask::Ptr>> create(const value_type & value) {
|
|
||||||
return std::make_shared<VariantWrapper<Mask::Ptr>>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
explicit VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
Mask::Ptr getMask(const Output<const Node> & output);
|
Mask::Ptr getMask(const Output<const Node> & output);
|
||||||
|
|
||||||
Mask::Ptr getMask(const Output<Node> & output);
|
Mask::Ptr getMask(const Output<Node> & output);
|
||||||
@ -217,3 +199,25 @@ Mask::Ptr getMask(const Output<Node> & output);
|
|||||||
void setMask(Output<Node> output, const Mask::Ptr & mask);
|
void setMask(Output<Node> output, const Mask::Ptr & mask);
|
||||||
|
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
|
||||||
|
extern template class VariantImpl<ngraph::Mask::Ptr>;
|
||||||
|
|
||||||
|
template<>
|
||||||
|
class VariantWrapper<ngraph::Mask::Ptr> : public VariantImpl<ngraph::Mask::Ptr> {
|
||||||
|
public:
|
||||||
|
static constexpr VariantTypeInfo type_info{"Variant::RuntimeAttribute::Mask", 0};
|
||||||
|
|
||||||
|
const VariantTypeInfo &get_type_info() const override {
|
||||||
|
return type_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::shared_ptr<VariantWrapper<ngraph::Mask::Ptr>> create(const value_type & value) {
|
||||||
|
return std::make_shared<VariantWrapper<ngraph::Mask::Ptr>>(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ov
|
||||||
|
@ -35,6 +35,8 @@
|
|||||||
#include <transformations/common_optimizations/conv_mul_fusion.hpp>
|
#include <transformations/common_optimizations/conv_mul_fusion.hpp>
|
||||||
#include <transformations/common_optimizations/nop_elimination.hpp>
|
#include <transformations/common_optimizations/nop_elimination.hpp>
|
||||||
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
||||||
|
#include <transformations/common_optimizations/leaky_relu_fusion.hpp>
|
||||||
|
#include <transformations/common_optimizations/normalize_l2_fusion.hpp>
|
||||||
|
|
||||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
|
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
|
||||||
|
|
||||||
@ -79,11 +81,13 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::F
|
|||||||
common_fusions->add_matcher<ngraph::pass::SwishFusion>();
|
common_fusions->add_matcher<ngraph::pass::SwishFusion>();
|
||||||
common_fusions->add_matcher<ngraph::pass::HSwishFusion>();
|
common_fusions->add_matcher<ngraph::pass::HSwishFusion>();
|
||||||
common_fusions->add_matcher<ngraph::pass::HSigmoidFusion>();
|
common_fusions->add_matcher<ngraph::pass::HSigmoidFusion>();
|
||||||
|
common_fusions->add_matcher<ngraph::pass::NormalizeL2Fusion>();
|
||||||
common_fusions->add_matcher<ngraph::pass::ClampFusion>();
|
common_fusions->add_matcher<ngraph::pass::ClampFusion>();
|
||||||
common_fusions->add_matcher<ngraph::pass::PadFusion>();
|
common_fusions->add_matcher<ngraph::pass::PadFusion>();
|
||||||
common_fusions->add_matcher<ngraph::pass::MVNFusion>();
|
common_fusions->add_matcher<ngraph::pass::MVNFusion>();
|
||||||
common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
|
common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
|
||||||
common_fusions->add_matcher<ngraph::pass::GeluFusion>();
|
common_fusions->add_matcher<ngraph::pass::GeluFusion>();
|
||||||
|
common_fusions->add_matcher<ngraph::pass::LeakyReluFusion>();
|
||||||
common_fusions->set_name("ngraph::pass::CommonFusions");
|
common_fusions->set_name("ngraph::pass::CommonFusions");
|
||||||
|
|
||||||
manager.register_pass<ngraph::pass::BinarizeWeights>();
|
manager.register_pass<ngraph::pass::BinarizeWeights>();
|
||||||
|
@ -12,10 +12,6 @@
|
|||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
|
|
||||||
template class ngraph::VariantImpl<Mask::Ptr>;
|
|
||||||
|
|
||||||
constexpr VariantTypeInfo VariantWrapper<Mask::Ptr>::type_info;
|
|
||||||
|
|
||||||
Mask::Ptr getMask(const Output<const Node> & output) {
|
Mask::Ptr getMask(const Output<const Node> & output) {
|
||||||
auto &rtInfo = output.get_rt_info();
|
auto &rtInfo = output.get_rt_info();
|
||||||
using MaskWrapper = VariantWrapper<Mask::Ptr>;
|
using MaskWrapper = VariantWrapper<Mask::Ptr>;
|
||||||
@ -57,6 +53,12 @@ std::ostream & operator<< (std::ostream & out, const Mask & mask) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
|
||||||
|
template class ngraph::VariantImpl<ngraph::Mask::Ptr>;
|
||||||
|
|
||||||
|
constexpr VariantTypeInfo VariantWrapper<ngraph::Mask::Ptr>::type_info;
|
||||||
|
|
||||||
|
} // namespace ov
|
||||||
|
@ -45,6 +45,15 @@ DECLARE_CONFIG_KEY(CPU_THREADS_PER_STREAM);
|
|||||||
*/
|
*/
|
||||||
DECLARE_CONFIG_KEY(FORCE_DISABLE_CACHE);
|
DECLARE_CONFIG_KEY(FORCE_DISABLE_CACHE);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The name for setting work mode internal in MULTI device plugin option.
|
||||||
|
*
|
||||||
|
* This option should be used with value only:
|
||||||
|
* PluginConfigInternalParams::MULTI_MODE_AUTO or PluginConfigInternalParams::MULTI_MODE_LEGACY
|
||||||
|
*/
|
||||||
|
DECLARE_CONFIG_KEY(WORK_MODE);
|
||||||
|
DECLARE_CONFIG_VALUE(MULTI_MODE_AUTO);
|
||||||
|
|
||||||
} // namespace PluginConfigInternalParams
|
} // namespace PluginConfigInternalParams
|
||||||
|
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
// Copyright (C) 2018-2019 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "ie_api.h"
|
||||||
|
#include "ie_parallel.hpp"
|
||||||
|
#include "threading/ie_istreams_executor.hpp"
|
||||||
|
|
||||||
|
namespace InferenceEngine {
|
||||||
|
/**
|
||||||
|
* @class TBBStreamsExecutor
|
||||||
|
* @brief CPU Streams executor implementation. Use TBB thread pool to run tasks
|
||||||
|
*/
|
||||||
|
class INFERENCE_ENGINE_API_CLASS(TBBStreamsExecutor) : public IStreamsExecutor {
|
||||||
|
public:
|
||||||
|
using Ptr = std::shared_ptr<TBBStreamsExecutor>;
|
||||||
|
explicit TBBStreamsExecutor(const Config& config = {});
|
||||||
|
~TBBStreamsExecutor() override;
|
||||||
|
void run(Task task) override;
|
||||||
|
void Execute(Task task) override;
|
||||||
|
int GetStreamId() override;
|
||||||
|
int GetNumaNodeId() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Impl;
|
||||||
|
std::unique_ptr<Impl> _impl;
|
||||||
|
};
|
||||||
|
} // namespace InferenceEngine
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user