Compare commits
58 Commits
2022.1.1
...
releases/2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e0568bf261 | ||
|
|
d30183dd18 | ||
|
|
d81d270a4c | ||
|
|
199c05b45e | ||
|
|
b504679eb9 | ||
|
|
e0eb1ae287 | ||
|
|
eab86afdd5 | ||
|
|
0dbaf078d8 | ||
|
|
3c5fa6f4b8 | ||
|
|
31ccf354dc | ||
|
|
bf9b649cdf | ||
|
|
84518964ba | ||
|
|
0b4846cfcc | ||
|
|
950388d9e8 | ||
|
|
f828b16f40 | ||
|
|
261bd3de6b | ||
|
|
31b3e356ab | ||
|
|
607982e79c | ||
|
|
c083e5b146 | ||
|
|
444301a1d6 | ||
|
|
f56ba0daa9 | ||
|
|
cd101085d7 | ||
|
|
2c79f74579 | ||
|
|
d7463eb216 | ||
|
|
74b13a0f74 | ||
|
|
1c8188908e | ||
|
|
86e39a6775 | ||
|
|
2645421df6 | ||
|
|
9b1961502b | ||
|
|
2023a7cd81 | ||
|
|
105cd18d0b | ||
|
|
92d19291c8 | ||
|
|
191e9f7f72 | ||
|
|
126c2600bb | ||
|
|
b922800ae2 | ||
|
|
272b17f5d9 | ||
|
|
b89e7d69dd | ||
|
|
528e6f9328 | ||
|
|
ebf009d1a1 | ||
|
|
d604a03ac0 | ||
|
|
e7e82b9eb7 | ||
|
|
f5bd16990e | ||
|
|
488f2dd916 | ||
|
|
79853baf28 | ||
|
|
6c5e0cfaa4 | ||
|
|
d239b2584c | ||
|
|
28a733b771 | ||
|
|
7bba2a9542 | ||
|
|
9b7e22f49a | ||
|
|
a4dc5c89f3 | ||
|
|
fef1803a86 | ||
|
|
e94393df10 | ||
|
|
2e4f46e1fd | ||
|
|
177906b99a | ||
|
|
6d38488462 | ||
|
|
db5aa551af | ||
|
|
6d90eedbd2 | ||
|
|
a91e256d27 |
118
.ci/azure/linux.yml
Normal file
118
.ci/azure/linux.yml
Normal file
@@ -0,0 +1,118 @@
|
||||
jobs:
|
||||
- job: Lin
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 85
|
||||
pool:
|
||||
name: LIN_VMSS_VENV_F8S_WU2
|
||||
variables:
|
||||
system.debug: true
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
fetchDepth: 1
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
uname -a
|
||||
which python3
|
||||
python3 --version
|
||||
gcc --version
|
||||
lsb_release
|
||||
env
|
||||
cat /proc/cpuinfo
|
||||
cat /proc/meminfo
|
||||
vmstat -s
|
||||
df
|
||||
displayName: 'System info'
|
||||
- script: |
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
- script: |
|
||||
sudo apt --assume-yes install libusb-1.0-0-dev
|
||||
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
|
||||
# For running Python API tests
|
||||
python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
|
||||
displayName: 'Install dependencies'
|
||||
- script: |
|
||||
wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
|
||||
unzip ninja-linux.zip
|
||||
sudo cp -v ninja /usr/local/bin/
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install Ninja'
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
# CMake must get Python 3.x version by default
|
||||
cmakeArgs: -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.6 -DENABLE_TESTS=ON $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
- script: ninja
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Lin'
|
||||
- script: ls -alR $(REPO_DIR)/bin/
|
||||
displayName: 'List files'
|
||||
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieUnitTests
|
||||
displayName: 'IE UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuUnitTests
|
||||
displayName: 'CPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/gnaUnitTests
|
||||
displayName: 'GNA UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/vpuUnitTests
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieFuncTests
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuFuncTests --gtest_print_time=1
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/MklDnnBehaviorTests
|
||||
displayName: 'MklDnnBehaviorTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Clone testdata & gtest-parallel'
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --print_test_times --dump_json_test_results=MklDnnFunctionalTests.json -- --gtest_print_time=1
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'MklDnnFunctionalTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
$(BIN_DIR)/InferenceEngineCAPITests
|
||||
displayName: 'IE CAPITests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
export LD_LIBRARY_PATH=$(BIN_DIR)/lib
|
||||
export PYTHONPATH=$(BIN_DIR)/lib/python_api/python3.6
|
||||
env
|
||||
cd $(REPO_DIR)/inference-engine/ie_bridges/python/tests
|
||||
pytest
|
||||
displayName: 'Python API Tests'
|
||||
continueOnError: false
|
||||
enabled: false
|
||||
|
||||
102
.ci/azure/mac.yml
Normal file
102
.ci/azure/mac.yml
Normal file
@@ -0,0 +1,102 @@
|
||||
jobs:
|
||||
- job: Mac
|
||||
# About 200% of total time (perfomace of Mac hosts is unstable)
|
||||
timeoutInMinutes: 180
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
variables:
|
||||
system.debug: true
|
||||
WORKERS_NUMBER: 3
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
fetchDepth: 1
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
- script: |
|
||||
whoami
|
||||
uname -a
|
||||
which python3
|
||||
python3 --version
|
||||
gcc --version
|
||||
xcrun --sdk macosx --show-sdk-version
|
||||
env
|
||||
sysctl -a
|
||||
displayName: 'System info'
|
||||
- script: |
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.7'
|
||||
- script: |
|
||||
brew install cython
|
||||
brew install automake
|
||||
displayName: 'Install dependencies'
|
||||
- script: brew install ninja
|
||||
displayName: 'Install Ninja'
|
||||
- script: |
|
||||
export PATH="/usr/local/opt/cython/bin:$PATH"
|
||||
export CC=gcc
|
||||
export CXX=g++
|
||||
# Disable errors with Ninja
|
||||
export CXXFLAGS="-Wno-error=unused-command-line-argument"
|
||||
export CFLAGS="-Wno-error=unused-command-line-argument"
|
||||
cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
- script: ninja
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Mac'
|
||||
- script: ls -alR $(REPO_DIR)/bin/
|
||||
displayName: 'List files'
|
||||
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieUnitTests
|
||||
displayName: 'IE UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuUnitTests
|
||||
displayName: 'CPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/vpuUnitTests
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieFuncTests
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuFuncTests --gtest_print_time=1
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/MklDnnBehaviorTests
|
||||
displayName: 'MklDnnBehaviorTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Clone testdata & gtest-parallel'
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --print_test_times --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric* -- --gtest_print_time=1
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'MklDnnFunctionalTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
$(BIN_DIR)/InferenceEngineCAPITests
|
||||
displayName: 'IE CAPITests'
|
||||
continueOnError: false
|
||||
|
||||
133
.ci/azure/windows.yml
Normal file
133
.ci/azure/windows.yml
Normal file
@@ -0,0 +1,133 @@
|
||||
jobs:
|
||||
- job: Win
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 120
|
||||
pool:
|
||||
name: WIN_VMSS_VENV_F8S_WU2
|
||||
variables:
|
||||
system.debug: true
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)\_w
|
||||
BUILD_DIR: D:\build
|
||||
BIN_DIR: $(REPO_DIR)\bin\intel64
|
||||
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
fetchDepth: 1
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
- script: |
|
||||
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
|
||||
where python3
|
||||
where python
|
||||
python --version
|
||||
wmic computersystem get TotalPhysicalMemory
|
||||
wmic cpu list
|
||||
wmic logicaldisk get description,name
|
||||
wmic VOLUME list
|
||||
set
|
||||
displayName: 'System info'
|
||||
- script: |
|
||||
rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR)
|
||||
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: Install Ninja
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/IBSetupConsole_9_5_0.exe IBSetupConsole_9_5_0.exe
|
||||
call IBSetupConsole_9_5_0.exe /Install /Components=Agent,oneuse /Coordinator=11.1.0.4 /AGENT:OPENFIREWALL=ON /AGENT:AUTOSELECTPORTS=ON /ADDTOPATH=ON /AGENT:INSTALLADDINS=OFF
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: Install IncrediBuild
|
||||
- script: |
|
||||
echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
reg add HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Xoreax\IncrediBuild\Builder /f /v LastEnabled /d 0 && echo Start IncrediBuild_Agent && net start IncrediBuild_Agent
|
||||
displayName: Start IncrediBuild
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja" /MaxCPUS=40
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Win'
|
||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
displayName: Stop IncrediBuild
|
||||
continueOnError: true
|
||||
- script: dir $(REPO_DIR)\bin\ /s /b
|
||||
displayName: 'List files'
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\InferenceEngineUnitTests --gtest_print_time=1
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\ieUnitTests
|
||||
displayName: 'IE UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\cpuUnitTests
|
||||
displayName: 'CPU UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\gnaUnitTests
|
||||
displayName: 'GNA UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\vpuUnitTests
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\ieFuncTests
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\cpuFuncTests --gtest_print_time=1
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\MklDnnBehaviorTests
|
||||
displayName: 'MklDnnBehaviorTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Clone testdata & gtest-parallel'
|
||||
# Add for gtest-parallel, it hangs now (CVS-33386)
|
||||
#python $(BUILD_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --print_test_times --dump_json_test_results=MklDnnFunctionalTests.json -- --gtest_print_time=1
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.0\opencv\bin;%PATH%
|
||||
set DATA_PATH=$(BUILD_DIR)\testdata
|
||||
set MODELS_PATH=$(BUILD_DIR)\testdata
|
||||
$(BIN_DIR)\MklDnnFunctionalTests --gtest_print_time=1
|
||||
displayName: 'MklDnnFunctionalTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.0\opencv\bin;%PATH%
|
||||
set DATA_PATH=$(BUILD_DIR)\testdata
|
||||
set MODELS_PATH=$(BUILD_DIR)\testdata
|
||||
$(BIN_DIR)\InferenceEngineCAPITests
|
||||
displayName: 'IE CAPITests'
|
||||
continueOnError: false
|
||||
@@ -79,5 +79,5 @@ ENV NGRAPH_CPP_BUILD_PATH=/openvino/dist
|
||||
ENV LD_LIBRARY_PATH=/openvino/dist/lib
|
||||
ENV NGRAPH_ONNX_IMPORT_ENABLE=TRUE
|
||||
ENV PYTHONPATH=/openvino/bin/intel64/Release/lib/python_api/python3.8:${PYTHONPATH}
|
||||
RUN git clone --recursive https://github.com/pybind/pybind11.git
|
||||
RUN git clone --recursive https://github.com/pybind/pybind11.git -b v2.5.0 --depth 1
|
||||
CMD tox
|
||||
|
||||
6
.ci/openvino-onnx/Jenkinsfile
vendored
6
.ci/openvino-onnx/Jenkinsfile
vendored
@@ -68,7 +68,7 @@ def buildDockerImage() {
|
||||
|
||||
def runTests() {
|
||||
sh """
|
||||
docker run --rm --name ${DOCKER_CONTAINER_NAME} \
|
||||
docker run --name ${DOCKER_CONTAINER_NAME} \
|
||||
--volume ${HOME}/ONNX_CI/onnx_models/.onnx:/root/.onnx ${DOCKER_IMAGE_TAG}
|
||||
"""
|
||||
}
|
||||
@@ -101,6 +101,9 @@ pipeline {
|
||||
}
|
||||
}
|
||||
stage("Run tests") {
|
||||
options {
|
||||
timeout(time: 10, unit: 'MINUTES')
|
||||
}
|
||||
steps{
|
||||
runTests()
|
||||
}
|
||||
@@ -118,6 +121,7 @@ pipeline {
|
||||
deleteDir()
|
||||
sh """
|
||||
docker image prune -f
|
||||
docker rm -f ${DOCKER_CONTAINER_NAME}
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,17 +8,7 @@ cmake_policy(SET CMP0054 NEW)
|
||||
# it allows to install targets created outside of current projects
|
||||
# See https://blog.kitware.com/cmake-3-13-0-available-for-download/
|
||||
|
||||
if (APPLE)
|
||||
if(CMAKE_GENERATOR STREQUAL "Xcode")
|
||||
# due to https://gitlab.kitware.com/cmake/cmake/issues/14254
|
||||
cmake_minimum_required(VERSION 3.12.0 FATAL_ERROR)
|
||||
else()
|
||||
# due to https://cmake.org/cmake/help/v3.12/policy/CMP0068.html
|
||||
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
|
||||
endif()
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR)
|
||||
endif()
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(OpenVINO)
|
||||
|
||||
|
||||
@@ -1,351 +0,0 @@
|
||||
jobs:
|
||||
- job: Lin
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 85
|
||||
pool:
|
||||
name: LIN_VMSS_VENV_F8S_WU2
|
||||
variables:
|
||||
system.debug: true
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
fetchDepth: 1
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
uname -a
|
||||
which python3
|
||||
python3 --version
|
||||
gcc --version
|
||||
lsb_release
|
||||
env
|
||||
cat /proc/cpuinfo
|
||||
cat /proc/meminfo
|
||||
vmstat -s
|
||||
df
|
||||
displayName: 'System properties'
|
||||
- script: |
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
- script: |
|
||||
sudo apt --assume-yes install libusb-1.0-0-dev
|
||||
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
|
||||
# For running Python API tests
|
||||
python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
|
||||
displayName: 'Install dependencies'
|
||||
- script: |
|
||||
wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
|
||||
unzip ninja-linux.zip
|
||||
sudo cp -v ninja /usr/local/bin/
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install Ninja'
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
# CMake must get Python 3.x version by default
|
||||
cmakeArgs: -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.6 -DENABLE_TESTS=ON $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
- script: ninja
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Lin'
|
||||
- script: ls -alR $(REPO_DIR)/bin/
|
||||
displayName: 'List files'
|
||||
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieUnitTests
|
||||
displayName: 'IE UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuUnitTests
|
||||
displayName: 'CPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/gnaUnitTests
|
||||
displayName: 'GNA UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/vpuUnitTests
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieFuncTests
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuFuncTests --gtest_print_time=1
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/MklDnnBehaviorTests
|
||||
displayName: 'MklDnnBehaviorTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Clone testdata & gtest-parallel'
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --print_test_times --dump_json_test_results=MklDnnFunctionalTests.json -- --gtest_print_time=1
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'MklDnnFunctionalTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
$(BIN_DIR)/InferenceEngineCAPITests
|
||||
displayName: 'IE CAPITests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
export LD_LIBRARY_PATH=$(BIN_DIR)/lib
|
||||
export PYTHONPATH=$(BIN_DIR)/lib/python_api/python3.6
|
||||
env
|
||||
cd $(REPO_DIR)/inference-engine/ie_bridges/python/tests
|
||||
pytest
|
||||
displayName: 'Python API Tests'
|
||||
continueOnError: false
|
||||
enabled: false
|
||||
|
||||
- job: Mac
|
||||
# About 200% of total time (perfomace of Mac hosts is unstable)
|
||||
timeoutInMinutes: 180
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
variables:
|
||||
system.debug: true
|
||||
WORKERS_NUMBER: 3
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
fetchDepth: 1
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
- script: |
|
||||
whoami
|
||||
uname -a
|
||||
which python3
|
||||
python3 --version
|
||||
gcc --version
|
||||
xcrun --sdk macosx --show-sdk-version
|
||||
env
|
||||
sysctl -a
|
||||
displayName: 'System properties'
|
||||
- script: |
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.7'
|
||||
- script: |
|
||||
brew install cython
|
||||
brew install automake
|
||||
displayName: 'Install dependencies'
|
||||
- script: brew install ninja
|
||||
displayName: 'Install Ninja'
|
||||
- script: |
|
||||
export PATH="/usr/local/opt/cython/bin:$PATH"
|
||||
export CC=gcc
|
||||
export CXX=g++
|
||||
# Disable errors with Ninja
|
||||
export CXXFLAGS="-Wno-error=unused-command-line-argument"
|
||||
export CFLAGS="-Wno-error=unused-command-line-argument"
|
||||
cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
- script: ninja
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Mac'
|
||||
- script: ls -alR $(REPO_DIR)/bin/
|
||||
displayName: 'List files'
|
||||
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieUnitTests
|
||||
displayName: 'IE UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuUnitTests
|
||||
displayName: 'CPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/vpuUnitTests
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/ieFuncTests
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/cpuFuncTests --gtest_print_time=1
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
- script: $(BIN_DIR)/MklDnnBehaviorTests
|
||||
displayName: 'MklDnnBehaviorTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Clone testdata & gtest-parallel'
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --print_test_times --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric* -- --gtest_print_time=1
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'MklDnnFunctionalTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
export DATA_PATH=$(WORK_DIR)/testdata
|
||||
export MODELS_PATH=$(WORK_DIR)/testdata
|
||||
$(BIN_DIR)/InferenceEngineCAPITests
|
||||
displayName: 'IE CAPITests'
|
||||
continueOnError: false
|
||||
|
||||
- job: Win
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 120
|
||||
pool:
|
||||
name: WIN_VMSS_VENV_F8S_WU2
|
||||
variables:
|
||||
system.debug: true
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)\_w
|
||||
BUILD_DIR: D:\build
|
||||
BIN_DIR: $(REPO_DIR)\bin\intel64
|
||||
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
fetchDepth: 1
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
- script: |
|
||||
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
|
||||
where python3
|
||||
where python
|
||||
python --version
|
||||
wmic computersystem get TotalPhysicalMemory
|
||||
wmic cpu list
|
||||
wmic logicaldisk get description,name
|
||||
wmic VOLUME list
|
||||
set
|
||||
displayName: 'System properties'
|
||||
- script: |
|
||||
rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR)
|
||||
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: Install Ninja
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/IBSetupConsole_9_5_0.exe IBSetupConsole_9_5_0.exe
|
||||
call IBSetupConsole_9_5_0.exe /Install /Components=Agent,oneuse /Coordinator=11.1.0.4 /AGENT:OPENFIREWALL=ON /AGENT:AUTOSELECTPORTS=ON /ADDTOPATH=ON /AGENT:INSTALLADDINS=OFF
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: Install IncrediBuild
|
||||
- script: |
|
||||
echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
reg add HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Xoreax\IncrediBuild\Builder /f /v LastEnabled /d 0 && echo Start IncrediBuild_Agent && net start IncrediBuild_Agent
|
||||
displayName: Start IncrediBuild
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja" /MaxCPUS=40
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Win'
|
||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
displayName: Stop IncrediBuild
|
||||
continueOnError: true
|
||||
- script: dir $(REPO_DIR)\bin\ /s /b
|
||||
displayName: 'List files'
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\InferenceEngineUnitTests --gtest_print_time=1
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\ieUnitTests
|
||||
displayName: 'IE UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\cpuUnitTests
|
||||
displayName: 'CPU UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\gnaUnitTests
|
||||
displayName: 'GNA UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\vpuUnitTests
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\ieFuncTests
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\cpuFuncTests --gtest_print_time=1
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
|
||||
$(BIN_DIR)\MklDnnBehaviorTests
|
||||
displayName: 'MklDnnBehaviorTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Clone testdata & gtest-parallel'
|
||||
# Add for gtest-parallel, it hangs now (CVS-33386)
|
||||
#python $(BUILD_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --print_test_times --dump_json_test_results=MklDnnFunctionalTests.json -- --gtest_print_time=1
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.3.0\opencv\bin;%PATH%
|
||||
set DATA_PATH=$(BUILD_DIR)\testdata
|
||||
set MODELS_PATH=$(BUILD_DIR)\testdata
|
||||
$(BIN_DIR)\MklDnnFunctionalTests --gtest_print_time=1
|
||||
displayName: 'MklDnnFunctionalTests'
|
||||
continueOnError: false
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.3.0\opencv\bin;%PATH%
|
||||
set DATA_PATH=$(BUILD_DIR)\testdata
|
||||
set MODELS_PATH=$(BUILD_DIR)\testdata
|
||||
$(BIN_DIR)\InferenceEngineCAPITests
|
||||
displayName: 'IE CAPITests'
|
||||
continueOnError: false
|
||||
@@ -57,10 +57,11 @@ The software was validated on:
|
||||
- CentOS\* 7.4 (64-bit) with default GCC\* 4.8.5
|
||||
|
||||
### Software Requirements
|
||||
- [CMake]\* 3.11 or higher
|
||||
- [CMake]\* 3.13 or higher
|
||||
- GCC\* 4.8 or higher to build the Inference Engine
|
||||
- Python 3.5 or higher for Inference Engine Python API wrapper
|
||||
- Python 3.6 or higher for Inference Engine Python API wrapper
|
||||
- (Optional) [Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 19.41.14441].
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
1. Clone submodules:
|
||||
@@ -335,10 +336,11 @@ The software was validated on:
|
||||
Compiler 2018 Update 3
|
||||
|
||||
### Software Requirements
|
||||
- [CMake]\*3.11 or higher
|
||||
- [CMake]\*3.13 or higher
|
||||
- Microsoft\* Visual Studio 2017, 2019 or [Intel® C++ Compiler] 18.0
|
||||
- (Optional) Intel® Graphics Driver for Windows* (26.20) [driver package].
|
||||
- Python 3.5 or higher for Inference Engine Python API wrapper
|
||||
- Python 3.6 or higher for Inference Engine Python API wrapper
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
|
||||
@@ -382,7 +384,7 @@ cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" ^
|
||||
6. Before running the samples, add paths to the TBB and OpenCV binaries used for
|
||||
the build to the `%PATH%` environment variable. By default, TBB binaries are
|
||||
downloaded by the CMake-based script to the `<openvino_repo>/inference-engine/temp/tbb/bin`
|
||||
folder, OpenCV binaries to the `<openvino_repo>/inference-engine/temp/opencv_4.3.0/opencv/bin`
|
||||
folder, OpenCV binaries to the `<openvino_repo>/inference-engine/temp/opencv_4.5.0/opencv/bin`
|
||||
folder.
|
||||
|
||||
### Additional Build Options
|
||||
@@ -452,9 +454,10 @@ The software was validated on:
|
||||
|
||||
### Software Requirements
|
||||
|
||||
- [CMake]\* 3.11 or higher
|
||||
- [CMake]\* 3.13 or higher
|
||||
- Clang\* compiler from Xcode\* 10.1 or higher
|
||||
- Python\* 3.5 or higher for the Inference Engine Python API wrapper
|
||||
- Python\* 3.6 or higher for the Inference Engine Python API wrapper
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
|
||||
@@ -527,8 +530,9 @@ This section describes how to build Inference Engine for Android x86 (64-bit) op
|
||||
|
||||
### Software Requirements
|
||||
|
||||
- [CMake]\* 3.11 or higher
|
||||
- [CMake]\* 3.13 or higher
|
||||
- Android NDK (this guide has been validated with r20 release)
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
|
||||
|
||||
@@ -44,4 +44,7 @@ ie_dependent_option (ENABLE_AVX2 "Enable AVX2 optimizations" ON "X86_64 OR X86"
|
||||
|
||||
ie_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR X86" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_PROFILING_ITT "ITT tracing of IE and plugins internals" ON "NOT CMAKE_CROSSCOMPILING" OFF)
|
||||
ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
|
||||
|
||||
# Documentation build
|
||||
ie_option (ENABLE_DOCS "build docs using Doxygen" OFF)
|
||||
|
||||
@@ -2,59 +2,186 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
add_subdirectory(examples)
|
||||
if(NOT ENABLE_DOCKER)
|
||||
add_subdirectory(examples)
|
||||
|
||||
# Detect nGraph
|
||||
find_package(ngraph QUIET)
|
||||
if(NOT ngraph_FOUND)
|
||||
set(ngraph_DIR ${CMAKE_BINARY_DIR}/ngraph)
|
||||
endif()
|
||||
|
||||
# Detect InferenceEngine
|
||||
find_package(InferenceEngine QUIET)
|
||||
if(NOT InferenceEngine_FOUND)
|
||||
set(InferenceEngine_DIR ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
add_subdirectory(template_extension)
|
||||
|
||||
set(all_docs_targets
|
||||
ie_docs_examples
|
||||
template_extension
|
||||
templatePlugin TemplateBehaviorTests TemplateFunctionalTests)
|
||||
foreach(target_name IN LISTS all_docs_targets)
|
||||
if (TARGET ${target_name})
|
||||
set_target_properties(${target_name} PROPERTIES FOLDER docs)
|
||||
# Detect nGraph
|
||||
find_package(ngraph QUIET)
|
||||
if(NOT ngraph_FOUND)
|
||||
set(ngraph_DIR ${CMAKE_BINARY_DIR}/ngraph)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# OpenVINO docs
|
||||
# Detect InferenceEngine
|
||||
find_package(InferenceEngine QUIET)
|
||||
if(NOT InferenceEngine_FOUND)
|
||||
set(InferenceEngine_DIR ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
set(OPENVINO_DOCS_PATH "" CACHE PATH "Path to openvino-documentation local repository")
|
||||
set(args "")
|
||||
add_subdirectory(template_extension)
|
||||
|
||||
if(OPENVINO_DOCS_PATH)
|
||||
set(args "${args} ovinodoc_path:${OPENVINO_DOCS_PATH}")
|
||||
set(all_docs_targets
|
||||
ie_docs_examples
|
||||
template_extension
|
||||
templatePlugin TemplateBehaviorTests TemplateFunctionalTests)
|
||||
foreach(target_name IN LISTS all_docs_targets)
|
||||
if (TARGET ${target_name})
|
||||
set_target_properties(${target_name} PROPERTIES FOLDER docs)
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE docs_files "${OpenVINO_MAIN_SOURCE_DIR}/docs")
|
||||
file(GLOB_RECURSE include_files "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/include")
|
||||
file(GLOB_RECURSE ovino_files "${OPENVINO_DOCS_PATH}")
|
||||
function(build_docs)
|
||||
|
||||
add_custom_target(ie_docs
|
||||
COMMAND ./build_docs.sh ${args}
|
||||
WORKING_DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}/docs/build_documentation"
|
||||
COMMENT "Generating OpenVINO documentation"
|
||||
SOURCES ${docs_files} ${include_files} ${ovino_files}
|
||||
VERBATIM)
|
||||
set_target_properties(ie_docs PROPERTIES FOLDER docs)
|
||||
find_package(Doxygen REQUIRED dot)
|
||||
find_package(Python3 COMPONENTS Interpreter)
|
||||
find_package(LATEX)
|
||||
|
||||
find_program(browser NAMES xdg-open)
|
||||
if(browser)
|
||||
add_custom_target(ie_docs_open
|
||||
COMMAND ${browser} "${OpenVINO_MAIN_SOURCE_DIR}/doc/html/index.html"
|
||||
DEPENDS ie_docs
|
||||
COMMENT "Open OpenVINO documentation"
|
||||
VERBATIM)
|
||||
set_target_properties(ie_docs_open PROPERTIES FOLDER docs)
|
||||
if(NOT DOXYGEN_FOUND)
|
||||
message(FATAL_ERROR "Doxygen is required to build the documentation")
|
||||
endif()
|
||||
|
||||
if(NOT Python3_FOUND)
|
||||
message(FATAL_ERROR "Python3 is required to build the documentation")
|
||||
endif()
|
||||
|
||||
if(NOT LATEX_FOUND)
|
||||
message(FATAL_ERROR "LATEX is required to build the documentation")
|
||||
endif()
|
||||
|
||||
set(DOCS_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(DOXYGEN_DIR ${OpenVINO_MAIN_SOURCE_DIR}/docs/doxygen)
|
||||
set(IE_SOURCE_DIR ${OpenVINO_MAIN_SOURCE_DIR}/inference-engine)
|
||||
set(PYTHON_API_IN ${IE_SOURCE_DIR}/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx)
|
||||
set(PYTHON_API_OUT ${DOCS_BINARY_DIR}/python_api/ie_api.pyx)
|
||||
set(C_API ${IE_SOURCE_DIR}/ie_bridges/c/include)
|
||||
set(PLUGIN_API_DIR ${DOCS_BINARY_DIR}/IE_PLUGIN_DG)
|
||||
|
||||
# Preprocessing scripts
|
||||
set(DOXY_MD_FILTER ${DOXYGEN_DIR}/doxy_md_filter.py)
|
||||
set(PYX_FILTER ${DOXYGEN_DIR}/pyx_filter.py)
|
||||
|
||||
file(GLOB_RECURSE doc_source_files
|
||||
LIST_DIRECTORIES true
|
||||
RELATIVE ${OpenVINO_MAIN_SOURCE_DIR}
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/docs/*.md
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/docs/*.png
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/docs/*.gif
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/docs/*.jpg
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.md
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.png
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.gif
|
||||
${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.jpg
|
||||
)
|
||||
|
||||
configure_file(${PYTHON_API_IN} ${PYTHON_API_OUT} @ONLY)
|
||||
|
||||
set(IE_CONFIG_SOURCE ${DOXYGEN_DIR}/ie_docs.config)
|
||||
set(C_CONFIG_SOURCE ${DOXYGEN_DIR}/ie_c_api.config)
|
||||
set(PY_CONFIG_SOURCE ${DOXYGEN_DIR}/ie_py_api.config)
|
||||
set(PLUGIN_CONFIG_SOURCE ${DOXYGEN_DIR}/ie_plugin_api.config)
|
||||
|
||||
set(IE_CONFIG_BINARY ${DOCS_BINARY_DIR}/ie_docs.config)
|
||||
set(C_CONFIG_BINARY ${DOCS_BINARY_DIR}/ie_c_api.config)
|
||||
set(PY_CONFIG_BINARY ${DOCS_BINARY_DIR}/ie_py_api.config)
|
||||
set(PLUGIN_CONFIG_BINARY ${DOCS_BINARY_DIR}/ie_plugin_api.config)
|
||||
|
||||
set(IE_LAYOUT_SOURCE ${DOXYGEN_DIR}/ie_docs.xml)
|
||||
set(C_LAYOUT_SOURCE ${DOXYGEN_DIR}/ie_c_api.xml)
|
||||
set(PY_LAYOUT_SOURCE ${DOXYGEN_DIR}/ie_py_api.xml)
|
||||
set(PLUGIN_LAYOUT_SOURCE ${DOXYGEN_DIR}/ie_plugin_api.xml)
|
||||
|
||||
set(IE_LAYOUT_BINARY ${DOCS_BINARY_DIR}/ie_docs.xml)
|
||||
set(C_LAYOUT_BINARY ${DOCS_BINARY_DIR}/ie_c_api.xml)
|
||||
set(PY_LAYOUT_BINARY ${DOCS_BINARY_DIR}/ie_py_api.xml)
|
||||
set(PLUGIN_LAYOUT_BINARY ${DOCS_BINARY_DIR}/ie_plugin_api.xml)
|
||||
|
||||
# Tables of contents
|
||||
configure_file(${IE_LAYOUT_SOURCE} ${IE_LAYOUT_BINARY} @ONLY)
|
||||
configure_file(${C_LAYOUT_SOURCE} ${C_LAYOUT_BINARY} @ONLY)
|
||||
configure_file(${PY_LAYOUT_SOURCE} ${PY_LAYOUT_BINARY} @ONLY)
|
||||
configure_file(${PLUGIN_LAYOUT_SOURCE} ${PLUGIN_LAYOUT_BINARY} @ONLY)
|
||||
|
||||
# Doxygen config files
|
||||
configure_file(${IE_CONFIG_SOURCE} ${IE_CONFIG_BINARY} @ONLY)
|
||||
configure_file(${C_CONFIG_SOURCE} ${C_CONFIG_BINARY} @ONLY)
|
||||
configure_file(${PY_CONFIG_SOURCE} ${PY_CONFIG_BINARY} @ONLY)
|
||||
configure_file(${PLUGIN_CONFIG_SOURCE} ${PLUGIN_CONFIG_BINARY} @ONLY)
|
||||
|
||||
# Preprocessing scripts
|
||||
set(DOXY_MD_FILTER ${DOXYGEN_DIR}/doxy_md_filter.py)
|
||||
set(PYX_FILTER ${DOXYGEN_DIR}/pyx_filter.py)
|
||||
|
||||
add_custom_target(c_api
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${C_CONFIG_BINARY}
|
||||
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
|
||||
COMMENT "Generating C API Reference"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_target(py_api
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${PY_CONFIG_BINARY}
|
||||
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
|
||||
COMMENT "Generating Python API Reference"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_target(plugin_api
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${PLUGIN_CONFIG_BINARY}
|
||||
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
|
||||
COMMENT "Generating Plugin API Reference"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_target(preprocess_docs
|
||||
COMMENT "Pre-process docs"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_target(ie_docs
|
||||
DEPENDS preprocess_docs
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${IE_CONFIG_BINARY}
|
||||
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_target(openvino_docs
|
||||
DEPENDS c_api py_api ie_docs plugin_api
|
||||
COMMENT "Generating OpenVINO documentation"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_command(TARGET py_api
|
||||
PRE_BUILD
|
||||
COMMAND ${Python3_EXECUTABLE} ${PYX_FILTER} ${PYTHON_API_OUT}
|
||||
COMMENT "Pre-process Python API."
|
||||
)
|
||||
|
||||
foreach(source_file ${doc_source_files})
|
||||
add_custom_command(TARGET preprocess_docs
|
||||
PRE_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${OpenVINO_MAIN_SOURCE_DIR}/${source_file} ${DOCS_BINARY_DIR}/${source_file})
|
||||
endforeach()
|
||||
|
||||
add_custom_command(TARGET preprocess_docs
|
||||
PRE_BUILD
|
||||
COMMAND ${Python3_EXECUTABLE} ${DOXY_MD_FILTER} ${DOCS_BINARY_DIR}
|
||||
COMMENT "Pre-process markdown and image links."
|
||||
)
|
||||
|
||||
set_target_properties(ie_docs PROPERTIES FOLDER docs)
|
||||
|
||||
find_program(browser NAMES xdg-open)
|
||||
if(browser)
|
||||
add_custom_target(ie_docs_open
|
||||
COMMAND ${browser} "${OpenVINO_MAIN_SOURCE_DIR}/doc/html/index.html"
|
||||
DEPENDS ie_docs
|
||||
COMMENT "Open OpenVINO documentation"
|
||||
VERBATIM)
|
||||
set_target_properties(ie_docs_open PROPERTIES FOLDER docs)
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
|
||||
if (ENABLE_DOCS)
|
||||
build_docs()
|
||||
endif()
|
||||
|
||||
@@ -16,6 +16,8 @@ To add your custom nGraph operation, create a new class that extends `ngraph::Op
|
||||
|
||||
5. Override the `visit_attributes` method, which allows serialization and deserialization of attributes. An `AttributeVisitor` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware `on_attribute` helper. Helpers are already implemented for standard C++ types like `int64_t`, `float`, `bool`, `vector` and for existing nGraph defined types.
|
||||
|
||||
6. Override `evaluate`, which is an optional method that enables the application of constant folding if there is a custom operation on the constant branch.
|
||||
|
||||
Based on that, declaration of a operation class can look as follows:
|
||||
|
||||
@snippet op.hpp op:header
|
||||
@@ -51,6 +53,12 @@ nGraph operation contains two constructors: a default constructor, which allows
|
||||
|
||||
@snippet op.cpp op:visit_attributes
|
||||
|
||||
### `evaluate()`
|
||||
|
||||
`ngraph::Node::evaluate` method allows to apply constant folding to an operation.
|
||||
|
||||
@snippet op.cpp op:evaluate
|
||||
|
||||
## Register Custom Operations in Extension Class
|
||||
|
||||
To add custom operations to the [Extension](Extension.md) class, create an operation set with custom operations and implement the `InferenceEngine::IExtension::getOpSets` method:
|
||||
|
||||
@@ -1,38 +1,61 @@
|
||||
Using Shape Inference {#openvino_docs_IE_DG_ShapeInference}
|
||||
==========================================
|
||||
|
||||
Inference Engine takes two kinds of model description as an input: [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md) and [nGraph::Function](nGraph_Flow.md) objects.
|
||||
Both should have fixed input shapes to be successfully loaded to the Inference Engine.
|
||||
To feed input data of a shape that is different from the model input shape, resize the model first.
|
||||
Inference Engine takes three kinds of a model description as an input, which are converted into an `InferenceEngine::CNNNetwork` object:
|
||||
1. [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md) through `InferenceEngine::Core::ReadNetwork`
|
||||
2. [ONNX model](../IE_DG/OnnxImporterTutorial.md) through `InferenceEngine::Core::ReadNetwork`
|
||||
3. [nGraph::Function](../IE_DG/nGraph_Flow.md) through the constructor of `InferenceEngine::CNNNetwork`
|
||||
|
||||
Model resizing on the stage of <a href="_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html#when_to_specify_input_shapes">IR generation</a> or [nGraph::Function creation](nGraphTutorial.md) is the recommended approach.
|
||||
OpenVINO™ provides the following experimental methods for runtime model reshaping:
|
||||
`InferenceEngine::CNNNetwork` keeps an `ngraph::Function` object with the model description internally.
|
||||
The object should have fully defined input shapes to be successfully loaded to the Inference Engine plugins.
|
||||
To resolve undefined input dimensions of a model, call the `CNNNetwork::reshape` method providing new input shapes before loading to the Inference Engine plugin.
|
||||
|
||||
1. Setting a new input shape with the `InferenceEngine::CNNNetwork::reshape` method
|
||||
|
||||
`InferenceEngine::CNNNetwork::reshape` method updates input shapes and propagates them down to the outputs of the model through all intermediate layers.
|
||||
|
||||
Shape propagation for `InferenceEngine::CNNNetwork` objects created from `nGraph::Function` or IR of the version 10 works through the `nGraph` shape inference mechanism.
|
||||
`InferenceEngine::CNNNetwork` objects created from lower IR versions are considered deprecated and may be reshaped incorrectly or give unexpected results.
|
||||
|
||||
To keep the v10 IR resizable by the `InferenceEngine::CNNNetwork::reshape` method, convert the model with the additional Model Optimizer key `--keep_shape_ops`.
|
||||
|
||||
2. Setting a new batch dimension value with the `InferenceEngine::CNNNetwork::setBatchSize` method
|
||||
|
||||
The meaning of a model batch may vary depending on choices you made during the model designing.
|
||||
The `InferenceEngine::CNNNetwork::setBatchSize` method deduces index of batch dimension relying only on the input rank.
|
||||
This method does not work for models with a non-zero index batch placement or models with inputs without a batch dimension.
|
||||
Run the following code right after `InferenceEngine::CNNNetwork` creation to explicitly check for model input names and shapes:
|
||||
```cpp
|
||||
CNNNetwork network = ... // read IR / ONNX model or create from nGraph::Function explicitly
|
||||
const auto parameters = network.getFunction()->get_parameters();
|
||||
for (const auto & parameter : parameters) {
|
||||
std::cout << "name: " << parameter->get_friendly_name() << " shape: " << parameter->get_partial_shape() << std::endl;
|
||||
if (parameter->get_partial_shape().is_dynamic())
|
||||
std::cout << "ATTENTION: Input shape is not fully defined. Use the CNNNetwork::reshape method to resolve it." << std::endl;
|
||||
}
|
||||
```
|
||||
|
||||
Batch-setting algorithm does not involve shape inference mechanism.
|
||||
Batch of input and output shapes for all layers is set to a new batch value without layer validation.
|
||||
It may cause both positive and negative side effects.
|
||||
|
||||
Due to the limitations described above, the current method is recommended for simple image processing models only.
|
||||
To feed input data of a shape that is different from the model input shape, reshape the model first.
|
||||
|
||||
OpenVINO™ provides the following methods for runtime model reshaping:
|
||||
|
||||
Practically, some models are not ready to be resized. In this case, a new input shape cannot be set with the Model Optimizer or the `InferenceEngine::CNNNetwork::reshape` method.
|
||||
* **Set a new input shape** with the `InferenceEngine::CNNNetwork::reshape` method.<br>
|
||||
The `InferenceEngine::CNNNetwork::reshape` method updates input shapes and propagates them down to the outputs of the model through all intermediate layers.
|
||||
You can reshape a model multiple times like in this application scheme:
|
||||
```
|
||||
ReadNetwork -> reshape(input_1_shape) -> LoadNetwork -> infer(input_1)
|
||||
\
|
||||
-> reshape(input_2_shape) -> LoadNetwork -> infer(input_2)
|
||||
```
|
||||
> **NOTES**:
|
||||
> - Starting with the 2021.1 release, the Model Optimizer converts topologies keeping shape-calculating sub-graphs by default, which enables correct shape propagation during reshaping.
|
||||
> - Older versions of IRs are not guaranteed to reshape successfully. Please regenerate them with the Model Optimizer of the latest version of OpenVINO™.<br>
|
||||
> - If an ONNX model does not have a fully defined input shape and the model was imported with the ONNX importer, reshape the model before loading it to the plugin.
|
||||
* **Set a new batch dimension value** with the `InferenceEngine::CNNNetwork::setBatchSize` method.<br>
|
||||
The meaning of a model batch may vary depending on the model design.
|
||||
The `InferenceEngine::CNNNetwork::setBatchSize` method deduces the index of a batch dimension based only on the input rank.
|
||||
This method does not work for models with a non-zero index batch placement or models with inputs without a batch dimension.
|
||||
The batch-setting algorithm does not involve the shape inference mechanism.
|
||||
Batch of input and output shapes for all layers is set to a new batch value without layer validation.
|
||||
It may cause both positive and negative side effects.
|
||||
Due to the limitations described above, the current method is not recommended to use.
|
||||
If you need to set a new batch size for the model, use the `CNNNetwork::reshape` method instead.
|
||||
|
||||
## Troubleshooting Resize Errors
|
||||
Do not use runtime reshaping methods simultaneously, especially do not call the `CNNNetwork::reshape` method after you use `InferenceEngine::CNNNetwork::setBatchSize`.
|
||||
The `InferenceEngine::CNNNetwork::setBatchSize` method causes irreversible conversion of the internal model representation into the legacy model representation.
|
||||
The method does not use nGraph for shape inference which leads to reduced reshape opportunities and may affect the performance of the model.
|
||||
|
||||
There are other approaches to reshape the model during the stage of <a href="_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html#when_to_specify_input_shapes">IR generation</a> or [nGraph::Function creation](../IE_DG/nGraphTutorial.md).
|
||||
|
||||
Practically, some models are not ready to be reshaped. In this case, a new input shape cannot be set with the Model Optimizer or the `InferenceEngine::CNNNetwork::reshape` method.
|
||||
|
||||
## Troubleshooting Reshape Errors
|
||||
|
||||
Operation semantics may impose restrictions on input shapes of the operation.
|
||||
Shape collision during shape propagation may be a sign that a new shape does not satisfy the restrictions.
|
||||
@@ -42,7 +65,7 @@ Examples of such operations:
|
||||
- <a href="_docs_MO_DG_prepare_model_convert_model_IR_V10_opset1.html#Reshape">`Reshape` operation</a> with a hard-coded output shape value
|
||||
- <a href="_docs_MO_DG_prepare_model_convert_model_IR_V10_opset1.html#MatMul">`MatMul` operation</a> with the `Const` second input cannot be resized by spatial dimensions due to operation semantics
|
||||
|
||||
Model structure and logic should not change significantly after resizing.
|
||||
Model structure and logic should not change significantly after model reshaping.
|
||||
- The Global Pooling operation is commonly used to reduce output feature map of classification models output.
|
||||
Having the input of the shape [N, C, H, W], Global Pooling returns the output of the shape [N, C, 1, 1].
|
||||
Model architects usually express Global Pooling with the help of the `Pooling` operation with the fixed kernel size [H, W].
|
||||
@@ -50,12 +73,12 @@ During spatial reshape, having the input of the shape [N, C, H1, W1], Pooling wi
|
||||
It breaks the classification model structure.
|
||||
For example, [publicly available Inception family models from TensorFlow*](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) have this issue.
|
||||
|
||||
- Resizing the model input shape may significantly affect its accuracy.
|
||||
- Changing the model input shape may significantly affect its accuracy.
|
||||
For example, Object Detection models from TensorFlow have resizing restrictions by design.
|
||||
To keep the model valid after the reshape, choose a new input shape that satisfies conditions listed in the `pipeline.config` file.
|
||||
For details, refer to the <a href="_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html#tf_od_custom_input_shape">Tensorflow Object Detection API models resizing techniques</a>.
|
||||
|
||||
## Usage of Reshape Method
|
||||
## Usage of Reshape Method <a name="usage_of_reshape_method"></a>
|
||||
|
||||
The primary method of the feature is `InferenceEngine::CNNNetwork::reshape`.
|
||||
It gets new input shapes and propagates it from input to output for all intermediates layers of the given network.
|
||||
|
||||
@@ -63,6 +63,8 @@ Below you can find examples how `ngraph::Function` can be created:
|
||||
nGraph has tree main transformation types: `ngraph::pass::FunctionPass` - strait forward way to work with `ngraph::Function` directly;
|
||||
`ngraph::pass::MatcherPass` - pattern based transformation approach; `ngraph::pass::GraphRewrite` - container for matcher passes.
|
||||
|
||||
![transformations_structure]
|
||||
|
||||
###1. ngraph::pass::FunctionPass <a name="function_pass"></a>
|
||||
|
||||
`ngraph::pass::FunctionPass` is used for transformations that take entire `ngraph::Function` as input and process it.
|
||||
@@ -131,7 +133,7 @@ The last step is to register Matcher and callback inside MatcherPass pass. And t
|
||||
|
||||
```cpp
|
||||
// Register matcher and callback
|
||||
this->register_matcher(m, callback);
|
||||
register_matcher(m, callback);
|
||||
```
|
||||
### Matcher pass execution
|
||||
MatcherPass has multiple ways to be executed:
|
||||
@@ -154,21 +156,32 @@ In addition GraphRewrite handles nodes that were registered by MatcherPasses dur
|
||||
|
||||
> **Note**: when using `pass::Manager` temporary GraphRewrite is used to execute single MatcherPass.
|
||||
|
||||
GraphRewrite has two algorithms for MatcherPasses execution. First algorithm is a straight-forward. It applies each MatcherPass in registraion order to current node.
|
||||
|
||||
![graph_rewrite_execution]
|
||||
|
||||
But it is nor really efficient when you have a lot of registered passes. So first of all GraphRewrite check that all MatcherPass patterns has type based root node (it means that type of this node is not hidden into predicate).
|
||||
And then creates map from registered MatcherPases. That helps to avoid additional cost of applying each MatcherPass for each node.
|
||||
|
||||
![graph_rewrite_efficient_search]
|
||||
|
||||
## Pattern matching <a name="pattern_matching"></a>
|
||||
|
||||
Sometimes patterns can't be expressed via regular nGraph operations. For example if you want to detect Convolution->Add sub-graph without specifying particular input type for Convolution operation or you want to create pattern where some of operations can have different types.
|
||||
Sometimes patterns can't be expressed via regular nGraph operations or it is too complicated.
|
||||
For example if you want to detect Convolution->Add sub-graph without specifying particular input type for Convolution operation or you want to create pattern where some of operations can have different types.
|
||||
And for these cases nGraph provides additional helpers to construct patterns for GraphRewrite transformations.
|
||||
|
||||
There are two main helpers:
|
||||
1. `ngraph::pattern::op::Label` - helps to express inputs if their type is undefined.
|
||||
2. `ngraph::pattern::op::Any` - helps to express intermediate nodes of pattern if their type is unknown.
|
||||
1. `ngraph::pattern::any_input` - helps to express inputs if their types are undefined.
|
||||
2. `ngraph::pattern::wrap_type<T>` - helps to express nodes of pattern without specifying node attributes.
|
||||
|
||||
Let's go through example to have better understanding how it works:
|
||||
|
||||
> **Note**: node attributes do not participate in pattern matching and needed only for operations creation. Only operation types participate in pattern matching.
|
||||
|
||||
Example below shows basic usage of `pattern::op::Label` class.
|
||||
Here we construct Multiply pattern with arbitrary first input and Constant as a second input.
|
||||
Example below shows basic usage of `pattern::any_input`.
|
||||
Here we construct Multiply pattern with arbitrary first input and Constant as a second input.
|
||||
Also as Multiply is commutative operation it does not matter in which order we set inputs (any_input/Constant or Constant/any_input) because both cases will be matched.
|
||||
|
||||
@snippet example_ngraph_utils.cpp pattern:label_example
|
||||
|
||||
@@ -176,7 +189,7 @@ This example show how we can construct pattern when operation has arbitrary numb
|
||||
|
||||
@snippet example_ngraph_utils.cpp pattern:concat_example
|
||||
|
||||
This example shows how to use predicate to construct pattern where operation has two different types. Also it shows how to match pattern manually on given node.
|
||||
This example shows how to use predicate to construct pattern. Also it shows how to match pattern manually on given node.
|
||||
|
||||
@snippet example_ngraph_utils.cpp pattern:predicate_example
|
||||
|
||||
@@ -321,9 +334,11 @@ ngraph::copy_runtime_info({a, b, c}, {e, f});
|
||||
|
||||
When transformation has multiple fusions or decompositions `ngraph::copy_runtime_info` must be called multiple times for each case.
|
||||
|
||||
> **Note**: copy_runtime_info removes rt_info from destination nodes. If you want to keep it you need to specify them in source nodes like this: copy_runtime_info({a, b, c}, {a, b})
|
||||
|
||||
###5. Constant Folding
|
||||
|
||||
If your transformation inserts constant sub-graphs that needs to be folded do not forget to use `ngraph::pass::ConstantFolding()` after your transformation.
|
||||
If your transformation inserts constant sub-graphs that needs to be folded do not forget to use `ngraph::pass::ConstantFolding()` after your transformation or call constant folding directly for operation.
|
||||
Example below shows how constant sub-graph can be constructed.
|
||||
|
||||
```cpp
|
||||
@@ -334,6 +349,12 @@ auto pow = std::make_shared<ngraph::opset3::Power>(
|
||||
auto mul = std::make_shared<ngraph::opset3::Multiply>(input /* not constant input */, pow);
|
||||
```
|
||||
|
||||
Manual constant folding is more preferable than `ngraph::pass::ConstantFolding()` because it is much faster.
|
||||
|
||||
Below you can find an example of manual constant folding:
|
||||
|
||||
@snippet src/template_pattern_transformation.cpp manual_constant_folding
|
||||
|
||||
## Common mistakes in transformations <a name="common_mistakes"></a>
|
||||
|
||||
In transformation development process
|
||||
@@ -427,4 +448,8 @@ The basic transformation test looks like this:
|
||||
|
||||
|
||||
[ngraph_replace_node]: ../images/ngraph_replace_node.png
|
||||
[ngraph_insert_node]: ../images/ngraph_insert_node.png
|
||||
[ngraph_insert_node]: ../images/ngraph_insert_node.png
|
||||
[transformations_structure]: ../images/transformations_structure.png
|
||||
[register_new_node]: ../images/register_new_node.png
|
||||
[graph_rewrite_execution]: ../images/graph_rewrite_execution.png
|
||||
[graph_rewrite_efficient_search]: ../images/graph_rewrite_efficient_search.png
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:05eb8600d2c905975674f3a0a5dc676107d22f65f2a1f78ee1cfabc1771721ea
|
||||
size 41307
|
||||
3
docs/IE_PLUGIN_DG/images/graph_rewrite_execution.png
Normal file
3
docs/IE_PLUGIN_DG/images/graph_rewrite_execution.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:17cd470c6d04d7aabbdb4a08e31f9c97eab960cf7ef5bbd3a541df92db38f26b
|
||||
size 40458
|
||||
3
docs/IE_PLUGIN_DG/images/register_new_node.png
Normal file
3
docs/IE_PLUGIN_DG/images/register_new_node.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80297287c81a2f27b7e74895738afd90844354a8dd745757e8321e2fb6ed547e
|
||||
size 31246
|
||||
3
docs/IE_PLUGIN_DG/images/transformations_structure.png
Normal file
3
docs/IE_PLUGIN_DG/images/transformations_structure.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0b206c602626f17ba5787810b9a28f9cde511448c3e63a5c7ba976cee7868bdb
|
||||
size 14907
|
||||
@@ -376,7 +376,7 @@ Standard ONNX\* operators:
|
||||
| ReduceSum | No |
|
||||
| Relu | No |
|
||||
| Reshape | No |
|
||||
| Resize | Opset-10 version is supported |
|
||||
| Resize | transformation mode `tf_crop_and_resize` is not supported, mode `nearest` is not supported for 5D+ inputs. |
|
||||
| ReverseSequence | No |
|
||||
| Scatter | Supported if fuse-able to ScatterUpdate. MYRIAD only |
|
||||
| ScatterND | No |
|
||||
|
||||
@@ -126,8 +126,10 @@ Framework-agnostic parameters:
|
||||
value, for example: "node_name->True". It will be
|
||||
DEPRECATED in future releases. Use --input option to
|
||||
specify a value for freezing.
|
||||
--static_shape Enables `ShapeOf` operation with all children folding to `Constant`.
|
||||
This option makes model not reshapable in Inference Engine
|
||||
--static_shape Enables IR generation for fixed input shape (folding
|
||||
`ShapeOf` operations and shape-calculating sub-graphs
|
||||
to `Constant`). Changing model input shape using
|
||||
the Inference Engine API in runtime may fail for such an IR.
|
||||
--disable_weights_compression
|
||||
Disable compression and store weights with original
|
||||
precision.
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
# Converting TensorFlow* Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models}
|
||||
|
||||
> **NOTES**:
|
||||
>
|
||||
> * Starting with the 2019 R1 release, the Model Optimizer supports the `--keep_shape_ops` command line parameter that allows you to convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies so they can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../IE_DG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size.
|
||||
> * Starting with the 2018 R4 release, the Model Optimizer supports the `--input_shape` command line parameter for the TensorFlow\* Object Detection API topologies. Refer to the [Custom Input Shape](#tf_od_custom_input_shape) for more information.
|
||||
> * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../IE_DG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size.
|
||||
> * To generate IRs for SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` layers instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape.
|
||||
|
||||
## How to Convert a Model
|
||||
|
||||
94
docs/doxygen/doxy_md_filter.py
Normal file
94
docs/doxygen/doxy_md_filter.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
import argparse
|
||||
|
||||
|
||||
def get_label(file):
|
||||
"""
|
||||
Read lines of a file and try to find a doxygen label.
|
||||
If the label is not found return None.
|
||||
"""
|
||||
for line in file:
|
||||
label = re.search(r'\{\#(.+)\}', line)
|
||||
if label:
|
||||
return label.group(1)
|
||||
return
|
||||
|
||||
|
||||
def replace_links(content, items, folder, labels, docs_folder):
|
||||
"""
|
||||
Replace markdown links with doxygen labels.
|
||||
"""
|
||||
for item in items:
|
||||
link = item[0]
|
||||
ext = item[1]
|
||||
link_path = os.path.abspath(os.path.join(folder, link))
|
||||
if os.path.exists(link_path):
|
||||
if ext == 'md':
|
||||
if link_path in labels:
|
||||
label = labels.get(link_path)
|
||||
else:
|
||||
with open(link_path, 'r', encoding='utf-8') as file:
|
||||
lines = []
|
||||
i = 0
|
||||
while i < 5:
|
||||
try:
|
||||
lines.append(next(file))
|
||||
except StopIteration:
|
||||
break
|
||||
i += 1
|
||||
label = get_label(lines)
|
||||
labels[link_path] = label
|
||||
if label:
|
||||
content = content.replace(link, '@ref ' + label)
|
||||
else:
|
||||
rel_path = os.path.relpath(link_path, docs_folder).replace('\\', '/')
|
||||
content = content.replace(link, rel_path)
|
||||
return content
|
||||
|
||||
|
||||
def process_github_md_links(content, items):
|
||||
"""
|
||||
This is a workaround to support github markdown links in doxygen 1.8.12.
|
||||
"""
|
||||
for item in items:
|
||||
orig = item[0]
|
||||
link_name = item[1]
|
||||
link_url = item[2]
|
||||
html_link = '<a href="{}">{}</a>'.format(link_url, link_name)
|
||||
content = content.replace(orig, html_link)
|
||||
return content
|
||||
|
||||
|
||||
def process(docs_folder):
|
||||
"""
|
||||
Recursively find markdown files in docs_folder and
|
||||
replace links to markdown files with doxygen labels (ex. @ref label_name).
|
||||
"""
|
||||
labels = dict() # store labels in dictionary
|
||||
md_files = glob.glob(os.path.join(docs_folder, '**/*.md'), recursive=True)
|
||||
for md_file in md_files:
|
||||
md_folder = os.path.dirname(md_file)
|
||||
with open(md_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
inline_links = set(re.findall(r'!?\[.*?\]\(([\w\/\-\.]+\.(md|png|jpg|gif))\)', content, flags=re.IGNORECASE))
|
||||
github_md_links = set(re.findall(r'(\[(.+?)\]\((https:[\w\.\/-]+?\.md)\))', content, flags=re.IGNORECASE))
|
||||
reference_links = set(re.findall(r'\[.+\]\:\s*?([\w\/\-\.]+\.(md|png|jpg|gif))', content, flags=re.IGNORECASE))
|
||||
content = replace_links(content, inline_links, md_folder, labels, docs_folder)
|
||||
content = replace_links(content, reference_links, md_folder, labels, docs_folder)
|
||||
content = process_github_md_links(content, github_md_links)
|
||||
if inline_links or reference_links or github_md_links:
|
||||
with open(md_file, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('docs', type=str, help='Path to a folder containing .md files.')
|
||||
args = parser.parse_args()
|
||||
process(args.docs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
25
docs/doxygen/ie_c_api.config
Normal file
25
docs/doxygen/ie_c_api.config
Normal file
@@ -0,0 +1,25 @@
|
||||
@INCLUDE = @IE_CONFIG_BINARY@
|
||||
|
||||
EXCLUDE_SYMBOLS = INFERENCE_ENGINE_C_API_EXTERN \
|
||||
INFERENCE_ENGINE_C_API \
|
||||
IE_NODISCARD
|
||||
|
||||
PREDEFINED = "__attribute__(x)=" \
|
||||
"__VA_ARGS__=" \
|
||||
"INFERENCE_ENGINE_C_API_EXTERN=" \
|
||||
"INFERENCE_ENGINE_C_API=" \
|
||||
"IE_NODISCARD=" \
|
||||
"__cdecl=" \
|
||||
"__declspec(x)=" \
|
||||
"__GNUC__=" \
|
||||
"_WIN32"
|
||||
|
||||
FILE_PATTERNS = *.h
|
||||
|
||||
LAYOUT_FILE = @C_LAYOUT_BINARY@
|
||||
|
||||
INPUT = @C_API@
|
||||
|
||||
HTML_OUTPUT = ie_c_api
|
||||
|
||||
GENERATE_TAGFILE = @DOCS_BINARY_DIR@/ie_c_api.tag
|
||||
@@ -29,6 +29,8 @@
|
||||
</tab>
|
||||
<tab type="user" title="Inference Engine С++ API Reference" url="../annotated.html"/>
|
||||
<tab type="user" title="Inference Engine Python API Reference" url="../ie_python_api/annotated.html"/>
|
||||
<!-- DL Streamer -->
|
||||
<tab type="user" title="DL Streamer API Reference" url="https://openvinotoolkit.github.io/dlstreamer_gst/"/>
|
||||
</tab>
|
||||
<!-- Chinese docs -->
|
||||
<tab type="user" title="中文文件" url="https://docs.openvinotoolkit.org/cn/index.html"/>
|
||||
|
||||
2596
docs/doxygen/ie_docs.config
Normal file
2596
docs/doxygen/ie_docs.config
Normal file
File diff suppressed because it is too large
Load Diff
@@ -102,15 +102,12 @@
|
||||
<tab type="usergroup" title="Operations Specifications" url="">
|
||||
<tab type="user" title="Abs-1" url="@ref openvino_docs_ops_arithmetic_Abs_1"/>
|
||||
<tab type="user" title="Acos-1" url="@ref openvino_docs_ops_arithmetic_Acos_1"/>
|
||||
<tab type="user" title="Acosh-1" url="@ref openvino_docs_ops_arithmetic_Acosh_1"/>
|
||||
<tab type="user" title="Acosh-3" url="@ref openvino_docs_ops_arithmetic_Acosh_3"/>
|
||||
<tab type="user" title="Add-1" url="@ref openvino_docs_ops_arithmetic_Add_1"/>
|
||||
<tab type="user" title="Asin-1" url="@ref openvino_docs_ops_arithmetic_Asin_1"/>
|
||||
<tab type="user" title="Asinh-1" url="@ref openvino_docs_ops_arithmetic_Asinh_1"/>
|
||||
<tab type="user" title="Asinh-3" url="@ref openvino_docs_ops_arithmetic_Asinh_3"/>
|
||||
<tab type="user" title="Assign-3" url="@ref openvino_docs_ops_infrastructure_Assign_3"/>
|
||||
<tab type="user" title="Atan-1" url="@ref openvino_docs_ops_arithmetic_Atan_1"/>
|
||||
<tab type="user" title="Atanh-1" url="@ref openvino_docs_ops_arithmetic_Atanh_1"/>
|
||||
<tab type="user" title="Atanh-3" url="@ref openvino_docs_ops_arithmetic_Atanh_3"/>
|
||||
<tab type="user" title="AvgPool-1" url="@ref openvino_docs_ops_pooling_AvgPool_1"/>
|
||||
<tab type="user" title="BatchNormInference-1" url="@ref openvino_docs_ops_normalization_BatchNormInference_1"/>
|
||||
@@ -158,8 +155,9 @@
|
||||
<tab type="user" title="GroupConvolutionBackpropData-1" url="@ref openvino_docs_ops_convolution_GroupConvolutionBackpropData_1"/>
|
||||
<tab type="user" title="GroupConvolution-1" url="@ref openvino_docs_ops_convolution_GroupConvolution_1"/>
|
||||
<tab type="user" title="HardSigmoid-1" url="@ref openvino_docs_ops_activation_HardSigmoid_1"/>
|
||||
<tab type="user" title="HSwish-4" url="@ref openvino_docs_ops_activation_HSwish_4"/>
|
||||
<tab type="user" title="Interpolate-1" url="@ref openvino_docs_ops_image_Interpolate_1"/>
|
||||
<tab type="user" title="Interpolate-1" url="@ref openvino_docs_ops_image_Interpolate_4"/>
|
||||
<tab type="user" title="Interpolate-4" url="@ref openvino_docs_ops_image_Interpolate_4"/>
|
||||
<tab type="user" title="LRN-1" url="@ref openvino_docs_ops_normalization_LRN_1"/>
|
||||
<tab type="user" title="LSTMCell-1" url="@ref openvino_docs_ops_sequence_LSTMCell_1"/>
|
||||
<tab type="user" title="LSTMSequence-1" url="@ref openvino_docs_ops_sequence_LSTMSequence_1"/>
|
||||
@@ -194,9 +192,13 @@
|
||||
<tab type="user" title="PriorBoxClustered-1" url="@ref openvino_docs_ops_detection_PriorBoxClustered_1"/>
|
||||
<tab type="user" title="PriorBox-1" url="@ref openvino_docs_ops_detection_PriorBox_1"/>
|
||||
<tab type="user" title="Proposal-1" url="@ref openvino_docs_ops_detection_Proposal_1"/>
|
||||
<tab type="user" title="Proposal-4" url="@ref openvino_docs_ops_detection_Proposal_4"/>
|
||||
<tab type="user" title="Range-1" url="@ref openvino_docs_ops_generation_Range_1"/>
|
||||
<tab type="user" title="Range-4" url="@ref openvino_docs_ops_generation_Range_4"/>
|
||||
<tab type="user" title="ReadValue-3" url="@ref openvino_docs_ops_infrastructure_ReadValue_3"/>
|
||||
<tab type="user" title="ReLU-1" url="@ref openvino_docs_ops_activation_ReLU_1"/>
|
||||
<tab type="user" title="ReduceL1-4" url="@ref openvino_docs_ops_reduction_ReduceL1_4"/>
|
||||
<tab type="user" title="ReduceL2-4" url="@ref openvino_docs_ops_reduction_ReduceL2_4"/>
|
||||
<tab type="user" title="ReduceLogicalAnd-1" url="@ref openvino_docs_ops_reduction_ReduceLogicalAnd_1"/>
|
||||
<tab type="user" title="ReduceLogicalOr-1" url="@ref openvino_docs_ops_reduction_ReduceLogicalOr_1"/>
|
||||
<tab type="user" title="ReduceMax-1" url="@ref openvino_docs_ops_reduction_ReduceMax_1"/>
|
||||
@@ -796,7 +798,7 @@
|
||||
<!-- OpenCV -->
|
||||
<tab type="user" title="OpenCV Developer Guide" url="https://docs.opencv.org/master/"/>
|
||||
<!-- IE C -->
|
||||
<tab type="user" title="Inference Engine C API Reference" url="ie_c_api/groups.html"/>
|
||||
<tab type="user" title="Inference Engine C API Reference" url="ie_c_api/modules.html"/>
|
||||
<!-- IE C++-->
|
||||
<tab type="classes" visible="yes" title="Inference Engine С++ API Reference">
|
||||
<tab type="classlist" visible="yes" title=""/>
|
||||
@@ -810,7 +812,7 @@
|
||||
<!-- IE Python -->
|
||||
<tab type="user" title="Inference Engine Python API Reference" url="ie_python_api/annotated.html"/>
|
||||
<!-- DL Streamer -->
|
||||
<tab type="user" title="DL Streamer API Reference" url="https://opencv.github.io/gst-video-analytics/"/>
|
||||
<tab type="user" title="DL Streamer API Reference" url="https://openvinotoolkit.github.io/dlstreamer_gst/"/>
|
||||
|
||||
</tab>
|
||||
<!-- Chinese docs -->
|
||||
|
||||
51
docs/doxygen/ie_plugin_api.config
Normal file
51
docs/doxygen/ie_plugin_api.config
Normal file
@@ -0,0 +1,51 @@
|
||||
@INCLUDE = @IE_CONFIG_BINARY@
|
||||
|
||||
LAYOUT_FILE = @PLUGIN_LAYOUT_BINARY@
|
||||
|
||||
HTML_OUTPUT = ie_plugin_api
|
||||
|
||||
GENERATE_TAGFILE = @DOCS_BINARY_DIR@/ie_plugin_api.tag
|
||||
|
||||
EXTRACT_LOCAL_CLASSES = NO
|
||||
|
||||
INPUT = @DOCS_BINARY_DIR@/docs/IE_PLUGIN_DG \
|
||||
@IE_SOURCE_DIR@/src/plugin_api
|
||||
|
||||
FILE_PATTERNS = *.c \
|
||||
*.cpp \
|
||||
*.c++ \
|
||||
*.h \
|
||||
*.hpp \
|
||||
*.md
|
||||
|
||||
EXCLUDE_PATTERNS = cnn_network_ngraph_impl.hpp \
|
||||
ie_imemory_state_internal.hpp \
|
||||
ie_memory_state_internal.hpp \
|
||||
ie_memory_state_base.hpp \
|
||||
convert_function_to_cnn_network.hpp \
|
||||
generic_ie.hpp
|
||||
|
||||
EXCLUDE_SYMBOLS =
|
||||
|
||||
EXAMPLE_PATH = @CMAKE_CURRENT_SOURCE_DIR@/template_plugin/src \
|
||||
@CMAKE_CURRENT_SOURCE_DIR@/template_plugin/include \
|
||||
@CMAKE_CURRENT_SOURCE_DIR@/template_plugin/src/CMakeLists.txt \
|
||||
@CMAKE_CURRENT_SOURCE_DIR@/template_plugin/tests/functional/CMakeLists.txt \
|
||||
@CMAKE_CURRENT_SOURCE_DIR@/examples
|
||||
|
||||
EXAMPLE_PATTERNS = *.cpp \
|
||||
*.hpp
|
||||
|
||||
ENUM_VALUES_PER_LINE = 1
|
||||
|
||||
EXPAND_ONLY_PREDEF = YES
|
||||
|
||||
PREDEFINED = INFERENCE_ENGINE_API \
|
||||
INFERENCE_ENGINE_API_CPP \
|
||||
INFERENCE_ENGINE_API_CLASS \
|
||||
INFERENCE_ENGINE_DEPRECATED \
|
||||
IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_SUPPRESS_DEPRECATED_END \
|
||||
IE_SUPPRESS_DEPRECATED_START_WIN \
|
||||
IE_SUPPRESS_DEPRECATED_END_WIN \
|
||||
IE_THREAD=IE_THREAD_TBB
|
||||
35
docs/doxygen/ie_py_api.config
Normal file
35
docs/doxygen/ie_py_api.config
Normal file
@@ -0,0 +1,35 @@
|
||||
@INCLUDE = @IE_CONFIG_BINARY@
|
||||
|
||||
EXCLUDE_SYMBOLS = ie_api::BlobBuffer \
|
||||
*impl* \
|
||||
*device_name* \
|
||||
*num_requests* \
|
||||
*exec_net* \
|
||||
*c_config* \
|
||||
*ie_core_impl* \
|
||||
*plugin_impl* \
|
||||
*extension_str* \
|
||||
*buffer* \
|
||||
*__cinit__*
|
||||
|
||||
PREDEFINED = "__attribute__(x)=" \
|
||||
"__VA_ARGS__=" \
|
||||
"INFERENCE_ENGINE_C_API_EXTERN=" \
|
||||
"INFERENCE_ENGINE_C_API=" \
|
||||
"IE_NODISCARD=" \
|
||||
"__cdecl=" \
|
||||
"__declspec(x)=" \
|
||||
"__GNUC__=" \
|
||||
"_WIN32"
|
||||
|
||||
EXTENSION_MAPPING = pyx=Python
|
||||
|
||||
FILE_PATTERNS = *.pyx
|
||||
|
||||
LAYOUT_FILE = @PY_LAYOUT_BINARY@
|
||||
|
||||
INPUT = @PYTHON_API_OUT@
|
||||
|
||||
HTML_OUTPUT = ie_python_api
|
||||
|
||||
GENERATE_TAGFILE = @DOCS_BINARY_DIR@/ie_python_api.tag
|
||||
@@ -14,7 +14,7 @@
|
||||
<!-- OpenCV -->
|
||||
<tab type="user" title="OpenCV Developer Guide" url="https://docs.opencv.org/master/"/>
|
||||
<!-- IE C -->
|
||||
<tab type="usergroup" title="Inference Engine C API Reference" url="../ie_c_api/groups.html"/>
|
||||
<tab type="usergroup" title="Inference Engine C API Reference" url="../ie_c_api/modules.html"/>
|
||||
<!-- IE C++-->
|
||||
<tab type="user" title="Inference Engine С++ API Reference" url="../annotated.html"/>
|
||||
<!-- IE Python -->
|
||||
@@ -27,6 +27,8 @@
|
||||
<tab type="filelist" visible="no"/>
|
||||
<tab type="globals" visible="no"/>
|
||||
</tab>
|
||||
<!-- DL Streamer -->
|
||||
<tab type="user" title="DL Streamer API Reference" url="https://openvinotoolkit.github.io/dlstreamer_gst/"/>
|
||||
</tab>
|
||||
<!-- Chinese docs -->
|
||||
<tab type="user" title="中文文件" url="https://docs.openvinotoolkit.org/cn/index.html"/>
|
||||
|
||||
129
docs/doxygen/pyx_filter.py
Normal file
129
docs/doxygen/pyx_filter.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import re
|
||||
import argparse
|
||||
|
||||
|
||||
def process_pyx(pyx_file):
|
||||
"""
|
||||
Convert .pyx file to a more readable format for doxygen.
|
||||
"""
|
||||
with open(pyx_file, 'r') as f:
|
||||
source = f.readlines()
|
||||
idx = 0
|
||||
while idx < len(source):
|
||||
line = source[idx]
|
||||
striped_line = line.lstrip()
|
||||
tabs = ' ' * (len(line) - len(striped_line)) # Keep indentation
|
||||
striped_line = striped_line.rstrip()
|
||||
if striped_line == '@property': # Python functions wrapped with @property decorator
|
||||
new_getter = convert_getter(source, idx)
|
||||
if new_getter:
|
||||
indent = tabs + ' ' * 4
|
||||
new_func, comments, shift = new_getter
|
||||
func_name = re.search(r'def\s+?([A-Za-z0-9_]+)\s*?\(', new_func).group(1)
|
||||
source[idx + 1] = tabs + new_func + '\n'
|
||||
for i in range(shift):
|
||||
source.pop(idx + 2)
|
||||
# This is a workaround to help Doxygen understand "@property" functions as class properties.
|
||||
for comm in comments:
|
||||
source.insert(idx + 2, '{indent}{comment}\n'.format(indent=indent, comment=comm))
|
||||
idx += 1
|
||||
source.insert(idx + 2, '{indent}self.{func_name} = {func_name}\n'.format(
|
||||
indent=indent,
|
||||
func_name=func_name
|
||||
))
|
||||
idx += 1
|
||||
if re.search(r'c?p?def.+\(', striped_line): # Convert cython functions to python format
|
||||
new_sign = get_signature(source, idx)
|
||||
if new_sign:
|
||||
new_func, shift = new_sign
|
||||
args = re.search(r'\((.+)\)', new_func)
|
||||
if args:
|
||||
new_func = new_func.replace(args.group(1), process_args(args.group(1))).replace('cpdef', 'def')
|
||||
source[idx] = tabs + new_func + '\n'
|
||||
for i in range(shift):
|
||||
source.pop(idx + 1)
|
||||
if '__cinit__' in striped_line: # Doxygen only interprets "__init__" constructors
|
||||
source[idx] = source[idx].replace('__cinit__', '__init__')
|
||||
idx += 1
|
||||
|
||||
with open(pyx_file, 'w') as f:
|
||||
f.writelines(source)
|
||||
|
||||
|
||||
def process_args(str_args):
|
||||
"""
|
||||
Convert function arguments to the doxygen readable format.
|
||||
"""
|
||||
args = re.sub(r'\[.*?\]', r'', str_args)
|
||||
args = re.sub(r'\(.*?\)', r'', args)
|
||||
args = args.split(',')
|
||||
for idx, arg in enumerate(args):
|
||||
arg = arg.replace('&', '').strip()
|
||||
if arg.startswith('const'):
|
||||
arg = arg.replace('const', '').strip()
|
||||
if ':' in arg:
|
||||
arg = arg.split(':')[0]
|
||||
match = re.match(r'^[\w\.]+\s+(\w.+)', arg)
|
||||
if match:
|
||||
arg = match.group(1)
|
||||
args[idx] = arg.strip()
|
||||
return ', '.join(args)
|
||||
|
||||
|
||||
def convert_getter(source, start):
|
||||
"""
|
||||
Process a function that is wrapped with @property decorator
|
||||
"""
|
||||
current = source[start + 1].strip()
|
||||
if not current.startswith('def'): # Base Case
|
||||
return
|
||||
new_sign = get_signature(source, start + 1)
|
||||
if new_sign:
|
||||
new_func, shift = new_sign
|
||||
new_func += ':'
|
||||
# get comments
|
||||
comments = []
|
||||
if start > 1:
|
||||
idx = start - 1
|
||||
while source[idx].lstrip().startswith('#') and idx >= 0:
|
||||
comments.append(source[idx].strip())
|
||||
idx -= 1
|
||||
comments.reverse()
|
||||
return new_func, comments, shift
|
||||
|
||||
|
||||
def get_signature(source, start):
|
||||
"""
|
||||
Get function signature and process it
|
||||
"""
|
||||
match = re.search(r'c?p?def.+\(', source[start].strip())
|
||||
if not match:
|
||||
return
|
||||
start_j = match.span()[1]
|
||||
open_brackets = 1
|
||||
new_sign = match.group()
|
||||
|
||||
for i in range(start, len(source)):
|
||||
line = source[i].strip()
|
||||
for j in range(start_j, len(line)):
|
||||
char = line[j]
|
||||
if char == ')':
|
||||
open_brackets -= 1
|
||||
if char == '(':
|
||||
open_brackets += 1
|
||||
new_sign += char
|
||||
if not open_brackets:
|
||||
return new_sign + ':\n', i - start
|
||||
start_j = 0
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('pyx_file', type=str, nargs='+', help='Path to a .pyx file.')
|
||||
args = parser.parse_args()
|
||||
for pyx in args.pyx_file:
|
||||
process_pyx(pyx)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
// ! [ngraph:include]
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset3.hpp>
|
||||
@@ -89,7 +91,7 @@ ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
|
||||
// ! [pattern:label_example]
|
||||
// Detect Multiply with arbitrary first input and second as Constant
|
||||
// ngraph::pattern::op::Label - represent arbitrary input
|
||||
auto input = std::make_shared<ngraph::pattern::op::Label>(ngraph::element::f32, ngraph::Shape{1});
|
||||
auto input = ngraph::pattern::any_input();
|
||||
auto value = ngraph::opset3::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {0.5});
|
||||
auto mul = std::make_shared<ngraph::opset3::Multiply>(input, value);
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(mul, "MultiplyMatcher");
|
||||
@@ -99,20 +101,17 @@ auto m = std::make_shared<ngraph::pattern::Matcher>(mul, "MultiplyMatcher");
|
||||
{
|
||||
// ! [pattern:concat_example]
|
||||
// Detect Concat operation with arbitrary number of inputs
|
||||
auto concat = std::make_shared<ngraph::pattern::op::Label>(ngraph::element::f32, ngraph::Shape{}, ngraph::pattern::has_class<ngraph::opset3::Concat>());
|
||||
auto concat = ngraph::pattern::wrap_type<ngraph::opset3::Concat>();
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(concat, "ConcatMatcher");
|
||||
// ! [pattern:concat_example]
|
||||
}
|
||||
|
||||
{
|
||||
// ! [pattern:predicate_example]
|
||||
// Detect Multiply or Add operation
|
||||
auto lin_op = std::make_shared<ngraph::pattern::op::Label>(ngraph::element::f32, ngraph::Shape{},
|
||||
[](const std::shared_ptr<ngraph::Node> & node) -> bool {
|
||||
return std::dynamic_pointer_cast<ngraph::opset3::Multiply>(node) ||
|
||||
std::dynamic_pointer_cast<ngraph::opset3::Add>(node);
|
||||
});
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(lin_op, "MultiplyOrAddMatcher");
|
||||
// Detect Multiply->Add sequence where mul has exactly one consumer
|
||||
auto mul = ngraph::pattern::wrap_type<ngraph::opset3::Multiply>(ngraph::pattern::consumers_count(1)/*сheck consumers count*/);
|
||||
auto add = ngraph::pattern::wrap_type<ngraph::opset3::Add>({mul, ngraph::pattern::any_input()});
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "MultiplyAddMatcher");
|
||||
// Matcher can be used to match pattern manually on given node
|
||||
if (m->match(node->output(0))) {
|
||||
// Successfully matched
|
||||
|
||||
@@ -35,7 +35,7 @@ OpenVINO™ toolkit includes the following components:
|
||||
- [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) - A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components
|
||||
- Deep Learning Streamer (DL Streamer) – Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. DL Streamer can be installed by the Intel® Distribution of OpenVINO™ toolkit installer. Its open source version is available on [GitHub](https://github.com/opencv/gst-video-analytics). For the DL Streamer documentation, see:
|
||||
- [DL Streamer Samples](IE_DG/Tools_Overview.md)
|
||||
- [API Reference](https://opencv.github.io/gst-video-analytics/)
|
||||
- [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/)
|
||||
- [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements)
|
||||
- [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial)
|
||||
- [OpenCV](https://docs.opencv.org/master/) - OpenCV* community version compiled for Intel® hardware
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
## Acosh <a name="Acosh"></a> {#openvino_docs_ops_arithmetic_Acosh_1}
|
||||
|
||||
**Versioned name**: *Acosh-1*
|
||||
|
||||
**Category**: Arithmetic unary operation
|
||||
|
||||
**Short description**: *Acosh* performs element-wise hyperbolic inverse cosine (arccosh) operation with given tensor.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
No attributes available.
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: An tensor of type T. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise acosh operation. A tensor of type T.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any numeric type.
|
||||
|
||||
*Acosh* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = acosh(a_{i})
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
||||
*Example 1*
|
||||
|
||||
```xml
|
||||
<layer ... type="Acosh">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>256</dim>
|
||||
<dim>56</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1">
|
||||
<dim>256</dim>
|
||||
<dim>56</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
@@ -1,50 +0,0 @@
|
||||
## Asinh <a name="Asinh"></a> {#openvino_docs_ops_arithmetic_Asinh_1}
|
||||
|
||||
**Versioned name**: *Asinh-1*
|
||||
|
||||
**Category**: Arithmetic unary operation
|
||||
|
||||
**Short description**: *Asinh* performs element-wise hyperbolic inverse sine (arcsinh) operation with given tensor.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
No attributes available.
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: An tensor of type T. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise asinh operation. A tensor of type T.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any numeric type.
|
||||
|
||||
*Asinh* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = asinh(a_{i})
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
||||
*Example 1*
|
||||
|
||||
```xml
|
||||
<layer ... type="Asinh">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>256</dim>
|
||||
<dim>56</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1">
|
||||
<dim>256</dim>
|
||||
<dim>56</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
@@ -1,50 +0,0 @@
|
||||
## Atanh <a name="Atanh"></a> {#openvino_docs_ops_arithmetic_Atanh_1}
|
||||
|
||||
**Versioned name**: *Atanh-1*
|
||||
|
||||
**Category**: Arithmetic unary operation
|
||||
|
||||
**Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with given tensor.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
No attributes available.
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: An tensor of type T. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise atanh operation. A tensor of type T.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any numeric type.
|
||||
|
||||
*Atanh* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = atanh(a_{i})
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
||||
*Example 1*
|
||||
|
||||
```xml
|
||||
<layer ... type="Atanh">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>256</dim>
|
||||
<dim>56</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1">
|
||||
<dim>256</dim>
|
||||
<dim>56</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
@@ -1,4 +1,4 @@
|
||||
## Proposal <a name="Proposal"></a>
|
||||
## Proposal <a name="Proposal"></a> {#openvino_docs_ops_detection_Proposal_4}
|
||||
|
||||
**Versioned name**: *Proposal-4*
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
## Range<a name="Range"></a>
|
||||
## Range<a name="Range"></a> {#openvino_docs_ops_generation_Range_4}
|
||||
|
||||
**Versioned name**: *Range-4*
|
||||
|
||||
|
||||
@@ -273,6 +273,11 @@ class InterpolateCalculation:
|
||||
else:
|
||||
self.scales = scales
|
||||
|
||||
if self.mode == 'nearest':
|
||||
self.all_scales = np.ones(rank).astype(np.float)
|
||||
for i, axis in enumerate(self.axes):
|
||||
self.all_scales[axis] = self.scales[i]
|
||||
|
||||
self.input_shape = padded_data.shape
|
||||
return self.func(padded_data)
|
||||
|
||||
@@ -446,9 +451,9 @@ class InterpolateCalculation:
|
||||
num_of_axes = len(self.axes)
|
||||
for coordinates in np.ndindex(tuple(self.output_shape)):
|
||||
input_coords = np.array(coordinates, dtype=np.int64)
|
||||
for i, axis in enumerate(self.axes):
|
||||
in_coord = self.get_original_coordinate(coordinates[axis], self.scales[i], self.output_shape[axis], self.input_shape[axis])
|
||||
nearest_pixel = self.get_nearest_pixel(in_coord, self.scales[i] < 1)
|
||||
for axis, scale in enumerate(self.all_scales):
|
||||
in_coord = self.get_original_coordinate(coordinates[axis], scale, self.output_shape[axis], self.input_shape[axis])
|
||||
nearest_pixel = self.get_nearest_pixel(in_coord, scale < 1)
|
||||
input_coords[axis] = max(0, min(nearest_pixel, self.input_shape[axis] - 1))
|
||||
result[coordinates] = input_data[tuple(input_coords)]
|
||||
|
||||
@@ -487,4 +492,4 @@ class InterpolateCalculation:
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
```
|
||||
@@ -8,21 +8,33 @@
|
||||
|
||||
**Detailed description**
|
||||
|
||||
GatherTree operation implements the same algorithm as GatherTree operation in TensorFlow. Please see complete documentation [here](https://www.tensorflow.org/versions/r1.12/api_docs/python/tf/contrib/seq2seq/gather_tree?hl=en).
|
||||
The GatherTree operation implements the same algorithm as the [GatherTree operation in TensorFlow](https://www.tensorflow.org/addons/api_docs/python/tfa/seq2seq/gather_tree).
|
||||
|
||||
Pseudo code:
|
||||
|
||||
```python
|
||||
final_idx[ :, :, :] = end_token
|
||||
for batch in range(BATCH_SIZE):
|
||||
for beam in range(BEAM_WIDTH):
|
||||
max_sequence_in_beam = min(MAX_TIME, max_seq_len[batch])
|
||||
|
||||
parent = parent_idx[max_sequence_in_beam - 1, batch, beam]
|
||||
|
||||
final_idx[max_sequence_in_beam - 1, batch, beam] = step_idx[max_sequence_in_beam - 1, batch, beam]
|
||||
|
||||
for level in reversed(range(max_sequence_in_beam - 1)):
|
||||
final_idx[level, batch, beam] = step_idx[level, batch, parent]
|
||||
|
||||
parent = parent_idx[level, batch, parent]
|
||||
|
||||
# For a given beam, past the time step containing the first decoded end_token
|
||||
# all values are filled in with end_token.
|
||||
finished = False
|
||||
for time in range(max_sequence_in_beam):
|
||||
if(finished):
|
||||
final_idx[time, batch, beam] = end_token
|
||||
elif(final_idx[time, batch, beam] == end_token):
|
||||
finished = True
|
||||
```
|
||||
|
||||
Element data types for all input tensors should match each other.
|
||||
|
||||
@@ -36,3 +36,52 @@ bool Operation::visit_attributes(ngraph::AttributeVisitor &visitor) {
|
||||
return true;
|
||||
}
|
||||
//! [op:visit_attributes]
|
||||
|
||||
//! [op:evaluate]
|
||||
namespace
|
||||
{
|
||||
|
||||
template <class T>
|
||||
void implementation(const T* input,
|
||||
T* output,
|
||||
int64_t add,
|
||||
size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
output[i] = input[i] + add;
|
||||
}
|
||||
}
|
||||
|
||||
template <ngraph::element::Type_t ET>
|
||||
bool evaluate_op(const ngraph::HostTensorPtr& arg0,
|
||||
const ngraph::HostTensorPtr& out, int64_t add)
|
||||
{
|
||||
size_t size = ngraph::shape_size(arg0->get_shape());
|
||||
implementation(arg0->get_data_ptr<ET>(),
|
||||
out->get_data_ptr<ET>(),
|
||||
add,
|
||||
size);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool Operation::evaluate(const ngraph::HostTensorVector& outputs,
|
||||
const ngraph::HostTensorVector& inputs) const {
|
||||
switch (inputs[0]->get_element_type())
|
||||
{
|
||||
case ngraph::element::Type_t::i8: return evaluate_op<ngraph::element::Type_t::i8>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::i16: return evaluate_op<ngraph::element::Type_t::i16>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::i32: return evaluate_op<ngraph::element::Type_t::i32>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::i64: return evaluate_op<ngraph::element::Type_t::i64>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::u8: return evaluate_op<ngraph::element::Type_t::u8>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::u16: return evaluate_op<ngraph::element::Type_t::u16>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::u32: return evaluate_op<ngraph::element::Type_t::u32>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::u64: return evaluate_op<ngraph::element::Type_t::u8>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::bf16: return evaluate_op<ngraph::element::Type_t::bf16>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::f16: return evaluate_op<ngraph::element::Type_t::f16>(inputs[0], outputs[0], getAddAttr());
|
||||
case ngraph::element::Type_t::f32: return evaluate_op<ngraph::element::Type_t::f32>(inputs[0], outputs[0], getAddAttr());
|
||||
default: break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
//! [op:evaluate]
|
||||
|
||||
@@ -19,7 +19,9 @@ public:
|
||||
void validate_and_infer_types() override;
|
||||
std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
|
||||
bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
|
||||
int64_t getAddAttr() { return add; }
|
||||
int64_t getAddAttr() const { return add; }
|
||||
bool evaluate(const ngraph::HostTensorVector& outputs,
|
||||
const ngraph::HostTensorVector& inputs) const override;
|
||||
|
||||
private:
|
||||
int64_t add;
|
||||
|
||||
@@ -3,12 +3,7 @@
|
||||
#
|
||||
|
||||
# [cmake:main]
|
||||
if (APPLE)
|
||||
# due to https://cmake.org/cmake/help/v3.12/policy/CMP0068.html
|
||||
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR)
|
||||
endif()
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(InferenceEngineTemplatePlugin)
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ using namespace ngraph;
|
||||
// template_function_transformation.cpp
|
||||
bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
// Example transformation code
|
||||
std::vector<std::shared_ptr<Node> > nodes;
|
||||
NodeVector nodes;
|
||||
|
||||
// Traverse nGraph Function in topological order
|
||||
for (auto & node : f->get_ordered_ops()) {
|
||||
|
||||
@@ -18,8 +18,6 @@ class MyFunctionTransformation;
|
||||
// template_function_transformation.hpp
|
||||
class ngraph::pass::MyFunctionTransformation: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
MyFunctionTransformation() : FunctionPass() {}
|
||||
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
||||
// ! [function_pass:template_transformation_hpp]
|
||||
|
||||
@@ -16,8 +16,8 @@ using namespace ngraph;
|
||||
// template_pattern_transformation.cpp
|
||||
ngraph::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() {
|
||||
// Pattern example
|
||||
auto input0 = std::make_shared<pattern::op::Label>(element::f32, Shape{});
|
||||
auto input1 = std::make_shared<pattern::op::Label>(element::f32, Shape{});
|
||||
auto input0 = pattern::any_input();
|
||||
auto input1 = pattern::any_input();
|
||||
auto div = std::make_shared<ngraph::opset3::Divide>(input0, input1);
|
||||
|
||||
ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) {
|
||||
@@ -49,7 +49,7 @@ ngraph::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() {
|
||||
// Register pattern with Divide operation as a pattern root node
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(div, "ConvertDivide");
|
||||
// Register Matcher
|
||||
this->register_matcher(m, callback);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
// ! [graph_rewrite:template_transformation_cpp]
|
||||
|
||||
@@ -82,7 +82,7 @@ ngraph::pass::ReluReluFusionMatcher::ReluReluFusionMatcher() {
|
||||
// Register pattern with Relu operation as a pattern root node
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(m_relu2, "ReluReluFusion");
|
||||
// Register Matcher
|
||||
this->register_matcher(m, callback);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
// ! [matcher_pass:relu_fusion]
|
||||
|
||||
@@ -137,3 +137,16 @@ pass.add_matcher<ngraph::pass::ReluReluFusionMatcher>();
|
||||
pass.run_on_function(f);
|
||||
// ! [matcher_pass:graph_rewrite]
|
||||
}
|
||||
|
||||
// ! [manual_constant_folding]
|
||||
template <class T>
|
||||
Output<Node> eltwise_fold(const Output<Node> & input0, const Output<Node> & input1) {
|
||||
auto eltwise = std::make_shared<T>(input0, input1);
|
||||
OutputVector output(eltwise->get_output_size());
|
||||
// If constant folding wasn't successful return eltwise output
|
||||
if (!eltwise->constant_fold(output, {input0, input1})) {
|
||||
return eltwise->output(0);
|
||||
}
|
||||
return output[0];
|
||||
}
|
||||
// ! [manual_constant_folding]
|
||||
|
||||
@@ -17,6 +17,10 @@ class ReluReluFusionMatcher;
|
||||
|
||||
// ! [graph_rewrite:template_transformation_hpp]
|
||||
// template_pattern_transformation.hpp
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief Add transformation description.
|
||||
*/
|
||||
class ngraph::pass::DecomposeDivideMatcher: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
DecomposeDivideMatcher();
|
||||
|
||||
@@ -7,7 +7,7 @@ if(DEFINED IE_MAIN_SOURCE_DIR AND TARGET inference_engine)
|
||||
inference_engine_c_api)
|
||||
else()
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/targets.cmake")
|
||||
if(NOT WIN32)
|
||||
if(NOT MSVC)
|
||||
set_target_properties(IE::inference_engine PROPERTIES INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
|
||||
endif()
|
||||
|
||||
@@ -31,4 +31,11 @@ else()
|
||||
get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
|
||||
set(InferenceEngine_LIBRARIES IE::inference_engine_legacy IE::inference_engine
|
||||
IE::inference_engine_c_api)
|
||||
|
||||
foreach(library IN LISTS InferenceEngine_LIBRARIES)
|
||||
if(CMAKE_CROSSCOMPILING AND NOT MSVC)
|
||||
set_property(TARGET ${library} PROPERTY
|
||||
INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@@ -181,9 +181,9 @@ endif ()
|
||||
if (ENABLE_OPENCV)
|
||||
reset_deps_cache(OpenCV_DIR)
|
||||
|
||||
set(OPENCV_VERSION "4.3.0")
|
||||
set(OPENCV_BUILD "060")
|
||||
set(OPENCV_BUILD_YOCTO "073")
|
||||
set(OPENCV_VERSION "4.5.0")
|
||||
set(OPENCV_BUILD "36")
|
||||
set(OPENCV_BUILD_YOCTO "337")
|
||||
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
|
||||
if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
|
||||
|
||||
@@ -2,12 +2,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
if(ENABLE_DOCKER)
|
||||
cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
|
||||
endif()
|
||||
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
find_package(Git REQUIRED)
|
||||
|
||||
@@ -154,6 +154,10 @@ else()
|
||||
else()
|
||||
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
|
||||
INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
|
||||
if(CMAKE_CROSSCOMPILING AND NOT MSVC)
|
||||
set_property(TARGET IE::inference_engine${ie_library_suffix} PROPERTY
|
||||
INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
@@ -19,8 +19,8 @@ set(VPU_SUPPORTED_FIRMWARES usb-ma2450 usb-ma2x8x pcie-ma248x)
|
||||
# Default packages
|
||||
#
|
||||
|
||||
set(FIRMWARE_PACKAGE_VERSION 1370)
|
||||
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.0")
|
||||
set(FIRMWARE_PACKAGE_VERSION 1381)
|
||||
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.1")
|
||||
|
||||
#
|
||||
# CMake variables to override default firmware files
|
||||
@@ -191,7 +191,7 @@ function(add_vpu_compile_custom_kernels)
|
||||
"SHAVE_MA2X8XLIBS_DIR=${VPU_CLC_MA2X8X}/lib"
|
||||
"SHAVE_MOVIASM_DIR=${VPU_CLC_MA2X8X}/bin"
|
||||
"SHAVE_MYRIAD_LD_DIR=${VPU_CLC_MA2X8X}/bin"
|
||||
${VPU_CLC_MA2X8X_COMMAND} --strip-binary-header ${cl_file} -o ${out_file}
|
||||
${VPU_CLC_MA2X8X_COMMAND} --strip-binary-header -d ma2x8x ${cl_file} -o ${out_file}
|
||||
MAIN_DEPENDENCY ${cl_file}
|
||||
DEPENDS ${VPU_CLC_MA2X8X_COMMAND}
|
||||
COMMENT "[VPU] Compile ${cl_file}"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Defines the CMake commands/policies
|
||||
cmake_minimum_required (VERSION 3.3)
|
||||
cmake_minimum_required (VERSION 3.13)
|
||||
|
||||
# Set the project name
|
||||
project (ie_python_api)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#distutils: language=c++
|
||||
#cython: embedsignature=True
|
||||
from cython.operator cimport dereference as deref
|
||||
from libcpp.string cimport string
|
||||
from libcpp.vector cimport vector
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
cmake_minimum_required (VERSION 3.10)
|
||||
|
||||
project(Samples)
|
||||
|
||||
@@ -145,6 +145,8 @@ endif()
|
||||
# exactly the same OpenCV_DIR path which was used for the InferenceEngine build
|
||||
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/format_reader")
|
||||
add_subdirectory(common/format_reader)
|
||||
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/opencv_c_wraper")
|
||||
add_subdirectory(common/opencv_c_wraper)
|
||||
endif()
|
||||
|
||||
# samples build can be switched off during whole IE build
|
||||
@@ -216,6 +218,7 @@ macro(ie_add_sample)
|
||||
|
||||
set(folder_name cpp_samples)
|
||||
if(IE_SAMPLE_NAME MATCHES ".*_c$")
|
||||
set(c_sample ON)
|
||||
set(folder_name c_samples)
|
||||
endif()
|
||||
|
||||
@@ -228,7 +231,11 @@ macro(ie_add_sample)
|
||||
target_include_directories(${IE_SAMPLE_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../common")
|
||||
|
||||
target_link_libraries(${IE_SAMPLE_NAME} PRIVATE ${OpenCV_LIBRARIES} ${InferenceEngine_LIBRARIES}
|
||||
${IE_SAMPLE_DEPENDENCIES} gflags)
|
||||
${IE_SAMPLE_DEPENDENCIES})
|
||||
|
||||
if(NOT c_sample)
|
||||
target_link_libraries(${IE_SAMPLE_NAME} PRIVATE gflags)
|
||||
endif()
|
||||
|
||||
# create global target with all samples / demo apps
|
||||
if(NOT TARGET ie_samples)
|
||||
@@ -237,7 +244,7 @@ macro(ie_add_sample)
|
||||
add_dependencies(ie_samples ${IE_SAMPLE_NAME})
|
||||
|
||||
if(COMMAND add_cpplint_target AND NOT IE_SAMPLE_EXCLUDE_CPPLINT)
|
||||
if(folder_name STREQUAL "c_samples")
|
||||
if(c_sample)
|
||||
set(custom_filters "-readability/casting")
|
||||
endif()
|
||||
add_cpplint_target(${IE_SAMPLE_NAME}_cpplint FOR_TARGETS ${IE_SAMPLE_NAME}
|
||||
|
||||
@@ -11,6 +11,7 @@ for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA
|
||||
for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA
|
||||
for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA
|
||||
for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA
|
||||
for %%A in ("%HDDL_UNITE%") do set HDDL_UNITE_FILENAME=%%~nxA
|
||||
for %%A in ("%VPU_FIRMWARE_MA2450%") do set VPU_FIRMWARE_MA2450_FILENAME=%%~nxA
|
||||
for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA
|
||||
for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA
|
||||
@@ -86,6 +87,16 @@ if not "%HDDL%"=="" (
|
||||
)
|
||||
)
|
||||
|
||||
if not "%HDDL_UNITE%"=="" (
|
||||
if not exist "%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\%HDDL_UNITE_FILENAME%" (
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE"
|
||||
powershell -command "iwr -outf '%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\_%HDDL_UNITE_FILENAME%' %HDDL_UNITE%"
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\%HDDL_UNITE_FILENAME%"
|
||||
call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\_%HDDL_UNITE_FILENAME% -o%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\%HDDL_UNITE_FILENAME%
|
||||
del "%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\_%HDDL_UNITE_FILENAME%" /F /Q
|
||||
)
|
||||
)
|
||||
|
||||
if not "%VPU_FIRMWARE_MA2450%"=="" (
|
||||
if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
|
||||
@@ -124,6 +135,7 @@ set PATH=%DL_SDK_TEMP%\test_dependencies\OMP\%OMP_FILENAME%%OMP%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\GNA\%GNA_FILENAME%%GNA%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\OPENCV\%OPENCV_FILENAME%%OPENCV%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\TBB\%TBB_FILENAME%%TBB%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\HDDL_UNITE\%HDDL_UNITE_FILENAME%%HDDL_UNITE%;%PATH%
|
||||
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH%
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ add_path() {
|
||||
fi
|
||||
}
|
||||
|
||||
runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2450 VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
|
||||
runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2450 VPU_FIRMWARE_USB-MA2X8X HDDL HDDL_UNITE OMP TBB AOCL_RTE LIBUSB)
|
||||
|
||||
export_library_path() {
|
||||
export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH
|
||||
|
||||
@@ -430,9 +430,8 @@ void GNAPluginNS::backend::AMIntelDNN::Propagate() {
|
||||
break;
|
||||
case kDnnCopyOp:ApplyCopy(comp);
|
||||
break;
|
||||
default:fprintf(stderr, "Bad operation in Propagate!\n");
|
||||
throw -1;
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Bad operation in Propagate : " << comp->operation;
|
||||
}
|
||||
// PrintOutputs(i); fflush(stdout);
|
||||
}
|
||||
|
||||
@@ -200,22 +200,6 @@ void GNAPluginNS::backend::ApplyCopy(intel_dnn_component_t *component) {
|
||||
}
|
||||
}
|
||||
|
||||
bool GNAPluginNS::backend::isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2) {
|
||||
bool isCompatible = true;
|
||||
|
||||
// compare basic structures to see if they are compatible
|
||||
if (dnn1.num_components() != dnn2.num_components()) isCompatible = false;
|
||||
for (int i = 0; i < dnn1.num_components(); i++) {
|
||||
if (dnn1.component[i].num_rows_in != dnn2.component[i].num_rows_in) isCompatible = false;
|
||||
if (dnn1.component[i].num_columns_in != dnn2.component[i].num_columns_in) isCompatible = false;
|
||||
if (dnn1.component[i].num_rows_out != dnn2.component[i].num_rows_out) isCompatible = false;
|
||||
if (dnn1.component[i].num_columns_out != dnn2.component[i].num_columns_out) isCompatible = false;
|
||||
if (dnn1.component[i].operation != dnn2.component[i].operation) isCompatible = false;
|
||||
}
|
||||
|
||||
return (isCompatible);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ClearScoreError(intel_score_error_t *error) {
|
||||
error->num_scores = 0;
|
||||
error->num_errors = 0;
|
||||
|
||||
@@ -65,7 +65,6 @@ void ApplyTranspose(intel_dnn_component_t *component);
|
||||
void ApplyCopy(intel_dnn_component_t *component);
|
||||
|
||||
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
|
||||
bool isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2);
|
||||
void ClearScoreError(intel_score_error_t *error);
|
||||
void UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error);
|
||||
void SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs);
|
||||
|
||||
@@ -8,32 +8,62 @@
|
||||
#include <ie_common.h>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <iomanip>
|
||||
#include <details/caseless.hpp>
|
||||
#include <layers/gna_copy_layer.hpp>
|
||||
#include "backend/dnn_types.h"
|
||||
|
||||
#include "dnn_components.hpp"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace GNAPluginNS::backend;
|
||||
|
||||
intel_dnn_component_t & backend::DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
|
||||
components.emplace_back(layerName, intel_dnn_component_t());
|
||||
auto ¤tComponent = components.back().second;
|
||||
intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
|
||||
auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName);
|
||||
delayedOperations += isDelayed ? 1 : 0;
|
||||
components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed});
|
||||
auto ¤tComponent = components.back().dnnComponent;
|
||||
#ifdef PLOT
|
||||
currentComponent.original_layer_name = components.back().first.c_str();
|
||||
currentComponent.original_layer_name = components.back().name.c_str();
|
||||
std::cout << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
|
||||
#endif
|
||||
int execOrder = 0;
|
||||
if (!isDelayed) {
|
||||
execOrder = static_cast<int>(components.size() - 1 - delayedOperations);
|
||||
} else {
|
||||
// todo: not perfect - propose to create mapping table that will be printed out by extra request
|
||||
execOrder = - static_cast<int>(delayedOperations);
|
||||
}
|
||||
|
||||
gnalog() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder << std::endl;
|
||||
return currentComponent;
|
||||
return currentComponent;
|
||||
}
|
||||
|
||||
intel_dnn_component_t * backend::DnnComponents::findComponent(InferenceEngine::CNNLayerPtr __layer) {
|
||||
intel_dnn_component_t * DnnComponents::findComponent(InferenceEngine::CNNLayerPtr __layer) {
|
||||
auto component = std::find_if(begin(components),
|
||||
end(components),
|
||||
[&](storage_type ::value_type &comp) {
|
||||
return comp.first == __layer->name;
|
||||
return comp.name == __layer->name;
|
||||
});
|
||||
// check for generic prev layer
|
||||
if (component != components.end()) {
|
||||
return &component->second;
|
||||
return &component->dnnComponent;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
std::vector<intel_dnn_component_t> DnnComponents::getExecutionOrder() {
|
||||
std::vector<intel_dnn_component_t> result(components.size());
|
||||
|
||||
uint32_t direct_id = 0;
|
||||
uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations);
|
||||
|
||||
for (auto &&c : components) {
|
||||
uint32_t &id = c.isDelayed ? delayed_id : direct_id;
|
||||
result[id] = c.dnnComponent;
|
||||
id++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -12,11 +12,21 @@
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace backend {
|
||||
struct DnnComponentExtra {
|
||||
std::string name;
|
||||
intel_dnn_component_t dnnComponent;
|
||||
bool isDelayed;
|
||||
DnnComponentExtra(std::string name,
|
||||
intel_dnn_component_t dnnComponent,
|
||||
bool isDelayed) :
|
||||
name(name), dnnComponent(dnnComponent), isDelayed(isDelayed) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* maps layer name to dnn.component, in topological sort prev nodes will be initialized
|
||||
* maps layer name to dnn.component, in topological order, or execution order
|
||||
*/
|
||||
struct DnnComponents {
|
||||
using storage_type = std::list<std::pair<std::string, intel_dnn_component_t>>;
|
||||
using storage_type = std::list<DnnComponentExtra>;
|
||||
storage_type components;
|
||||
/**
|
||||
* @brief initializes new empty intel_dnn_component_t object
|
||||
@@ -30,6 +40,14 @@ struct DnnComponents {
|
||||
* @return
|
||||
*/
|
||||
intel_dnn_component_t * findComponent(InferenceEngine::CNNLayerPtr layer);
|
||||
|
||||
/**
|
||||
* @brief extract components in execution order
|
||||
*/
|
||||
std::vector<intel_dnn_component_t> getExecutionOrder();
|
||||
|
||||
private:
|
||||
uint32_t delayedOperations = 0;
|
||||
};
|
||||
} // namespace backend
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
@@ -309,7 +309,17 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
case InferenceEngine::EltwiseLayer::Sub:
|
||||
case InferenceEngine::EltwiseLayer::Sum: {
|
||||
// detect which input will be used as biases
|
||||
if (LayerInfo(in0).has32BOutput()) {
|
||||
auto findPrevFunctional = [](InferenceEngine::CNNLayerPtr layer) {
|
||||
auto prev = InferenceEngine::CNNNetPrevLayer(layer, 0);
|
||||
while (CNNNetHasPrevLayer(prev.get(), 0) && LayerInfo(prev).isNonFunctional()) {
|
||||
prev = InferenceEngine::CNNNetPrevLayer(prev, 0);
|
||||
}
|
||||
|
||||
return prev;
|
||||
};
|
||||
|
||||
if (LayerInfo(in0).has32BOutput() ||
|
||||
(LayerInfo(in0).isNonFunctional() && CNNNetHasPrevLayer(in0.get(), 0) && LayerInfo(findPrevFunctional(in0)).has32BOutput())) {
|
||||
std::swap(in0, in1);
|
||||
std::swap(quantParams0, quantParams1);
|
||||
}
|
||||
|
||||
@@ -542,9 +542,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
|
||||
if (gnaFlags->sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64);
|
||||
} else {
|
||||
IE_ASSERT(quantized != nullptr);
|
||||
auto quantizedScale = FLOAT_TO_INT16(std::min(quantized->_weights_quant.scale * power.scale,
|
||||
static_cast<float>(INT16_MAX)));
|
||||
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.scale * power.offset,
|
||||
@@ -704,7 +706,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
void* ptr_outputs = nullptr;
|
||||
auto orientation = kDnnInterleavedOrientation;
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "copy");
|
||||
auto ¤tComponent = dnnComponents.addComponent(layer->name, layer->type);
|
||||
|
||||
dnn->InitCopyComponent(currentComponent,
|
||||
orientation,
|
||||
@@ -1293,7 +1295,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
|
||||
auto orientation = kDnnInterleavedOrientation;
|
||||
|
||||
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", "copy");
|
||||
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", CopyLayerName);
|
||||
|
||||
dnn->InitCopyComponent(copyComponent,
|
||||
orientation,
|
||||
@@ -1772,7 +1774,8 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
{{"Reshape"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
|
||||
{{"Squeeze"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
|
||||
{{"Crop"}, CREATE(CropPrimitive)},
|
||||
{{"Copy"}, CREATE(CopyPrimitive)},
|
||||
{{CopyLayerName}, CREATE(CopyPrimitive)},
|
||||
{{DelayedCopyLayerName}, CREATE(CopyPrimitive)},
|
||||
{{"TensorIterator"}, SKIP},
|
||||
{{"LSTMCell"}, SKIP}
|
||||
};
|
||||
@@ -1784,7 +1787,17 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, size_t num_data_bytes_out) {
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
|
||||
size_t num_data_bytes_out) {
|
||||
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
|
||||
int32_t output_offset = 0;
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
output_offset = layer->GetParamAsInt("output_offset");
|
||||
}
|
||||
return output_offset;
|
||||
};
|
||||
|
||||
|
||||
gnalog() << "Connecting output " << layer->name << " ...\n";
|
||||
// in case of Memory Layer it's input allocated in meminput layer
|
||||
if (layer->outData.size() == 1) {
|
||||
@@ -1814,12 +1827,12 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
|
||||
|
||||
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
|
||||
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
||||
} else {
|
||||
IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out));
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -2071,7 +2084,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
||||
} else {
|
||||
if (num_data_bytes_in > memorySize) {
|
||||
if (num_data_bytes_in > memorySize - offset) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
||||
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize;
|
||||
}
|
||||
|
||||
@@ -362,7 +362,9 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<UnrollTIPass>();
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<InsertIdentityToLSTMCellPass>();
|
||||
passes->registerPass<UnrollLSTMCellPass>();
|
||||
passes->registerPass<RemoveSingleInputConcatPass>();
|
||||
|
||||
passes->registerPass<SubstitutePReluPass>();
|
||||
passes->registerPass<SubstituteSoftSignPass>();
|
||||
@@ -556,15 +558,15 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
auto irLayerAvatar = std::find_if(
|
||||
graphCompiler.dnnComponents.components.begin(),
|
||||
graphCompiler.dnnComponents.components.end(),
|
||||
[&layer](std::pair<std::string, intel_dnn_component_t> & value) {
|
||||
return value.first == layer->name;
|
||||
[&layer](const backend::DnnComponents::storage_type::value_type & value) {
|
||||
return value.name == layer->name;
|
||||
});
|
||||
|
||||
gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
|
||||
|
||||
// probing gna_primitives
|
||||
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
|
||||
initOutput(portId, irLayerAvatar->second, layer);
|
||||
initOutput(portId, irLayerAvatar->dnnComponent, layer);
|
||||
stopSearching = true;
|
||||
}
|
||||
|
||||
@@ -620,9 +622,8 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
1);
|
||||
|
||||
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
|
||||
for (auto &element : graphCompiler.dnnComponents.components) {
|
||||
dnn->component.push_back(element.second);
|
||||
}
|
||||
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
|
||||
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
|
||||
|
||||
// in fp32 mode last PWL cannot be computed without that
|
||||
dnn->InitActiveList(NULL);
|
||||
|
||||
@@ -33,7 +33,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
|
||||
protected:
|
||||
std::string _pluginName = "GNA";
|
||||
|
||||
Config config;
|
||||
Config config {};
|
||||
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn;
|
||||
std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
|
||||
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
#include <cstdint>
|
||||
|
||||
typedef struct {
|
||||
double slope;
|
||||
double slope {};
|
||||
uint64_t slope_scale = 0;
|
||||
uint32_t slope_scale_index;
|
||||
uint32_t slope_scale_index {};
|
||||
} pwl_gna_slope_scale_t;
|
||||
|
||||
pwl_gna_slope_scale_t gna_slope(const double slope, const double in_scale, const double out_scale);
|
||||
|
||||
17
inference-engine/src/gna_plugin/layers/gna_copy_layer.hpp
Normal file
17
inference-engine/src/gna_plugin/layers/gna_copy_layer.hpp
Normal file
@@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace GNAPluginNS {
|
||||
/**
|
||||
* GNA primitive created in sorting order for this copy layer
|
||||
*/
|
||||
static constexpr auto CopyLayerName = "Copy";
|
||||
/**
|
||||
* GNA primitive created at the end of primitives sequence
|
||||
*/
|
||||
static constexpr auto DelayedCopyLayerName = "DelayedCopy";
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "backend/gna_types.h"
|
||||
#include "gna_permute.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "gna_copy_layer.hpp"
|
||||
|
||||
|
||||
namespace GNAPluginNS {
|
||||
@@ -201,13 +202,13 @@ class LayerInfo {
|
||||
return isOfType("concat");
|
||||
}
|
||||
bool isNonFunctional() const noexcept {
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze");
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute();
|
||||
}
|
||||
bool isPermute() const noexcept {
|
||||
return isOfType("permute");
|
||||
}
|
||||
// @brief this not only mathematically trivial, has some WA for kaldi case
|
||||
bool isTrivialPermute() {
|
||||
bool isTrivialPermute() const {
|
||||
if (!isPermute()) return false;
|
||||
|
||||
auto layerOrder = layer->GetParamAsInts("order");
|
||||
@@ -269,8 +270,13 @@ class LayerInfo {
|
||||
return false;
|
||||
}
|
||||
bool isCopy() const noexcept {
|
||||
return isOfType("copy");
|
||||
return isOfType(CopyLayerName) || isOfType(DelayedCopyLayerName);
|
||||
}
|
||||
|
||||
bool isCopyDelayed() const noexcept {
|
||||
return isOfType(DelayedCopyLayerName);
|
||||
}
|
||||
|
||||
size_t paddingSize() const {
|
||||
static InferenceEngine::details::caseless_set<std::string> layersWithPossiblePadding = {"FullyConnected",
|
||||
"InnerProduct",
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <legacy/ie_util_internal.hpp>
|
||||
#include <legacy/graph_tools.hpp>
|
||||
#include <legacy/net_pass.h>
|
||||
#include <layers/gna_copy_layer.hpp>
|
||||
|
||||
#include "gna_plugin_log.hpp"
|
||||
#include "frontend/quantized_layer_params.hpp"
|
||||
@@ -47,6 +48,7 @@ std::shared_ptr<IPassManager> BasePass::getPassManager() {
|
||||
}
|
||||
|
||||
// indexes stored in pass manager
|
||||
static const char identityLayersCounterName[] = "identityLayerCounter";
|
||||
static const char diagonalLayersCounterName[] = "diagonalLayerCounter";
|
||||
static const char copyLayersCounter[] = "numCopyLayers";
|
||||
static const char softSignLayersCounter[] = "numSoftSignLayers";
|
||||
@@ -94,12 +96,13 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
||||
* @brief copy layer inserted by several passes
|
||||
* @returns pointer to newly created COPYLayer
|
||||
*/
|
||||
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx, std::shared_ptr<IPassManager> passmanager) {
|
||||
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx,
|
||||
std::shared_ptr<IPassManager> passmanager, std::string copyLayerType) {
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
|
||||
std::string copyName = std::string("copy_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
|
||||
std::string copyName = copyLayerType + std::string("_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
|
||||
gnalog() << "Inserted " << copyName << " between: " << prevLayer->name << " and " << nextLayer->name << std::endl;
|
||||
|
||||
CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
|
||||
CNNLayerPtr copyLayer = std::make_shared<GenericLayer>(LayerParams({copyName, copyLayerType, Precision::FP32}));
|
||||
|
||||
auto inputData = nextLayer->insData[beforeIdx].lock();
|
||||
auto dataPtr = std::make_shared<Data>(copyName, inputData->getTensorDesc());
|
||||
@@ -124,7 +127,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
|
||||
auto PrevFunctionalLayer = [](CNNLayerPtr l, int idx = 0) {
|
||||
auto prevLayer = CNNNetPrevLayerSkipCertain(l, idx, [](CNNLayerPtr ptr) {
|
||||
return LayerInfo(ptr).isNonFunctional();
|
||||
});
|
||||
});
|
||||
gnalog() << "CNNNetPrevLayerSkipCertain for :: " << l->name << "returned: " << prevLayer->name << std::endl;
|
||||
return prevLayer;
|
||||
};
|
||||
@@ -148,35 +151,35 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
|
||||
auto prev1 = PrevFunctionalLayer(l, 1);
|
||||
|
||||
switch (eltwise->_operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
case EltwiseLayer::Sum:
|
||||
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
// TODO: whether there are possibility to select after what layer identity gets inserted
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
break;
|
||||
case EltwiseLayer::Prod: {
|
||||
if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
|
||||
if (LayerInfo(prev0).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
}
|
||||
|
||||
// if layers of outdata are different
|
||||
auto prevData0 = l->insData[0].lock();
|
||||
auto prevData1 = l->insData[1].lock();
|
||||
|
||||
if ((prev0 != prev1 || prevData0 != prevData1) && LayerInfo(prev1).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 1));
|
||||
}
|
||||
|
||||
break;
|
||||
case EltwiseLayer::Sub:
|
||||
case EltwiseLayer::Sum:
|
||||
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
default :
|
||||
THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
|
||||
// TODO: whether there are possibility to select after what layer identity gets inserted
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
break;
|
||||
case EltwiseLayer::Prod: {
|
||||
if (LayerInfo(prev0).has16BOutput() && LayerInfo(prev1).has16BOutput()) {
|
||||
return prevLayers;
|
||||
}
|
||||
|
||||
if (LayerInfo(prev0).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 0));
|
||||
}
|
||||
|
||||
// if layers of outdata are different
|
||||
auto prevData0 = l->insData[0].lock();
|
||||
auto prevData1 = l->insData[1].lock();
|
||||
|
||||
if ((prev0 != prev1 || prevData0 != prevData1) && LayerInfo(prev1).has32BOutput()) {
|
||||
prevLayers.push_back(CNNNetPrevLayer(l, 1));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default :
|
||||
THROW_GNA_EXCEPTION << "Eltwise Layer of type: " << eltwise->_operation << " not supported";
|
||||
}
|
||||
} else if (concat != nullptr) {
|
||||
for (int i = 0; CNNNetHasPrevLayer(l.get(), i); ++i) {
|
||||
@@ -294,6 +297,9 @@ void SubstituteSoftSignPass::run() {
|
||||
};
|
||||
auto getNthChild = [](CNNLayerPtr l, int N) {
|
||||
auto first = getInputTo(l->outData.front()).begin();
|
||||
auto last = getInputTo(l->outData.front()).end();
|
||||
IE_ASSERT(first != last);
|
||||
IE_ASSERT(N <= std::distance(first, last));
|
||||
std::advance(first, N);
|
||||
return first->second;
|
||||
};
|
||||
@@ -621,12 +627,12 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
}
|
||||
|
||||
void InsertIdentityLayerPass::run() {
|
||||
int numOfIdentityLayers = 0;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
for (auto & l : *pLayers) {
|
||||
for (auto && prev : getCandidatesForIdentityInsertion(l)) {
|
||||
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
|
||||
// actual insertion
|
||||
auto activationName = std::string("identity_") + std::to_string(++numOfIdentityLayers);
|
||||
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
|
||||
|
||||
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
|
||||
|
||||
@@ -689,27 +695,34 @@ void InsertCopyLayerPass::run() {
|
||||
for (int i=0; i != prevLayers.size(); i++) {
|
||||
auto & prevIndirectLayer = prevLayers[i].first;
|
||||
bool bInsert = false;
|
||||
/// Delayed copy layers need to be moved to the very end of processing
|
||||
bool bInsertDelayed = false;
|
||||
|
||||
auto isInserted = [&bInsertDelayed, &bInsert]() {
|
||||
return bInsert || bInsertDelayed;
|
||||
};
|
||||
|
||||
if (LayerInfo(l).isMemory()) {
|
||||
if (LayerInfo(prevIndirectLayer).isConcat()) { bInsert = true;}
|
||||
if (LayerInfo(prevIndirectLayer).isConcat() || LayerInfo(prevIndirectLayer).isCrop()) { bInsertDelayed = true;}
|
||||
// memory usualy preceded by either activation or split, or other layers in order to have 2b precision
|
||||
for (auto && inputto : getInputTo(prevLayers[i].first->outData[prevLayers[i].second])) {
|
||||
// if preceding layer is common for memory and concat
|
||||
if (LayerInfo(inputto.second).isConcat()) {
|
||||
bInsert = true;
|
||||
bInsertDelayed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
|
||||
if (!isInserted() && LayerInfo(l).isConcat() && LayerInfo(prevIndirectLayer).isCrop()) { bInsert = true; }
|
||||
|
||||
if (bInsert) {
|
||||
if (isInserted()) {
|
||||
if (LayerInfo(prevIndirectLayer).isCropAffined()) {
|
||||
// The crop will be replaced by affine.
|
||||
// Copy layer insertion is not required
|
||||
continue;
|
||||
}
|
||||
auto prevLayer = CNNNetPrevLayer(l, i);
|
||||
InsertCopyLayer(prevLayer, l, i, getPassManager());
|
||||
InsertCopyLayer(prevLayer, l, i, getPassManager(), bInsertDelayed ? DelayedCopyLayerName : CopyLayerName);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1119,6 +1132,7 @@ void EltwiseSplitOverChannelsPass::run() {
|
||||
for (size_t k = 0; k != totalSplits; k++) {
|
||||
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
|
||||
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
|
||||
IE_ASSERT(eltwiseRaw != nullptr);
|
||||
eltwiseRaw->_operation = masterEltwise->_operation;
|
||||
eltwiseRaw->coeff = masterEltwise->coeff;
|
||||
auto eltwise = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(eltwiseRaw) : eltwiseRaw;
|
||||
@@ -1249,6 +1263,48 @@ void BroadcastConstPass::run() {
|
||||
}
|
||||
}
|
||||
|
||||
void InsertIdentityToLSTMCellPass::run() {
|
||||
for (auto layer : *pLayers) {
|
||||
if (layer->type == "LSTMCell") {
|
||||
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
|
||||
// which results in scratch buffer being used so outputs cannot be used in form of blob or by non-functional layers
|
||||
// downside is scaling down from i32 to i16 which may
|
||||
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
|
||||
int numOfIdentityLayers = ((this->getPassManager())->getIntVar(identityLayersCounterName))++;
|
||||
auto activationName = std::string("lstm_identity_") + std::to_string(numOfIdentityLayers);
|
||||
auto& output = layer->outData[output_idx];
|
||||
auto& input_to = getInputTo(output);
|
||||
|
||||
CNNLayerPtr activationLayer =
|
||||
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", InferenceEngine::Precision::FP32}));
|
||||
|
||||
auto dataPtr = std::make_shared<Data>("lstm_identity_data_" + std::to_string(numOfIdentityLayers), output->getTensorDesc());
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
auto activationLayerWithQuant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) : activationLayer;
|
||||
getCreatorLayer(dataPtr) = activationLayerWithQuant;
|
||||
activationLayerWithQuant->outData.push_back(dataPtr);
|
||||
activationLayerWithQuant->insData.push_back(output);
|
||||
auto& activationInputTo = getInputTo(dataPtr);
|
||||
|
||||
for (auto& input : input_to) {
|
||||
auto& next_layer = input.second;
|
||||
activationInputTo[input.first] = next_layer;
|
||||
for (int i = next_layer->insData.size() -1; i>= 0; i--) {
|
||||
auto ins = next_layer->insData[i].lock();
|
||||
if (ins == output) {
|
||||
next_layer->insData.erase(next_layer->insData.begin() + i);
|
||||
}
|
||||
}
|
||||
next_layer->insData.push_back(dataPtr);
|
||||
}
|
||||
input_to.clear();
|
||||
input_to[activationName] = activationLayerWithQuant;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnrollLSTMCellPass::run() {
|
||||
InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
|
||||
if (rnn.clip != 0.0f)
|
||||
@@ -1282,6 +1338,33 @@ void RemoveConstPass::run() {
|
||||
transformer.fullTrim();
|
||||
}
|
||||
|
||||
void RemoveSingleInputConcatPass::run() {
|
||||
for (auto &l : *pLayers) {
|
||||
if (l->type == "Concat") {
|
||||
auto concat = dynamic_cast<ConcatLayer*>(l.get());
|
||||
if (concat->insData.size() == 1 && concat->outData.size() > 0) {
|
||||
auto in = concat->insData[0];
|
||||
auto in_layer = getCreatorLayer(in.lock());
|
||||
|
||||
auto out = concat->outData[0];
|
||||
|
||||
for (auto out_layer : getInputTo(out)) {
|
||||
for (int i = 0; i < out_layer.second->insData.size(); i++) {
|
||||
if (out_layer.second->insData[i].lock() == out) {
|
||||
out_layer.second->insData[i] = in;
|
||||
getInputTo(in.lock())[out_layer.second->name] = out_layer.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
getInputTo(in.lock()).erase(concat->name);
|
||||
getInputTo(out).clear();
|
||||
concat->insData.clear();
|
||||
concat->outData.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FuseMultipleIdentitiesPass::run() {
|
||||
for (auto &l : *pLayers) {
|
||||
if (l->insData.empty()) continue;
|
||||
|
||||
@@ -144,6 +144,8 @@ DECL_PASS(InsertConcatAligningFilter);
|
||||
*/
|
||||
DECL_PASS(ReorderConcatInputs);
|
||||
|
||||
DECL_PASS_BEFORE_COPY(InsertIdentityToLSTMCell);
|
||||
|
||||
/**
|
||||
* @brief unrolled LSTM cell layer in supported GNA primitives
|
||||
*/
|
||||
@@ -159,6 +161,10 @@ DECL_PASS_BEFORE_COPY(UnrollTI);
|
||||
*/
|
||||
DECL_PASS_BEFORE_COPY(RemoveConst);
|
||||
|
||||
/**
|
||||
*/
|
||||
DECL_PASS_BEFORE_COPY(RemoveSingleInputConcat);
|
||||
|
||||
/**
|
||||
* @brief removed extra identity layer for multi-output
|
||||
*/
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
#include <ie_core.hpp>
|
||||
#include <multi-device/multi_device_config.hpp>
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/graph_util.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include <cpp_interfaces/exception2status.hpp>
|
||||
#include "ie_plugin_cpp.hpp"
|
||||
@@ -294,6 +297,23 @@ public:
|
||||
QueryNetworkResult res;
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config, res);
|
||||
if (!network.getFunction())
|
||||
return res;
|
||||
|
||||
// WA for constant folded operations (plugins should support all folded ops)
|
||||
const auto& func = network.getFunction();
|
||||
auto specialized_function = ngraph::clone_function(*func);
|
||||
|
||||
ngraph::pass::ConstantFolding().run_on_function(specialized_function);
|
||||
std::unordered_set<std::string> operationNames;
|
||||
for (const auto& op : specialized_function->get_ops())
|
||||
operationNames.emplace(op->get_friendly_name());
|
||||
|
||||
for (const auto& op : func->get_ops()) {
|
||||
if (operationNames.find(op->get_friendly_name()) != operationNames.end())
|
||||
continue;
|
||||
res.supportedLayersMap[op->get_friendly_name()] = deviceName;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
@@ -669,6 +669,8 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
|
||||
[](const std::shared_ptr<::ngraph::Node>& node, const std::map<std::string, std::string>& params) -> CNNLayerPtr {
|
||||
LayerParams attrs = {node->get_friendly_name(), node->description(), details::convertPrecision(node->get_output_element_type(0))};
|
||||
auto reduce_node = std::dynamic_pointer_cast<ngraph::op::util::ArithmeticReductionKeepDims>(node);
|
||||
if (reduce_node == nullptr)
|
||||
THROW_IE_EXCEPTION << "Node '" << node->get_name() << "' is not an instance of ArithmeticReductionKeepDims.";
|
||||
auto res = std::make_shared<InferenceEngine::ReduceLayer>(attrs);
|
||||
res->params = params;
|
||||
res->params["keep_dims"] = reduce_node->get_keep_dims() ? "True" : "False";
|
||||
@@ -678,6 +680,8 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
|
||||
addSpecificCreator({"ReduceLogicalAnd"}, [](const std::shared_ptr<::ngraph::Node>& node, const std::map<std::string, std::string>& params) -> CNNLayerPtr {
|
||||
LayerParams attrs = {node->get_friendly_name(), "ReduceAnd", details::convertPrecision(node->get_output_element_type(0))};
|
||||
auto reduce_node = std::dynamic_pointer_cast<ngraph::op::util::LogicalReductionKeepDims>(node);
|
||||
if (reduce_node == nullptr)
|
||||
THROW_IE_EXCEPTION << "Node '" << node->get_name() << "' is not an instance of LogicalReductionKeepDims.";
|
||||
auto res = std::make_shared<InferenceEngine::ReduceLayer>(attrs);
|
||||
res->params = params;
|
||||
res->params["keep_dims"] = reduce_node->get_keep_dims() ? "True" : "False";
|
||||
@@ -687,6 +691,8 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
|
||||
addSpecificCreator({"ReduceLogicalOr"}, [](const std::shared_ptr<::ngraph::Node>& node, const std::map<std::string, std::string>& params) -> CNNLayerPtr {
|
||||
LayerParams attrs = {node->get_friendly_name(), "ReduceOr", details::convertPrecision(node->get_output_element_type(0))};
|
||||
auto reduce_node = std::dynamic_pointer_cast<ngraph::op::util::LogicalReductionKeepDims>(node);
|
||||
if (reduce_node == nullptr)
|
||||
THROW_IE_EXCEPTION << "Node '" << node->get_name() << "' is not an instance of LogicalReductionKeepDims.";
|
||||
auto res = std::make_shared<InferenceEngine::ReduceLayer>(attrs);
|
||||
res->params = params;
|
||||
res->params["keep_dims"] = reduce_node->get_keep_dims() ? "True" : "False";
|
||||
|
||||
@@ -398,8 +398,10 @@ bool convertToRNNSeq(CNNLayerPtr cur, const N& net) {
|
||||
IE_ASSERT(cell->insData.size() == NS + 1); // {data, state1, [state2]}
|
||||
IE_ASSERT(cell->outData.size() == NS); // {state1, [state2]}
|
||||
|
||||
auto outData0InputsTo = getInputTo(cell->outData[0]);
|
||||
if (getCreatorLayer(cell->insData[0].lock()).lock() != rsp1 ||
|
||||
getInputTo(cell->outData[0]).begin()->second != rsp2)
|
||||
outData0InputsTo.empty() ||
|
||||
outData0InputsTo.begin()->second != rsp2)
|
||||
return false;
|
||||
|
||||
// Check port mapping
|
||||
@@ -581,6 +583,12 @@ bool unrollTI(CNNLayerPtr cur, ICNNNetwork& net) {
|
||||
auto& rule = first_class[i];
|
||||
auto out_data = ti->outData[rule.from];
|
||||
|
||||
if (num == 1) {
|
||||
getInputTo(body_list[0].outputs[rule.to]) = getInputTo(out_data);
|
||||
getInputTo(body_list[0].outputs[rule.to]).begin()->second->insData[0] = body_list[0].outputs[rule.to];
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string name = ti->name + ":out_concat_" + std::to_string(i);
|
||||
auto concat = std::make_shared<ConcatLayer>(LayerParams {name, "Concat", cur->precision});
|
||||
concat->_axis = rule.axis;
|
||||
|
||||
@@ -48,16 +48,22 @@ public:
|
||||
|
||||
static Blob::Ptr makeNewBlobPtr(const TensorDesc& desc);
|
||||
|
||||
static void invertFakeQuantize(const CNNLayer& fakeQuantize);
|
||||
|
||||
static void updateBlobs(CNNLayer& layer, const std::string& blobName, float value);
|
||||
|
||||
static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, float value);
|
||||
|
||||
static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
|
||||
|
||||
static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value);
|
||||
|
||||
static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
|
||||
|
||||
static void updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values);
|
||||
|
||||
static CNNLayerPtr copyConstant(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& quantizeLayer,
|
||||
const CNNLayerPtr& blobLayer,
|
||||
const size_t constLayerIndex);
|
||||
|
||||
// return true if at least one child uses layer on weights
|
||||
static bool onWeights(const CNNLayer& layer);
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ protected:
|
||||
std::vector<float>& biasesShifts) const;
|
||||
|
||||
void updateWeights(
|
||||
TransformationContext& context,
|
||||
const CNNLayerPtr fakeQuantize,
|
||||
std::vector<float>& outputLowValues,
|
||||
std::vector<float>& outputHighValues) const;
|
||||
@@ -68,6 +69,7 @@ protected:
|
||||
const bool onWeights) const;
|
||||
|
||||
DataPrecision fillDequantizationsForWeightsPath(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& weightableLayer,
|
||||
const bool supportAsymmetricQuantization,
|
||||
std::vector<float>& dequantizationScales,
|
||||
|
||||
@@ -148,10 +148,10 @@ void ConcatTransformation::transform(TransformationContext& context, CNNLayer& c
|
||||
switch (quantizedTensorAlignmentOnActivations) {
|
||||
case QuantizedTensorAlignment::None: {
|
||||
const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
||||
|
||||
const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -165,18 +165,18 @@ void ConcatTransformation::transform(TransformationContext& context, CNNLayer& c
|
||||
(outputHighValue / quantizationDetails.outputHighValues[0]))
|
||||
: outputHighValue;
|
||||
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 1, inputLowValue);
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 2, inputHighValue);
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, dataPrecision.min);
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, dataPrecision.max);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 1, inputLowValue);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 2, inputHighValue);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, dataPrecision.min);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, dataPrecision.max);
|
||||
break;
|
||||
}
|
||||
case QuantizedTensorAlignment::UpdateLevel: {
|
||||
const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
||||
|
||||
const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
||||
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
||||
|
||||
const int levels = static_cast<int>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
|
||||
fakeQuantizeLayer.params["levels"] = std::to_string(levels);
|
||||
|
||||
@@ -106,8 +106,8 @@ void ConcatMultiChannelsTransformation::transform(TransformationContext& context
|
||||
dequantizationScalesLayers[fakeQuantizeLayer->name] = dequantizationScales;
|
||||
dequantizationShiftsLayers[fakeQuantizeLayer->name] = dequantizationShifts;
|
||||
|
||||
CNNNetworkHelper::updateBlobs(*fakeQuantizeLayer, 3, dataPrecision.min);
|
||||
CNNNetworkHelper::updateBlobs(*fakeQuantizeLayer, 4, dataPrecision.max);
|
||||
CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 3, dataPrecision.min);
|
||||
CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 4, dataPrecision.max);
|
||||
}
|
||||
|
||||
if (updatePrecisions) {
|
||||
|
||||
@@ -105,6 +105,7 @@ void ConvolutionTransformation::transform(TransformationContext& context, CNNLay
|
||||
const CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul);
|
||||
|
||||
const DataPrecision dataPrecisionOnWeights = fillDequantizationsForWeightsPath(
|
||||
context,
|
||||
layer,
|
||||
supportAsymmetricQuantization,
|
||||
originalWeightsDequantizationScales,
|
||||
|
||||
@@ -34,8 +34,6 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa
|
||||
THROW_IE_EXCEPTION << "Layer '" << layer.insData.size() << "' has invalid inputs number. 5 is expected.";
|
||||
}
|
||||
|
||||
// CNNNetworkHelper::invertFakeQuantize(layer);
|
||||
|
||||
// FakeQuantize on weights are used without dequantization ScaleShifts
|
||||
const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer);
|
||||
if (onWeights) {
|
||||
@@ -77,8 +75,8 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa
|
||||
printDequantizationValues(dequantizationScales, dequantizationShifts);
|
||||
#endif
|
||||
|
||||
CNNNetworkHelper::updateBlobs(layer, 3, dataPrecision.min);
|
||||
CNNNetworkHelper::updateBlobs(layer, 4, dataPrecision.max);
|
||||
CNNNetworkHelper::updateBlobs(context, layer, 3, dataPrecision.min);
|
||||
CNNNetworkHelper::updateBlobs(context, layer, 4, dataPrecision.max);
|
||||
|
||||
if (updatePrecisions) {
|
||||
CNNNetworkHelper::setOutDataPrecision(layer, dataPrecision.precision);
|
||||
|
||||
@@ -135,6 +135,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
|
||||
}
|
||||
|
||||
fillDequantizationsForWeightsPath(
|
||||
context,
|
||||
fullyConnected,
|
||||
supportAsymmetricQuantization,
|
||||
originalWeightsDequantizationScales,
|
||||
|
||||
@@ -183,54 +183,6 @@ Blob::Ptr CNNNetworkHelper::makeNewBlobPtr(const TensorDesc& desc) {
|
||||
return newBlob;
|
||||
}
|
||||
|
||||
void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, float value) {
|
||||
const auto existingBlobIt = layer.blobs.find(blobName);
|
||||
if (existingBlobIt == layer.blobs.end()) {
|
||||
THROW_IE_EXCEPTION << "blob '" << blobName << "' was not found in layer " << layer.name;
|
||||
}
|
||||
const auto& existingBlobTensorDesc = existingBlobIt->second->getTensorDesc();
|
||||
Blob::Ptr newBlob = makeNewBlobPtr(existingBlobTensorDesc);
|
||||
|
||||
newBlob->allocate();
|
||||
fillBlobByFP32(newBlob, value);
|
||||
layer.blobs[existingBlobIt->first] = newBlob;
|
||||
}
|
||||
|
||||
void CNNNetworkHelper::invertFakeQuantize(const CNNLayer& fakeQuantize) {
|
||||
if (fakeQuantize.type != "FakeQuantize") {
|
||||
THROW_IE_EXCEPTION << "invalid layer type " << fakeQuantize.type;
|
||||
}
|
||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize);
|
||||
const size_t valuesCount =
|
||||
std::max(quantizationDetails.inputLowValues.size(), quantizationDetails.outputLowValues.size());
|
||||
std::vector<float> inputLowValues(valuesCount);
|
||||
std::vector<float> inputHightValues(valuesCount);
|
||||
std::vector<float> outputLowValues(valuesCount);
|
||||
std::vector<float> outputHighValues(valuesCount);
|
||||
bool wasInverted = false;
|
||||
for (size_t i = 0ul; i < valuesCount; ++i) {
|
||||
if ((quantizationDetails.getInputLowValue(i) > quantizationDetails.getInputHighValue(i)) &&
|
||||
(quantizationDetails.getOutputLowValue(i) > quantizationDetails.getOutputHighValue(i))) {
|
||||
inputLowValues[i] = quantizationDetails.getInputHighValue(i);
|
||||
inputHightValues[i] = quantizationDetails.getInputLowValue(i);
|
||||
outputLowValues[i] = quantizationDetails.getOutputHighValue(i);
|
||||
outputHighValues[i] = quantizationDetails.getOutputLowValue(i);
|
||||
wasInverted = true;
|
||||
} else {
|
||||
inputLowValues[i] = quantizationDetails.getInputLowValue(i);
|
||||
inputHightValues[i] = quantizationDetails.getInputHighValue(i);
|
||||
outputLowValues[i] = quantizationDetails.getOutputLowValue(i);
|
||||
outputHighValues[i] = quantizationDetails.getOutputHighValue(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (wasInverted) {
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 1, inputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 2, inputHightValues);
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 3, outputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 4, outputHighValues);
|
||||
}
|
||||
}
|
||||
void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex,
|
||||
const std::vector<float>& values) {
|
||||
CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
|
||||
@@ -288,6 +240,25 @@ void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayer
|
||||
fillBlobByFP32(newBlob, values.data());
|
||||
}
|
||||
|
||||
void CNNNetworkHelper::updateBlobs(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& quantizeLayer,
|
||||
int constLayerIndex,
|
||||
const std::vector<float>& values) {
|
||||
CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
|
||||
if (blobLayer == nullptr) {
|
||||
THROW_IE_EXCEPTION << "layer is absent";
|
||||
}
|
||||
|
||||
const auto existingBlobIt = blobLayer->blobs.find("custom");
|
||||
if (existingBlobIt == blobLayer->blobs.end()) {
|
||||
THROW_IE_EXCEPTION << "custom blob was not found ";
|
||||
}
|
||||
|
||||
blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex);
|
||||
updateBlobs(quantizeLayer, constLayerIndex, values);
|
||||
}
|
||||
|
||||
void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values) {
|
||||
const auto existingBlobIt = layer.blobs.find(blobName);
|
||||
if (existingBlobIt == layer.blobs.end()) {
|
||||
@@ -377,6 +348,96 @@ void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayer
|
||||
blobLayer->blobs[existingBlobIt->first] = newBlob;
|
||||
}
|
||||
|
||||
void CNNNetworkHelper::updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value) {
|
||||
auto inData = quantizeLayer.insData[constLayerIndex].lock();
|
||||
if (inData == nullptr) {
|
||||
THROW_IE_EXCEPTION << "data is absent";
|
||||
}
|
||||
|
||||
CNNLayerPtr blobLayer = getCreatorLayer(inData).lock();
|
||||
if (blobLayer == nullptr) {
|
||||
THROW_IE_EXCEPTION << "layer is absent";
|
||||
}
|
||||
|
||||
if (blobLayer->blobs.size() != 1) {
|
||||
THROW_IE_EXCEPTION << "unexpected blobs size";
|
||||
}
|
||||
|
||||
blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex);
|
||||
updateBlobs(quantizeLayer, constLayerIndex, value);
|
||||
}
|
||||
|
||||
CNNLayerPtr CNNNetworkHelper::copyConstant(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& quantizeLayer,
|
||||
const CNNLayerPtr& blobLayer,
|
||||
const size_t constLayerIndex) {
|
||||
size_t repeatsCount = 0ul;
|
||||
for (size_t i = 0; i < quantizeLayer.insData.size(); ++i) {
|
||||
auto parentInData = quantizeLayer.insData[i].lock();
|
||||
if (parentInData == nullptr) {
|
||||
continue;
|
||||
}
|
||||
const auto quantizeLayerParent = getCreatorLayer(parentInData).lock();
|
||||
if (quantizeLayerParent == nullptr) {
|
||||
continue;
|
||||
}
|
||||
if (quantizeLayerParent->name == blobLayer->name) {
|
||||
repeatsCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (repeatsCount < 2ul) {
|
||||
return blobLayer;
|
||||
}
|
||||
|
||||
details::CNNNetworkImpl* networkImpl = dynamic_cast<details::CNNNetworkImpl*>(&context.network);
|
||||
if (networkImpl == nullptr) {
|
||||
THROW_IE_EXCEPTION << "Unexpected network type";
|
||||
}
|
||||
|
||||
const DataPtr outData = blobLayer->outData[0];
|
||||
const std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
|
||||
const auto quantizeLayerIt = inputTo.find(quantizeLayer.name);
|
||||
if (quantizeLayerIt == inputTo.end()) {
|
||||
THROW_IE_EXCEPTION << "Layer was not found";
|
||||
}
|
||||
|
||||
const auto blobIt = blobLayer->blobs.find("custom");
|
||||
if (blobIt == blobLayer->blobs.end()) {
|
||||
THROW_IE_EXCEPTION << "Blob was not found";
|
||||
}
|
||||
|
||||
const Blob::Ptr blob = blobIt->second;
|
||||
Blob::Ptr newBlob = makeNewBlobPtr(blob->getTensorDesc());
|
||||
newBlob->allocate();
|
||||
|
||||
const std::shared_ptr<float> blobValues = CNNNetworkHelper::getFloatData(blob);
|
||||
fillBlobByFP32(newBlob, blobValues.get());
|
||||
|
||||
auto newBlobValues = CNNNetworkHelper::getFloatData(newBlob);
|
||||
|
||||
const std::string layerName = blobLayer->name + "/new" + std::to_string(repeatsCount);
|
||||
CNNLayerPtr newBlobLayer = CNNLayerPtr(new CNNLayer({ layerName, "Const", blob->getTensorDesc().getPrecision() }));
|
||||
newBlobLayer->blobs.emplace("custom", newBlob);
|
||||
|
||||
const TensorDesc& tensorDesc = blobLayer->outData[0]->getTensorDesc();
|
||||
DataPtr newEdgeAfterLayer(new Data(newBlobLayer->name, tensorDesc));
|
||||
newEdgeAfterLayer->setName(newBlobLayer->name);
|
||||
newEdgeAfterLayer->setPrecision(blob->getTensorDesc().getPrecision());
|
||||
quantizeLayerIt->second->insData[constLayerIndex] = newEdgeAfterLayer;
|
||||
getInputTo(newEdgeAfterLayer)[quantizeLayer.name] = quantizeLayerIt->second;
|
||||
|
||||
getCreatorLayer(newEdgeAfterLayer) = newBlobLayer;
|
||||
newBlobLayer->outData.push_back(newEdgeAfterLayer);
|
||||
|
||||
CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&context.network);
|
||||
netImpl->addData(newBlobLayer->name.c_str(), newEdgeAfterLayer);
|
||||
netImpl->addLayer(newBlobLayer);
|
||||
|
||||
return newBlobLayer;
|
||||
}
|
||||
|
||||
int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) {
|
||||
const std::vector<CNNLayerPtr> children = getChildren(layer);
|
||||
for (const CNNLayerPtr& child : children) {
|
||||
|
||||
@@ -250,14 +250,14 @@ void WeightableLayerTransformation::updateLayerBiasesFcSpecific(
|
||||
CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
|
||||
}
|
||||
|
||||
void WeightableLayerTransformation::updateWeights(const CNNLayerPtr parent, std::vector<float>& outputLowValues,
|
||||
void WeightableLayerTransformation::updateWeights(TransformationContext& context, const CNNLayerPtr parent, std::vector<float>& outputLowValues,
|
||||
std::vector<float>& outputHighValues) const {
|
||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parent);
|
||||
// TODO: refactor: move to standalone method
|
||||
switch (quantizedTensorAlignmentOnWeights) {
|
||||
case LayerTransformation::QuantizedTensorAlignment::None: {
|
||||
CNNNetworkHelper::updateBlobs(*parent, 3, outputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(*parent, 4, outputHighValues);
|
||||
CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues);
|
||||
break;
|
||||
}
|
||||
case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals:
|
||||
@@ -300,10 +300,10 @@ void WeightableLayerTransformation::updateWeights(const CNNLayerPtr parent, std:
|
||||
outputHighValues[i] = roundf(outputHighValues[i] * maxK);
|
||||
}
|
||||
|
||||
CNNNetworkHelper::updateBlobs(*parent, 1, inputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(*parent, 2, inputHighValues);
|
||||
CNNNetworkHelper::updateBlobs(*parent, 3, outputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(*parent, 4, outputHighValues);
|
||||
CNNNetworkHelper::updateBlobs(context, *parent, 1, inputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(context, *parent, 2, inputHighValues);
|
||||
CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues);
|
||||
CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues);
|
||||
|
||||
const size_t levels = static_cast<size_t>(roundf(minOutputIntervalLowValue + maxOutputIntervalHighValue + 1.0));
|
||||
parent->params["levels"] = std::to_string(levels);
|
||||
@@ -411,6 +411,7 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
|
||||
}
|
||||
|
||||
DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& weightableLayer,
|
||||
const bool supportAsymmetricQuantization,
|
||||
std::vector<float>& dequantizationScales,
|
||||
@@ -461,7 +462,7 @@ DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
|
||||
}
|
||||
}
|
||||
|
||||
updateWeights(parent, outputLowValues, outputHighValues);
|
||||
updateWeights(context, parent, outputLowValues, outputHighValues);
|
||||
return dataPrecision;
|
||||
}
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
|
||||
MKLDNNGraph::ApplyUnrollPasses(static_cast<ICNNNetwork&>(*_clonedNetwork));
|
||||
|
||||
if (_cfg.enableDynamicBatch) {
|
||||
if (_cfg.batchLimit > 1) {
|
||||
// check topology for applicability
|
||||
if (!CanProcessDynBatch(*_clonedNetwork)) {
|
||||
THROW_IE_EXCEPTION << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
|
||||
@@ -279,8 +279,7 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::ICNNNetwork &n
|
||||
type != Eltwise &&
|
||||
type != Crop &&
|
||||
type != BatchNormalization &&
|
||||
type != Copy &&
|
||||
type != MVN) {
|
||||
type != Copy) {
|
||||
check_result = false;
|
||||
}
|
||||
}, false);
|
||||
|
||||
@@ -1047,48 +1047,53 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
|
||||
auto scalesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32);
|
||||
auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32);
|
||||
|
||||
auto pushDesc = [&](memory::format dataFormat) {
|
||||
auto pushDesc = [&](memory::format dataFormat, impl_desc_type implDetail) {
|
||||
config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), inputDataType, dataFormat);
|
||||
config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(TARGET_SHAPE_ID)->getDims(), targetShapeType, memory::x);
|
||||
config.inConfs[SCALES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(SCALES_ID)->getDims(), scalesType, memory::x);
|
||||
if (isAxesSpecified)
|
||||
config.inConfs[AXES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES_ID)->getDims(), axesType, memory::x);
|
||||
config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, dataFormat);
|
||||
supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, dataFormat});
|
||||
supportedPrimitiveDescriptors.push_back({config, implDetail, dataFormat});
|
||||
};
|
||||
|
||||
if (mode == InterpolateMode::nearest || mode == InterpolateMode::linear_onnx) {
|
||||
// blk and by_channel JIT kernel on sse42 or above machine
|
||||
if (mayiuse(cpu::sse42)) {
|
||||
if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) {
|
||||
pushDesc(memory::nhwc);
|
||||
if (mayiuse(cpu::avx512_common)) {
|
||||
pushDesc(memory::nChw16c);
|
||||
pushDesc(memory::nhwc, jit_avx512);
|
||||
pushDesc(memory::nChw16c, jit_avx512);
|
||||
} else if (mayiuse(cpu::avx2)) {
|
||||
pushDesc(memory::nhwc, jit_avx2);
|
||||
pushDesc(memory::nChw8c, jit_avx2);
|
||||
} else {
|
||||
pushDesc(memory::nChw8c);
|
||||
pushDesc(memory::nhwc, jit_sse42);
|
||||
pushDesc(memory::nChw8c, jit_sse42);
|
||||
}
|
||||
} else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode == InterpolateMode::nearest) {
|
||||
pushDesc(memory::ndhwc);
|
||||
if (mayiuse(cpu::avx512_common)) {
|
||||
pushDesc(memory::nCdhw16c);
|
||||
pushDesc(memory::ndhwc, jit_avx512);
|
||||
pushDesc(memory::nCdhw16c, jit_avx512);
|
||||
} else if (mayiuse(cpu::avx2)) {
|
||||
pushDesc(memory::ndhwc, jit_avx2);
|
||||
pushDesc(memory::nCdhw8c, jit_avx2);
|
||||
} else {
|
||||
pushDesc(memory::nCdhw8c);
|
||||
pushDesc(memory::ndhwc, jit_sse42);
|
||||
pushDesc(memory::nCdhw8c, jit_sse42);
|
||||
}
|
||||
}
|
||||
if (fusedWith.empty()) {
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()));
|
||||
}
|
||||
}
|
||||
|
||||
// planar for 1.ref on machine without sse42(no fuse). 2.JIT kernel for f32 && avx2(gather).(with fuse)
|
||||
// planar for 1.ref on machine without sse42(if no sse42, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse)
|
||||
if (!mayiuse(cpu::sse42))
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()));
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref);
|
||||
|
||||
if (mayiuse(cpu::avx2) && inputPrec == Precision::FP32) {
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()));
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2);
|
||||
}
|
||||
} else if (mode == InterpolateMode::linear || mode == InterpolateMode::cubic) {
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()));
|
||||
pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1194,16 +1199,16 @@ int clipCoord(int pos, int length) {
|
||||
void MKLDNNInterpolateNode::buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstDim5d,
|
||||
std::vector<float>& dataScales, InterpolateLayoutType layout) {
|
||||
int dimSize = srcDim.size();
|
||||
float fz = (dimSize == 5) ? (1.f / dataScales[dimSize - 3]) : 1.f;
|
||||
float fy = 1.f / dataScales[dimSize - 2];
|
||||
float fx = 1.f / dataScales[dimSize - 1];
|
||||
float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f;
|
||||
float fy = dataScales[dimSize - 2];
|
||||
float fx = dataScales[dimSize - 1];
|
||||
size_t ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
|
||||
size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];
|
||||
|
||||
indexTable.resize(OD + OH + OW);
|
||||
bool isDDownsample = (fz > 1) ? true : false;
|
||||
bool isHDownsample = (fy > 1) ? true : false;
|
||||
bool isWDownsample = (fx > 1) ? true : false;
|
||||
bool isDDownsample = (fz < 1) ? true : false;
|
||||
bool isHDownsample = (fy < 1) ? true : false;
|
||||
bool isWDownsample = (fx < 1) ? true : false;
|
||||
for (int oz = 0; oz < OD; oz++) {
|
||||
float iz = coordTransToInput(oz, fz, ID, OD);
|
||||
indexTable[oz] = nearestRound(iz, isDDownsample);
|
||||
@@ -1224,8 +1229,8 @@ void MKLDNNInterpolateNode::buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstD
|
||||
void MKLDNNInterpolateNode::buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVector& dstDim5d,
|
||||
std::vector<float>& dataScales, InterpolateLayoutType layout) {
|
||||
int dimSize = srcDim.size();
|
||||
float fy = 1.f / dataScales[dimSize - 2];
|
||||
float fx = 1.f / dataScales[dimSize - 1];
|
||||
float fy = dataScales[dimSize - 2];
|
||||
float fx = dataScales[dimSize - 1];
|
||||
int IH = srcDimPad5d[3], IW = srcDimPad5d[4];
|
||||
int OH = dstDim5d[3], OW = dstDim5d[4];
|
||||
if (layout == InterpolateLayoutType::planar) {
|
||||
@@ -1337,20 +1342,20 @@ static inline float triangleCoeff(float x) {
|
||||
void MKLDNNInterpolateNode::buidTblLinear(SizeVector& srcDimPad5d, SizeVector& dstDim5d,
|
||||
std::vector<float>& dataScales, int kernel_width, bool antialias) {
|
||||
int dimSize = srcDim.size();
|
||||
float fz = (dimSize == 5) ? (1.f / dataScales[dimSize - 3]) : 1.f;
|
||||
float fy = 1.f / dataScales[dimSize - 2];
|
||||
float fx = 1.f / dataScales[dimSize - 1];
|
||||
float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f;
|
||||
float fy = dataScales[dimSize - 2];
|
||||
float fx = dataScales[dimSize - 1];
|
||||
size_t ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4];
|
||||
size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4];
|
||||
|
||||
if (!(IW == OW && IH == OH && ID == OD)) {
|
||||
float ax = 1.0f / (antialias ? fx : 1.0f);
|
||||
float ay = 1.0f / (antialias ? fy : 1.0f);
|
||||
float az = 1.0f / (antialias ? fz : 1.0f);
|
||||
float ax = antialias ? fx : 1.0f;
|
||||
float ay = antialias ? fy : 1.0f;
|
||||
float az = antialias ? fz : 1.0f;
|
||||
|
||||
int rx = (fx < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ax));
|
||||
int ry = (fy < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ay));
|
||||
int rz = (fz < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / az));
|
||||
int rx = (fx > 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ax));
|
||||
int ry = (fy > 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ay));
|
||||
int rz = (fz > 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / az));
|
||||
|
||||
int diaOD = 2 * rz + 1;
|
||||
int diaOH = 2 * ry + 1;
|
||||
@@ -1427,8 +1432,8 @@ std::vector<float> MKLDNNInterpolateNode::getCubicCoeffs(float mantissa, float a
|
||||
// x_idx x_weight0 x_weight1 x_weight2 x_weight3 y_idx y_weight0 y_weight1 y_weight2 y_weight3
|
||||
void MKLDNNInterpolateNode::buidTblCubic(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, float cubicCoeff) {
|
||||
int dimSize = srcDim.size();
|
||||
float fy = 1.f / dataScales[dimSize - 2];
|
||||
float fx = 1.f / dataScales[dimSize - 1];
|
||||
float fy = dataScales[dimSize - 2];
|
||||
float fx = dataScales[dimSize - 1];
|
||||
int IH = srcDimPad5d[3], IW = srcDimPad5d[4];
|
||||
int OH = dstDim5d[3], OW = dstDim5d[4];
|
||||
|
||||
@@ -1591,7 +1596,7 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
|
||||
|
||||
SizeVector inShapeBlock = getBlockND(srcDim5d);
|
||||
SizeVector inShapePadBlock = getBlockND(srcDimPad5d);
|
||||
srcPadded = std::vector<uint8_t>(inShapePadBlock[0] * srcDataSize, 0);
|
||||
srcPadded.resize(inShapePadBlock[0] * srcDataSize, 0);
|
||||
uint8_t *src_data_pad = static_cast<uint8_t *>(&srcPadded[0]);
|
||||
|
||||
parallel_for4d(srcDim5d[0], srcDim5d[1], srcDim5d[2], srcDim5d[3], [&](int n, int c, int d, int h) {
|
||||
@@ -1611,9 +1616,6 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
|
||||
if (dimSize > 2 && (dataScales[0] != 1.f || dataScales[1] != 1.f)) {
|
||||
THROW_IE_EXCEPTION << "Interpolate layer only supports resize on spatial dimensions(depth, height and width)";
|
||||
}
|
||||
float fz = (dimSize == 5) ? (1.f / dataScales[dimSize - 3]) : 1.f;
|
||||
float fy = 1.f / dataScales[dimSize - 2];
|
||||
float fx = 1.f / dataScales[dimSize - 1];
|
||||
Layout layout = getParentEdgeAt(DATA_ID)->getDesc().getLayout();
|
||||
bool isPlanar = (layout == NC || layout == NCHW || layout == NCDHW) ? true : false;
|
||||
|
||||
@@ -1621,17 +1623,21 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
|
||||
case InterpolateMode::nearest: {
|
||||
if (interpolateKernel) {
|
||||
if (isPlanar) {
|
||||
NNPlanar(src_data, dst_data, N, C, ID, IH, IW, fx, fy, fz, OD, OH, OW);
|
||||
NNPlanar(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW);
|
||||
} else {
|
||||
NNCGathered(src_data, dst_data, N, C, ID, IH, IW, fx, fy, fz, OD, OH, OW);
|
||||
NNCGathered(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW);
|
||||
}
|
||||
} else {
|
||||
NNRef(src_data, dst_data, N, C, ID, IH, IW, fx, fy, fz, OD, OH, OW);
|
||||
NNRef(src_data, dst_data, N, C, ID, IH, IW, OD, OH, OW);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case InterpolateMode::linear: {
|
||||
bool isDownsample = (fx > 1) || (fy > 1) || (fz > 1);
|
||||
float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f;
|
||||
float fy = dataScales[dimSize - 2];
|
||||
float fx = dataScales[dimSize - 1];
|
||||
|
||||
bool isDownsample = (fx < 1.f) || (fy < 1.f) || (fz < 1.f);
|
||||
int kernel_width = 2;
|
||||
linearInterpolation(src_data, dst_data, N, C, ID, IH, IW, fx, fy, fz, OD, OH, OW, kernel_width, isDownsample && antialias);
|
||||
break;
|
||||
@@ -1639,17 +1645,17 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
|
||||
case InterpolateMode::linear_onnx: {
|
||||
if (interpolateKernel) {
|
||||
if (isPlanar) {
|
||||
linearOnnxPlanar(src_data, dst_data, N, C, IH, IW, fx, fy, OH, OW);
|
||||
linearOnnxPlanar(src_data, dst_data, N, C, IH, IW, OH, OW);
|
||||
} else {
|
||||
linearOnnxCGathered(src_data, dst_data, N, C, IH, IW, fx, fy, OH, OW);
|
||||
linearOnnxCGathered(src_data, dst_data, N, C, IH, IW, OH, OW);
|
||||
}
|
||||
} else {
|
||||
linearOnnxRef(src_data, dst_data, N, C, IH, IW, fx, fy, OH, OW);
|
||||
linearOnnxRef(src_data, dst_data, N, C, IH, IW, OH, OW);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case InterpolateMode::cubic: {
|
||||
cubic(src_data, dst_data, N, C, IH, IW, fx, fy, OH, OW, cubeCoeff);
|
||||
cubic(src_data, dst_data, N, C, IH, IW, OH, OW, cubeCoeff);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@@ -1660,8 +1666,7 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) {
|
||||
|
||||
// for ndhwc and nCdhw8c[16c]
|
||||
// input may be f32/bf16/int8, fused->output varies
|
||||
void MKLDNNInterpolateNode::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
float fx, float fy, float fz, int OD, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
int *index_d = static_cast<int*>(&indexTable[0]);
|
||||
int *index_h = static_cast<int*>(&indexTable[OD]);
|
||||
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
|
||||
@@ -1715,20 +1720,19 @@ void MKLDNNInterpolateNode::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr
|
||||
} // batch end
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
float fx, float fy, float fz, int OD, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
int *index_d = static_cast<int*>(&indexTable[0]);
|
||||
int *index_h = static_cast<int*>(&indexTable[OD]);
|
||||
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
|
||||
|
||||
// index_h * IW * srcDataSize
|
||||
std::vector<int> index_kernel(OH + OW);
|
||||
// index_h * IW * srcDataSize to reduce and simplify redundant compute
|
||||
for (int oh = 0; oh < OH; oh++) {
|
||||
index_h[oh] *= IW;
|
||||
index_h[oh] *= srcDataSize;
|
||||
index_kernel[oh] = index_h[oh] * IW * srcDataSize;
|
||||
}
|
||||
// index_w * srcDataSize
|
||||
for (int ow = 0; ow < OW; ow++) {
|
||||
index_w[ow] *= srcDataSize;
|
||||
index_kernel[OH + ow] = index_w[ow] * srcDataSize;
|
||||
}
|
||||
|
||||
parallel_for3d(B, C, OD, [&](size_t b, size_t c, size_t od) {
|
||||
@@ -1738,15 +1742,14 @@ void MKLDNNInterpolateNode::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_,
|
||||
auto arg = jit_interpolate_call_args();
|
||||
arg.src = in_ptr;
|
||||
arg.dst = out_ptr;
|
||||
arg.index = index_h; // need index_h and index_w in kernel, it's in continous memory so one param
|
||||
arg.index = static_cast<int*>(&index_kernel[0]); // need index_h and index_w in kernel, it's in continous memory so one param
|
||||
arg.oc_off = static_cast<size_t>(c);
|
||||
// work_amount is OH(out loop) and OW(inner loop), can get in kernel from jcp.
|
||||
(*interpolateKernel)(&arg);
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
float fx, float fy, float fz, int OD, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) {
|
||||
int *index_d = static_cast<int*>(&indexTable[0]);
|
||||
int *index_h = static_cast<int*>(&indexTable[OD]);
|
||||
int *index_w = static_cast<int*>(&indexTable[OD + OH]);
|
||||
@@ -1765,8 +1768,7 @@ void MKLDNNInterpolateNode::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
|
||||
int *index = static_cast<int*>(&indexTable[0]);
|
||||
int eltInGrid = 4;
|
||||
int scratchLen = rnd_up(eltInGrid * OW * OH, 16);
|
||||
@@ -1786,8 +1788,7 @@ void MKLDNNInterpolateNode::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *ou
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
|
||||
// left:OW right:OW Top:OH Bottom:OH
|
||||
size_t scratchLen = rnd_up(OW + OW + OH + OH, 16);
|
||||
int *indexLeft = static_cast<int*>(&indexTable[0]);
|
||||
@@ -1844,8 +1845,7 @@ void MKLDNNInterpolateNode::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNInterpolateNode::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW) {
|
||||
void MKLDNNInterpolateNode::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) {
|
||||
int eltInGrid = 4;
|
||||
int scratchLen = rnd_up(eltInGrid * OW * OH, 16);
|
||||
|
||||
@@ -1896,13 +1896,13 @@ void MKLDNNInterpolateNode::linearInterpolation(const uint8_t *in_ptr_, uint8_t
|
||||
return;
|
||||
}
|
||||
|
||||
float ax = 1.0f / (antialias ? fx : 1.0f);
|
||||
float ay = 1.0f / (antialias ? fy : 1.0f);
|
||||
float az = 1.0f / (antialias ? fz : 1.0f);
|
||||
float ax = antialias ? fx : 1.0f;
|
||||
float ay = antialias ? fy : 1.0f;
|
||||
float az = antialias ? fz : 1.0f;
|
||||
|
||||
int rx = (fx < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ax));
|
||||
int ry = (fy < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ay));
|
||||
int rz = (fz < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / az));
|
||||
int rx = (fx > 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ax));
|
||||
int ry = (fy > 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ay));
|
||||
int rz = (fz > 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / az));
|
||||
|
||||
int diaOD = 2 * rz + 1;
|
||||
int diaOH = 2 * ry + 1;
|
||||
@@ -1992,9 +1992,7 @@ void MKLDNNInterpolateNode::linearInterpolation(const uint8_t *in_ptr_, uint8_t
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void MKLDNNInterpolateNode::cubic(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW, float a) {
|
||||
void MKLDNNInterpolateNode::cubic(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW, float a) {
|
||||
const int idxNum = 1;
|
||||
int *xOrigin = static_cast<int*>(&indexTable[0]);
|
||||
float *xFactor = reinterpret_cast<float*>(&indexTable[OW]);
|
||||
@@ -2082,79 +2080,40 @@ void MKLDNNInterpolateNode::setValue(uint8_t *base, size_t offset, float value,
|
||||
}
|
||||
}
|
||||
|
||||
// scale is float(inShape) / float(outShape)
|
||||
// nearest mode need to be strictly consistent with onnx calc manner(div scale, not multiply inverse),
|
||||
// scale is float(outShape) / float(inShape)
|
||||
// strictly consistent with onnx calc manner(div scale, not multiply inverse),
|
||||
// the slight precison diff can produce obvious wrong value due to "nearest round" behavior for NN mode
|
||||
inline float MKLDNNInterpolateNode::coordTransToInput(int outCoord, float scale, int inShape, int outShape) {
|
||||
if ((scale == 1.f) || (inShape == outShape)) {
|
||||
return static_cast<float>(outCoord);
|
||||
}
|
||||
if (mode == InterpolateMode::nearest) {
|
||||
scale = 1.f / scale;
|
||||
switch (coordTransMode) {
|
||||
case InterpolateCoordTransMode::half_pixel: {
|
||||
return (outCoord + 0.5f) / scale - 0.5f;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::pytorch_half_pixel: {
|
||||
if (outShape > 1)
|
||||
return (outCoord + 0.5f) / scale - 0.5f;
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::asymmetric: {
|
||||
return static_cast<float>(outCoord) / scale;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::tf_half_pixel_for_nn: {
|
||||
return (outCoord + 0.5f) / scale;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::align_corners: {
|
||||
if (outShape > 1)
|
||||
return outCoord * static_cast<float>(inShape - 1) / static_cast<float>(outShape - 1);
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << "' does not support specified coordinate transformation mode";
|
||||
break;
|
||||
}
|
||||
switch (coordTransMode) {
|
||||
case InterpolateCoordTransMode::half_pixel: {
|
||||
return (outCoord + 0.5f) / scale - 0.5f;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (coordTransMode) {
|
||||
case InterpolateCoordTransMode::half_pixel: {
|
||||
return (outCoord + 0.5f) * scale - 0.5f;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::pytorch_half_pixel: {
|
||||
if (outShape > 1)
|
||||
return (outCoord + 0.5f) * scale - 0.5f;
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::asymmetric: {
|
||||
return outCoord * scale;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::tf_half_pixel_for_nn: {
|
||||
return (outCoord + 0.5f) * scale;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::align_corners: {
|
||||
if (outShape > 1)
|
||||
return outCoord * static_cast<float>(inShape - 1) / static_cast<float>(outShape - 1);
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << "' does not support specified coordinate transformation mode";
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::pytorch_half_pixel: {
|
||||
if (outShape > 1)
|
||||
return (outCoord + 0.5f) / scale - 0.5f;
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::asymmetric: {
|
||||
return static_cast<float>(outCoord) / scale;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::tf_half_pixel_for_nn: {
|
||||
return (outCoord + 0.5f) / scale;
|
||||
break;
|
||||
}
|
||||
case InterpolateCoordTransMode::align_corners: {
|
||||
if (outShape > 1)
|
||||
return outCoord * static_cast<float>(inShape - 1) / static_cast<float>(outShape - 1);
|
||||
else
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
THROW_IE_EXCEPTION << "Interpolate layer with name '" << getName() << "' does not support specified coordinate transformation mode";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,20 +101,14 @@ public:
|
||||
|
||||
private:
|
||||
// nearest neighbor
|
||||
void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
float fx, float fy, float fz, int OD, int OH, int OW);
|
||||
void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
float fx, float fy, float fz, int OD, int OH, int OW);
|
||||
void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
float fx, float fy, float fz, int OD, int OH, int OW);
|
||||
void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
|
||||
|
||||
// onnx linear
|
||||
void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW);
|
||||
void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW);
|
||||
void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW);
|
||||
void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
|
||||
void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
|
||||
void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
|
||||
|
||||
// linear
|
||||
void linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
|
||||
@@ -122,8 +116,7 @@ private:
|
||||
|
||||
// cubic
|
||||
std::vector<float> getCubicCoeffs(float mantissa, float a);
|
||||
void cubic(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW,
|
||||
float fx, float fy, int OH, int OW, float a);
|
||||
void cubic(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW, float a);
|
||||
|
||||
void buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, InterpolateLayoutType layout);
|
||||
void buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, InterpolateLayoutType layout);
|
||||
|
||||
@@ -990,8 +990,7 @@ void MKLDNNMVNNode::mvn_blk(const in_data_t* src_data, out_data_t* dst_data, con
|
||||
std::vector<float> mean_buffer(aux_buffer_size * threads_num);
|
||||
std::vector<float> variance_buffer(aux_buffer_size * threads_num);
|
||||
|
||||
int actual_N = batchToProcess();
|
||||
for (size_t b = 0lu; b < actual_N; b++) {
|
||||
for (size_t b = 0lu; b < N; b++) {
|
||||
size_t ccb = is_nhwc ? b * C2 : b * C3;
|
||||
if (across_channels) {
|
||||
// mean for this instance in batch
|
||||
|
||||
@@ -141,7 +141,7 @@ struct IdleGuard {
|
||||
};
|
||||
|
||||
MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<InferenceEngine::ExecutableNetwork>& networksPerDevice,
|
||||
const DeviceMap<DeviceInformation>& networkDevices,
|
||||
const std::vector<DeviceInformation>& networkDevices,
|
||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
||||
const bool needPerfCounters) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared<InferenceEngine::ImmediateExecutor>()),
|
||||
@@ -154,7 +154,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
|
||||
auto& device = networkValue.first;
|
||||
auto& network = networkValue.second;
|
||||
|
||||
auto itNumRequests = _devicePriorities.find(device);
|
||||
auto itNumRequests = std::find_if(_devicePriorities.cbegin(), _devicePriorities.cend(),
|
||||
[&device](const DeviceInformation& d){ return d.deviceName == device;});
|
||||
unsigned int optimalNum = 0;
|
||||
try {
|
||||
optimalNum = network.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
|
||||
@@ -165,7 +166,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
|
||||
<< "Failed to query the metric for the " << device << " with error:" << iie.what();
|
||||
}
|
||||
const auto numRequests = (_devicePriorities.end() == itNumRequests ||
|
||||
itNumRequests->second.numRequestsPerDevices == -1) ? optimalNum : itNumRequests->second.numRequestsPerDevices;
|
||||
itNumRequests->numRequestsPerDevices == -1) ? optimalNum : itNumRequests->numRequestsPerDevices;
|
||||
auto& workerRequests = _workerRequests[device];
|
||||
auto& idleWorkerRequests = _idleWorkerRequests[device];
|
||||
workerRequests.resize(numRequests);
|
||||
@@ -197,7 +198,7 @@ void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest() {
|
||||
return _devicePriorities;
|
||||
}();
|
||||
for (auto&& device : devices) {
|
||||
auto& idleWorkerRequests = _idleWorkerRequests[device.first];
|
||||
auto& idleWorkerRequests = _idleWorkerRequests[device.deviceName];
|
||||
WorkerInferRequest* workerRequestPtr = nullptr;
|
||||
if (idleWorkerRequests.try_pop(workerRequestPtr)) {
|
||||
IdleGuard idleGuard{workerRequestPtr, idleWorkerRequests};
|
||||
@@ -258,8 +259,8 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, Inferen
|
||||
assert(multiPlugin != nullptr);
|
||||
auto metaDevices = multiPlugin->ParseMetaDevices(priorities->second, {});
|
||||
|
||||
if (std::any_of(metaDevices.begin(), metaDevices.end(), [](const std::pair<DeviceName, DeviceInformation> & kvp) {
|
||||
return kvp.second.numRequestsPerDevices != -1;
|
||||
if (std::any_of(metaDevices.begin(), metaDevices.end(), [](const DeviceInformation& kvp) {
|
||||
return kvp.numRequestsPerDevices != -1;
|
||||
})) {
|
||||
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "You can only change device priorities but not number of requests"
|
||||
<<" with the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES!";
|
||||
@@ -268,9 +269,10 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, Inferen
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
for (auto && device : metaDevices) {
|
||||
if (_networksPerDevice.find(device.first) == _networksPerDevice.end()) {
|
||||
if (_networksPerDevice.find(device.deviceName) == _networksPerDevice.end()) {
|
||||
THROW_IE_EXCEPTION << NOT_FOUND_str << "You can only change device priorities but not add new devices with"
|
||||
<< " the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES. " << device.first <<
|
||||
<< " the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES. "
|
||||
<< device.deviceName <<
|
||||
" device was not in the original device list!";
|
||||
}
|
||||
}
|
||||
@@ -353,9 +355,9 @@ std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfi
|
||||
return supportedConfig;
|
||||
}
|
||||
|
||||
DeviceMap<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(const std::string& priorities,
|
||||
std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(const std::string& priorities,
|
||||
const std::map<std::string, std::string> & config) const {
|
||||
DeviceMap<DeviceInformation> metaDevices;
|
||||
std::vector<DeviceInformation> metaDevices;
|
||||
|
||||
// parsing the string and splitting to tokens
|
||||
std::vector<std::string> devicesWithRequests;
|
||||
@@ -399,12 +401,13 @@ DeviceMap<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(const
|
||||
}
|
||||
|
||||
// create meta device
|
||||
metaDevices[deviceName] = { getDeviceConfig(deviceName), numRequests };
|
||||
auto cfg = getDeviceConfig(deviceName);
|
||||
std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
if (std::find(std::begin(supportedConfigKeys), std::end(supportedConfigKeys), CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN))
|
||||
!= std::end(supportedConfigKeys)) {
|
||||
metaDevices[deviceName].config.emplace(CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN), "");
|
||||
cfg.emplace(CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN), "");
|
||||
}
|
||||
metaDevices.push_back({ deviceName, cfg, numRequests });
|
||||
}
|
||||
|
||||
return metaDevices;
|
||||
@@ -470,7 +473,7 @@ ExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(co
|
||||
THROW_IE_EXCEPTION << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
|
||||
}
|
||||
|
||||
DeviceMap<DeviceInformation> metaDevices = ParseMetaDevices(priorities->second, fullConfig);
|
||||
auto metaDevices = ParseMetaDevices(priorities->second, fullConfig);
|
||||
|
||||
// collect the settings that are applicable to the devices we are loading the network to
|
||||
std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
|
||||
@@ -478,9 +481,8 @@ ExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(co
|
||||
|
||||
DeviceMap<ExecutableNetwork> executableNetworkPerDevice;
|
||||
for (auto& p : metaDevices) {
|
||||
auto & deviceName = p.first;
|
||||
auto & metaDevice = p.second;
|
||||
auto & deviceConfig = metaDevice.config;
|
||||
auto & deviceName = p.deviceName;
|
||||
auto & deviceConfig = p.config;
|
||||
auto clonedNetwork = cloneNetwork(network);
|
||||
executableNetworkPerDevice.insert({ deviceName, GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfig) });
|
||||
multiNetworkConfig.insert(deviceConfig.begin(), deviceConfig.end());
|
||||
@@ -514,16 +516,14 @@ void MultiDeviceInferencePlugin::QueryNetwork(const ICNNNetwork&
|
||||
THROW_IE_EXCEPTION << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
|
||||
}
|
||||
|
||||
DeviceMap<DeviceInformation> metaDevices = ParseMetaDevices(priorities->second, fullConfig);
|
||||
auto metaDevices = ParseMetaDevices(priorities->second, fullConfig);
|
||||
std::unordered_set<std::string> supportedLayers;
|
||||
|
||||
auto allSupportsNgraph =
|
||||
std::all_of(std::begin(metaDevices), std::end(metaDevices),
|
||||
[&] (const DeviceMap<DeviceInformation>::value_type & value) -> bool {
|
||||
auto& deviceName = value.first;
|
||||
auto& metaDevice = value.second;
|
||||
[&] (const DeviceInformation& value) -> bool {
|
||||
auto clonedNetwork = cloneNetwork(network);
|
||||
try { GetCore()->QueryNetwork(*clonedNetwork, deviceName, metaDevice.config); }
|
||||
try { GetCore()->QueryNetwork(*clonedNetwork, value.deviceName, value.config); }
|
||||
catch (const InferenceEngine::details::InferenceEngineException & ex) {
|
||||
std::string message = ex.what();
|
||||
return message.find(NOT_IMPLEMENTED_str) == std::string::npos;
|
||||
@@ -532,12 +532,9 @@ void MultiDeviceInferencePlugin::QueryNetwork(const ICNNNetwork&
|
||||
});
|
||||
|
||||
for (auto&& value : metaDevices) {
|
||||
auto& deviceName = value.first;
|
||||
auto& metaDevice = value.second;
|
||||
|
||||
auto queryNetwork = [&] (const InferenceEngine::ICNNNetwork & networkObject) {
|
||||
auto clonedNetwork = cloneNetwork(networkObject);
|
||||
auto deviceQr = GetCore()->QueryNetwork(*clonedNetwork, deviceName, metaDevice.config);
|
||||
auto deviceQr = GetCore()->QueryNetwork(*clonedNetwork, value.deviceName, value.config);
|
||||
std::unordered_set<std::string> deviceSupportedLayers;
|
||||
for (auto&& layerQr : deviceQr.supportedLayersMap) {
|
||||
deviceSupportedLayers.emplace(layerQr.first);
|
||||
|
||||
@@ -31,6 +31,7 @@ namespace MultiDevicePlugin {
|
||||
using DeviceName = std::string;
|
||||
|
||||
struct DeviceInformation {
|
||||
DeviceName deviceName;
|
||||
std::map<std::string, std::string> config;
|
||||
int numRequestsPerDevices;
|
||||
};
|
||||
@@ -99,7 +100,7 @@ public:
|
||||
using NotBusyWorkerRequests = ThreadSafeQueue<WorkerInferRequest*>;
|
||||
|
||||
explicit MultiDeviceExecutableNetwork(const DeviceMap<InferenceEngine::ExecutableNetwork>& networksPerDevice,
|
||||
const DeviceMap<DeviceInformation>& networkDevices,
|
||||
const std::vector<DeviceInformation>& networkDevices,
|
||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
||||
const bool needPerfCounters = false);
|
||||
|
||||
@@ -117,7 +118,7 @@ public:
|
||||
static thread_local WorkerInferRequest* _thisWorkerInferRequest;
|
||||
std::atomic_bool _terminate = {false};
|
||||
std::mutex _mutex;
|
||||
DeviceMap<DeviceInformation> _devicePriorities;
|
||||
std::vector<DeviceInformation> _devicePriorities;
|
||||
DeviceMap<InferenceEngine::ExecutableNetwork> _networksPerDevice;
|
||||
ThreadSafeQueue<Task> _inferPipelineTasks;
|
||||
DeviceMap<NotBusyWorkerRequests> _idleWorkerRequests;
|
||||
@@ -163,7 +164,7 @@ public:
|
||||
InferenceEngine::Parameter GetMetric(const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
|
||||
DeviceMap<DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
|
||||
std::vector<DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
|
||||
const std::map<std::string, std::string> & config) const;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -591,7 +591,8 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
|
||||
}
|
||||
|
||||
if (!ngraphNode) {
|
||||
THROW_IE_EXCEPTION << "Cannot create " << params.type << " layer " << params.name << " id:" << params.layerId;
|
||||
THROW_IE_EXCEPTION << "Cannot create " << params.type << " layer " << params.name << " id:" << params.layerId
|
||||
<< " from unsupported opset: " << params.version;
|
||||
}
|
||||
|
||||
// Save run time info
|
||||
|
||||
@@ -269,7 +269,7 @@ void CropValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>&
|
||||
}
|
||||
} else if (!casted->dim.empty()) {
|
||||
int dim = casted->dim[i];
|
||||
if (firstShape[axis] < static_cast<size_t>(offset + dim)) {
|
||||
if (firstShape[axis] < (static_cast<size_t>(offset) + dim)) {
|
||||
THROW_IE_EXCEPTION << "Incorrect crop data! Offset(" << offset << ") + result size of output(" << dim
|
||||
<< ") should be less then input size(" << firstShape[axis] << ") for axis(" << axis
|
||||
<< ")";
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API FakeQuantizeMulFusion;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief This transformation looks for a FQ + Mul pair in the graph and moves
|
||||
* the Mul operation above the FQ node. The last two inputs of FQ are multiplied
|
||||
* by the value that was originally below the FQ node.
|
||||
*/
|
||||
|
||||
class ngraph::pass::FakeQuantizeMulFusion : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
FakeQuantizeMulFusion();
|
||||
};
|
||||
@@ -0,0 +1,108 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/fq_mul_fusion.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset4.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
namespace {
|
||||
std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
|
||||
get_adjusted_output_range(ngraph::Output<ngraph::Node> out_low,
|
||||
ngraph::Output<ngraph::Node> out_high,
|
||||
ngraph::Output<ngraph::Node> multiplier) {
|
||||
const auto mul_out_low = std::make_shared<ngraph::opset4::Multiply>(out_low, multiplier);
|
||||
const auto mul_out_high = std::make_shared<ngraph::opset4::Multiply>(out_high, multiplier);
|
||||
copy_runtime_info({out_low.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
|
||||
mul_out_low);
|
||||
copy_runtime_info({out_high.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
|
||||
mul_out_high);
|
||||
|
||||
ngraph::OutputVector new_out_low(1), new_out_high(1);
|
||||
|
||||
if (!mul_out_low->constant_fold(new_out_low, {out_low, multiplier})) {
|
||||
new_out_low[0] = mul_out_low;
|
||||
}
|
||||
|
||||
if (!mul_out_high->constant_fold(new_out_high, {out_high, multiplier})) {
|
||||
new_out_high[0] = mul_out_high;
|
||||
}
|
||||
|
||||
return {new_out_low[0], new_out_high[0]};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// This transformation multiplies the "output_low" and "output_high" inputs of the FQ operation
|
||||
// by the constant value that before transormation is used to multiply the output of FQ.
|
||||
// Both output_low and output_high are multiplied by the value represented as C (a constant) below.
|
||||
// In case any of the FQ inputs (out_L, out_H) is constant, it gets constant folded with C.
|
||||
//
|
||||
// data in_L in_H out_L out_H
|
||||
// | | | | |
|
||||
// | | | | | data in_L in_H out_L * C out_H * C
|
||||
// v v v v v | | | | |
|
||||
// +-------------------------+ | | | | |
|
||||
// | FakeQuantize | v v v v v
|
||||
// +-------------------------+ +-----------------------------------+
|
||||
// | =====> | FakeQuantize |
|
||||
// v +-----------------------------------+
|
||||
// +----------+ |
|
||||
// | Multiply | <--- C v
|
||||
// +----+-----+
|
||||
// |
|
||||
// v
|
||||
//
|
||||
|
||||
ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
|
||||
const auto fq_output_low_p = ngraph::pattern::any_input();
|
||||
const auto fq_output_high_p = ngraph::pattern::any_input();
|
||||
|
||||
const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>(
|
||||
{ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input(),
|
||||
fq_output_low_p,
|
||||
fq_output_high_p},
|
||||
pattern::consumers_count(1));
|
||||
|
||||
const auto mul_constant_p = ngraph::pattern::wrap_type<opset4::Constant>();
|
||||
const auto mul_node_p = ngraph::pattern::wrap_type<opset4::Multiply>(
|
||||
{fq_node_p, mul_constant_p}, pattern::consumers_count(1));
|
||||
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
|
||||
const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr();
|
||||
|
||||
const auto original_output_low = pattern_map.at(fq_output_low_p);
|
||||
const auto original_output_high = pattern_map.at(fq_output_high_p);
|
||||
const auto mul_constant = pattern_map.at(mul_constant_p);
|
||||
|
||||
const auto new_output_limits = get_adjusted_output_range(
|
||||
original_output_low, original_output_high, mul_constant);
|
||||
|
||||
const auto new_fq_node = fq_node->clone_with_new_inputs({fq_node->input_value(0),
|
||||
fq_node->input_value(1),
|
||||
fq_node->input_value(2),
|
||||
new_output_limits.first,
|
||||
new_output_limits.second});
|
||||
|
||||
const auto mul_node = pattern_map.at(mul_node_p).get_node_shared_ptr();
|
||||
replace_node(mul_node, new_fq_node);
|
||||
|
||||
new_fq_node->set_friendly_name(fq_node->get_friendly_name());
|
||||
copy_runtime_info({fq_node, mul_node}, new_fq_node);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(mul_node_p,
|
||||
"FakeQuantizeMulFusion");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
@@ -51,6 +51,7 @@
|
||||
#include <transformations/hswish_decomposition.hpp>
|
||||
#include <transformations/reduce_l1_decomposition.hpp>
|
||||
#include <transformations/reduce_l2_decomposition.hpp>
|
||||
#include <transformations/common_optimizations/fq_mul_fusion.hpp>
|
||||
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
@@ -109,6 +110,9 @@ bool ngraph::pass::ConvertOpSet1ToLegacy::run_on_function(std::shared_ptr<ngraph
|
||||
manager.register_pass<ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion>();
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
|
||||
// Multiply the thrird and fourth input instead of the output of FQ with all const inputs
|
||||
manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
|
||||
|
||||
// Convolution/Deconvolution/FullyConnected fusions
|
||||
auto convert_convolutions = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||
convert_convolutions->add_matcher<ngraph::pass::ConvertConvolution>();
|
||||
|
||||
@@ -26,6 +26,8 @@ ngraph::pass::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
|
||||
|
||||
if (m_fc == nullptr) {
|
||||
m_fc = std::dynamic_pointer_cast<op::FullyConnected>(add_input_1);
|
||||
if (m_fc == nullptr)
|
||||
return false;
|
||||
m_bias = add_input_0;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ ngraph::pass::ReduceL1Decomposition::ReduceL1Decomposition() {
|
||||
auto &pattern_to_output = m.get_pattern_value_map();
|
||||
auto reduce_l1_node = std::dynamic_pointer_cast<ngraph::opset4::ReduceL1>(pattern_to_output.at(reduce_l1).get_node_shared_ptr());
|
||||
|
||||
if (m_transformation_callback(reduce_l1_node)) {
|
||||
if (reduce_l1_node == nullptr || m_transformation_callback(reduce_l1_node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ ngraph::pass::ReduceL2Decomposition::ReduceL2Decomposition() {
|
||||
auto &pattern_to_output = m.get_pattern_value_map();
|
||||
auto reduce_l2_node = std::dynamic_pointer_cast<ngraph::opset4::ReduceL2>(pattern_to_output.at(reduce_l2).get_node_shared_ptr());
|
||||
|
||||
if (m_transformation_callback(reduce_l2_node)) {
|
||||
if (reduce_l2_node == nullptr || m_transformation_callback(reduce_l2_node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,4 @@ private:
|
||||
Transformations transformations;
|
||||
};
|
||||
|
||||
void printTo(std::ostream& stream, const ngraph::NodeTypeInfo& object);
|
||||
|
||||
} // namespace vpu
|
||||
|
||||
@@ -7,3 +7,9 @@
|
||||
#include "ngraph/node.hpp"
|
||||
|
||||
std::vector<std::int64_t> evaluateTargetShape(const ngraph::Output<ngraph::Node>& value);
|
||||
|
||||
namespace vpu {
|
||||
|
||||
void printTo(std::ostream& stream, const ngraph::NodeTypeInfo& object);
|
||||
|
||||
} // namespace vpu
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user