Compare commits

..

10 Commits

Author SHA1 Message Date
Alexey Suhov
f26da46e3b Publishing 2020.3.1 LTS content (#3108) 2020-11-12 19:35:17 +03:00
Alexander Zhogov
cd95d8d3bb Azure CI: Disable Ninja on Mac due to errors (#809) 2020-06-06 18:29:31 +03:00
azhogov
5c6a0cb922 Azure: Add Ninja 2020-06-06 16:29:54 +03:00
azhogov
2e634cafc9 Add CODEOWNERS and CONTRIBUTING.md 2020-06-06 16:15:24 +03:00
Alexander Zhogov
28f258e18d Enable public CI (#789)
* Enable public CI

* Exclude failed nGraph UT by *GPU*:*CPU*

* Disable absent tests

* Exclude failed nGraph UT constant.shared_data
2020-06-05 15:55:45 +03:00
Alexey Suhov
2fe9b15230 change repo name to openvino in readme files 2020-06-03 00:08:25 +03:00
Alexey Suhov
9221f41b01 fix permissions for shell scripts 2020-06-02 22:32:00 +03:00
Alexey Suhov
85de6ee857 Publishing 2020.3 content 2020-06-02 21:59:45 +03:00
Moshe David
acad2e01e5 w (#394)
Co-authored-by: modav <modav@microsoft.com>
2020-05-26 00:28:09 +03:00
Ian Hunter
94dd082199 Fix link to Linux Guide (#494) 2020-05-14 13:52:13 +03:00
9851 changed files with 653631 additions and 1024748 deletions

View File

@@ -1,161 +0,0 @@
jobs:
- job: Lin
# About 150% of total time
timeoutInMinutes: 90
pool:
name: LIN_VMSS_VENV_F8S_WU2
variables:
system.debug: true
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 8
BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath)
WORK_DIR: $(Pipeline.Workspace)/_w
BUILD_DIR: $(WORK_DIR)/build
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
steps:
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami
uname -a
which python3
python3 --version
which java
java -version
gcc --version
lsb_release
env
cat /proc/cpuinfo
cat /proc/meminfo
cat /etc/fstab
vmstat -s
df
lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd"
displayName: 'System info'
- script: |
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
displayName: 'Make dir'
- checkout: self
clean: true
lfs: false
submodules: recursive
path: openvino
- script: |
sudo apt --assume-yes install libusb-1.0-0-dev
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
# For running Python API tests
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
# Speed up build
wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
unzip ninja-linux.zip
sudo cp -v ninja /usr/local/bin/
# Speed up tests
git clone https://github.com/google/gtest-parallel.git
workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies'
- task: CMake@1
inputs:
# CMake must get Python 3.x version by default
cmakeArgs: -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.6 -DENABLE_TESTS=ON $(REPO_DIR)
workingDirectory: $(BUILD_DIR)
- script: ninja
workingDirectory: $(BUILD_DIR)
displayName: 'Build Lin'
- script: ls -alR $(REPO_DIR)/bin/
displayName: 'List files'
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
displayName: 'nGraph UT'
continueOnError: false
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
displayName: 'IE UT old'
continueOnError: false
- script: $(BIN_DIR)/ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
displayName: 'IE UT'
continueOnError: false
- script: $(BIN_DIR)/cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml
displayName: 'CPU UT'
continueOnError: false
- script: $(BIN_DIR)/gnaUnitTests --gtest_output=xml:TEST-gnaUnitTests.xml
displayName: 'GNA UT'
continueOnError: false
- script: $(BIN_DIR)/vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml
displayName: 'VPU UT'
continueOnError: false
- script: $(BIN_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
displayName: 'ONNX Importer UT'
continueOnError: false
- script: $(BIN_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
displayName: 'IE FuncTests'
continueOnError: false
- script: $(BIN_DIR)/cpuFuncTests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml
displayName: 'CPU FuncTests'
continueOnError: false
- script: $(BIN_DIR)/MklDnnBehaviorTests --gtest_output=xml:TEST-MklDnnBehaviorTests.xml
displayName: 'MklDnnBehaviorTests'
continueOnError: false
- script: |
git clone https://github.com/openvinotoolkit/testdata.git
workingDirectory: $(WORK_DIR)
displayName: 'Clone testdata'
- script: |
export DATA_PATH=$(WORK_DIR)/testdata
export MODELS_PATH=$(WORK_DIR)/testdata
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
workingDirectory: $(WORK_DIR)
displayName: 'MklDnnFunctionalTests'
continueOnError: false
- script: |
export DATA_PATH=$(WORK_DIR)/testdata
export MODELS_PATH=$(WORK_DIR)/testdata
$(BIN_DIR)/InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml
displayName: 'IE CAPITests'
continueOnError: false
- script: |
export DATA_PATH=$(WORK_DIR)/testdata
export MODELS_PATH=$(WORK_DIR)/testdata
export LD_LIBRARY_PATH=$(BIN_DIR)/lib
export PYTHONPATH=$(BIN_DIR)/lib/python_api/python3.6
env
cd $(REPO_DIR)/inference-engine/ie_bridges/python/tests
pytest pytest --junitxml=TEST-PythonAPI.xml
displayName: 'Python API Tests'
continueOnError: false
enabled: false
- task: PublishTestResults@2
condition: always()
inputs:
testResultsFormat: 'JUnit' # Options: JUnit, NUnit, VSTest, xUnit, cTest
testResultsFiles: '**/TEST-*.xml'
#searchFolder: '$(BUILD_DIR)'
mergeTestResults: false # Optional
#failTaskOnFailedTests: false # Optional
#testRunTitle: 'Pre/Post-Commit' # Optional
buildPlatform: 'x64' # Optional
buildConfiguration: 'Linux' # Optional
#publishRunAttachments: true # Optional

View File

@@ -1,95 +0,0 @@
jobs:
- job: nGraph_ONNX_Lin
# About 150% of total time
timeoutInMinutes: 60
pool:
name: LIN_VMSS_VENV_F8S_WU2
variables:
system.debug: true
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 8
BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath)
WORK_DIR: $(Pipeline.Workspace)/_w
BUILD_DIR: $(WORK_DIR)/build
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
INSTALL_DIR: $(WORK_DIR)/install
steps:
- checkout: self
clean: true
lfs: false
submodules: recursive
path: openvino
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami
uname -a
which python3
python3 --version
gcc --version
lsb_release
env
cat /proc/cpuinfo
cat /proc/meminfo
vmstat -s
df
displayName: 'System info'
- script: |
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
displayName: 'Make dir'
- script: |
sudo apt --assume-yes install libusb-1.0-0-dev
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
# For running Python API tests
python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
displayName: 'Install dependencies'
enabled: false
- script: |
wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
unzip ninja-linux.zip
sudo cp -v ninja /usr/local/bin/
workingDirectory: $(WORK_DIR)
displayName: 'Install Ninja'
enabled: false
- task: CMake@1
inputs:
# CMake must get Python 3.x version by default
cmakeArgs: -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_VPU=OFF -DENABLE_GNA=OFF -DENABLE_OPENCV=OFF -DENABLE_CPPLINT=OFF -DENABLE_TESTS=OFF -DENABLE_BEH_TESTS=OFF -DENABLE_FUNCTIONAL_TESTS=OFF -DENABLE_MKL_DNN=ON -DENABLE_CLDNN=OFF -DENABLE_PROFILING_ITT=OFF -DENABLE_SAMPLES=OFF -DENABLE_SPEECH_DEMO=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.6 -DNGRAPH_ONNX_IMPORT_ENABLE=ON -DNGRAPH_INTERPRETER_ENABLE=ON -DNGRAPH_DEBUG_ENABLE=OFF -DNGRAPH_DYNAMIC_COMPONENTS_ENABLE=ON -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) $(REPO_DIR)
workingDirectory: $(BUILD_DIR)
enabled: false
- script: ninja
workingDirectory: $(BUILD_DIR)
displayName: 'Build'
enabled: false
- script: make install
workingDirectory: $(BUILD_DIR)
displayName: 'Install'
enabled: false
- script: |
ls -alR $(REPO_DIR)/bin/
ls -alR $(INSTALL_DIR)
displayName: 'List files'
enabled: false
- script: docker build --tag=openvino-onnx-ci-image --file=$(REPO_DIR)/.ci/openvino-onnx/Dockerfile .
workingDirectory: $(BUILD_DIR)
displayName: 'Docker build'
enabled: false
- script: docker run --name openvino-onnx-ci-container openvino-onnx-ci-image
workingDirectory: $(BUILD_DIR)
displayName: 'Docker run tests'
enabled: false

View File

@@ -1,144 +0,0 @@
jobs:
- job: Mac
# About 200% of total time (perfomace of Mac hosts is unstable)
timeoutInMinutes: 240
pool:
vmImage: 'macOS-10.15'
variables:
system.debug: true
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 3
BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath)
WORK_DIR: $(Pipeline.Workspace)/_w
BUILD_DIR: $(WORK_DIR)/build
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
steps:
- script: |
whoami
uname -a
which python3
python3 --version
which java
java -version
gcc --version
xcrun --sdk macosx --show-sdk-version
env
sysctl -a
displayName: 'System info'
- script: |
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR)
displayName: 'Make dir'
- checkout: self
clean: true
lfs: false
submodules: recursive
path: openvino
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7'
- script: |
brew install cython
brew install automake
# Speed up build
brew install ninja
# Speed up tests
git clone https://github.com/google/gtest-parallel.git
workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies'
- script: |
export PATH="/usr/local/opt/cython/bin:$PATH"
export CC=gcc
export CXX=g++
# Disable errors with Ninja
export CXXFLAGS="-Wno-error=unused-command-line-argument"
export CFLAGS="-Wno-error=unused-command-line-argument"
cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON $(REPO_DIR)
workingDirectory: $(BUILD_DIR)
displayName: 'CMake'
- script: ninja
workingDirectory: $(BUILD_DIR)
displayName: 'Build Mac'
- script: ls -alR $(REPO_DIR)/bin/
displayName: 'List files'
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid --gtest_output=xml:TEST-NGraphUT.xml
displayName: 'nGraph UT'
continueOnError: false
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
displayName: 'IE UT old'
continueOnError: false
- script: $(BIN_DIR)/ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
displayName: 'IE UT'
continueOnError: false
- script: $(BIN_DIR)/cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml
displayName: 'CPU UT'
continueOnError: false
- script: $(BIN_DIR)/vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml
displayName: 'VPU UT'
continueOnError: false
- script: $(BIN_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
displayName: 'ONNX Importer UT'
continueOnError: false
- script: $(BIN_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
displayName: 'IE FuncTests'
continueOnError: false
- script: $(BIN_DIR)/cpuFuncTests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml
displayName: 'CPU FuncTests'
continueOnError: false
- script: $(BIN_DIR)/MklDnnBehaviorTests --gtest_output=xml:TEST-MklDnnBehaviorTests.xml
displayName: 'MklDnnBehaviorTests'
continueOnError: false
- script: |
git clone --single-branch --branch releases/2021/2 https://github.com/openvinotoolkit/testdata.git
workingDirectory: $(WORK_DIR)
displayName: 'Clone testdata'
- script: |
export DATA_PATH=$(WORK_DIR)/testdata
export MODELS_PATH=$(WORK_DIR)/testdata
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric* -- --gtest_print_time=1
workingDirectory: $(WORK_DIR)
displayName: 'MklDnnFunctionalTests'
continueOnError: false
- script: |
export DATA_PATH=$(WORK_DIR)/testdata
export MODELS_PATH=$(WORK_DIR)/testdata
$(BIN_DIR)/InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml
displayName: 'IE CAPITests'
continueOnError: false
- task: PublishTestResults@2
condition: always()
inputs:
testResultsFormat: 'JUnit' # Options: JUnit, NUnit, VSTest, xUnit, cTest
testResultsFiles: '**/TEST-*.xml'
#searchFolder: '$(BUILD_DIR)'
mergeTestResults: false # Optional
#failTaskOnFailedTests: false # Optional
#testRunTitle: 'Pre/Post-Commit' # Optional
buildPlatform: 'x64' # Optional
buildConfiguration: 'Mac' # Optional
#publishRunAttachments: true # Optional

View File

@@ -1,178 +0,0 @@
jobs:
- job: Win
# About 150% of total time
timeoutInMinutes: 120
pool:
name: WIN_VMSS_VENV_F8S_WU2
variables:
system.debug: true
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 8
BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath)
WORK_DIR: $(Pipeline.Workspace)\_w
BUILD_DIR: D:\build
BIN_DIR: $(REPO_DIR)\bin\intel64
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
steps:
- script: |
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
where python3
where python
python --version
where java
java -version
wmic computersystem get TotalPhysicalMemory
wmic cpu list
wmic logicaldisk get description,name
wmic VOLUME list
set
displayName: 'System info'
- script: |
rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR)
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
displayName: 'Make dir'
- checkout: self
clean: true
lfs: false
submodules: recursive
path: openvino
- script: |
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
powershell -command "Expand-Archive -Force ninja-win.zip"
git clone https://github.com/google/gtest-parallel.git
workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies'
- script: |
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/IBSetupConsole_9_5_0.exe IBSetupConsole_9_5_0.exe
call IBSetupConsole_9_5_0.exe /Install /Components=Agent,oneuse /Coordinator=11.1.0.4 /AGENT:OPENFIREWALL=ON /AGENT:AUTOSELECTPORTS=ON /ADDTOPATH=ON /AGENT:INSTALLADDINS=OFF
workingDirectory: $(WORK_DIR)
displayName: 'Install IncrediBuild'
- script: |
echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
reg add HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Xoreax\IncrediBuild\Builder /f /v LastEnabled /d 0 && echo Start IncrediBuild_Agent && net start IncrediBuild_Agent
displayName: 'Start IncrediBuild'
- script: |
set PATH=$(WORK_DIR)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
workingDirectory: $(BUILD_DIR)
displayName: 'CMake'
- script: |
set PATH=$(WORK_DIR)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja" /MaxCPUS=40
workingDirectory: $(BUILD_DIR)
displayName: 'Build Win'
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
displayName: Stop IncrediBuild
continueOnError: true
- script: dir $(REPO_DIR)\bin\ /s
displayName: 'List files'
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
displayName: 'nGraph UT'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
displayName: 'IE UT old'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
displayName: 'IE UT'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml
displayName: 'CPU UT'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\gnaUnitTests --gtest_output=xml:TEST-gnaUnitTests.xml
displayName: 'GNA UT'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml
displayName: 'VPU UT'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
displayName: 'ONNX Importer UT'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
displayName: 'IE FuncTests'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\cpuFuncTests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml
displayName: 'CPU FuncTests'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\MklDnnBehaviorTests --gtest_output=xml:TEST-MklDnnBehaviorTests.xml
displayName: 'MklDnnBehaviorTests'
continueOnError: false
- script: |
git clone https://github.com/openvinotoolkit/testdata.git
workingDirectory: $(BUILD_DIR)
displayName: 'Clone testdata'
# Add for gtest-parallel, it hangs now (CVS-33386)
#python $(BUILD_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.0\opencv\bin;%PATH%
set DATA_PATH=$(BUILD_DIR)\testdata
set MODELS_PATH=$(BUILD_DIR)\testdata
$(BIN_DIR)\MklDnnFunctionalTests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:TEST-MklDnnFunctionalTests.xml
displayName: 'MklDnnFunctionalTests'
continueOnError: false
- script: |
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.0\opencv\bin;%PATH%
set DATA_PATH=$(BUILD_DIR)\testdata
set MODELS_PATH=$(BUILD_DIR)\testdata
$(BIN_DIR)\InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml
displayName: 'IE CAPITests'
continueOnError: false
- task: PublishTestResults@2
condition: always()
inputs:
testResultsFormat: 'JUnit' # Options: JUnit, NUnit, VSTest, xUnit, cTest
testResultsFiles: '**/TEST-*.xml'
#searchFolder: '$(BUILD_DIR)'
mergeTestResults: false # Optional
#failTaskOnFailedTests: false # Optional
#testRunTitle: 'Pre/Post-Commit' # Optional
buildPlatform: 'x64' # Optional
buildConfiguration: 'Windows' # Optional
#publishRunAttachments: true # Optional

View File

@@ -1,83 +0,0 @@
FROM ubuntu:20.04
LABEL version=2020.07.09.1
ARG http_proxy
ARG https_proxy
ENV http_proxy ${http_proxy}
ENV https_proxy ${https_proxy}
ENV CI=true
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED 1
# Install base dependencies
RUN apt-get update && apt-get install -y locales && apt-get clean autoclean && apt-get autoremove -y
# Set the locale to en_US.UTF-8
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
RUN apt-get update && apt-get -y --no-install-recommends install \
# OpenVINO dependencies
autoconf \
automake \
build-essential \
cmake \
curl \
git \
libtool \
ocl-icd-opencl-dev \
pkg-config \
unzip \
wget \
# Python dependencies
python3 \
python3-pip \
python3-dev \
python3-virtualenv \
cython3 \
tox \
# ONNX dependencies
git-lfs \
protobuf-compiler \
libprotobuf-dev && \
apt-get clean autoclean && \
apt-get autoremove -y
# Build OpenVINO
COPY . /openvino/
WORKDIR /openvino/build
RUN cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DENABLE_VPU=OFF \
-DENABLE_GNA=OFF \
-DENABLE_OPENCV=OFF \
-DENABLE_CPPLINT=OFF \
-DENABLE_TESTS=OFF \
-DENABLE_BEH_TESTS=OFF \
-DENABLE_FUNCTIONAL_TESTS=OFF \
-DENABLE_MKL_DNN=ON \
-DENABLE_CLDNN=OFF \
-DENABLE_PROFILING_ITT=OFF \
-DENABLE_SAMPLES=OFF \
-DENABLE_SPEECH_DEMO=OFF \
-DENABLE_PYTHON=ON \
-DPYTHON_EXECUTABLE=/usr/bin/python3 \
-DNGRAPH_ONNX_IMPORT_ENABLE=ON \
-DNGRAPH_INTERPRETER_ENABLE=ON \
-DNGRAPH_DEBUG_ENABLE=OFF \
-DNGRAPH_DYNAMIC_COMPONENTS_ENABLE=ON \
-DCMAKE_INSTALL_PREFIX=/openvino/dist
RUN make -j $(nproc) install
# Run tests via tox
WORKDIR /openvino/ngraph/python
ENV NGRAPH_CPP_BUILD_PATH=/openvino/dist
ENV LD_LIBRARY_PATH=/openvino/dist/lib
ENV NGRAPH_ONNX_IMPORT_ENABLE=TRUE
ENV PYTHONPATH=/openvino/bin/intel64/Release/lib/python_api/python3.8:${PYTHONPATH}
RUN git clone --recursive https://github.com/pybind/pybind11.git -b v2.5.0 --depth 1
CMD tox

View File

@@ -1,152 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
DOCKER_CONTAINER_NAME= "openvino-onnx-ci-container"
DOCKER_IMAGE_TAG = "openvino-onnx-ci-image"
// workaround for aborting previous builds on PR update
@NonCPS
def stopPreviousRunningBuilds() {
def jobname = env.JOB_NAME
if (jobname.startsWith("onnx/openvino_ci/PR")){
def buildnum = env.BUILD_NUMBER.toInteger()
def job = Jenkins.instance.getItemByFullName(jobname)
def job_newest = job.builds.first()
for (build in job.builds.reverse()[0..<-1]) {
if (build.isBuilding()){
echo "Stop task = ${build} because newest #${job_newest} is on the way"
build.doStop();
continue;
}
}
}
}
def getGitPrInfo(String project) {
def gitPrInfo = [
prAuthorEmail : "",
commitAuthorEmail : "",
commitHash : "",
commitSubject : ""
]
try {
dir ("${WORKDIR}/${project}") {
gitPrInfo.prAuthorEmail = sh (script: 'git log -1 --pretty="format:%ae" ', returnStdout: true).trim()
gitPrInfo.commitAuthorEmail = sh (script: 'git log -1 --pretty="format:%ce" ', returnStdout: true).trim()
gitPrInfo.commitSubject = sh (script: 'git log -1 --pretty="format:%H" ', returnStdout: true).trim()
gitPrInfo.commitHash = sh (script: 'git log -1 --pretty="format:%s" ', returnStdout: true).trim()
}
}
catch(e) {
echo "Failed to retrieve ${project} git repository information!"
echo "ERROR: ${e}"
}
return gitPrInfo
}
def notifyByEmail(def gitPrInfo) {
stage('Notify') {
String notifyPeople = "${gitPrInfo.prAuthorEmail}, ${gitPrInfo.commitAuthorEmail}"
emailext (
subject: "OpenVino CI: PR ${CHANGE_ID} ${currentBuild.result}!",
body: """
Status: ${currentBuild.result}
Pull Request Title: ${CHANGE_TITLE}
Pull Request: ${CHANGE_URL}
Branch: ${CHANGE_BRANCH}
Commit Hash: ${gitPrInfo.commitSubject}
Commit Subject: ${gitPrInfo.commitHash}
Jenkins Build: ${RUN_DISPLAY_URL}
""",
to: "${notifyPeople}"
)
}
}
def gitSubmoduleUpdate(String repository_name) {
dir ("${WORKDIR}/${repository_name}") {
sh label: "Init ${repository_name} submodules",
script:
"""
git submodule init && git submodule update \
--init \
--no-fetch \
--recursive
"""
}
}
def buildDockerImage() {
sh """
docker build --tag=${DOCKER_IMAGE_TAG} --file=.ci/openvino-onnx/Dockerfile \
--build-arg http_proxy=http://proxy-chain.intel.com:911/ \
--build-arg https_proxy=http://proxy-chain.intel.com:912/ .
"""
}
def runTests() {
sh """
docker run --name ${DOCKER_CONTAINER_NAME} \
--volume ${HOME}/ONNX_CI/onnx-models-28-Oct/.onnx/model_zoo:/root/.onnx/model_zoo \
--volume ${HOME}/ONNX_CI/onnx-models/.onnx/model_zoo/MSFT:/root/.onnx/model_zoo/MSFT \
${DOCKER_IMAGE_TAG}
"""
}
pipeline {
agent {
label "OpenVino"
}
environment {
PROJECT_NAME = "openvino"
WORKDIR = "${WORKSPACE}/${BUILD_NUMBER}"
}
options {
skipDefaultCheckout true
timeout(activity: true, time: 10, unit: 'MINUTES')
}
stages {
stage("Clone repository") {
steps{
stopPreviousRunningBuilds()
dir("${WORKDIR}") {
checkout scm
}
gitSubmoduleUpdate(PROJECT_NAME)
}
}
stage("Prepare Docker environment") {
steps{
dir("${WORKDIR}") {
buildDockerImage()
}
}
}
stage("Run tests") {
options {
timeout(time: 15, unit: 'MINUTES')
}
steps{
runTests()
}
}
}
post {
failure {
script {
gitPrInfo = getGitPrInfo(PROJECT_NAME)
notifyByEmail(gitPrInfo)
}
}
cleanup {
dir("${WORKDIR}") {
deleteDir()
sh """
docker image prune -f
docker rm -f ${DOCKER_CONTAINER_NAME}
"""
}
}
}
}

View File

@@ -1,65 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
timeout(30)
{
node(LABEL) {
BUILD_WORKSPACE = "$WORKSPACE/$BUILD_NUMBER"
WATCHDOG_ROOT = "$BUILD_WORKSPACE/.ci/openvino-onnx/watchdog"
VENV_PATH = "${BUILD_WORKSPACE}/.wdvenv"
try {
stage("Clone repository") {
dir ("$BUILD_WORKSPACE") {
checkout([$class: 'GitSCM', branches: [[name: "*/$BRANCH"]],
doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', timeout: 30]], submoduleCfg: [],
userRemoteConfigs: [[credentialsId: "${GITHUB_KEY}", url: "${OPEN_VINO_URL}"]]])
}
}
stage("Prepare environment") {
sh """#!/bin/bash
if [ ! -d ${VENV_PATH} ]; then
python3 -m venv ${VENV_PATH}
source ${VENV_PATH}/bin/activate
pip install -r ${WATCHDOG_ROOT}/requirements.txt
fi
"""
}
stage("Run script") {
withCredentials([
usernamePassword(credentialsId: '7157091e-bc04-42f0-99fd-dc4da2922a55',
usernameVariable: 'username',
passwordVariable: 'password')])
{
dir ("$BUILD_WORKSPACE") {
sh """#!/bin/bash
source ${VENV_PATH}/bin/activate
export PYTHONHTTPSVERIFY=0
python ${WATCHDOG_ROOT}/src/main.py \
--msteams-url=${MSTEAMS_URL_FILE} \
--github-credentials '${username}' '${password}' \
--github-org=${GITHUB_ORG} \
--github-project=${GITHUB_PROJECT} \
--jenkins-token=${JENKINS_TOKEN_FILE} \
--jenkins-server=${JENKINS_SERVER} \
--jenkins-user=${JENKINS_USER} \
--ci-job=${CI_JOB_NAME} \
--watchdog-job=${WATCHDOG_JOB_NAME}
"""
}
}
}
} catch (e) {
echo "$e"
currentBuild.result = "FAILURE"
} finally {
stage("Cleanup") {
sh """
cd $BUILD_WORKSPACE
rm -rf ..?* .[!.]* *
"""
}
}
}
}

View File

@@ -1,6 +0,0 @@
python-jenkins==1.7.0
retrying==1.3.3
pygithub==1.51
timeout-decorator==0.4.1
requests==2.23.0
wheel

View File

@@ -1,108 +0,0 @@
#!/usr/bin/python3
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import logging
import timeout_decorator
from datetime import datetime
from retrying import retry
from github import Github, GithubException
# Logging
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s')
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
_RETRY_LIMIT = 3
_RETRY_COOLDOWN_MS = 2000
_REQUEST_TIMEOUT_S = 10
class GitWrapper:
"""Class wrapping PyGithub API.
The purpose of this class is to wrap methods from PyGithub API used in Watchdog, for less error-prone and
more convenient use. Docs for used API, including wrapped methods can be found at:
https://pygithub.readthedocs.io/en/latest/introduction.html
:param github_credentials: Credentials used for GitHub
:param repository: GitHub repository name
:param project: GitHub project name
:type github_credentials: String
:type repository: String
:type project: String
"""
def __init__(self, github_credentials, repository, project):
self.git = Github(*github_credentials)
self.repository = repository
self.project = project
self.github_credentials = github_credentials
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_git_time(self):
"""Retrieve time from GitHub.
Used to reliably determine time during Watchdog run.
:return: Datetime object describing current time
:rtype: datetime
"""
try:
datetime_object = self._get_git_time()
except ValueError as e:
raise GitWrapperError(str(e))
except GithubException as e:
message = 'GitHub Exception during API status retrieval. Exception: {}'.format(str(e))
raise GitWrapperError(message)
except timeout_decorator.TimeoutError:
message = 'GitHub Exception during API status retrieval. Timeout during API request.'
raise GitWrapperError(message)
return datetime_object
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_pull_requests(self):
"""Retrieve paginated list of pull requests from GitHub.
:return: Paginated list of Pull Requests in GitHub repo
:rtype: github.PaginatedList.PaginatedList of github.PullRequest.PullRequest
"""
try:
prs = self._get_pull_requests()
except GithubException as e:
message = 'GitHub Exception during API status retrieval. Exception: {}'.format(str(e))
raise GitWrapperError(message)
return prs
@timeout_decorator.timeout(_REQUEST_TIMEOUT_S)
def _get_git_time(self):
"""Private method retrieving time from GitHub.
:return: Datetime object describing current time
:rtype: datetime
"""
datetime_string = self.git.get_api_status().raw_headers.get('date', '')
datetime_format = '%a, %d %b %Y %H:%M:%S %Z'
datetime_object = datetime.strptime(datetime_string, datetime_format)
return datetime_object
@timeout_decorator.timeout(_REQUEST_TIMEOUT_S)
def _get_pull_requests(self):
"""Private method retrieving pull requests from GitHub.
:return: Paginated list of Pull Requests in GitHub repo
:rtype: github.PaginatedList.PaginatedList of github.PullRequest.PullRequest
"""
return self.git.get_organization(self.repository).get_repo(self.project).get_pulls()
class GitWrapperError(Exception):
"""Base class for exceptions raised in GitWrapper.
:param message Explanation of the error
"""
def __init__(self, message):
self.message = message
log.exception(message)

View File

@@ -1,91 +0,0 @@
#!/usr/bin/python3
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import requests
import jenkins
import logging
from retrying import retry
# Logging
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s')
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
_RETRY_LIMIT = 3
_RETRY_COOLDOWN_MS = 5000
class JenkinsWrapper:
"""Class wrapping Python-Jenkins API.
The purpose of this class is to wrap methods from Python-Jenkins API used in Watchdog, for less error-prone and
more convenient use. Docs for used API, including wrapped methods can be found at:
https://python-jenkins.readthedocs.io/en/latest/
:param jenkins_token: Token used for Jenkins
:param jenkins_user: Username used to connect to Jenkins
:param jenkins_server: Jenkins server address
:type jenkins_token: String
:type jenkins_user: String
:type jenkins_server: String
"""
def __init__(self, jenkins_token, jenkins_user, jenkins_server):
self.jenkins_server = jenkins_server
self.jenkins = jenkins.Jenkins(jenkins_server, username=jenkins_user,
password=jenkins_token)
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_build_console_output(self, job_name, build_number):
return self.jenkins.get_build_console_output(job_name, build_number)
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_job_info(self, job_name):
return self.jenkins.get_job_info(job_name)
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_build_info(self, job_name, build_number):
return self.jenkins.get_build_info(job_name, build_number)
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_queue_item(self, queue_id):
"""Attempt to retrieve Jenkins job queue item.
Exception communicating queue doesn't exist is expected,
in that case method returns empty dict.
:param queue_id: Jenkins job queue ID number
:type queue_id: int
:return: Dictionary representing Jenkins job queue item
:rtype: dict
"""
try:
return self.jenkins.get_queue_item(queue_id)
except Exception as e:
# Exception 'queue does not exist' is expected behaviour when job is running
if 'queue' in str(e) and 'does not exist' in str(e):
return {}
else:
raise
@retry(stop_max_attempt_number=_RETRY_LIMIT, wait_fixed=_RETRY_COOLDOWN_MS)
def get_idle_ci_hosts(self):
"""Query Jenkins for idle servers.
Send GET request to Jenkins server, querying for idle servers labeled
for OpenVino-ONNX CI job.
:return: Number of idle hosts delegated to OpenVino-ONNX CI
:rtype: int
"""
jenkins_request_url = self.jenkins_server + 'label/ci&&onnx/api/json?pretty=true'
try:
log.info('Sending request to Jenkins: %s', jenkins_request_url)
r = requests.Request(method='GET', url=jenkins_request_url, verify=False)
response = self.jenkins.jenkins_request(r).json()
return int(response['totalExecutors']) - int(response['busyExecutors'])
except Exception as e:
log.exception('Failed to send request to Jenkins!\nException message: %s', str(e))
raise

View File

@@ -1,89 +0,0 @@
#!/usr/bin/python3
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import sys
from watchdog import Watchdog
DEFAULT_MSTEAMS_URL_FILE = '/home/lab_nerval/tokens/msteams_url'
DEFAULT_GITHUB_ORGANIZATION = 'openvinotoolkit'
DEFAULT_GITHUB_PROJECT = 'openvino'
DEFAULT_JENKINS_TOKEN_FILE = '/home/lab_nerval/tokens/crackerjack'
DEFAULT_JENKINS_SERVER = 'https://crackerjack.intel.com/'
DEFAULT_JENKINS_USER = 'lab_nerval'
DEFAULT_CI_JOB_NAME = 'onnx/OpenVino_CI'
DEFAULT_WATCHDOG_JOB_NAME = 'onnx/ci_watchdog'
def main(args):
"""
Read args passed to script, load tokens and run watchdog.
Keyword arguments:
:param args: arguments parsed by argparse ArgumentParser
:return: returns status code 0 on successful completion
"""
jenkins_server = args.jenkins_server.strip()
jenkins_user = args.jenkins_user.strip()
jenkins_token = open(args.jenkins_token).read().replace('\n', '').strip()
msteams_url = open(args.msteams_url).read().replace('\n', '').strip()
github_credentials = args.github_credentials
github_org = args.github_org
github_project = args.github_project
ci_job = args.ci_job.strip()
watchdog_job = args.watchdog_job.strip()
quiet = args.quiet
wd = Watchdog(jenkins_token=jenkins_token,
jenkins_server=jenkins_server,
jenkins_user=jenkins_user,
github_credentials=github_credentials,
git_org=github_org,
git_project=github_project,
msteams_url=msteams_url,
ci_job_name=ci_job,
watchdog_job_name=watchdog_job)
wd.run(quiet=quiet)
return 0
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--msteams-url', help='Path to MS Teams channel url to communicate messages.',
default=DEFAULT_MSTEAMS_URL_FILE, action='store', required=False)
parser.add_argument('--github-credentials', help='GitHub user credentials to access repo.',
nargs="+", required=True)
parser.add_argument('--github-org', help='Name of organization on GitHub.',
default=DEFAULT_GITHUB_ORGANIZATION, action='store', required=False)
parser.add_argument('--github-project', help='Name of project on GitHub.',
default=DEFAULT_GITHUB_PROJECT, action='store', required=False)
parser.add_argument('--jenkins-token', help='Path to Jenkins user token to access build info.',
default=DEFAULT_JENKINS_TOKEN_FILE, action='store', required=False)
parser.add_argument('--jenkins-server', help='Jenkins server address.',
default=DEFAULT_JENKINS_SERVER, action='store', required=False)
parser.add_argument('--jenkins-user', help='Jenkins user used to log in.',
default=DEFAULT_JENKINS_USER, action='store', required=False)
parser.add_argument('--ci-job', help='Jenkins CI job name.',
default=DEFAULT_CI_JOB_NAME, action='store', required=False)
parser.add_argument('--watchdog-job', help='Jenkins CI Watchdog job name.',
default=DEFAULT_WATCHDOG_JOB_NAME, action='store', required=False)
parser.add_argument('--quiet', help="Quiet mode - doesn\'t send message to communicator.",
action='store_true', required=False)
args = parser.parse_args()
sys.exit(main(args))

View File

@@ -1,128 +0,0 @@
#!/usr/bin/python3
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import requests
class MSTeamsCommunicator:
"""Class communicating with MSTeams using Incoming Webhook.
The purpose of this class is to use MSTeams API to send message.
Docs for used API, including wrapped methods can be found at:
https://docs.microsoft.com/en-us/outlook/actionable-messages/send-via-connectors
"""
def __init__(self, _ci_alerts_channel_url):
self._ci_alerts_channel_url = _ci_alerts_channel_url
self._queued_messages = {
self._ci_alerts_channel_url: [],
}
@property
def messages(self):
"""
Get list of queued messages.
:return: List of queued messages
:return type: List[String]
"""
return self._queued_messages.values()
def queue_message(self, message):
"""
Queue message to be sent later.
:param message: Message content
:type message: String
"""
self._queued_messages[self._ci_alerts_channel_url].append(message)
def _parse_text(self, watchdog_log, message):
"""
Parse text to display as alert.
:param watchdog_log: Watchdog log content
:param message: Unparsed message content
:type watchdog_log: String
:type message: String
"""
message_split = message.split('\n')
log_url = None
if len(message_split) == 3:
log_url = message_split[-1]
title = message_split[0]
text = message_split[1]
header = watchdog_log.split(' - ')
header_formatted = '{} - [Watchdog Log]({})'.format(header[0], header[1])
return title, log_url, '{}\n\n{}'.format(header_formatted, text)
def _json_request_content(self, title, log_url, text_formatted):
"""
Create final json request to send message to MS Teams channel.
:param title: Title of alert
:param log_url: URL to PR
:param text_formatted: General content of alert - finally formatted
:type title: String
:type title: String
:type title: String
"""
data = {
'@context': 'https://schema.org/extensions',
'@type': 'MessageCard',
'themeColor': '0072C6',
'title': title,
'text': text_formatted,
'potentialAction':
[
{
'@type': 'OpenUri',
'name': 'Open PR',
'targets':
[
{
'os': 'default',
'uri': log_url,
},
],
},
],
}
return data
def _send_to_channel(self, watchdog_log, message_queue, channel_url):
"""
Send MSTeams message to specified channel.
:param watchdog_log: Watchdog log content
:param message_queue: Queued messages to send
:param channel_url: Channel url
:type watchdog_log: String
:type message_queue: String
:type channel_url: String
"""
for message in message_queue:
title, log_url, text_formatted = self._parse_text(watchdog_log, message)
data = self._json_request_content(title, log_url, text_formatted)
try:
requests.post(url=channel_url, json=data)
except Exception as ex:
raise Exception('!!CRITICAL!! MSTeamsCommunicator: Could not send message '
'due to {}'.format(ex))
def send_message(self, watchdog_log, quiet=False):
"""
Send queued messages as single communication.
:param watchdog_log: Watchdog log content
:param quiet: Flag for disabling sending report through MS Teams
:type watchdog_log: String
:type quiet: Boolean
"""
for channel, message_queue in self._queued_messages.items():
if not quiet and message_queue:
self._send_to_channel(watchdog_log, message_queue, channel)

View File

@@ -1,505 +0,0 @@
#!/usr/bin/python3
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import datetime
import time
import re
import logging
import requests
from ms_teams_communicator import MSTeamsCommunicator
from jenkins_wrapper import JenkinsWrapper
from jenkins import NotFoundException
from git_wrapper import GitWrapper, GitWrapperError
import os
import json
# Logging
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s')
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
# Watchdog static constant variables
_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
_BUILD_DURATION_THRESHOLD = datetime.timedelta(minutes=60)
_CI_START_THRESHOLD = datetime.timedelta(minutes=30)
_AWAITING_JENKINS_THRESHOLD = datetime.timedelta(minutes=5)
_WATCHDOG_DIR = os.path.expanduser('~')
_PR_REPORTS_CONFIG_KEY = 'pr_reports'
_CI_BUILD_FAIL_MESSAGE = 'ERROR: py3: commands failed'
_CI_BUILD_SUCCESS_MESSAGE = 'py3: commands succeeded'
_GITHUB_CI_CHECK_NAME = 'OpenVINO-ONNX'
INTERNAL_ERROR_MESSAGE_HEADER = '!!! --- !!! INTERNAL WATCHDOG ERROR !!! --- !!!'
ERROR_MESSAGE_HEADER = '!!! OpenVino-ONNX CI Error !!!'
WARNING_MESSAGE_HEADER = 'OpenVino-ONNX CI WARNING'
INFO_MESSAGE_HEADER = 'OpenVino-ONNX CI INFO'
class Watchdog:
"""Class describing OpenVino-ONNX-CI Watchdog.
Watchdog connects to GitHub and retrieves the list of current pull requests (PRs) in
OpenVino repository. Then it connects to specified Jenkins server to
check CI jobs associated with every PR. Watchdog verifies time durations for Jenkins
initial response, job queue and execution against time treshold constants. Every fail
is logged and reported through MS Teams communicators.
:param jenkins_token: Token used for Jenkins
:param jenkins_server: Jenkins server address
:param jenkins_user: Username used to connect to Jenkins
:param github_credentials: Credentials used to connect to GitHub
:param msteams_url: URL used to connect to MS Teams channel
:param ci_job_name: OpenVino-ONNX CI job name used in Jenkins
:param watchdog_job_name: Watchdog job name used in Jenkins
:type jenkins_token: String
:type jenkins_server: String
:type jenkins_user: String
:type github_credentials: String
:type msteams_url: String
:type ci_job_name: String
:type watchdog_job_name: String
.. note::
Watchdog and OpenVino-ONNX CI job must be placed on the same Jenkins server.
"""
def __init__(self, jenkins_token, jenkins_server, jenkins_user, github_credentials, git_org,
git_project, msteams_url, ci_job_name, watchdog_job_name):
self._config_path = os.path.join(_WATCHDOG_DIR, '{}/.{}_ci_watchdog.json'.format(_WATCHDOG_DIR, git_project))
# Jenkins Wrapper object for CI job
self._jenkins = JenkinsWrapper(jenkins_token,
jenkins_user=jenkins_user,
jenkins_server=jenkins_server)
# Load GitHub token and log in, retrieve pull requests
self._git = GitWrapper(github_credentials, repository=git_org, project=git_project)
# Create MS Teams api object
self._msteams_hook = MSTeamsCommunicator(msteams_url)
self._ci_job_name = ci_job_name.lower()
self._watchdog_job_name = watchdog_job_name
# Read config file
self._config = self._read_config_file()
# Time at Watchdog initiation
self._now_time = datetime.datetime.now()
self._current_prs = {}
self._ms_teams_enabled = True
def run(self, quiet=False):
"""Run main watchdog logic.
Retrieve list of pull requests and pass it to the method responsible for checking them.
:param quiet: Flag for disabling sending report through communicator
:type quiet: Boolean
"""
try:
pull_requests = self._git.get_pull_requests()
except GitWrapperError:
message = 'Failed to retrieve Pull Requests!'
log.exception(message)
self._queue_message(message, message_severity='internal')
# Check all pull requests
for pr in pull_requests:
try:
self._check_pr(pr)
except Exception as e:
log.exception(str(e))
self._queue_message(str(e), message_severity='internal', pr=pr)
self._update_config()
self._send_message(quiet=quiet)
def _read_config_file(self):
"""Read Watchdog config file stored on the system.
The file stores every fail already reported along with timestamp. This
mechanism is used to prevent Watchdog from reporting same failure
multiple times. In case there's no config under the expected path,
appropriate data structure is created and returned.
:return: Returns dict of dicts with reported fails with their timestamps
:rtype: dict of dicts
"""
if os.path.isfile(self._config_path):
log.info('Reading config file in: {}'.format(self._config_path))
file = open(self._config_path, 'r')
data = json.load(file)
else:
log.info('No config file found in: {}'.format(self._config_path))
data = {_PR_REPORTS_CONFIG_KEY: {}}
return data
def _check_pr(self, pr):
"""Check pull request (if there's no reason to skip).
Retrieve list of statuses for every PR's last commit and interpret them. Filters out statuses
unrelated to OpenVino-ONNX Jenkins CI and passes relevant statuses to method that interprets them.
If no commit statuses related to Jenkins are available after time defined by
**_AWAITING_JENKINS_THRESHOLD** calls appropriate method to check for builds waiting in queue.
:param pr: GitHub Pull Requests
:type pr: github.PullRequest.PullRequest
"""
log.info('===============================================')
log.info('Checking PR#{}'.format(pr.number))
# Get last Jenkins status
last_status = self._get_last_status(pr)
# Append PR checked in current run for Watchdog config
self._current_prs[str(pr.number)] = self._get_pr_timestamps(pr, last_status)
if self._should_ignore(pr) or self._updated_since_last_run(pr):
log.info('Ignoring PR#{}'.format(pr.number))
return
# Calculate time passed since PR update (any commit, merge or comment)
pr_time_delta = self._now_time - pr.updated_at
if last_status:
# Interpret found CI statuses
log.info('Last status: {} at {}'.format(last_status.description, last_status.updated_at))
self._interpret_status(last_status, pr)
elif pr_time_delta > _CI_START_THRESHOLD:
# If there's no status after assumed time - check if build is waiting in queue
log.info('CI for PR {}: NO JENKINS STATUS YET'.format(pr.number))
self._check_missing_status(pr)
@staticmethod
def _get_pr_timestamps(pr, last_status):
"""Get dict containing PR timestamp and last status timestamp.
:param pr: Single PR being currently checked
:type pr: github.PullRequest.PullRequest
:return: Dictionary with PR and last status update timestamps
:rtype: dict
"""
pr_timestamp = time.mktime(pr.updated_at.timetuple())
if last_status:
status_timestamp = time.mktime(last_status.updated_at.timetuple())
else:
status_timestamp = None
pr_dict = {'pr_timestamp': pr_timestamp,
'status_timestamp': status_timestamp}
return pr_dict
@staticmethod
def _get_last_status(pr):
"""Get last commit status posted from Jenkins.
:param pr: Single PR being currently checked
:type pr: github.PullRequest.PullRequest
:return: Either last PR status posted from Jenkins or None
:rtype: github.CommitStatus.CommitStatus
"""
# Find last commit in PR
last_commit = pr.get_commits().reversed[0]
# Get statuses and filter them to contain only those related to Jenkins CI
# and check if CI in Jenkins started
statuses = last_commit.get_statuses()
jenk_statuses = [stat for stat in statuses if
_GITHUB_CI_CHECK_NAME in stat.context]
try:
last_status = jenk_statuses[0]
except IndexError:
last_status = None
return last_status
@staticmethod
def _should_ignore(pr):
"""Determine if PR should be ignored.
:param pr: Single PR being currently checked
:type pr: github.PullRequest.PullRequest
:return: Returns True if PR should be ignored
:rtype: Bool
"""
# Ignore PR if it has WIP label or WIP in title
if 'WIP' in pr.title:
log.info('PR#{} should be ignored. WIP tag in title.'.format(pr.number))
return True
label_names = [label.name for label in pr.labels]
if 'WIP' in label_names:
log.info('PR#{} should be ignored. WIP label present.'.format(pr.number))
return True
# Ignore PR if base ref is not master
if 'master' not in pr.base.ref:
log.info('PR#{} should be ignored. Base ref is not master'.format(pr.number))
return True
# Ignore PR if mergeable state is 'dirty' or 'behind'.
# Practically this ignores PR in case of merge conflicts
ignored_mergeable_states = ['behind', 'dirty', 'draft']
if pr.mergeable_state in ignored_mergeable_states:
log.info('PR#{} should be ignored. Mergeable state is {}. '.format(pr.number, pr.mergeable_state))
return True
# If no criteria for ignoring PR are met - return false
return False
def _updated_since_last_run(self, pr):
# Ignore if PR was already checked and there was no update in meantime
pr_number = str(pr.number)
current_pr_timestamps = self._current_prs.get(pr_number)
last_pr_timestamps = self._config[_PR_REPORTS_CONFIG_KEY].get(pr_number)
if current_pr_timestamps == last_pr_timestamps:
log.info('PR#{} - No update since last check'.format(pr.number))
return True
else:
return False
def _check_missing_status(self, pr):
"""Verify if missing status is expected.
This method checks if CI build for last was scheduled and still waits in queue for
executor.
:param pr: Single PR being currently checked
:type pr: github.PullRequest.PullRequest
"""
pr_time_delta = self._now_time - pr.updated_at
try:
build_number = self._build_scheduled(pr)
if self._build_in_queue(pr, build_number):
message = ('PR# {}: build waiting in queue after {} minutes.'
.format(pr.number, pr_time_delta.seconds / 60))
severity = 'warning'
else:
message = ('PR# {}: missing status on GitHub after {} minutes.'
.format(pr.number, pr_time_delta.seconds / 60))
severity = 'error'
self._queue_message(message, message_severity=severity, pr=pr)
except TypeError:
log.info('Committer outside of OpenVino organization')
def _build_scheduled(self, pr):
"""Check if Jenkins build corresponding to PR was scheduled.
This method takes last Jenkins build for given PR and compares hash from Jenkins console output
and sha from PR object to determine if CI build for appropriate commit was scheduled.
:param pr: Single PR being currently checked
:type pr: github.PullRequest.PullRequest
:return: Returns build number or -1 if no build found
:rtype: int
"""
pr_number = str(pr.number)
project_name_full = self._ci_job_name + '/PR-' + pr_number
try:
# Retrieve console output from last Jenkins build for job corresponding to this PR
last_build_number = self._jenkins.get_job_info(project_name_full)['lastBuild']['number']
console_output = self._jenkins.get_build_console_output(project_name_full, last_build_number)
# Check if CI build was scheduled - commit hash on GH must match hash in last Jenkins build console output
# Retrieve hash from Jenkins output
match_string = '(?:Obtained .ci/[a-zA-Z/]+Jenkinsfile from ([a-z0-9]{40}))'
retrieved_sha = re.search(match_string, console_output).group(1)
if retrieved_sha == pr.get_commits().reversed[0].sha:
return last_build_number
else:
return -1
except (NotFoundException, AttributeError, requests.exceptions.HTTPError):
message = ('PR #{}: Jenkins build corresponding to commit {} not found!'
.format(pr_number, pr.get_commits().reversed[0].sha))
self._queue_message(message, message_severity='error', pr=pr)
return -1
def _build_in_queue(self, pr, build_number):
"""Check if Jenkins build waits in queue.
This method verifies if CI build is waiting in queue based on console output.
:param pr: Single PR being currently checked
:param build_number: Jenkins build number to retrieve console output from
:type pr: github.PullRequest.PullRequest
:type build_number: int
:return: Returns True if CI build is waiting in queue
:rtype: Bool
"""
pr_number = str(pr.number)
project_name_full = self._ci_job_name + '/PR-' + pr_number
# Retrieve console output
try:
console_output = self._jenkins.get_build_console_output(project_name_full, build_number)
except NotFoundException:
return False
# Check if build is waiting in queue (and not already running on an executor)
if 'Waiting for next available executor on' in console_output \
and 'Running on' not in console_output:
log.info('CI for PR %s: WAITING IN QUEUE', pr_number)
return True
else:
return False
def _interpret_status(self, status, pr):
"""
Verify GitHub status passed to the method.
This method verifies last commit status for given PR, calling appropriate methods
to further validate the status.
:param status: GitHub commit status
:param pr: Single PR being currently checked
:type status: github.CommitStatus.CommitStatus
:type pr: github.PullRequest.PullRequest
"""
try:
# Retrieve build number for Jenkins build related to this PR
build_number = self._retrieve_build_number(status.target_url)
# CI build finished - verify if expected output is present
finished_statuses = ['Build finished', 'This commit cannot be built', 'This commit looks good']
pending_statuses = ['This commit is being built', 'Testing in progress',
'This commit is scheduled to be built']
if any(phrase in status.description for phrase in finished_statuses):
self._check_finished(pr, build_number)
# CI build in progress - verify timeouts for build queue and duration
elif any(phrase in status.description for phrase in pending_statuses):
self._check_in_progress(pr, build_number)
else:
message = 'ONNX CI job for PR# {}: unrecognized status: {}'.format(pr.number, status.description)
self._queue_message(message, message_severity='error', pr=pr)
except Exception:
# Log Watchdog internal error in case any status can't be properly verified
message = 'Failed to verify status "{}" for PR# {}'.format(status.description, pr.number)
log.exception(message)
self._queue_message(message, message_severity='internal', pr=pr)
def _retrieve_build_number(self, url):
"""Retrieve Jenkins CI job build number from URL address coming from GitHub commit status.
:param url: URL address from GitHub commit status
:type url: String
:return: Returns build number
:rtype: int
"""
# Retrieve the build number from url string
match_obj = re.search('(?:/PR-[0-9]+/)([0-9]+)', url)
try:
number = int(match_obj.group(1))
return number
except Exception:
log.exception('Failed to retrieve build number from url link: %s', url)
raise
def _queue_message(self, message, message_severity='info', pr=None):
"""Add a message to message queue in communicator object.
The queued message is constructed based on message string passed as
a method argument and message header. Message header is mapped to message severity
also passed as an argument.
:param message: Message content
:param message_severity: Message severity level
:type message: String
:type message_severity: int
"""
log.info(message)
internal = False
if 'internal' in message_severity:
message_header = INTERNAL_ERROR_MESSAGE_HEADER
internal = True
elif 'error' in message_severity:
message_header = ERROR_MESSAGE_HEADER
elif 'warning' in message_severity:
message_header = WARNING_MESSAGE_HEADER
else:
message_header = INFO_MESSAGE_HEADER
# If message is related to PR attatch url
if pr:
message = message + '\n' + pr.html_url
send = message_header + '\n' + message
if self._ms_teams_enabled:
self._msteams_hook.queue_message(send)
def _check_finished(self, pr, build_number):
"""Verify if finished build output contains expected string for either fail or success.
:param pr: Single PR being currently checked
:param build_number: Jenkins CI job build number
:type pr: github.PullRequest.PullRequest
:type build_number: int
"""
pr_number = str(pr.number)
log.info('CI for PR %s: FINISHED', pr_number)
# Check if FINISH was valid FAIL / SUCCESS
project_name_full = self._ci_job_name + '/PR-' + pr_number
build_output = self._jenkins.get_build_console_output(project_name_full, build_number)
if _CI_BUILD_FAIL_MESSAGE not in build_output \
and _CI_BUILD_SUCCESS_MESSAGE not in build_output:
message = ('ONNX CI job for PR #{}: finished but no tests success or fail '
'confirmation is present in console output!'.format(pr_number))
self._queue_message(message, message_severity='error', pr=pr)
def _send_message(self, quiet=False):
"""Send messages queued in MS Teams objects to designated channel.
Queued messages are being sent as a single communication.
:param quiet: Flag for disabling sending report through communicator
:type quiet: Boolean
"""
if any(messages for messages in self._msteams_hook.messages):
try:
watchdog_build = self._jenkins.get_job_info(self._watchdog_job_name)['lastBuild']
watchdog_build_number = watchdog_build['number']
watchdog_build_link = watchdog_build['url']
except Exception:
watchdog_build_number = 'UNKNOWN'
watchdog_build_link = self._jenkins.jenkins_server
send = self._watchdog_job_name + '- build ' + str(
watchdog_build_number) + ' - ' + watchdog_build_link
if self._ms_teams_enabled:
self._msteams_hook.send_message(send, quiet=quiet)
else:
log.info('Nothing to report.')
def _check_in_progress(self, pr, build_number):
"""Check if CI build succesfully started.
Checks if build started within designated time threshold, and job is
currently running - it didn't cross the time threshold.
:param pr: Single PR being currently checked
:param build_number: Jenkins CI job build number
:type pr: github.PullRequest.PullRequest
:type build_number: int
"""
pr_number = str(pr.number)
log.info('CI for PR %s: TESTING IN PROGRESS', pr_number)
project_name_full = self._ci_job_name + '/PR-' + pr_number
build_info = self._jenkins.get_build_info(project_name_full, build_number)
build_datetime = datetime.datetime.fromtimestamp(build_info['timestamp'] / 1000.0)
build_delta = self._now_time - build_datetime
log.info('Build %s: IN PROGRESS, started: %s minutes ago', str(build_number),
str(build_delta))
# If build still waiting in queue
if build_delta > _CI_START_THRESHOLD and self._build_in_queue(pr, build_number):
message = ('ONNX CI job build #{}, for PR #{} waiting in queue after {} '
'minutes'.format(build_number, pr_number, str(build_delta.seconds / 60)))
self._queue_message(message, message_severity='warning', pr=pr)
elif build_delta > _BUILD_DURATION_THRESHOLD:
# CI job take too long, possibly froze - communicate failure
message = ('ONNX CI job build #{}, for PR #{} started,'
'but did not finish in designated time of {} '
'minutes!'.format(build_number, pr_number,
str(_BUILD_DURATION_THRESHOLD.seconds / 60)))
self._queue_message(message, message_severity='error', pr=pr)
def _update_config(self):
"""Update Watchdog config file with PRs checked in current Watchdog run, remove old entries.
:param current_prs: List of PR numbers checked during current Watchdog run
:type current_prs: list of ints
"""
# Cleanup config of old reports
log.info('Writing to config file at: {}'.format(self._config_path))
new_config = {_PR_REPORTS_CONFIG_KEY: self._current_prs}
file = open(self._config_path, 'w+')
json.dump(new_config, file)

19
.clang-format Normal file
View File

@@ -0,0 +1,19 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
---
Language: Cpp
Standard: Cpp11
AccessModifierOffset: -4
AllowAllArgumentsOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortLambdasOnASingleLine: Empty
AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 120
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: false
---

View File

@@ -13,7 +13,6 @@ omit =
/usr/*
# omit tests
*/test_*.py
*_test.py
# init scripts
*/__init__.py
@@ -37,4 +36,4 @@ exclude_lines =
ignore_errors = True
[html]
directory = htmlcov
directory = htmlcov

6
.gitattributes vendored
View File

@@ -63,9 +63,3 @@
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
*.PNG filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.jpg filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
*.vsdx filter=lfs diff=lfs merge=lfs -text

View File

@@ -1,58 +0,0 @@
---
name: Bug
about: Create a report to help us improve
title: "[Bug]"
labels: bug, support_request
assignees: ''
---
##### System information (version)
<!-- Example
- OpenVINO => 2020.4
- Operating System / Platform => Windows 64 Bit
- Compiler => Visual Studio 2017
- Problem classification: Model Conversion
- Framework: TensorFlow (if applicable)
- Model name: ResNet50 (if applicable)
-->
- OpenVINO=> :grey_question:
- Operating System / Platform => :grey_question:
- Compiler => :grey_question:
- Problem classification => :grey_question:
##### Detailed description
<!-- your description -->
##### Steps to reproduce
<!--
Describe your problem and steps you've done before you got to this point.
to add code example fence it with triple backticks and optional file extension
```.cpp
// C++ code example
```
or attach as .txt or .zip file
-->
##### Issue submission checklist
- [ ] I report the issue, it's not a question
<!--
OpenVINO team works with support forum, Stack Overflow and other communities
to discuss problems. Tickets with question without real issue statement will be
closed.
-->
- [ ] I checked the problem with documentation, FAQ, open issues, Stack Overflow, etc and have not found solution
<!--
Places to check:
* OpenVINO documentation: https://docs.openvinotoolkit.org/
* OpenVINO forum: https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit
* OpenVINO issue tracker: https://github.com/openvinotoolkit/openvino/issues?q=is%3Aissue
* Stack Overflow branch: https://stackoverflow.com/questions/tagged/openvino
-->
- [ ] There is reproducer code and related data files: images, videos, models, etc.
<!--
The best reproducer -- test case for OpenVINO that we can add to the library.
-->

View File

@@ -1,13 +0,0 @@
version: 2
updates:
- package-ecosystem: pip
directory: "/ngraph/python"
schedule:
interval: weekly
day: monday
time: "13:00"
open-pull-requests-limit: 10
reviewers:
- postrational
labels:
- dependencies

View File

@@ -1,51 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Check GitHub organization and invite members
"""
# pylint: disable=fixme,no-member
from argparse import ArgumentParser
import github_api
from configs import Config
def main():
"""The main entry point function"""
arg_parser = ArgumentParser()
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--teams", action="store_true", help="Check GitHub teams")
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
gh_api = github_api.GithubOrgApi()
if args.teams:
gh_api.get_org_teams()
else:
dev_emails = github_api.get_dev_emails()
print(f'\nDeveloper emails {len(dev_emails)}:', '; '.join(dev_emails))
org_emails = gh_api.get_org_emails()
print(f'\nOrg emails {len(org_emails)}:', '; '.join(org_emails))
org_pendig_invitation_emails = gh_api.get_org_invitation_emails()
invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails)
print(f'\nInvite emails {len(invite_emails)}:', '; '.join(invite_emails))
no_in_dev_emails = org_emails.difference(dev_emails)
print(f'\nOrg members - no in developers list {len(no_in_dev_emails)}:',
'; '.join(no_in_dev_emails))
valid_github_users = gh_api.get_valid_github_users(invite_emails)
gh_api.invite_users(valid_github_users)
if __name__ == '__main__':
main()

View File

@@ -1,149 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Check GitHub PRs and set labels by type and categories, e.g. 'ExternalPR', 'category: ci'
"""
# pylint: disable=fixme,no-member
import re
import datetime
from argparse import ArgumentParser
from enum import Enum
import github_api
from configs import Config
class PrType(Enum):
"""Constants for type of GitHub pull request by author membership"""
EXTERNAL = 'ExternalPR'
INTEL = 'ExternalIntelPR'
ORG = 'OpenvinoPR'
BAD = 'BadPR'
def get_pr_labels(pull):
"""Gets PR labels as set"""
pr_lables = set()
for label in pull.labels:
pr_lables.add(label.name)
return pr_lables
def set_pr_labels(pull, labels):
"""Sets PR labels"""
if not labels or Config().DRY_RUN:
return
print(f'Set PR labels:', labels)
pull.set_labels(labels)
def get_pr_type_by_labels(pull):
"""Gets PR type using labels"""
pr_lables = get_pr_labels(pull)
pr_types = set(type.value for type in PrType)
pr_types_labels = pr_lables & pr_types
if not pr_types_labels:
return None
if len(pr_types_labels) > 1:
print(f'Duplicated labels: {pr_types_labels}')
return PrType.BAD
return PrType(PrType(pr_types_labels.pop()))
def get_label_by_team_name_re(team_name):
"""Generates label by PR reviwer team name using regular expressions"""
if 'admins' in team_name:
return 'category: ci'
re_compile_label = re.compile(rf'{Config().GITHUB_REPO}-(.+)-maintainers')
re_label = re_compile_label.match(team_name)
if re_label:
return f'category: {re_label.group(1).strip()}'
return None
def get_label_by_team_name_map(team_name):
"""Generates label by PR reviwer team name using config map"""
return Config().TEAM_TO_LABEL.get(team_name)
def get_category_labels(pull):
"""Gets list of category labels by all PR reviwer teams"""
labels = []
pr_lables = get_pr_labels(pull)
for reviewer_team in pull.get_review_requests()[1]:
reviewer_label = get_label_by_team_name_map(reviewer_team.name)
if reviewer_label and reviewer_label not in pr_lables:
labels.append(reviewer_label)
return labels
def main():
"""The main entry point function"""
arg_parser = ArgumentParser()
arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path,
help=f"Path to json configuration file, e.g. {Config.default_cfg_path}")
arg_parser.add_argument("--pr", metavar="NUMBER",
help="Get GitHub pull request with the number")
arg_parser.add_argument("--pr-state", default="open", choices=["open", "closed"],
help="Set GitHub pull request state")
arg_parser.add_argument("--newer", metavar="MINUTES",
help="Get newly created GitHub pull request only")
args, unknown_args = arg_parser.parse_known_args()
Config(args.cfg_file, unknown_args)
gh_api = github_api.GithubOrgApi()
if args.pr:
pulls = [gh_api.repo.get_pull(int(args.pr))]
else:
pulls = gh_api.repo.get_pulls(state=args.pr_state)
print(f'\nPRs count ({args.pr_state}):', pulls.totalCount)
if args.newer:
pr_created_after = datetime.datetime.now() - datetime.timedelta(minutes=int(args.newer))
print('PRs created after:', pr_created_after)
non_org_intel_pr_users = set()
non_org_pr_users = set()
for pull in pulls:
if args.newer and pull.created_at <= pr_created_after:
print(f'\nIGNORE: {pull} - Created: {pull.created_at}')
continue
pr_lables = get_pr_labels(pull)
pr_type_by_labels = get_pr_type_by_labels(pull)
set_labels = []
print(f'\n{pull} - Created: {pull.created_at} - Labels: {pr_lables} -',
f'Type: {pr_type_by_labels}', end='')
# Checks PR source type
if gh_api.is_org_user(pull.user):
print(' - Org user')
elif github_api.is_intel_email(pull.user.email) or \
github_api.is_intel_company(pull.user.company):
print(' - Non org user with Intel email or company')
non_org_intel_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.INTEL:
print(f'NO "{PrType.INTEL.value}" label: ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.INTEL.value)
else:
print(f' - Non org user with NO Intel email or company')
non_org_pr_users.add(pull.user)
if pr_type_by_labels is not PrType.EXTERNAL:
print(f'NO "{PrType.EXTERNAL.value}" label: ', end='')
github_api.print_users(pull.user)
set_labels.append(PrType.EXTERNAL.value)
set_labels += get_category_labels(pull)
set_pr_labels(pull, set_labels)
print(f'\nNon org user with Intel email or company:')
github_api.print_users(non_org_intel_pr_users)
print(f'\nNon org user with NO Intel email or company:')
github_api.print_users(non_org_pr_users)
if __name__ == '__main__':
main()

View File

@@ -1,36 +0,0 @@
{
"GITHUB_TOKEN": "<Put token here or set as arg or as env variable>",
"GITHUB_ORGANIZATION": "openvinotoolkit",
"GITHUB_REPO": "openvino",
"IGNORE_LOGINS": [
"openvino-ci",
"openvino-pushbot",
"lab-nerval",
"lab-nerval-onnx-ci"
],
"EMAILS_FILE_PATH": "dev_emails-test.txt",
"PROXIES": {
"HTTP_PROXY": null,
"HTTPS_PROXY": null,
"NO_PROXY": "localhost,127.0.0.1,.intel.com"
},
"DRY_RUN": false,
"TEAM_TO_LABEL": {
"openvino-admins": "category: CI",
"openvino-maintainers": "category: IE common",
"openvino-docs-maintainers": "category: docs",
"openvino-ie-maintainers": "category: IE common",
"openvino-ie-cpu-maintainers": "category: CPU",
"openvino-ie-gna-maintainers": "category: GNA",
"openvino-ie-gpu-maintainers": "category: GPU",
"openvino-ie-lpt-maintainers": "category: LP transformations",
"openvino-ie-multi-maintainers": "category: MULTI",
"openvino-ie-python-api-maintainers": "category: python api",
"openvino-ie-tests-maintainers": "category: IE Tests",
"openvino-ie-vpu-maintainers": "category: VPU",
"openvino-mo-maintainers": "category: MO",
"openvino-ngraph-maintainers": "category: nGraph",
"openvino-tests-maintainers": "category: IE Tests",
"openvino-tools-maintainers": "category: tools"
}
}

View File

@@ -1,113 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
Configurations management
"""
# pylint: disable=fixme,broad-except
import os
import sys
import ast
import json
from pathlib import Path
if sys.hexversion < 0x3060000:
raise Exception('Python version must be >= 3.6')
class ConfigException(Exception):
"""Base configuration exception"""
class Config:
"""Configuration wrapper"""
_instance = None
properties = None
default_cfg_path = Path(__file__).resolve().parent / 'config.json'
def __new__(cls, *_args, **_kwargs):
if not Config._instance:
Config._instance = super(Config, cls).__new__(cls)
return Config._instance
def __init__(self, file_path=None, cli_args=None):
"""
:param file_path: Path to json configuration file
:type file_path: String
:param args: List of argparse arguments with patterns: 'name=value' or 'name'
:type args: list
"""
if Config.properties:
return
self._file_path = file_path or Config.default_cfg_path
self._cli_args = cli_args or []
self._json_cfg = {}
self._args = {}
self._load_cfg()
self._parse_cli_args()
Config.properties = {}
for name, value in self._json_cfg.items():
if hasattr(self, name):
raise ConfigException(f'Duplicating prosperity: {name}')
prosperity_value = self._args.get(name) or os.getenv(name)
if prosperity_value:
# Try to set prosperity_value as Python literal structures, e.g. DRY_RUN=False
try:
prosperity_value = ast.literal_eval(prosperity_value)
except Exception:
pass
if not isinstance(prosperity_value, type(value)):
raise ConfigException(f'Python type of {name} parameter must be {type(value)}')
else:
prosperity_value = value
setattr(self, name, prosperity_value)
Config.properties[name] = prosperity_value
self.set_proxy()
def _load_cfg(self):
"""Load the json configuration file"""
try:
with open(self._file_path) as conf:
self._json_cfg = json.load(conf)
except:
print('Failed to load configuration from:', self._file_path)
raise
def _parse_cli_args(self):
"""Parse argparse arguments with patterns: 'name=value' or 'name'"""
for cli_arg in self._cli_args:
arg = cli_arg.split('=')
if arg[0] not in self._json_cfg:
raise ConfigException(f'Unsupported argument: {arg}')
self._args[arg[0]] = True if len(arg) == 1 else '='.join(arg[1:])
def get_properties(self):
"""Get all properties as Dict"""
return self.properties
def set_proxy(self):
"""Set proxies"""
for proxy_name, url in self.properties['PROXIES'].items():
if url is not None:
print(f'Set proxy: {proxy_name}={url}')
os.environ[proxy_name] = url
def _test():
"""Test and debug"""
print('Config.default_cfg_path:', Config.default_cfg_path)
cfg = Config(cli_args=['DRY_RUN=True'])
print('Config.properties:', cfg.get_properties())
if __name__ == '__main__':
_test()

View File

@@ -1,9 +0,0 @@
# good comment
Last_name, First_name <first_name.last_name@intel.com>
first_name.last_name@intel.com
openvino_pushbot@intel.com
# Wrong emails
foo@foo.com
foo1 foo2
foo1 foo2@intel.com

View File

@@ -1,287 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""
GitHub API for controlling organization
"""
# pylint: disable=fixme,no-member
import re
import time
from github import Github, GithubException, RateLimitExceededException, IncompletableObject
from github import UnknownObjectException
from github.PaginatedList import PaginatedList
from configs import Config
def is_valid_user(user):
"""Checks that user is valid github.Github object"""
try:
return user and user.login
except IncompletableObject:
return False
def is_user_ignored(user):
"""Checks that user should be ignored"""
cfg = Config()
if is_valid_user(user) and user.login.lower() not in cfg.properties['IGNORE_LOGINS']:
return False
return True
def is_valid_name(name):
"""Checks that GitHub user's name is valid"""
return name and len(name) >= 3 and ' ' in name
def is_intel_email(email):
"""Checks that email is valid Intel email"""
return email and len(email) > 10 and ' ' not in email and email.lower().endswith('@intel.com')
def is_intel_company(company):
"""Checks that company contains intel"""
return company and 'intel' in company.lower()
def is_valid_intel_user(user):
"""Checks that user is valid GitHub and Intel user"""
return is_valid_user(user) and (is_valid_name(user.name) and is_intel_email(user.email) or
is_user_ignored(user))
def print_users(users):
"""Print list of users in different formats: list, set, PaginatedList"""
if isinstance(users, (list, set, PaginatedList)):
users_count = users.totalCount if isinstance(users, PaginatedList) else len(users)
print(f'\nGitHub users {users_count} (login - name - company - email - valid):')
else:
users = [users]
for user in users:
if not is_valid_user(user):
print('WRONG GitHub user: ???')
continue
valid_check = 'OK' if is_valid_intel_user(user) else 'FIX'
if not is_intel_email(user.email):
valid_check += ' email'
if not is_valid_name(user.name):
valid_check += ' name'
print(f'{user.login} - "{user.name}" - "{user.company}" - {user.email} - {valid_check}')
def get_dev_emails():
"""
Read a file with developer emails. Supported email formats
first_name.last_name@intel.com
Import from Outlook: Last_name, First_name <first_name.last_name@intel.com>
"""
re_email = re.compile(r'.+<(.+)>')
emails = set()
cfg = Config()
with open(cfg.properties['EMAILS_FILE_PATH']) as file_obj:
for line in file_obj:
line = line.strip().lower()
if not line or line.startswith('#'):
continue
re_outlook_email = re_email.match(line)
if re_outlook_email:
line = re_outlook_email.group(1).strip()
if not is_intel_email(line):
print(f'Wrong email in {cfg.properties["EMAILS_FILE_PATH"]}: {line}')
continue
emails.add(line)
return emails
class GithubOrgApi:
"""Common API for GitHub organization"""
def __init__(self):
self._cfg = Config()
self.github = Github(self._cfg.GITHUB_TOKEN)
self.github_org = self.github.get_organization(self._cfg.GITHUB_ORGANIZATION)
self.repo = self.github.get_repo(f'{self._cfg.GITHUB_ORGANIZATION}/'
f'{self._cfg.GITHUB_REPO}')
def is_org_user(self, user):
"""Checks that user is a member of GitHub organization"""
if is_valid_user(user):
try:
membership = user.get_organization_membership(self.github_org)
# membership.role can be 'member' or 'admin'
if membership.state == 'active' and membership.role:
return True
except UnknownObjectException:
pass
return False
def get_org_emails(self):
"""Gets and prints all emails of GitHub organization members"""
org_members = self.github_org.get_members()
org_emails = set()
org_members_fix = set()
org_emails_fix_name = set()
org_logins_fix_intel_email = set()
print(f'\nOrg members {org_members.totalCount} (login - name - company - email - valid):')
for org_member in org_members:
print_users(org_member)
if is_user_ignored(org_member):
continue
if is_intel_email(org_member.email):
org_emails.add(org_member.email.lower())
if not is_valid_name(org_member.name):
org_members_fix.add(org_member)
org_emails_fix_name.add(org_member.email.lower())
else:
org_members_fix.add(org_member)
org_logins_fix_intel_email.add(org_member.login.lower())
print_users(org_members_fix)
print(f'\nOrg members - no Intel emails {len(org_logins_fix_intel_email)}:',
'; '.join(org_logins_fix_intel_email))
print(f'\nOrg members - no real name {len(org_emails_fix_name)}:',
'; '.join(org_emails_fix_name))
return org_emails
def get_org_invitation_emails(self):
"""Gets GitHub organization teams prints info"""
org_invitations = self.github_org.invitations()
org_invitation_emails = set()
print(f'\nOrg invitations {org_invitations.totalCount} (login - name - email - valid):')
for org_invitation in org_invitations:
# TODO: investigate GithubException while access to user name and enable print_users()
# github.GithubException.IncompletableObject: 400 "Returned object contains no URL"
#print_users(org_invitation)
print(f'{org_invitation.login} - ??? - {org_invitation.email} - ???')
if is_user_ignored(org_invitation):
continue
if is_intel_email(org_invitation.email):
org_invitation_emails.add(org_invitation.email.lower())
else:
print('Strange org invitation:', org_invitation)
print(f'\nOrg invitation emails {len(org_invitation_emails)}:',
'; '.join(org_invitation_emails))
return org_invitation_emails
def get_org_teams(self):
"""Gets GitHub organization teams prints info"""
teams = []
org_teams = self.github_org.get_teams()
print('\nOrg teams count:', org_teams.totalCount)
for team in org_teams:
teams.append(team.name)
print(f'\nTeam: {team.name} - parent: {team.parent}')
repos = team.get_repos()
print('Repos:')
for repo in repos:
print(f' {repo.name} -', team.get_repo_permission(repo))
team_maintainers = team.get_members(role='maintainer')
team_maintainer_logins = set()
for maintainer in team_maintainers:
team_maintainer_logins.add(maintainer.login)
team_members = team.get_members(role='member')
team_member_logins = set()
for member in team_members:
team_member_logins.add(member.login)
members = team.get_members(role='all')
member_emails = []
print('Members (role - login - name - company - email - valid):')
for user in members:
if user.login in team_maintainer_logins:
print(' Maintainer - ', end='')
elif user.login in team_member_logins:
print(' Member - ', end='')
else:
# It is not possible to check child teams members
print(' ??? - ', end='')
print_users(user)
if is_intel_email(user.email) and not is_user_ignored(user):
member_emails.append(user.email.lower())
print(f'Intel emails {len(member_emails)}:', '; '.join(member_emails))
return teams
def get_valid_github_users(self, emails):
"""Gets valid GitHub users by email and prints status"""
valid_users = set()
no_account_emails = set()
print(f'\nGitHub users from {len(emails)} invite emails (email - status):')
for email in emails:
if not is_intel_email(email):
print(f'{email} - Non Intel email')
continue
# You can make up to 30 requests per minute; https://developer.github.com/v3/search/
# Sleep 2.4 sec is about 25 requests per minute
time.sleep(2.4)
try:
users = self.github.search_users(f'{email} in:email')
except RateLimitExceededException:
time.sleep(5)
users = self.github.search_users(f'{email} in:email')
if users.totalCount == 0:
print(f'{email} - No valid GitHub account')
no_account_emails.add(email)
continue
if users.totalCount > 1:
print(f'{email} - Found {users.totalCount} GitHub accounts')
for user in users:
if user.email and user.email.lower() == email:
print(f'{email} - OK')
valid_users.add(user)
else:
print(f'{email} - Non public or wrong email - login: {user.login} - '
f'email: {user.email}')
print('Valid users count:', len(valid_users))
print_users(valid_users)
print(f'\nIntel emails - No valid GitHub account {len(no_account_emails)}:',
'; '.join(no_account_emails))
return valid_users
def invite_users(self, users):
"""Invites users and prints status"""
if isinstance(users, (list, set)):
print(f'\nInvite {len(users)} users:')
else:
users = [users]
for user in users:
if isinstance(user, str):
print(f'Email: {user}')
self.github_org.invite_user(email=user)
else:
print(f'{user.login} - "{user.name}" - {user.email} - ', end='')
try:
if is_user_ignored(user):
print('Ignored')
continue
if not self._cfg.DRY_RUN:
self.github_org.invite_user(user=user)
print('OK')
else:
print('Dry run')
except GithubException as exc:
print(f'FAIL: {exc.data["errors"][0]["message"]}')
def _test():
"""Test and debug"""
Config(cli_args=['DRY_RUN=True'])
dev_emails = get_dev_emails()
print('dev_emails:', dev_emails)
gh_api = GithubOrgApi()
gh_api.get_org_emails()
if __name__ == '__main__':
_test()

View File

@@ -1 +0,0 @@
PyGithub==1.51

View File

@@ -1 +0,0 @@
pylint==2.3.0

View File

@@ -1,92 +0,0 @@
name: Code Style
on: [push, pull_request]
jobs:
nGraph:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Install clang-format-3.9
run: sudo apt --assume-yes install clang-format-3.9
- name: Install dependencies
run: |
sudo apt --assume-yes install libusb-1.0-0-dev
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
- name: CMake
run: |
mkdir build
cd build
cmake ..
- name: Check code style
run: make style-check
working-directory: build
- name: Create code style diff
if: failure()
run: |
ngraph/maint/apply-code-format.sh
git diff >ngraph_code_style_diff.patch
- uses: actions/upload-artifact@v2
if: failure()
with:
name: ngraph_code_style_diff
path: ngraph_code_style_diff.patch
ShellCheck:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Install ShellCheck
run: sudo apt --assume-yes install shellcheck
- name: Install dependencies
run: |
sudo apt --assume-yes install libusb-1.0-0-dev
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
- name: CMake
run: |
mkdir build
cd build
cmake ..
- name: ShellCheck
run: make ie_shellcheck
working-directory: build
Java:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v1
with:
java-version: '11'
- name: Install dependencies
run: |
wget -nc https://github.com/google/google-java-format/releases/download/google-java-format-1.9/google-java-format-1.9-all-deps.jar
- name: Check code style
run: |
java -jar google-java-format-1.9-all-deps.jar --set-exit-if-changed -a -i $(find . -type f -name "*.java")
- name: Create code style diff
if: failure()
run: |
git diff >java_code_style_diff.patch
- uses: actions/upload-artifact@v2
if: failure()
with:
name: java_code_style_diff
path: java_code_style_diff.patch

View File

@@ -1,17 +0,0 @@
name: Files Size
on: [push, pull_request]
jobs:
Check-Files-Size:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- name: git ls-tree
run: |
git ls-tree -r -t -l --full-name HEAD | sort -n -r -k 4
- name: git lfs ls-files
run: |
git lfs ls-files --size

View File

@@ -12,9 +12,6 @@ jobs:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
@@ -35,25 +32,19 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
# For Pylint
pip install tensorflow==1.14.0 tensorboard==1.14.0 tensorflow-estimator==1.14.0
# For UT
pip install unittest-xml-reporting==3.0.2
# MO requirements
pip install -r requirements.txt
pip install -r requirements_dev.txt
# requrements for CMake
sudo apt --assume-yes install libusb-1.0-0-dev
working-directory: model-optimizer
- name: Pylint
run: pylint -d C,R,W mo/ mo.py extensions/
working-directory: model-optimizer
- name: CMake
run: |
mkdir build
cd build
cmake ..
- name: UT
run: |
export PYTHONPATH=$PYTHONPATH:`pwd`

387
.gitignore vendored
View File

@@ -1,71 +1,342 @@
# build/artifact dirs
_*
# but ensure we don't skip __init__.py
!__init__.py
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# developer tools
*.idea
.vscode
cmake-build-*
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
[Xx]64/
[Xx]86/
[Bb]uild/
bld/
[Bb]in/
[Oo]bj/
# PY.TEST
*.pyc
tests/integration/report.html
tests/integration/report.xml
tests/integration/assets/
tests/integration/__pycache__/
# Visual Studio 2015 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUNIT
*.VisualState.xml
TestResult.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# DNX
project.lock.json
artifacts/
*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# JustCode is a .NET coding add-in
.JustCode
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Un-comment the next line if you do not want to checkin
# your web deploy settings because they may include unencrypted
# passwords
#*.pubxml
*.publishproj
# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# NuGet v3's project.json files produces more ignoreable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Microsoft Azure ApplicationInsights config file
ApplicationInsights.config
# Windows Store app package directory
AppPackages/
BundleArtifacts/
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/
# Others
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.pfx
*.publishsettings
node_modules/
orleans.codegen.cs
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
*.mdf
*.ldf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Target VS files:
vsx64
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# LightSwitch generated files
GeneratedArtifacts/
ModelManifest.xml
# Paket dependency manager
.paket/paket.exe
# FAKE - F# Make
.fake/
*.filters
/External
/Output
/InferenceEngineMain/models
/Test
/HTTPClient/*.a
/InferenceEngineMain/newModels
.DS_Store
**/tags
compile_commands.json
bin/
build/
.local_vimrc
.gdb_history
.vimspector.json
doc/
!ngraph/doc
docs/build_documentation/work_dir/
inference-engine/plugins/
inference-engine/temp
inference-engine/report
.repo/
docs/template_plugin/html/
CMakeLists.txt.user
docs/IE_PLUGIN_DG/html/
*.project
*.cproject
*.pydevproject
*.settings
*/gen/
# For IDEA
.idea/
VS/
Xcode/
temp/
report/
.kdev4/
*.kdev4
*.kate-swp
/lin-build
/win-build
/CMakeFiles
*.stamp
*.depend
*.vcxproj
*.sln
/CMakeCache.txt
.vimprj/
build_IA32/
.dir-locals.el
GTAGS
GPATH
GRTAGS
GSYMS
compile_commands.json
service/dot-net-service/Output
**/sublime_build
/.project
.vscode/
/vsx32
/service/dot-net-service/.klocwork/DotNetService
cmake-build-*/
/lin64
.gdb_history
.local_vimrc
.ycm_extra_conf.py
tags
# from Model Optimizer repo
.idea
.project
.cproject
.pydevproject
.settings
/bin/
/gen/
__pycache__
*.swp
/config.xml
# Python-specific
*.env3
.env3
*.pyc
# Tests-specific
*.coverage
*htmlcov
*pylint_report.txt
*pylint_report_comments.txt
.coverage
htmlcov
pylint_report.txt
pylint_report_comments.txt
# Documentation-generated
docs/build
docs/source/_static
docs/source/_templates
docs/source/generated/
# Artifacts
/model-optimizer/*.bin
/model-optimizer/*.xml
/model-optimizer/*.json
/model-optimizer/*.so
/model-optimizer/*.txt
/model-optimizer/*.pb
/model-optimizer/*.pbtxt
/model-optimizer/!CMakeLists.txt
/model-optimizer/*.mapping
/model-optimizer/*.dat
/model-optimizer/*.svg
# ngraph
ngraph/src/CPackConfig.cmake
ngraph/src/CPackSourceConfig.cmake
ngraph/src/VERSION
ngraph/src/gtest/
ngraph/src/json/
ngraph/src/ngraphConfig.cmake
ngraph/src/ngraphConfigVersion.cmake
ngraph/src/protobuf/
ngraph/src/src/
ngraph/src/test/
/*.bin
/*.xml
/*.json
/*.so
/*.txt
/*.mapping
/*.dat
/*.svg

14
.gitmodules vendored
View File

@@ -2,15 +2,7 @@
path = inference-engine/thirdparty/ade
url = https://github.com/opencv/ade.git
ignore = dirty
[submodule "inference-engine/thirdparty/mkl-dnn"]
path = inference-engine/thirdparty/mkl-dnn
url = https://github.com/openvinotoolkit/oneDNN.git
[submodule "ngraph"]
path = ngraph
url = https://github.com/NervanaSystems/ngraph.git
ignore = dirty
[submodule "inference-engine/tests/ie_test_utils/common_test_utils/gtest"]
path = inference-engine/tests/ie_test_utils/common_test_utils/gtest
url = https://github.com/openvinotoolkit/googletest.git
ignore = dirty
[submodule "inference-engine/samples/thirdparty/gflags"]
path = inference-engine/samples/thirdparty/gflags
url = https://github.com/gflags/gflags.git
ignore = dirty

View File

@@ -202,7 +202,7 @@ ignore-mixin-members=yes
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=flask_sqlalchemy,app.extensions.flask_sqlalchemy,distutils
ignored-modules=flask_sqlalchemy,app.extensions.flask_sqlalchemy
# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of

View File

@@ -2,21 +2,34 @@
# SPDX-License-Identifier: Apache-2.0
#
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
cmake_policy(SET CMP0054 NEW)
# TODO: for make instal / package we need to use 3.13.3 version because
# it allows to install targets created outside of current projects
# See https://blog.kitware.com/cmake-3-13-0-available-for-download/
if (APPLE)
# due to https://cmake.org/cmake/help/v3.12/policy/CMP0068.html
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
else()
cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR)
endif()
project(OpenVINO)
set(OpenVINO_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(IE_MAIN_SOURCE_DIR ${OpenVINO_MAIN_SOURCE_DIR}/inference-engine)
list(APPEND CMAKE_MODULE_PATH "${OpenVINO_MAIN_SOURCE_DIR}/cmake")
set(CMAKE_MODULE_PATH "${OpenVINO_MAIN_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
include(CTest)
include(features)
# include developer package
include(developer_package)
include(developer_package NO_POLICY_SCOPE)
# These options are shared with 3rdparty plugins by means of developer package
# These options are shared with 3rdparty plugins
# by means of developer package
include(check_features)
include(dependencies)
@@ -24,7 +37,7 @@ include(dependencies)
message (STATUS "PROJECT ............................... " ${PROJECT_NAME})
message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR})
message (STATUS "OpenVINO_MAIN_SOURCE_DIR .............. " ${OpenVINO_MAIN_SOURCE_DIR})
message (STATUS "IE_MAIN_SOURCE_DIR .................... " ${IE_MAIN_SOURCE_DIR})
message (STATUS "IE_MAIN_SOURCE_DIR .............. " ${IE_MAIN_SOURCE_DIR})
message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR})
message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID})
message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
@@ -33,10 +46,6 @@ message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
file(REMOVE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
file(REMOVE "${CMAKE_BINARY_DIR}/targets.cmake")
#
# Build
#
function(build_ngraph)
function(ngraph_set option value)
if(NOT DEFINED ${option})
@@ -52,46 +61,32 @@ function(build_ngraph)
else ()
ngraph_set(NGRAPH_ADDRESS_SANITIZER FALSE)
endif ()
ngraph_set(NGRAPH_TOOLS_ENABLE FALSE)
ngraph_set(NGRAPH_CPU_ENABLE FALSE)
ngraph_set(NGRAPH_INTERPRETER_ENABLE TRUE)
ngraph_set(NGRAPH_NOP_ENABLE FALSE)
ngraph_set(NGRAPH_GPUH_ENABLE FALSE)
ngraph_set(NGRAPH_GENERIC_CPU_ENABLE FALSE)
ngraph_set(NGRAPH_ENABLE_CPU_CONV_AUTO FALSE)
ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE FALSE)
ngraph_set(NGRAPH_PLAIDML_ENABLE FALSE)
ngraph_set(NGRAPH_FAST_MATH_ENABLE FALSE)
ngraph_set(NGRAPH_JSON_ENABLE FALSE)
ngraph_set(NGRAPH_DYNAMIC_COMPONENTS_ENABLE FALSE)
ngraph_set(NGRAPH_NATIVE_ARCH_ENABLE FALSE)
if(ENABLE_TESTS AND NOT ANDROID)
if (NOT ANDROID)
ngraph_set(NGRAPH_UNIT_TEST_ENABLE TRUE)
else()
ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
endif()
if(NOT (ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)) ))
ngraph_set(NGRAPH_UNIT_TEST_OPENVINO_ENABLE TRUE)
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
else()
ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
ngraph_set(NGRAPH_TEST_UTIL_ENABLE FALSE)
ngraph_set(NGRAPH_UNIT_TEST_OPENVINO_ENABLE FALSE)
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)
endif()
ngraph_set(NGRAPH_INTERPRETER_ENABLE TRUE)
if(TREAT_WARNING_AS_ERROR)
ngraph_set(NGRAPH_WARNINGS_AS_ERRORS ON)
else()
ngraph_set(NGRAPH_WARNINGS_AS_ERRORS OFF)
endif()
if(COVERAGE)
ngraph_set(NGRAPH_CODE_COVERAGE_ENABLE ON)
else()
ngraph_set(NGRAPH_CODE_COVERAGE_ENABLE OFF)
endif()
if(ENABLE_SANITIZER)
ngraph_set(NGRAPH_ADDRESS_SANITIZER_ENABLE ON)
else()
ngraph_set(NGRAPH_ADDRESS_SANITIZER_ENABLE OFF)
endif()
if(ENABLE_THREAD_SANITIZER)
ngraph_set(NGRAPH_THREAD_SANITIZER_ENABLE ON)
else()
ngraph_set(NGRAPH_THREAD_SANITIZER_ENABLE OFF)
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion)
elseif(UNIX)
ie_add_compiler_flags(-Wno-error=maybe-uninitialized -Wno-error=return-type -fPIC)
@@ -104,11 +99,11 @@ function(build_ngraph)
if (UNIX)
ie_add_compiler_flags(-Wno-error=return-type -Wno-undef)
elseif(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4308 /wd4146 /wd4703 /wd4244 /wd4819")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4308 /wd4146 /wd4703 /wd4244")
endif()
if(ENABLE_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON)
ie_enable_lto()
endif()
ie_cpack_add_component(ngraph)
@@ -116,63 +111,15 @@ function(build_ngraph)
set(SDL_cmake_included ON)
# set(NGRAPH_COMPONENT_PREFIX "deployment_tools/ngraph/")
add_subdirectory(ngraph)
set(NGRAPH_LIBRARIES ngraph PARENT_SCOPE)
endfunction()
file(REMOVE "${CMAKE_BINARY_DIR}/openvino_targets_developer.cmake")
unset(OpenVINODeveloperPackageTargets CACHE)
function(openvino_developer_export_targets)
set(OpenVINODeveloperPackageTargets "${OpenVINODeveloperPackageTargets};${ARGV}")
# to allow exporting of aliased targets with the original names
foreach(target_name ${OpenVINODeveloperPackageTargets})
if(TARGET "${target_name}")
get_target_property(original_name ${target_name} ALIASED_TARGET)
if(TARGET "${original_name}")
message(STATUS "The name ${target_name} is an ALIAS for ${original_name}. "
"It will be exported to the InferenceEngineDeveloperPackage with the original name.")
list(REMOVE_ITEM OpenVINODeveloperPackageTargets ${target_name})
list(APPEND OpenVINODeveloperPackageTargets ${original_name})
endif()
endif()
endforeach()
list(REMOVE_DUPLICATES OpenVINODeveloperPackageTargets)
set(OpenVINODeveloperPackageTargets "${OpenVINODeveloperPackageTargets}" CACHE INTERNAL
"Paths to extra Inference Engine plugins" FORCE)
endfunction()
add_subdirectory(openvino)
build_ngraph()
add_subdirectory(inference-engine)
add_subdirectory(model-optimizer)
add_subdirectory(docs)
#
# Shellcheck
#
ie_shellcheck_process(DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}"
SKIP "${OpenVINO_MAIN_SOURCE_DIR}/bin"
"${OpenVINO_MAIN_SOURCE_DIR}/build"
"${IE_MAIN_SOURCE_DIR}/tests/ie_test_utils/common_test_utils/gtest"
"${IE_MAIN_SOURCE_DIR}/samples/thirdparty"
"${IE_MAIN_SOURCE_DIR}/thirdparty"
"${IE_MAIN_SOURCE_DIR}/temp"
# TODO fix and enable back:
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies"
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/demo"
"${OpenVINO_MAIN_SOURCE_DIR}/ngraph"
"${IE_MAIN_SOURCE_DIR}/scripts")
#
# cpack
#
# install setupvars

View File

@@ -8,18 +8,13 @@ CODEOWNERS @openvinotoolkit/openvino-admins @openvinotoolkit/openvino-maintaine
Jenkinsfile @openvinotoolkit/openvino-admins
azure-pipelines.yml @openvinotoolkit/openvino-admins
/.github/ @openvinotoolkit/openvino-admins
/.ci/ @openvinotoolkit/openvino-admins
# QA Tests:
/tests/ @openvinotoolkit/openvino-tests-maintainers
# OpenVINO Scripts
/scripts/ @openvinotoolkit/openvino-admins @openvinotoolkit/openvino-scripts-maintainers
# IE Core:
/inference-engine/ @openvinotoolkit/openvino-ie-maintainers
/inference-engine/ie_bridges/python @openvinotoolkit/openvino-ie-python-api-maintainers
/inference-engine/src/transformations/ @GlebKazantaev @ilyachur
/inference-engine/src/transformations/ @GlebKazantaev @ichuraev
/inference-engine/src/legacy_api/ @openvinotoolkit/openvino-ngraph-maintainers
/inference-engine/src/readers/ @openvinotoolkit/openvino-ngraph-maintainers
@@ -69,7 +64,3 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins
# Tools
/tools/ @openvinotoolkit/openvino-tools-maintainers
# Documentation
/docs/ @openvinotoolkit/openvino-docs-maintainers
*.md @openvinotoolkit/openvino-docs-maintainers

18
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,18 @@
# How to Contribute
We welcome community contributions to the OpenVINO™ repository.
If you have an idea how to improve the product, please share it
with us doing the following steps:
* Make sure you can build the product and run all tests and samples with your patch
* In case of a larger feature, provide relevant unit tests and one or more sample
* Submit a pull request at https://github.com/openvinotoolkit/openvino/pulls
## OpenVINO™ Coding Style Guide
We basically use the Google style (https://google.github.io/styleguide/cppguide.html) with some exceptions:
* 4 spaces instead of 2 spaces for indentations
* Limitation of 160 symbols for the line length
* Exceptions are allowed
* Using namespace are allowed in cpp and prohibited in headers
* Underscore symbol before member in classes/structures
* thisStyleForFunctions()
* theSameStyleForVariables

15
Jenkinsfile vendored
View File

@@ -1,15 +0,0 @@
#!groovy
properties([
parameters([
booleanParam(defaultValue: true,
description: 'Cancel the rest of parallel stages if one of them fails and return status immediately',
name: 'failFast'),
string(defaultValue: '',
description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
name: 'library_version')
])
])
loadOpenVinoLibrary {
entrypoint(this)
}

View File

@@ -1,40 +1,42 @@
# [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
[![Stable release](https://img.shields.io/badge/version-2021.2-green.svg)](https://github.com/openvinotoolkit/openvino/releases/tag/2021.2)
[![Stable release](https://img.shields.io/badge/version-2020.3-green.svg)](https://github.com/openvinotoolkit/openvino/releases/tag/2020.3.0)
[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
![Azure DevOps builds (branch)](https://img.shields.io/azure-devops/build/openvinoci/b2bab62f-ab2f-4871-a538-86ea1be7d20f/9/master?label=Public%20CI)
This toolkit allows developers to deploy pre-trained deep learning models
through a high-level C++ Inference Engine API integrated with application logic.
This toolkit allows developers to deploy pre-trained deep learning models
through a high-level C++ Inference Engine API integrated with application logic.
This open source version includes several components: namely [Model Optimizer], [ngraph] and
[Inference Engine], as well as CPU, GPU, MYRIAD, multi device and heterogeneous plugins to accelerate deep learning inferencing on Intel® CPUs and Intel® Processor Graphics.
It supports pre-trained models from the [Open Model Zoo], along with 100+ open
source and public models in popular formats such as Caffe\*, TensorFlow\*,
MXNet\* and ONNX\*.
This open source version includes two components: namely [Model Optimizer] and
[Inference Engine], as well as CPU, GPU and heterogeneous plugins to accelerate
deep learning inferencing on Intel® CPUs and Intel® Processor Graphics.
It supports pre-trained models from the [Open Model Zoo], along with 100+ open
source and public models in popular formats such as Caffe\*, TensorFlow\*,
MXNet\* and ONNX\*.
## Repository components:
* [Inference Engine]
* [ngraph]
* [Model Optimizer]
## License
Deep Learning Deployment Toolkit is licensed under [Apache License Version 2.0](LICENSE).
By contributing to the project, you agree to the license and copyright terms therein
By contributing to the project, you agree to the license and copyright terms therein
and release your contribution under these terms.
## Resources:
* Docs: https://docs.openvinotoolkit.org/
* Wiki: https://github.com/openvinotoolkit/openvino/wiki
* Issue tracking: https://github.com/openvinotoolkit/openvino/issues
* Additional OpenVINO modules: https://github.com/openvinotoolkit/openvino_contrib
* [HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
## Documentation
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
* [OpenVINO™ Inference Engine Build Instructions](build-instruction.md)
* [Get Started with Deep Learning Deployment Toolkit on Linux](get-started-linux.md)\*
* [Introduction to Deep Learning Deployment Toolkit](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_Introduction.html)
* [Inference Engine Developer Guide](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide.html)
* [Model Optimizer Developer Guide](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
## How to Contribute
See [CONTRIBUTING](./CONTRIBUTING.md) for details. Thank you!
## Support
Please report questions, issues and suggestions using:
* The [`openvino`](https://stackoverflow.com/questions/tagged/openvino) tag on StackOverflow\*
* [GitHub* Issues](https://github.com/openvinotoolkit/openvino/issues)
* The `openvino` [tag on StackOverflow]\*
* [GitHub* Issues](https://github.com/openvinotoolkit/openvino/issues)
* [Forum](https://software.intel.com/en-us/forums/computer-vision)
---
@@ -44,4 +46,3 @@ Please report questions, issues and suggestions using:
[Inference Engine]:https://software.intel.com/en-us/articles/OpenVINO-InferEngine
[Model Optimizer]:https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
[tag on StackOverflow]:https://stackoverflow.com/search?q=%23openvino
[ngraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html

View File

@@ -1,12 +0,0 @@
# Security Policy
## Report a Vulnerability
Please report security issues or vulnerabilities to the [Intel® Security Center].
For more information on how Intel® works to resolve security issues, see
[Vulnerability Handling Guidelines].
[Intel® Security Center]:https://www.intel.com/security
[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html

345
azure-pipelines.yml Normal file
View File

@@ -0,0 +1,345 @@
jobs:
- job: Lin
# About 150% of total time
timeoutInMinutes: 75
pool:
#vmImage: 'ubuntu-18.04'
name: LIN_VMSS_VENV_F8S_WU2
variables:
BUILD_TYPE: Release
BIN_DIR: ../bin/intel64/$(BUILD_TYPE)
steps:
- script: |
whoami
uname -a
which python3
gcc --version
lsb_release
env
cat /proc/cpuinfo
cat /proc/meminfo
vmstat -s
df
displayName: 'System properties'
- script: |
sudo apt --assume-yes install libusb-1.0-0-dev
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
# For running Python API tests
python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
displayName: 'Install dependencies'
- script: |
wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
unzip ninja-linux.zip
sudo cp -v ninja /usr/local/bin/
displayName: 'Install Ninja'
- script: git submodule update --init --recursive --jobs 8
displayName: 'Clone submodules'
- script: |
mkdir dldt-build
cd dldt-build
displayName: 'Create build directory'
- task: CMake@1
inputs:
workingDirectory: dldt-build
# CMake must get Python 3.x version by default
cmakeArgs: .. -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.6 -DENABLE_TESTS=ON
- script: ninja
workingDirectory: dldt-build
displayName: 'Build Lin'
- script: ls -alR ../bin/
workingDirectory: dldt-build
displayName: 'List files'
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*GPU*:*CPU*:constant.shared_data
workingDirectory: dldt-build
displayName: 'nGraph UT'
continueOnError: false
- script: $(BIN_DIR)/InferenceEngineUnitTests
workingDirectory: dldt-build
displayName: 'IE UT old'
continueOnError: false
- script: $(BIN_DIR)/ieUnitTests
workingDirectory: dldt-build
displayName: 'IE UT'
continueOnError: false
- script: $(BIN_DIR)/cpuUnitTests
workingDirectory: dldt-build
displayName: 'CPU UT'
continueOnError: false
- script: $(BIN_DIR)/gnaUnitTests
workingDirectory: dldt-build
displayName: 'GNA UT'
continueOnError: false
- script: $(BIN_DIR)/vpuUnitTests
workingDirectory: dldt-build
displayName: 'VPU UT'
continueOnError: false
- script: $(BIN_DIR)/ieFuncTests
workingDirectory: dldt-build
displayName: 'IE FuncTests'
continueOnError: false
- script: $(BIN_DIR)/cpuFuncTests
workingDirectory: dldt-build
displayName: 'CPU FuncTests'
continueOnError: false
- script: $(BIN_DIR)/MklDnnBehaviorTests
workingDirectory: dldt-build
displayName: 'MklDnnBehaviorTests'
continueOnError: false
enabled: false
- script: git clone https://github.com/openvinotoolkit/testdata.git
displayName: 'Clone testdata'
enabled: false
- script: |
export DATA_PATH=`pwd`/../testdata
export MODELS_PATH=`pwd`/../testdata
$(BIN_DIR)/MklDnnFunctionalTests --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric*
workingDirectory: dldt-build
displayName: 'MklDnnFunctionalTests'
continueOnError: false
enabled: false
- script: |
export DATA_PATH=`pwd`/../testdata
export MODELS_PATH=`pwd`/../testdata
$(BIN_DIR)/InferenceEngineCAPITests
workingDirectory: dldt-build
displayName: 'IE CAPITests'
continueOnError: false
enabled: false
- script: |
export DATA_PATH=`pwd`/../testdata
export MODELS_PATH=`pwd`/../testdata
export LD_LIBRARY_PATH=`pwd`/$(BIN_DIR)/lib
export PYTHONPATH=`pwd`/$(BIN_DIR)/lib/python_api/python3.6
env
cd ../inference-engine/ie_bridges/python/tests
pytest
workingDirectory: dldt-build
displayName: 'Python API Tests'
continueOnError: false
enabled: false
- job: Mac
# About 150% of total time
timeoutInMinutes: 130
pool:
vmImage: 'macOS-10.15'
variables:
BUILD_TYPE: Release
BIN_DIR: ../bin/intel64/$(BUILD_TYPE)
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7'
- script: |
whoami
uname -a
which python3
gcc --version
xcrun --sdk macosx --show-sdk-version
env
sysctl -a
displayName: 'System properties'
- script: |
brew install cython
brew install automake
displayName: 'Install dependencies'
- script: brew install ninja
displayName: 'Install Ninja'
- script: git submodule update --init --recursive --jobs 8
displayName: 'Clone submodules'
- script: |
mkdir dldt-build
cd dldt-build
displayName: 'Create build directory'
- script: |
export PATH="/usr/local/opt/cython/bin:$PATH"
export CC=gcc
export CXX=g++
# Disable errors with Ninja
#export CXXFLAGS="-Wno-error=unused-command-line-argument"
#export CFLAGS="-Wno-error=unused-command-line-argument"
cmake .. -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON
workingDirectory: dldt-build
displayName: 'CMake'
- script: make -j3
workingDirectory: dldt-build
displayName: 'Build Mac'
- script: ls -alR ../bin/
workingDirectory: dldt-build
displayName: 'List files'
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*GPU*:*CPU*:constant.shared_data
workingDirectory: dldt-build
displayName: 'nGraph UT'
continueOnError: false
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_filter=-*MKLDNNGraph*
workingDirectory: dldt-build
displayName: 'IE UT old'
continueOnError: false
- script: $(BIN_DIR)/ieUnitTests
workingDirectory: dldt-build
displayName: 'IE UT'
continueOnError: false
- script: $(BIN_DIR)/cpuUnitTests
workingDirectory: dldt-build
displayName: 'CPU UT'
continueOnError: false
- script: $(BIN_DIR)/vpuUnitTests
workingDirectory: dldt-build
displayName: 'VPU UT'
continueOnError: false
- script: $(BIN_DIR)/ieFuncTests
workingDirectory: dldt-build
displayName: 'IE FuncTests'
continueOnError: false
- script: $(BIN_DIR)/cpuFuncTests
workingDirectory: dldt-build
displayName: 'CPU FuncTests'
continueOnError: false
- script: $(BIN_DIR)/MklDnnBehaviorTests
workingDirectory: dldt-build
displayName: 'MklDnnBehaviorTests'
continueOnError: false
enabled: false
- script: git clone https://github.com/openvinotoolkit/testdata.git
displayName: 'Clone testdata'
enabled: false
- script: |
export DATA_PATH=`pwd`/../testdata
export MODELS_PATH=`pwd`/../testdata
$(BIN_DIR)/MklDnnFunctionalTests --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric*
workingDirectory: dldt-build
displayName: 'MklDnnFunctionalTests'
continueOnError: false
enabled: false
- script: |
export DATA_PATH=`pwd`/../testdata
export MODELS_PATH=`pwd`/../testdata
$(BIN_DIR)/InferenceEngineCAPITests
workingDirectory: dldt-build
displayName: 'IE CAPITests'
continueOnError: false
enabled: false
- job: Win
# About 150% of total time
timeoutInMinutes: 120
pool:
#vmImage: 'vs2017-win2016'
name: WIN_VMSS_VENV_F8S_WU2
variables:
BUILD_TYPE: Release
BUILD_DIR: D:\dldt-build
BIN_DIR: ..\bin\intel64
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
steps:
- script: |
where python3
wmic computersystem get TotalPhysicalMemory
wmic cpu list
wmic logicaldisk get description,name
wmic VOLUME list
set
displayName: 'System properties'
- script: |
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
powershell -command "Expand-Archive -Force ninja-win.zip"
displayName: Install Ninja
- script: git submodule update --init --recursive --jobs 8
displayName: 'Clone submodules'
- script: |
rd /Q /S $(BUILD_DIR)
mkdir $(BUILD_DIR)\bin
rd /Q /S dldt-build
mkdir dldt-build
displayName: 'Create build directory'
- script: |
set PATH=$(Build.Repository.LocalPath)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(Build.Repository.LocalPath)
workingDirectory: $(BUILD_DIR)
displayName: 'CMake'
- script: |
set PATH=$(Build.Repository.LocalPath)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && ninja
workingDirectory: $(BUILD_DIR)
displayName: 'Build Win'
- script: dir ..\bin\ /s /b
workingDirectory: dldt-build
displayName: 'List files'
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*GPU*:*CPU*:constant.shared_data
workingDirectory: dldt-build
displayName: 'nGraph UT'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\InferenceEngineUnitTests
workingDirectory: dldt-build
displayName: 'IE UT old'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\ieUnitTests
workingDirectory: dldt-build
displayName: 'IE UT'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\cpuUnitTests
workingDirectory: dldt-build
displayName: 'CPU UT'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\gnaUnitTests
workingDirectory: dldt-build
displayName: 'GNA UT'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\vpuUnitTests
workingDirectory: dldt-build
displayName: 'VPU UT'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\ieFuncTests
workingDirectory: dldt-build
displayName: 'IE FuncTests'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\cpuFuncTests
workingDirectory: dldt-build
displayName: 'CPU FuncTests'
continueOnError: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;%PATH%
$(BIN_DIR)\MklDnnBehaviorTests
workingDirectory: dldt-build
displayName: 'MklDnnBehaviorTests'
continueOnError: false
enabled: false
- script: git clone https://github.com/openvinotoolkit/testdata.git
workingDirectory: $(BUILD_DIR)
displayName: 'Clone testdata'
enabled: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;$(Build.Repository.LocalPath)\inference-engine\temp\opencv_4.3.0\opencv\bin;%PATH%
set DATA_PATH=$(BUILD_DIR)\testdata
set MODELS_PATH=$(BUILD_DIR)\testdata
$(BIN_DIR)\MklDnnFunctionalTests --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric*
workingDirectory: dldt-build
displayName: 'MklDnnFunctionalTests'
continueOnError: false
enabled: false
- script: |
set PATH=$(Build.Repository.LocalPath)\inference-engine\temp\tbb\bin;$(Build.Repository.LocalPath)\inference-engine\temp\opencv_4.3.0\opencv\bin;%PATH%
set DATA_PATH=$(BUILD_DIR)\testdata
set MODELS_PATH=$(BUILD_DIR)\testdata
$(BIN_DIR)\InferenceEngineCAPITests
workingDirectory: dldt-build
displayName: 'IE CAPITests'
continueOnError: false
enabled: false

704
build-instruction.md Normal file
View File

@@ -0,0 +1,704 @@
# Build OpenVINO™ Inference Engine
## Contents
- [Introduction](#introduction)
- [Build on Linux\* Systems](#build-on-linux-systems)
- [Software Requirements](#software-requirements)
- [Build Steps](#build-steps)
- [Additional Build Options](#additional-build-options)
- [Build for Raspbian* Stretch OS](#build-for-raspbian-stretch-os)
- [Hardware Requirements](#hardware-requirements)
- [Native Compilation](#native-compilation)
- [Cross Compilation Using Docker\*](#cross-compilation-using-docker)
- [Additional Build Options](#additional-build-options-1)
- [Build on Windows* Systems](#build-on-windows-systems)
- [Software Requirements](#software-requirements-1)
- [Build Steps](#build-steps-1)
- [Additional Build Options](#additional-build-options-2)
- [Building Inference Engine with Ninja* Build System](#building-inference-engine-with-ninja-build-system)
- [Build on macOS\* Systems](#build-on-macos-systems)
- [Software Requirements](#software-requirements-2)
- [Build Steps](#build-steps-2)
- [Additional Build Options](#additional-build-options-3)
- [Build on Android\* Systems](#build-on-android-systems)
- [Software Requirements](#software-requirements-3)
- [Build Steps](#build-steps-3)
- [Use Custom OpenCV Builds for Inference Engine](#use-custom-opencv-builds-for-inference-engine)
- [Add Inference Engine to Your Project](#add-inference-engine-to-your-project)
- [(Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2)
- [For Linux, Raspbian Stretch* OS](#for-linux-raspbian-stretch-os)
- [Next Steps](#next-steps)
- [Additional Resources](#additional-resources)
## Introduction
The Inference Engine can infer models in different formats with various input
and output formats.
The open source version of Inference Engine includes the following plugins:
| PLUGIN | DEVICE TYPES |
| ---------------------| -------------|
| CPU plugin | Intel® Xeon® with Intel® AVX2 and AVX512, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® SSE |
| GPU plugin | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics |
| GNA plugin | Intel® Speech Enabling Developer Kit, Amazon Alexa\* Premium Far-Field Developer Kit, Intel® Pentium® Silver processor J5005, Intel® Celeron® processor J4005, Intel® Core™ i3-8121U processor |
| MYRIAD plugin | Intel® Movidius™ Neural Compute Stick powered by the Intel® Movidius™ Myriad™ 2, Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
| Heterogeneous plugin | Heterogeneous plugin enables computing for inference on one network on several Intel® devices. |
Inference Engine plugin for Intel® FPGA is distributed only in a binary form,
as a part of [Intel® Distribution of OpenVINO™].
## Build on Linux\* Systems
The software was validated on:
- Ubuntu\* 16.04 (64-bit) with default GCC\* 5.4.0
- CentOS\* 7.4 (64-bit) with default GCC\* 4.8.5
### Software Requirements
- [CMake]\* 3.11 or higher
- GCC\* 4.8 or higher to build the Inference Engine
- Python 2.7 or higher for Inference Engine Python API wrapper
- (Optional) [Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 20.13.16352].
### Build Steps
1. Clone submodules:
```sh
cd openvino
git submodule update --init --recursive
```
2. Install build dependencies using the `install_dependencies.sh` script in the
project root folder.
```sh
chmod +x install_dependencies.sh
```
```sh
./install_dependencies.sh
```
3. By default, the build enables the Inference Engine GPU plugin to infer models
on your Intel® Processor Graphics. This requires you to
[Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 20.13.16352]
before running the build. If you don't want to use the GPU plugin, use the
`-DENABLE_CLDNN=OFF` CMake build option and skip the installation of the
Intel® Graphics Compute Runtime for OpenCL™ Driver.
4. Create a build folder:
```sh
mkdir build && cd build
```
5. Inference Engine uses a CMake-based build system. In the created `build`
directory, run `cmake` to fetch project dependencies and create Unix
makefiles, then run `make` to build the project:
```sh
cmake -DCMAKE_BUILD_TYPE=Release ..
make --jobs=$(nproc --all)
```
### Additional Build Options
You can use the following additional build options:
- The default build uses an internal JIT GEMM implementation.
- To switch to an OpenBLAS\* implementation, use the `GEMM=OPENBLAS` option with
`BLAS_INCLUDE_DIRS` and `BLAS_LIBRARIES` CMake options to specify a path to the
OpenBLAS headers and library. For example, the following options on CentOS\*:
`-DGEMM=OPENBLAS -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DBLAS_LIBRARIES=/usr/lib64/libopenblas.so.0`.
- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL`
and `-DMKLROOT=<path_to_MKL>` CMake options to specify a path to unpacked
MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded
from the Intel® [MKL-DNN repository].
- Threading Building Blocks (TBB) is used by default. To build the Inference
Engine with OpenMP\* threading, set the `-DTHREADING=OMP` option.
- Required versions of TBB and OpenCV packages are downloaded automatically by
the CMake-based script. If you want to use the automatically downloaded
packages but you already have installed TBB or OpenCV packages configured in
your environment, you may need to clean the `TBBROOT` and `OpenCV_DIR`
environment variables before running the `cmake` command, otherwise they
will not be downloaded and the build may fail if incompatible versions were
installed.
- If the CMake-based build script can not find and download the OpenCV package
that is supported on your platform, or if you want to use a custom build of
the OpenCV library, refer to the
[Use Custom OpenCV Builds](#use-custom-opencv-builds-for-inference-engine)
section for details.
- To build the Python API wrapper:
1. Install all additional packages listed in the
`/inference-engine/ie_bridges/python/requirements.txt` file:
```sh
pip install -r requirements.txt
```
2. Use the `-DENABLE_PYTHON=ON` option. To specify an exact Python version, use the following
options:
```
-DPYTHON_EXECUTABLE=`which python3.7` \
-DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.7m.so \
-DPYTHON_INCLUDE_DIR=/usr/include/python3.7
```
- To switch the CPU and GPU plugins off/on, use the `cmake` options
`-DENABLE_MKL_DNN=ON/OFF` and `-DENABLE_CLDNN=ON/OFF` respectively.
- nGraph-specific compilation options:
`-DNGRAPH_ONNX_IMPORT_ENABLE=ON` enables the building of the nGraph ONNX importer.
`-DNGRAPH_JSON_ENABLE=ON` enables nGraph JSON-based serialization.
`-DNGRAPH_DEBUG_ENABLE=ON` enables additional debug prints.
## Build for Raspbian Stretch* OS
> **NOTE**: Only the MYRIAD plugin is supported.
### Hardware Requirements
* Raspberry Pi\* 2 or 3 with Raspbian\* Stretch OS (32-bit). Check that it's CPU supports ARMv7 instruction set (`uname -m` command returns `armv7l`).
> **NOTE**: Despite the Raspberry Pi\* CPU is ARMv8, 32-bit OS detects ARMv7 CPU instruction set. The default `gcc` compiler applies ARMv6 architecture flag for compatibility with lower versions of boards. For more information, run the `gcc -Q --help=target` command and refer to the description of the `-march=` option.
You can compile the Inference Engine for Raspberry Pi\* in one of the two ways:
* [Native Compilation](#native-compilation), which is the simplest way, but time-consuming
* [Cross Compilation Using Docker*](#cross-compilation-using-docker), which is the recommended way
### Native Compilation
Native compilation of the Inference Engine is the most straightforward solution. However, it might take at least one hour to complete on Raspberry Pi\* 3.
1. Install dependencies:
```bash
sudo apt-get update
sudo apt-get install -y git cmake libusb-1.0-0-dev
```
2. Go to the cloned `openvino` repository:
```bash
cd openvino
```
3. Initialize submodules:
```bash
git submodule update --init --recursive
```
4. Create a build folder:
```bash
mkdir build && cd build
```
5. Build the Inference Engine:
```bash
cmake -DCMAKE_BUILD_TYPE=Release \
-DENABLE_SSE42=OFF \
-DTHREADING=SEQ \
-DENABLE_GNA=OFF .. && make
```
### Cross Compilation Using Docker*
This compilation was tested on the following configuration:
* Host: Ubuntu\* 16.04 (64-bit, Intel® Core™ i7-6700K CPU @ 4.00GHz × 8)
* Target: Raspbian\* Stretch (32-bit, ARMv7, Raspberry Pi\* 3)
1. Install Docker\*:
```bash
sudo apt-get install -y docker.io
```
2. Add a current user to `docker` group:
```bash
sudo usermod -a -G docker $USER
```
Log out and log in for this to take effect.
3. Create a directory named `ie_cross_armhf` and add a text file named `Dockerfile`
with the following content:
```docker
FROM debian:stretch
USER root
RUN dpkg --add-architecture armhf && \
apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
crossbuild-essential-armhf \
git \
wget \
libusb-1.0-0-dev:armhf \
libgtk-3-dev:armhf \
libavcodec-dev:armhf \
libavformat-dev:armhf \
libswscale-dev:armhf \
libgstreamer1.0-dev:armhf \
libgstreamer-plugins-base1.0-dev:armhf \
libpython3-dev:armhf \
python3-pip
RUN wget https://www.cmake.org/files/v3.14/cmake-3.14.3.tar.gz && \
tar xf cmake-3.14.3.tar.gz && \
(cd cmake-3.14.3 && ./bootstrap --parallel=$(nproc --all) && make --jobs=$(nproc --all) && make install) && \
rm -rf cmake-3.14.3 cmake-3.14.3.tar.gz
```
It uses the Debian\* Stretch (Debian 9) OS for compilation because it is a base of the Raspbian\* Stretch.
4. Build a Docker\* image:
```bash
docker image build -t ie_cross_armhf ie_cross_armhf
```
5. Run Docker\* container with mounted source code folder from host:
```bash
docker run -it -v /absolute/path/to/openvino:/openvino ie_cross_armhf /bin/bash
```
6. While in the container:
1. Go to the cloned `openvino` repository:
```bash
cd openvino
```
2. Create a build folder:
```bash
mkdir build && cd build
```
3. Build the Inference Engine:
```bash
cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE="../cmake/arm.toolchain.cmake" \
-DTHREADS_PTHREAD_ARG="-pthread" \
-DENABLE_SSE42=OFF \
-DTHREADING=SEQ \
-DENABLE_GNA=OFF .. && make --jobs=$(nproc --all)
```
7. Press **Ctrl+D** to exit from Docker. You can find the resulting binaries
in the `openvino/bin/armv7l/` directory and the OpenCV*
installation in the `openvino/inference-engine/temp`.
>**NOTE**: Native applications that link to cross-compiled Inference Engine
library require an extra compilation flag `-march=armv7-a`.
### Additional Build Options
You can use the following additional build options:
- Required versions of OpenCV packages are downloaded automatically by the
CMake-based script. If you want to use the automatically downloaded packages
but you already have installed OpenCV packages configured in your environment,
you may need to clean the `OpenCV_DIR` environment variable before running
the `cmake` command; otherwise they won't be downloaded and the build may
fail if incompatible versions were installed.
- If the CMake-based build script cannot find and download the OpenCV package
that is supported on your platform, or if you want to use a custom build of
the OpenCV library, see: [Use Custom OpenCV Builds](#use-custom-opencv-builds-for-inference-engine)
for details.
- To build Python API wrapper, install `libpython3-dev:armhf` and `python3-pip`
packages using `apt-get`; then install `numpy` and `cython` python modules
via `pip3`, adding the following options:
```sh
-DENABLE_PYTHON=ON \
-DPYTHON_EXECUTABLE=/usr/bin/python3.5 \
-DPYTHON_LIBRARY=/usr/lib/arm-linux-gnueabihf/libpython3.5m.so \
-DPYTHON_INCLUDE_DIR=/usr/include/python3.5
```
- nGraph-specific compilation options:
`-DNGRAPH_ONNX_IMPORT_ENABLE=ON` enables the building of the nGraph ONNX importer.
`-DNGRAPH_JSON_ENABLE=ON` enables nGraph JSON-based serialization.
`-DNGRAPH_DEBUG_ENABLE=ON` enables additional debug prints.
## Build on Windows* Systems
The software was validated on:
- Microsoft\* Windows\* 10 (64-bit) with Visual Studio 2017 and Intel® C++
Compiler 2018 Update 3
### Software Requirements
- [CMake]\*3.11 or higher
- Microsoft\* Visual Studio 2017, 2019 or [Intel® C++ Compiler] 18.0
- (Optional) Intel® Graphics Driver for Windows* (26.20) [driver package].
- Python 3.4 or higher for Inference Engine Python API wrapper
### Build Steps
1. Clone submodules:
```sh
git submodule update --init --recursive
```
2. By default, the build enables the Inference Engine GPU plugin to infer models
on your Intel® Processor Graphics. This requires you to [download and install
the Intel® Graphics Driver for Windows (26.20) [driver package] before
running the build. If you don't want to use the GPU plugin, use the
`-DENABLE_CLDNN=OFF` CMake build option and skip the installation of the
Intel® Graphics Driver.
3. Create build directory:
```sh
mkdir build
```
4. In the `build` directory, run `cmake` to fetch project dependencies and
generate a Visual Studio solution.
For Microsoft\* Visual Studio 2017:
```sh
cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release ..
```
For Microsoft\* Visual Studio 2019:
```sh
cmake -G "Visual Studio 16 2019" -A x64 -DCMAKE_BUILD_TYPE=Release ..
```
For Intel® C++ Compiler 18:
```sh
cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" ^
-DCMAKE_BUILD_TYPE=Release ^
-DICCLIB="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2018\windows\compiler\lib" ..
```
5. Build generated solution in Visual Studio or run
`cmake --build . --config Release` to build from the command line.
6. Before running the samples, add paths to the TBB and OpenCV binaries used for
the build to the `%PATH%` environment variable. By default, TBB binaries are
downloaded by the CMake-based script to the `<openvino_repo>/inference-engine/temp/tbb/bin`
folder, OpenCV binaries to the `<openvino_repo>/inference-engine/temp/opencv_4.3.0/opencv/bin`
folder.
### Additional Build Options
- Internal JIT GEMM implementation is used by default.
- To switch to OpenBLAS GEMM implementation, use the `-DGEMM=OPENBLAS` CMake
option and specify path to OpenBLAS using the `-DBLAS_INCLUDE_DIRS=<OPENBLAS_DIR>\include`
and `-DBLAS_LIBRARIES=<OPENBLAS_DIR>\lib\libopenblas.dll.a` options. Download
a prebuilt OpenBLAS\* package via the [OpenBLAS] link. mingw64* runtime
dependencies can be downloaded via the [mingw64\* runtime dependencies] link.
- To switch to the optimized MKL-ML\* GEMM implementation, use the
`-DGEMM=MKL` and `-DMKLROOT=<path_to_MKL>` CMake options to specify a path to
unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be
downloaded from the Intel&reg; [MKL-DNN repository for Windows].
- Threading Building Blocks (TBB) is used by default. To build the Inference
Engine with OpenMP* threading, set the `-DTHREADING=OMP` option.
- Required versions of TBB and OpenCV packages are downloaded automatically by
the CMake-based script. If you want to use the automatically-downloaded
packages but you already have installed TBB or OpenCV packages configured in
your environment, you may need to clean the `TBBROOT` and `OpenCV_DIR`
environment variables before running the `cmake` command; otherwise they won't
be downloaded and the build may fail if incompatible versions were installed.
- If the CMake-based build script can not find and download the OpenCV package
that is supported on your platform, or if you want to use a custom build of
the OpenCV library, refer to the [Use Custom OpenCV Builds](#use-custom-opencv-builds-for-inference-engine)
section for details.
- To switch off/on the CPU and GPU plugins, use the `cmake` options
`-DENABLE_MKL_DNN=ON/OFF` and `-DENABLE_CLDNN=ON/OFF` respectively.
- To build the Python API wrapper, use the `-DENABLE_PYTHON=ON` option. To
specify an exact Python version, use the following options:
```sh
-DPYTHON_EXECUTABLE="C:\Program Files\Python37\python.exe" ^
-DPYTHON_LIBRARY="C:\Program Files\Python37\libs\python37.lib" ^
-DPYTHON_INCLUDE_DIR="C:\Program Files\Python37\include"
```
- nGraph-specific compilation options:
`-DNGRAPH_ONNX_IMPORT_ENABLE=ON` enables the building of the nGraph ONNX importer.
`-DNGRAPH_JSON_ENABLE=ON` enables nGraph JSON-based serialization.
`-DNGRAPH_DEBUG_ENABLE=ON` enables additional debug prints.
### Building Inference Engine with Ninja* Build System
```sh
call "C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2018\windows\bin\ipsxe-comp-vars.bat" intel64 vs2017
set CXX=icl
set CC=icl
:: clean TBBROOT value set by ipsxe-comp-vars.bat, required TBB package will be downloaded by openvino cmake script
set TBBROOT=
cmake -G Ninja -Wno-dev -DCMAKE_BUILD_TYPE=Release ..
cmake --build . --config Release
```
## Build on macOS* Systems
> **NOTE**: The current version of the OpenVINO™ toolkit for macOS* supports
inference on Intel CPUs only.
The software was validated on:
- macOS\* 10.14, 64-bit
### Software Requirements
- [CMake]\* 3.11 or higher
- Clang\* compiler from Xcode\* 10.1 or higher
- Python\* 3.4 or higher for the Inference Engine Python API wrapper
### Build Steps
1. Clone submodules:
```sh
cd openvino
git submodule update --init --recursive
```
2. Install build dependencies using the `install_dependencies.sh` script in the
project root folder:
```sh
chmod +x install_dependencies.sh
```
```sh
./install_dependencies.sh
```
3. Create a build folder:
```sh
mkdir build
```
4. Inference Engine uses a CMake-based build system. In the created `build`
directory, run `cmake` to fetch project dependencies and create Unix makefiles,
then run `make` to build the project:
```sh
cmake -DCMAKE_BUILD_TYPE=Release ..
make --jobs=$(nproc --all)
```
### Additional Build Options
You can use the following additional build options:
- Internal JIT GEMM implementation is used by default.
- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and
`-DMKLROOT=<path_to_MKL>` cmake options to specify a path to unpacked MKL-ML
with the `include` and `lib` folders. MKL-ML\* [package for Mac] can be downloaded
[here](https://github.com/intel/mkl-dnn/releases/download/v0.19/mklml_mac_2019.0.5.20190502.tgz)
- Threading Building Blocks (TBB) is used by default. To build the Inference
Engine with OpenMP* threading, set the `-DTHREADING=OMP` option.
- Required versions of TBB and OpenCV packages are downloaded automatically by
the CMake-based script. If you want to use the automatically downloaded
packages but you already have installed TBB or OpenCV packages configured in
your environment, you may need to clean the `TBBROOT` and `OpenCV_DIR`
environment variables before running the `cmake` command, otherwise they won't
be downloaded and the build may fail if incompatible versions were installed.
- If the CMake-based build script can not find and download the OpenCV package
that is supported on your platform, or if you want to use a custom build of
the OpenCV library, refer to the
[Use Custom OpenCV Builds](#use-custom-opencv-builds-for-inference-engine)
section for details.
- To build the Python API wrapper, use the `-DENABLE_PYTHON=ON` option. To
specify an exact Python version, use the following options:
```sh
-DPYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 \
-DPYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib \
-DPYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m
```
- nGraph-specific compilation options:
`-DNGRAPH_ONNX_IMPORT_ENABLE=ON` enables the building of the nGraph ONNX importer.
`-DNGRAPH_JSON_ENABLE=ON` enables nGraph JSON-based serialization.
`-DNGRAPH_DEBUG_ENABLE=ON` enables additional debug prints.
## Build on Android* Systems
This section describes how to build Inference Engine for Android x86 (64-bit) operating systems.
### Software Requirements
- [CMake]\* 3.11 or higher
- Android NDK (this guide has been validated with r20 release)
### Build Steps
1. Download and unpack Android NDK: https://developer.android.com/ndk/downloads. Let's assume that `~/Downloads` is used as a working folder.
```sh
cd ~/Downloads
wget https://dl.google.com/android/repository/android-ndk-r20-linux-x86_64.zip
unzip android-ndk-r20-linux-x86_64.zip
mv android-ndk-r20 android-ndk
```
2. Clone submodules
```sh
cd openvino
git submodule update --init --recursive
```
3. Create a build folder:
```sh
mkdir build
```
4. Change working directory to `build` and run `cmake` to create makefiles. Then run `make`.
```sh
cd build
cmake .. \
-DCMAKE_TOOLCHAIN_FILE=~/Downloads/android-ndk/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=x86_64 \
-DANDROID_PLATFORM=21 \
-DANDROID_STL=c++_shared \
-DENABLE_OPENCV=OFF
make --jobs=$(nproc --all)
```
* `ANDROID_ABI` specifies target architecture (`x86_64`)
* `ANDROID_PLATFORM` - Android API version
* `ANDROID_STL` specifies that shared C++ runtime is used. Copy `~/Downloads/android-ndk/sources/cxx-stl/llvm-libc++/libs/x86_64/libc++_shared.so` from Android NDK along with built binaries
## Use Custom OpenCV Builds for Inference Engine
> **NOTE**: The recommended and tested version of OpenCV is 4.3. The minimum
supported version is 3.4.0.
Required versions of OpenCV packages are downloaded automatically during the
building Inference Engine library. If the build script can not find and download
the OpenCV package that is supported on your platform, you can use one of the
following options:
* Download the most suitable version from the list of available pre-build
packages from [https://download.01.org/opencv/2020/openvinotoolkit] from the
`<release_version>/inference_engine` directory.
* Use a system-provided OpenCV package (e.g with running the
`apt install libopencv-dev` command). The following modules must be enabled:
`imgcodecs`, `videoio`, `highgui`.
* Get the OpenCV package using a package manager: pip, conda, conan etc. The
package must have the development components included (header files and CMake
scripts).
* Build OpenCV from source using the [build instructions](https://docs.opencv.org/master/df/d65/tutorial_table_of_content_introduction.html) on the OpenCV site.
After you got the built OpenCV library, perform the following preparation steps
before running the Inference Engine build:
1. Set the `OpenCV_DIR` environment variable to the directory where the
`OpenCVConfig.cmake` file of you custom OpenCV build is located.
2. Disable the package automatic downloading with using the `-DENABLE_OPENCV=OFF`
option for CMake-based build script for Inference Engine.
## Add Inference Engine to Your Project
For CMake projects, set the `InferenceEngine_DIR` environment variable:
```sh
export InferenceEngine_DIR=/path/to/openvino/build/
```
Then you can find Inference Engine by `find_package`:
```cmake
find_package(InferenceEngine)
include_directories(${InferenceEngine_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} ${InferenceEngine_LIBRARIES} dl)
```
## (Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2
> **NOTE**: These steps are only required if you want to perform inference on
Intel® Movidius™ Neural Compute Stick or the Intel® Neural Compute Stick 2 using
the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get Started].
### For Linux, Raspbian\* Stretch OS
1. Add the current Linux user to the `users` group; you will need to log out and
log in for it to take effect:
```sh
sudo usermod -a -G users "$(whoami)"
```
2. To perform inference on Intel® Movidius™ Neural Compute Stick and Intel®
Neural Compute Stick 2, install the USB rules as follows:
```sh
cat <<EOF > 97-myriad-usbboot.rules
SUBSYSTEM=="usb", ATTRS{idProduct}=="2150", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
SUBSYSTEM=="usb", ATTRS{idProduct}=="2485", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
SUBSYSTEM=="usb", ATTRS{idProduct}=="f63b", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
EOF
```
```sh
sudo cp 97-myriad-usbboot.rules /etc/udev/rules.d/
```
```sh
sudo udevadm control --reload-rules
```
```sh
sudo udevadm trigger
```
```sh
sudo ldconfig
```
```sh
rm 97-myriad-usbboot.rules
```
## Next Steps
Congratulations, you have built the Inference Engine. To get started with the
OpenVINO™, proceed to the Get Started guides:
* [Get Started with Deep Learning Deployment Toolkit on Linux*](get-started-linux.md)
## Notice
To enable some additional nGraph features and use your custom nGraph library with the OpenVINO™ binary package,
make sure the following:
- nGraph library was built with the same version which is used in the Inference Engine.
- nGraph library and the Inference Engine were built with the same compilers. Otherwise you might face application binary interface (ABI) problems.
To prepare your custom nGraph library for distribution, which includes collecting all headers, copy
binaries, and so on, use the `install` CMake target.
This target collects all dependencies, prepares the nGraph package and copies it to a separate directory.
## Additional Resources
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
* [Introduction to Intel® Deep Learning Deployment Toolkit](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_Introduction.html)
* [Inference Engine Samples Overview](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_Samples_Overview.html)
* [Inference Engine Developer Guide](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide.html)
* [Model Optimizer Developer Guide](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
---
\* Other names and brands may be claimed as the property of others.
[Intel® Distribution of OpenVINO™]:https://software.intel.com/en-us/openvino-toolkit
[CMake]:https://cmake.org/download/
[Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 20.13.16352]:https://github.com/intel/compute-runtime/releases/tag/20.13.16352
[MKL-DNN repository]:https://github.com/intel/mkl-dnn/releases/download/v0.19/mklml_lnx_2019.0.5.20190502.tgz
[MKL-DNN repository for Windows]:(https://github.com/intel/mkl-dnn/releases/download/v0.19/mklml_win_2019.0.5.20190502.zip)
[OpenBLAS]:https://sourceforge.net/projects/openblas/files/v0.2.14/OpenBLAS-v0.2.14-Win64-int64.zip/download
[mingw64\* runtime dependencies]:https://sourceforge.net/projects/openblas/files/v0.2.14/mingw64_dll.zip/download
[https://download.01.org/opencv/2020/openvinotoolkit]:https://download.01.org/opencv/2020/openvinotoolkit
[build instructions]:https://docs.opencv.org/master/df/d65/tutorial_table_of_content_introduction.html
[driver package]:https://downloadcenter.intel.com/download/29335/Intel-Graphics-Windows-10-DCH-Drivers
[Intel® Neural Compute Stick 2 Get Started]:https://software.intel.com/en-us/neural-compute-stick/get-started
[Intel® C++ Compiler]:https://software.intel.com/en-us/intel-parallel-studio-xe
[OpenBLAS]:https://sourceforge.net/projects/openblas/files/v0.2.14/OpenBLAS-v0.2.14-Win64-int64.zip/download

View File

@@ -1,128 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
if(WIN32)
set(PROGRAMFILES_ENV "ProgramFiles(X86)")
file(TO_CMAKE_PATH $ENV{${PROGRAMFILES_ENV}} PROGRAMFILES)
set(UWP_SDK_PATH "${PROGRAMFILES}/Windows Kits/10/bin/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}/x64")
message(STATUS "Trying to find apivalidator in: ${UWP_SDK_PATH}")
find_host_program(UWP_API_VALIDATOR
NAMES apivalidator
PATHS "${UWP_SDK_PATH}"
DOC "ApiValidator for UWP compliance")
if(UWP_API_VALIDATOR)
message(STATUS "Found apivalidator: ${UWP_API_VALIDATOR}")
endif()
endif()
function(_ie_add_api_validator_post_build_step_recursive)
cmake_parse_arguments(API_VALIDATOR "" "TARGET" "" ${ARGN})
list(APPEND API_VALIDATOR_TARGETS ${API_VALIDATOR_TARGET})
set(API_VALIDATOR_TARGETS ${API_VALIDATOR_TARGETS} PARENT_SCOPE)
get_target_property(IS_IMPORTED ${API_VALIDATOR_TARGET} IMPORTED)
if(IS_IMPORTED)
return()
endif()
get_target_property(LIBRARY_TYPE ${API_VALIDATOR_TARGET} TYPE)
if(LIBRARY_TYPE STREQUAL "EXECUTABLE" OR LIBRARY_TYPE STREQUAL "SHARED_LIBRARY")
get_target_property(LINKED_LIBRARIES ${API_VALIDATOR_TARGET} LINK_LIBRARIES)
if(LINKED_LIBRARIES)
foreach(ITEM IN LISTS LINKED_LIBRARIES)
if(NOT TARGET ${ITEM})
continue()
endif()
get_target_property(LIBRARY_TYPE_DEPENDENCY ${ITEM} TYPE)
if(LIBRARY_TYPE_DEPENDENCY STREQUAL "SHARED_LIBRARY")
_ie_add_api_validator_post_build_step_recursive(TARGET ${ITEM})
endif()
endforeach()
endif()
endif()
set(API_VALIDATOR_TARGETS ${API_VALIDATOR_TARGETS} PARENT_SCOPE)
endfunction()
set(VALIDATED_LIBRARIES "" CACHE INTERNAL "")
function(_ie_add_api_validator_post_build_step)
set(UWP_API_VALIDATOR_APIS "${PROGRAMFILES}/Windows Kits/10/build/universalDDIs/x64/UniversalDDIs.xml")
set(UWP_API_VALIDATOR_EXCLUSION "${UWP_SDK_PATH}/BinaryExclusionlist.xml")
if((NOT UWP_API_VALIDATOR) OR (WINDOWS_STORE OR WINDOWS_PHONE))
return()
endif()
cmake_parse_arguments(API_VALIDATOR "" "TARGET" "" ${ARGN})
if(NOT API_VALIDATOR_TARGET)
message(FATAL_ERROR "RunApiValidator requires TARGET to validate!")
endif()
if(NOT TARGET ${API_VALIDATOR_TARGET})
message(FATAL_ERROR "${API_VALIDATOR_TARGET} is not a TARGET in the project tree.")
endif()
# collect targets
_ie_add_api_validator_post_build_step_recursive(TARGET ${API_VALIDATOR_TARGET})
# remove targets which were tested before
foreach(item IN LISTS VALIDATED_LIBRARIES)
list(REMOVE_ITEM API_VALIDATOR_TARGETS ${item})
endforeach()
list(REMOVE_DUPLICATES API_VALIDATOR_TARGETS)
if(NOT API_VALIDATOR_TARGETS)
return()
endif()
# apply check
macro(api_validator_get_target_name)
get_target_property(IS_IMPORTED ${target} IMPORTED)
if(IS_IMPORTED)
get_target_property(target_location ${target} LOCATION)
get_filename_component(target_name "${target_location}" NAME_WE)
else()
set(target_name ${target})
endif()
endmacro()
foreach(target IN LISTS API_VALIDATOR_TARGETS)
api_validator_get_target_name()
set(output_file "${CMAKE_BINARY_DIR}/api_validator/${target_name}.txt")
add_custom_command(TARGET ${API_VALIDATOR_TARGET} POST_BUILD
COMMAND ${CMAKE_COMMAND}
-D UWP_API_VALIDATOR=${UWP_API_VALIDATOR}
-D UWP_API_VALIDATOR_TARGET=$<TARGET_FILE:${target}>
-D UWP_API_VALIDATOR_APIS=${UWP_API_VALIDATOR_APIS}
-D UWP_API_VALIDATOR_EXCLUSION=${UWP_API_VALIDATOR_EXCLUSION}
-D UWP_API_VALIDATOR_OUTPUT=${output_file}
-D CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-P "${OpenVINO_MAIN_SOURCE_DIR}/cmake/api_validator/api_validator_run.cmake"
BYPRODUCTS ${output_file}
COMMENT "[apiValidator] Check ${target_name} for OneCore compliance"
VERBATIM)
endforeach()
# update list of validated libraries
list(APPEND VALIDATED_LIBRARIES ${API_VALIDATOR_TARGETS})
set(VALIDATED_LIBRARIES "${VALIDATED_LIBRARIES}" CACHE INTERNAL "" FORCE)
endfunction()
#
# ie_add_api_validator_post_build_step(TARGET <name>)
#
macro(ie_add_api_validator_post_build_step)
_ie_add_api_validator_post_build_step(${ARGV})
endmacro()

View File

@@ -1,73 +0,0 @@
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
cmake_policy(SET CMP0012 NEW)
foreach(var UWP_API_VALIDATOR UWP_API_VALIDATOR_TARGET
UWP_API_VALIDATOR_APIS UWP_API_VALIDATOR_EXCLUSION
UWP_API_VALIDATOR_OUTPUT CMAKE_TOOLCHAIN_FILE)
if(NOT DEFINED ${var})
message(FATAL_ERROR "Variable ${var} is not defined")
endif()
endforeach()
# create command
if(NOT EXISTS "${UWP_API_VALIDATOR_APIS}")
message(FATAL_ERROR "${UWP_API_VALIDATOR_APIS} does not exist")
endif()
set(command "${UWP_API_VALIDATOR}"
-SupportedApiXmlFiles:${UWP_API_VALIDATOR_APIS}
-DriverPackagePath:${UWP_API_VALIDATOR_TARGET})
if(EXISTS "${UWP_API_VALIDATOR_EXCLUSION}")
list(APPEND command
-BinaryExclusionListXmlFile:${UWP_API_VALIDATOR_EXCLUSION}
-StrictCompliance:TRUE)
set(UWP_HAS_BINARY_EXCLUSION ON)
endif()
# execute
execute_process(COMMAND ${command}
OUTPUT_VARIABLE output_message
ERROR_VARIABLE error_message
RESULT_VARIABLE exit_code
OUTPUT_STRIP_TRAILING_WHITESPACE)
file(WRITE "${UWP_API_VALIDATOR_OUTPUT}" "${output_message}\n\n\n${error_message}")
# post-process output
get_filename_component(name "${UWP_API_VALIDATOR_TARGET}" NAME)
if(NOT UWP_HAS_BINARY_EXCLUSION)
if(CMAKE_TOOLCHAIN_FILE MATCHES "onecoreuap.toolchain.cmake$")
# empty since we compile with static MSVC runtime
else()
set(exclusion_dlls "msvcp140.dll" "vcruntime140.dll")
endif()
# remove exclusions from error_message
foreach(dll IN LISTS exclusion_dlls)
string(REGEX REPLACE
"ApiValidation: Error: ${name} has unsupported API call to \"${dll}![^\"]+\"\n"
"" error_message "${error_message}")
endforeach()
# throw error if error_message still contains any errors
if(error_message)
message(FATAL_ERROR "${error_message}")
endif()
endif()
# write output
if(UWP_HAS_BINARY_EXCLUSION AND NOT exit_code EQUAL 0)
message(FATAL_ERROR "${error_message}")
endif()
message("ApiValidator: ${name} has passed the OneCore compliance")

View File

@@ -15,19 +15,19 @@ else()
SET(ARCH_64 OFF)
endif()
if (NOT ENABLE_MKL_DNN)
set(ENABLE_MKL OFF)
endif()
if(ENABLE_AVX512F)
if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920))
if ((CMAKE_CXX_COMPILER_ID MATCHES MSVC) AND (MSVC_VERSION VERSION_LESS 1920))
# 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
if ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6))
if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
if ((CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10))
# TBD: clarify which AppleClang version supports avx512
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9))
if ((CMAKE_CXX_COMPILER_ID STREQUAL GNU) AND (NOT (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9)))
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
endif()

View File

@@ -4,17 +4,14 @@
if(NOT TARGET ie_coverage_clean)
add_custom_target(ie_coverage_clean)
set_target_properties(ie_coverage_clean PROPERTIES FOLDER coverage)
endif()
if(NOT TARGET ie_coverage_init)
add_custom_target(ie_coverage_init)
set_target_properties(ie_coverage_init PROPERTIES FOLDER coverage)
endif()
if(NOT TARGET ie_coverage)
add_custom_target(ie_coverage)
set_target_properties(ie_coverage PROPERTIES FOLDER coverage)
endif()
set(IE_COVERAGE_REPORTS "${CMAKE_BINARY_DIR}/coverage")
@@ -29,10 +26,10 @@ function(ie_coverage_clean)
cmake_parse_arguments(IE_COVERAGE "" "REPOSITORY;DIRECTORY" "" ${ARGN})
add_custom_target(ie_coverage_zerocounters_${IE_COVERAGE_REPOSITORY}
COMMAND lcov --zerocounters --quiet
--directory "${IE_COVERAGE_DIRECTORY}"
COMMENT "Add zero counters for coverage for ${IE_COVERAGE_REPOSITORY}"
VERBATIM)
COMMAND lcov --zerocounters --quiet
--directory "${IE_COVERAGE_DIRECTORY}"
COMMENT "Add zero counters for coverage for ${IE_COVERAGE_REPOSITORY}"
VERBATIM)
add_custom_target(ie_coverage_clean_${IE_COVERAGE_REPOSITORY}
COMMAND ${CMAKE_COMMAND}
@@ -45,10 +42,6 @@ function(ie_coverage_clean)
DEPENDS "${IE_COVERAGE_SCRIPT_DIR}/coverage_clean.cmake"
VERBATIM)
set_target_properties(ie_coverage_zerocounters_${IE_COVERAGE_REPOSITORY}
ie_coverage_clean_${IE_COVERAGE_REPOSITORY}
PROPERTIES FOLDER coverage)
add_dependencies(ie_coverage_clean ie_coverage_zerocounters_${IE_COVERAGE_REPOSITORY}
ie_coverage_clean_${IE_COVERAGE_REPOSITORY})
endfunction()
@@ -94,8 +87,6 @@ function(ie_coverage_capture)
add_custom_target(ie_coverage_${IE_COVERAGE_INFO_FILE}_info
DEPENDS ${output_file})
set_target_properties(ie_coverage_${IE_COVERAGE_INFO_FILE}_info
PROPERTIES FOLDER coverage)
endfunction()
#
@@ -120,8 +111,6 @@ function(ie_coverage_extract)
VERBATIM)
add_custom_target(ie_coverage_${IE_COVERAGE_OUTPUT}_info
DEPENDS ${output_file})
set_target_properties(ie_coverage_${IE_COVERAGE_OUTPUT}_info
PROPERTIES FOLDER coverage)
add_dependencies(ie_coverage_${IE_COVERAGE_OUTPUT}_info ie_coverage_${IE_COVERAGE_INPUT}_info)
endfunction()
@@ -148,8 +137,6 @@ function(ie_coverage_remove)
VERBATIM)
add_custom_target(ie_coverage_${IE_COVERAGE_OUTPUT}_info
DEPENDS ${output_file})
set_target_properties(ie_coverage_${IE_COVERAGE_OUTPUT}_info
PROPERTIES FOLDER coverage)
add_dependencies(ie_coverage_${IE_COVERAGE_OUTPUT}_info ie_coverage_${IE_COVERAGE_INPUT}_info)
endfunction()
@@ -177,8 +164,6 @@ function(ie_coverage_merge)
VERBATIM)
add_custom_target(ie_coverage_${IE_COVERAGE_OUTPUT}_info
DEPENDS ${output_file})
set_target_properties(ie_coverage_${IE_COVERAGE_OUTPUT}_info
PROPERTIES FOLDER coverage)
add_dependencies(ie_coverage_${IE_COVERAGE_OUTPUT}_info ${dependencies})
endfunction()
@@ -203,8 +188,6 @@ function(ie_coverage_genhtml)
VERBATIM)
add_custom_target(ie_coverage_${IE_COVERAGE_INFO_FILE}_genhtml
DEPENDS "${output_directory}/index.html")
set_target_properties(ie_coverage_${IE_COVERAGE_INFO_FILE}_genhtml
PROPERTIES FOLDER coverage)
add_dependencies(ie_coverage_${IE_COVERAGE_INFO_FILE}_genhtml ie_coverage_${IE_COVERAGE_INFO_FILE}_info)
add_dependencies(ie_coverage ie_coverage_${IE_COVERAGE_INFO_FILE}_genhtml)

View File

@@ -3,28 +3,28 @@
#
if(NOT DEFINED IE_COVERAGE_REPORTS)
message(FATAL_ERROR "IE_COVERAGE_REPORTS variable is not defined")
return()
message(FATAL_ERROR "IE_COVERAGE_REPORTS variable is not defined")
return()
endif()
file(REMOVE_RECURSE "${IE_COVERAGE_REPORTS}")
if(NOT DEFINED IE_COVERAGE_DIRECTORY)
message(FATAL_ERROR "IE_COVERAGE_DIRECTORY variable is not defined")
return()
message(FATAL_ERROR "IE_COVERAGE_DIRECTORY variable is not defined")
return()
endif()
# remove .gcno files which are kept from the previous build
file(GLOB_RECURSE gcno_files "${IE_COVERAGE_DIRECTORY}/*.gcno")
foreach(file IN LISTS gcno_files)
string(REPLACE ".gcno" "" temp_file "${file}")
string(REGEX REPLACE "CMakeFiles/.+dir/" "" temp_file "${temp_file}")
string(REPLACE "${CMAKE_BINARY_DIRECTORY}" "${CMAKE_SOURCE_DIRECTORY}" source_file "${temp_file}")
string(REPLACE ".gcno" "" temp_file "${file}")
string(REGEX REPLACE "CMakeFiles/.+dir/" "" temp_file "${temp_file}")
string(REPLACE "${CMAKE_BINARY_DIRECTORY}" "${CMAKE_SOURCE_DIRECTORY}" source_file "${temp_file}")
if(NOT EXISTS "${source_file}")
file(REMOVE "${file}")
string(REPLACE "${CMAKE_BINARY_DIRECTORY}/" "" file "${file}")
message("Removing ${file}")
endif()
if(NOT EXISTS "${source_file}")
file(REMOVE "${file}")
string(REPLACE "${CMAKE_BINARY_DIRECTORY}/" "" file "${file}")
message("Removing ${file}")
endif()
endforeach()

View File

@@ -1,105 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
# =================================================================
#
# Generates cpp file with dispatcher for cross compiled function
# Parameters:
# XARCH_API_HEADER -- path to header with function declaration
# XARCH_FUNC_NAME -- name of function to dispatch
# XARCH_NAMESPACES -- full namespace used to keep ODR
# XARCH_DISP_FILE -- dispatcher file name to generate
# XARCH_SET -- set of ARCH supported by dispatcher. space delimited
#
# =================================================================
set(_CPU_CHECK_ANY "true")
set(_CPU_CHECK_SSE42 "with_cpu_x86_sse42()")
set(_CPU_CHECK_AVX "with_cpu_x86_avx()")
set(_CPU_CHECK_AVX2 "with_cpu_x86_avx2()")
set(_CPU_CHECK_AVX512F "with_cpu_x86_avx512f()")
function(_generate_dispatcher)
_find_signature_in_file(${XARCH_API_HEADER} ${XARCH_FUNC_NAME} SIGNATURE)
_generate_call_line_from_signature("${SIGNATURE}" CALL_LINE)
string(REPLACE " " ";" XARCH_SET "${XARCH_SET}")
string(REPLACE "::" ";" XARCH_NAMESPACES "${XARCH_NAMESPACES}")
list(GET XARCH_NAMESPACES -1 XARCH_CURRENT_NAMESPACE)
set(PARENT_NAMESPACES ${XARCH_NAMESPACES})
list(REMOVE_AT PARENT_NAMESPACES -1)
set(DISP_CONTENT
"
//
// Auto generated file by CMake macros cross_compiled_file()
// !! do not modify it !!!
//
#include \"${XARCH_API_HEADER}\"
#include \"ie_system_conf.h\"
")
foreach(_namespace ${PARENT_NAMESPACES})
string(APPEND DISP_CONTENT
"namespace ${_namespace} {\n")
endforeach()
foreach(_arch ${XARCH_SET})
string(APPEND DISP_CONTENT
"namespace ${_arch} {\n ${SIGNATURE}\; \n}\n")
endforeach()
string(APPEND DISP_CONTENT
"namespace ${XARCH_CURRENT_NAMESPACE} {\n\n${SIGNATURE} {\n")
foreach(_arch ${XARCH_SET})
string(APPEND DISP_CONTENT
" if (${_CPU_CHECK_${_arch}}) {\n return ${_arch}::${CALL_LINE}\;\n }\n")
endforeach()
string(APPEND DISP_CONTENT "}\n\n}\n")
foreach(_namespace ${PARENT_NAMESPACES})
string(APPEND DISP_CONTENT "} // namespace ${_namespace}\n")
endforeach()
file(WRITE ${XARCH_DISP_FILE} ${DISP_CONTENT})
endfunction()
function(_find_signature_in_file FILE FUNCTION RESULT_NAME)
file(READ "${FILE}" CONTENT)
set(valid_chars "<>:_*& a-zA-Z0-9\n") ## valid chars for type/var specification (including new line /n)
string(REGEX MATCH "[${valid_chars}]*${FUNCTION}[ ]*[(][=,${valid_chars}]*[)]" SIGNATURE ${CONTENT})
string(STRIP "${SIGNATURE}" SIGNATURE)
set (${RESULT_NAME} "${SIGNATURE}" PARENT_SCOPE)
endfunction()
function(_generate_call_line_from_signature SIGNATURE RESULT_NAME)
## extract func name
set(_name ${SIGNATURE})
string(REGEX REPLACE "[ ]*[(].*[)]" "" _name "${_name}") # remove arguments
string(REGEX MATCH "[a-zA-Z0-9_]*[ ]*$" _name "${_name}") # extract func name
set(nt_chars "[:_*& a-zA-Z0-9\n]*") ## any sequence of chars to describe object type (no template)
## extract arg names
set(_args ${SIGNATURE})
string(REGEX MATCH "[(].*[)]" _args "${_args}") # extract args with types, all inside brackets
string(REGEX REPLACE "<${nt_chars},${nt_chars}>" "" _args "${_args}") # remove template brackets with ','
string(REPLACE "(" "" _args ${_args})
string(REPLACE ")" "" _args ${_args})
string(REPLACE "," ";" _args ${_args}) # now it's list
foreach(_arg_elem ${_args})
string(REGEX MATCH "[a-zA-Z0-9_]*[ ]*$" _arg_elem "${_arg_elem}")
list(APPEND _arg_names ${_arg_elem})
endforeach()
string(REPLACE ";" ", " _arg_names "${_arg_names}") # back to comma separated string
set (${RESULT_NAME} "${_name}(${_arg_names})" PARENT_SCOPE)
endfunction()
_generate_dispatcher()

View File

@@ -1,16 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
# =================================================================
#
# This file is used to add dependency on option value. If the args
# was changes the configure file will be updated. And the dependent
# add_custom_command will rerun.
#
# Otherwise the changing of CMake options will not have affect on
# generated file.
#
# =================================================================
@_GEN_ARGS_LIST@

View File

@@ -1,227 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
## list of available instruction sets
set(_ARCH_LIST ANY SSE42 AVX AVX2 AVX512F)
set(_ACCEPTED_ARCHS_ANY "^(ANY)$")
set(_ACCEPTED_ARCHS_SSE42 "^(ANY|SSE42)$")
set(_ACCEPTED_ARCHS_AVX "^(ANY|SSE42|AVX)$")
set(_ACCEPTED_ARCHS_AVX2 "^(ANY|SSE42|AVX|AVX2)$")
set(_ACCEPTED_ARCHS_AVX512F "^(ANY|SSE42|AVX|AVX2|AVX512F)$")
## Arch specific definitions
set(_DEFINE_ANY "")
set(_DEFINE_SSE42 "-DHAVE_SSE42" ${_DEFINE_ANY})
set(_DEFINE_AVX "-DHAVE_AVX" ${_DEFINE_SSE42})
set(_DEFINE_AVX2 "-DHAVE_AVX2" ${_DEFINE_AVX})
set(_DEFINE_AVX512F "-DHAVE_AVX512F" ${_DEFINE_AVX2})
## Arch specific compile options
ie_avx512_optimization_flags(_FLAGS_AVX512F)
ie_avx2_optimization_flags (_FLAGS_AVX2)
ie_sse42_optimization_flags (_FLAGS_SSE42)
set(_FLAGS_AVX "") ## TBD is not defined for IE project yet
set(_FLAGS_ANY "") ##
## way to duplicate file via cmake tool set
if (UNIX)
## Clone sources via sym link because it allow to modify original file in IDE along with debug
set(TO_DUPLICATE create_symlink)
else()
## Windows and others - just copy
set(TO_DUPLICATE copy)
endif()
set(DISPATCHER_GEN_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/cross_compiled_disp_gen.cmake)
set(DISPATCHER_GEN_OPTIONS_HOLDER ${CMAKE_CURRENT_LIST_DIR}/cross_compiled_disp_gen_options.in)
#######################################
#
# Allow to enable multiple cross compilation of source file inside one module
# with keeping requirements on minimal instruction set. The CPU check performed
# in runtime via common utils declared in "ie_system_conf.h".
#
# Usage example:
# cross_compiled_file(<target>
# ARCH
# ANY <source_file>
# SSE SSE42 <source_file>
# AVX AVX2 <source_file>
# AVX512F <source_file>
# API <header_file>
# NAMESPACE <namespace> # like "IE::Ext::CPU::XARCH"
# NAME <function_name> # like "my_fun"
# )
#
function(cross_compiled_file TARGET)
set(oneValueArgs API ## Header with declaration of cross compiled function
NAMESPACE ## The namespace where cross compiled function was declared
NAME) ## String with function signature to make cross compiled
set(multiValueArgs ARCH) ## List of architecture described in _ARCH_LIST
cmake_parse_arguments(X "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
## verification
if(X_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown argument: " ${X_UNPARSED_ARGUMENTS})
endif()
if((NOT TARGET) OR (NOT X_NAME) OR (NOT X_NAMESPACE) OR (NOT X_API) OR (NOT X_ARCH))
message(FATAL_ERROR "Missed arguments")
endif()
_currently_requested_top_arch(TOP_ARCH)
set(_CURRENT_ARCH_FILTER "${_ACCEPTED_ARCHS_${TOP_ARCH}}")
## format: ARCH1 ARCH2 <src1> ARCH3 <src2> ...
foreach(_it ${X_ARCH})
if (_it IN_LIST _ARCH_LIST)
## that is arch ID
set(_arch ${_it})
if(_arch MATCHES ${_CURRENT_ARCH_FILTER})
list(APPEND _CUR_ARCH_SET ${_arch})
list(APPEND _FULL_ARCH_SET ${_arch})
endif()
else()
## that is source file name
set(_src_name ${_it})
_remove_source_from_target(${TARGET} ${_src_name})
_clone_source_to_target(${TARGET} ${_src_name} "${_CUR_ARCH_SET}")
set(_CUR_ARCH_SET "")
endif()
endforeach()
_add_dispatcher_to_target(${TARGET} ${X_API} ${X_NAME} "${X_NAMESPACE}" "${_FULL_ARCH_SET}")
endfunction()
##########################################
#
# Add source multiple time per each element in ARCH_SET.
# Also provide corresponding arch specific flags and defines.
#
function(_clone_source_to_target TARGET SOURCE ARCH_SET)
foreach(_arch ${ARCH_SET})
set(_arch_dir cross-compiled/${_arch})
get_filename_component(ARCH_NAME ${SOURCE} NAME)
get_filename_component(ARCH_INCLUDE_DIR ${SOURCE} DIRECTORY)
set(ARCH_SOURCE "${_arch_dir}/${ARCH_NAME}")
add_custom_command(
OUTPUT ${ARCH_SOURCE}
COMMAND ${CMAKE_COMMAND} -E make_directory
${CMAKE_CURRENT_BINARY_DIR}/${_arch_dir}
COMMAND ${CMAKE_COMMAND} -E ${TO_DUPLICATE}
${CMAKE_CURRENT_SOURCE_DIR}/${SOURCE}
${CMAKE_CURRENT_BINARY_DIR}/${ARCH_SOURCE}
DEPENDS ${SOURCE}
)
set(_ARCH_SPECIFIC_FLAGS
${_DEFINE_${_arch}}
${_FLAGS_${_arch}}
"-DXARCH=${_arch}" ## to replace XARCH with direct ARCH name
"-I${CMAKE_CURRENT_SOURCE_DIR}/${ARCH_INCLUDE_DIR}" ## To make valid #include "some.hpp"
)
_add_source_compile_flags(${ARCH_SOURCE} ${_ARCH_SPECIFIC_FLAGS})
list(APPEND _ARCH_SOURCES ${ARCH_SOURCE})
endforeach()
_add_source_to_target(${TARGET} ${_ARCH_SOURCES})
endfunction()
##########################################
#
# Generate dispatcher for provided function
# for archs in ARCH_SET.
#
function(_add_dispatcher_to_target TARGET HEADER FUNC_NAME NAMESPACE ARCH_SET)
get_filename_component(DISPATCHER_NAME ${HEADER} NAME_WE)
get_filename_component(DISPATCHER_INCLUDE_DIR ${HEADER} DIRECTORY)
set(DISPATCHER_SOURCE "cross-compiled/${DISPATCHER_NAME}_disp.cpp")
set(DISPATCHER_OPT_HOLDER "cross-compiled/${DISPATCHER_NAME}_holder.txt")
set(_GEN_ARGS_LIST
-DXARCH_FUNC_NAME="${X_NAME}"
-DXARCH_NAMESPACES="${NAMESPACE}"
-DXARCH_API_HEADER="${CMAKE_CURRENT_SOURCE_DIR}/${HEADER}"
-DXARCH_DISP_FILE="${CMAKE_CURRENT_BINARY_DIR}/${DISPATCHER_SOURCE}"
-DXARCH_SET="${ARCH_SET}"
)
configure_file(${DISPATCHER_GEN_OPTIONS_HOLDER} ${DISPATCHER_OPT_HOLDER})
add_custom_command(
OUTPUT ${DISPATCHER_SOURCE}
COMMAND ${CMAKE_COMMAND} ${_GEN_ARGS_LIST}
-P ${DISPATCHER_GEN_SCRIPT}
DEPENDS ${HEADER}
${DISPATCHER_GEN_SCRIPT}
${CMAKE_CURRENT_BINARY_DIR}/${DISPATCHER_OPT_HOLDER} ## Just to make run dependency on args value
)
_add_source_compile_flags(${DISPATCHER_SOURCE} "-I${DISPATCHER_INCLUDE_DIR}")
_add_source_to_target(${TARGET} ${DISPATCHER_SOURCE})
endfunction()
#######################################
#
# Return currently requested ARCH id
#
function(_currently_requested_top_arch VAR)
if(ENABLE_AVX512F)
set(RES AVX512F)
elseif(ENABLE_AVX2)
set(RES AVX2)
elseif(ENABLE_SSE42)
set(RES SSE42)
else()
set(RES ANY)
endif()
set (${VAR} "${RES}" PARENT_SCOPE)
endfunction()
#####################################
#
# Utils to handle with cmake target
#
function(_remove_source_from_target TARGET SOURCE_FILE)
get_target_property(ORIGINAL_SOURCES ${TARGET} SOURCES)
## To match by file name only. The path is any.
list(FILTER ORIGINAL_SOURCES EXCLUDE REGEX ".*${SOURCE_FILE}$")
set_target_properties(${TARGET}
PROPERTIES
SOURCES "${ORIGINAL_SOURCES}")
endfunction()
function(_add_source_to_target TARGET)
get_target_property(ORIGINAL_SOURCES ${TARGET} SOURCES)
list(APPEND ORIGINAL_SOURCES ${ARGN})
set_target_properties(${TARGET}
PROPERTIES
SOURCES "${ORIGINAL_SOURCES}")
endfunction()
function(_add_source_compile_flags SOURCE)
get_source_file_property(ORIGINAL_FLAGS ${SOURCE} COMPILE_FLAGS)
## Empty list of COMPILE_FLAGS represented as NOTFOUND
if(NOT ORIGINAL_FLAGS)
set(ORIGINAL_FLAGS "")
endif()
string(REPLACE ";" " " NEW_FLAGS "${ARGN}")
string(APPEND ORIGINAL_FLAGS " " ${NEW_FLAGS})
set_source_files_properties(${SOURCE}
PROPERTIES
COMPILE_FLAGS "${ORIGINAL_FLAGS}")
endfunction()

View File

@@ -6,20 +6,20 @@ set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")
include(dependency_solver)
if(CMAKE_CROSSCOMPILING AND NGRAPH_ONNX_IMPORT_ENABLE)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(HOST_X86_64 ON)
endif()
if(CMAKE_CROSSCOMPILING)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(HOST_X86_64 ON)
endif()
set(protoc_version "3.7.1")
if(CMAKE_HOST_SYSTEM_NAME MATCHES Linux)
RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT
ARCHIVE_LIN "protoc-${protoc_version}-linux-x86_64.tar.gz"
TARGET_PATH "${TEMP}/protoc-${protoc_version}-linux-x86_64")
debug_message(STATUS "host protoc-${protoc_version} root path = " ${SYSTEM_PROTOC_ROOT})
else()
message(FATAL_ERROR "Unsupported host system (${CMAKE_HOST_SYSTEM_NAME}) and arch (${CMAKE_HOST_SYSTEM_PROCESSOR}) for cross-compilation")
endif()
set(protoc_version "3.7.1")
if(CMAKE_HOST_SYSTEM_NAME MATCHES Linux)
RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT
ARCHIVE_LIN "protoc-${protoc_version}-linux-x86_64.tar.gz"
TARGET_PATH "${TEMP}/protoc-${protoc_version}-linux-x86_64")
debug_message(STATUS "host protoc-${protoc_version} root path = " ${SYSTEM_PROTOC_ROOT})
else()
message(FATAL_ERROR "Unsupported host system (${CMAKE_HOST_SYSTEM_NAME}) and arch (${CMAKE_HOST_SYSTEM_PROCESSOR}) for cross-compilation")
endif()
reset_deps_cache(SYSTEM_PROTOC)

View File

@@ -2,29 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
cmake_minimum_required(VERSION 3.13)
# Detect target
include(target_flags)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
if(X86_64)
set(ARCH_FOLDER intel64)
elseif(X86)
set(ARCH_FOLDER ia32)
elseif(MSVC AND ARM)
set(ARCH_FOLDER arm)
elseif(MSVC AND AARCH64)
set(ARCH_FOLDER arm64)
endif()
list(APPEND CMAKE_MODULE_PATH
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/download"
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile")
#
# CPack
#
set(CMAKE_MODULE_PATH "${OpenVINO_MAIN_SOURCE_DIR}/cmake/download" ${CMAKE_MODULE_PATH})
include(CPackComponent)
unset(IE_CPACK_COMPONENTS_ALL CACHE)
@@ -50,14 +28,21 @@ endif()
# Set library directory for cpack
#
function(ie_cpack_set_library_dir)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
set(ARCH intel64)
elseif(ARCH STREQUAL "i386")
set(ARCH ia32)
endif()
if(WIN32)
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
else()
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
endif()
endfunction()
@@ -76,11 +61,12 @@ endmacro()
macro(ie_cpack)
set(CPACK_GENERATOR "TGZ")
string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
if(WIN32)
set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
else()
set(CPACK_PACKAGE_NAME inference-engine)
string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
endif()
set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
@@ -119,27 +105,35 @@ function(set_temp_directory temp_variable source_tree_dir)
endif()
endfunction()
#
# Common scripts
#
include(coverage/coverage)
include(shellcheck/shellcheck)
# External dependencies
find_package(Threads)
# Detect target
include(target_flags)
# printing debug messages
include(debug)
# linking libraries without discarding symbols
include(whole_archive)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
if(ARCH_FOLDER STREQUAL "x86_64" OR ARCH_FOLDER STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
set(ARCH_FOLDER intel64)
elseif(ARCH_FOLDER STREQUAL "i386")
set(ARCH_FOLDER ia32)
endif()
if(OS_FOLDER)
message ("**** OS FOLDER IS: [${OS_FOLDER}]")
if("${OS_FOLDER}" STREQUAL "ON")
message ("**** USING OS FOLDER: [${CMAKE_SYSTEM_NAME}]")
set(BIN_FOLDER "bin/${CMAKE_SYSTEM_NAME}/${ARCH_FOLDER}")
else()
set(BIN_FOLDER "bin/${OS_FOLDER}/${ARCH_FOLDER}")
endif()
message ("**** OS FOLDER IS: [${OS_FOLDER}]")
if("${OS_FOLDER}" STREQUAL "ON")
message ("**** USING OS FOLDER: [${CMAKE_SYSTEM_NAME}]")
set(BIN_FOLDER "bin/${CMAKE_SYSTEM_NAME}/${ARCH_FOLDER}")
else()
set(BIN_FOLDER "bin/${OS_FOLDER}/${ARCH_FOLDER}")
endif()
else()
set(BIN_FOLDER "bin/${ARCH_FOLDER}")
endif()
@@ -149,10 +143,7 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE "Release")
endif()
# allow to override default OUTPUT_ROOT root
if(NOT DEFINED OUTPUT_ROOT)
set(OUTPUT_ROOT ${OpenVINO_MAIN_SOURCE_DIR})
endif()
set(OUTPUT_ROOT ${OpenVINO_MAIN_SOURCE_DIR})
# Enable postfixes for Debug/Release builds
set(IE_DEBUG_POSTFIX_WIN "d")
@@ -176,8 +167,8 @@ endif()
set(CMAKE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX})
set(CMAKE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX})
if (WIN32 OR CMAKE_GENERATOR STREQUAL "Xcode")
# Support CMake multiconfiguration for Visual Studio or Xcode build
if (WIN32)
# Support CMake multiconfiguration for Visual Studio build
set(IE_BUILD_POSTFIX $<$<CONFIG:Debug>:${IE_DEBUG_POSTFIX}>$<$<CONFIG:Release>:${IE_RELEASE_POSTFIX}>)
else ()
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" )
@@ -191,6 +182,10 @@ message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\")
if(NOT UNIX)
if (WIN32)
# set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
endif()
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER})
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER})
@@ -205,41 +200,15 @@ else()
endif()
if(APPLE)
# WA for Xcode generator + object libraries issue:
# https://gitlab.kitware.com/cmake/cmake/issues/20260
# http://cmake.3232098.n2.nabble.com/XCODE-DEPEND-HELPER-make-Deletes-Targets-Before-and-While-They-re-Built-td7598277.html
set(CMAKE_XCODE_GENERATE_TOP_LEVEL_PROJECT_ONLY ON)
set(CMAKE_MACOSX_RPATH ON)
endif()
endif(APPLE)
# Use solution folders
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set(CMAKE_POLICY_DEFAULT_CMP0054 NEW)
# LTO
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
include(CheckIPOSupported)
check_ipo_supported(RESULT IPO_SUPPORTED
OUTPUT OUTPUT_MESSAGE
LANGUAGES C CXX)
if(NOT IPO_SUPPORTED)
set(ENABLE_LTO "OFF" CACHE STRING "Enable Link Time Optmization" FORCE)
message(WARNING "IPO / LTO is not supported: ${OUTPUT_MESSAGE}")
endif()
# General flags
include(sdl)
include(os_flags)
include(os_flags NO_POLICY_SCOPE)
include(sanitizer)
include(cross_compiled_func)
include(faster_build)
include(whole_archive)
include(api_validator/api_validator)
function(set_ci_build_number)
set(OpenVINO_MAIN_SOURCE_DIR "${CMAKE_SOURCE_DIR}")
@@ -247,5 +216,3 @@ function(set_ci_build_number)
set(CI_BUILD_NUMBER "${CI_BUILD_NUMBER}" PARENT_SCOPE)
endfunction()
set_ci_build_number()
include(vs_version/vs_version)

View File

@@ -22,11 +22,8 @@ function (DownloadAndCheck from to fatal result)
Download(${from} ${to} ${fatal} ${result} output)
list(GET output 0 status_code)
else()
message(STATUS "${WGET_EXECUTABLE} --no-cache --no-check-certificate
--retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=5 ${from}")
execute_process(COMMAND ${WGET_EXECUTABLE} "--no-cache" "--no-check-certificate"
"--retry-connrefused" "--waitretry=1" "--read-timeout=20" "--timeout=15" "--tries=5"
"${from}" "-O" "${to}"
message(STATUS "${WGET_EXECUTABLE} --no-cache ${from}")
execute_process(COMMAND ${WGET_EXECUTABLE} "--no-cache" "--no-check-certificate" "${from}" "-O" "${to}"
TIMEOUT 2000
RESULT_VARIABLE status_code)
endif()

View File

@@ -7,7 +7,7 @@ include ("download_and_check")
function (GetNameAndUrlToDownload name url archive_name_unified archive_name_win archive_name_lin archive_name_mac archive_name_android)
if (archive_name_unified)
set (${url} "thirdparty/unified/${archive_name_unified}" PARENT_SCOPE)
set (${url} "${archive_name_unified}" PARENT_SCOPE)
set (${name} ${archive_name_unified} PARENT_SCOPE)
else()
if(archive_name_lin)
@@ -27,7 +27,7 @@ function (GetNameAndUrlToDownload name url archive_name_unified archive_name_win
endif()
set (${name} ${archive_name} PARENT_SCOPE)
set (${url} "thirdparty/${PLATFORM_FOLDER}/${archive_name}" PARENT_SCOPE)
set (${url} "${archive_name}" PARENT_SCOPE)
endif()
endfunction(GetNameAndUrlToDownload)
@@ -151,12 +151,10 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked
set (status "ON")
set (on_master FALSE)
if(DEFINED IE_PATH_TO_DEPS)
set(URL "${IE_PATH_TO_DEPS}/${RELATIVE_URL}")
elseif(DEFINED ENV{IE_PATH_TO_DEPS})
if(DEFINED ENV{IE_PATH_TO_DEPS})
set(URL "$ENV{IE_PATH_TO_DEPS}/${RELATIVE_URL}")
else()
set(URL "https://download.01.org/opencv/master/openvinotoolkit/${RELATIVE_URL}")
set(URL "https://download.01.org/opencv/2020/openvinotoolkit/2020.3/inference_engine/${RELATIVE_URL}")
endif()
#no message on recursive calls

View File

@@ -1,26 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
include(CMakeParseArguments)
function(ie_faster_build TARGET_NAME)
if(NOT ENABLE_FASTER_BUILD)
return()
endif()
cmake_parse_arguments(IE_FASTER_BUILD "UNITY" "" "PCH" ${ARGN})
if(IE_FASTER_BUILD_UNITY)
set_target_properties(${TARGET_NAME}
PROPERTIES
UNITY_BUILD ON
)
endif()
if(IE_FASTER_BUILD_PCH)
target_precompile_headers(${TARGET_NAME}
${IE_FASTER_BUILD_PCH}
)
endif()
endfunction()

View File

@@ -12,24 +12,19 @@ if(X86_64)
else()
set(ENABLE_MKL_DNN_DEFAULT OFF)
endif()
ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ${ENABLE_MKL_DNN_DEFAULT})
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "WIN32 OR X86_64;NOT APPLE;NOT MINGW" OFF)
# FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
# this must be addressed in a proper way
ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX OR WIN32;NOT CMAKE_CROSSCOMPILING" OFF)
ie_option (OS_FOLDER "create OS dedicated folder in output" OFF)
# FIXME: ARM cross-compiler generates several "false positive" warnings regarding __builtin_memcpy buffer overflow
ie_dependent_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ON "X86 OR X86_64" OFF)
ie_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF)
ie_option (ENABLE_SANITIZER "enable checking memory errors via AddressSanitizer" OFF)
ie_option (ENABLE_THREAD_SANITIZER "enable checking data races via ThreadSanitizer" OFF)
@@ -43,10 +38,3 @@ ie_dependent_option (ENABLE_SSE42 "Enable SSE4.2 optimizations" ON "X86_64 OR X8
ie_dependent_option (ENABLE_AVX2 "Enable AVX2 optimizations" ON "X86_64 OR X86" OFF)
ie_dependent_option (ENABLE_AVX512F "Enable AVX512 optimizations" ON "X86_64 OR X86" OFF)
ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
# Documentation build
ie_option (ENABLE_DOCS "build docs using Doxygen" OFF)
ie_dependent_option (ENABLE_FASTER_BUILD "Enable build features (PCH, UNITY) to speed up build time" OFF "CMAKE_VERSION VERSION_GREATER_EQUAL 3.16" OFF)

View File

@@ -1,97 +0,0 @@
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
#
# Define CMAKE_SYSTEM_VERSION if not defined
#
if(NOT DEFINED CMAKE_SYSTEM_VERSION)
# Sometimes CMAKE_HOST_SYSTEM_VERSION has form 10.x.y while we need
# form 10.x.y.z Adding .0 at the end fixes the issue
if(CMAKE_HOST_SYSTEM_VERSION MATCHES "^10\.0\.[0-9]+$")
set(CMAKE_SYSTEM_VERSION "${CMAKE_HOST_SYSTEM_VERSION}.0")
else()
set(CMAKE_SYSTEM_VERSION "${CMAKE_HOST_SYSTEM_VERSION}")
endif()
endif()
if(NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
message(STATUS "Building for Windows OneCore compliance (using OneCoreUap.lib, ${CMAKE_SYSTEM_VERSION})")
#
# OneCore flags
#
set(_onecoreuap_arch "x64")
if(CMAKE_GENERATOR_PLATFORM)
set(_onecoreuap_arch ${CMAKE_GENERATOR_PLATFORM})
endif()
if(_onecoreuap_arch STREQUAL "x64")
# Forcefull make VS search for C++ libreries in these folders prior to other c++ standard libraries localizations.
add_link_options("/LIBPATH:\"\$\(VC_LibraryPath_VC_x64_OneCore\)\"")
set(CMAKE_C_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/\$\(Platform\)/OneCoreUap.lib" CACHE STRING "" FORCE)
set(CMAKE_CXX_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/\$\(Platform\)/OneCoreUap.lib" CACHE STRING "" FORCE)
elseif(_onecoreuap_arch STREQUAL "X86")
add_link_options("/LIBPATH:\"\$\(VCInstallDir\)lib/onecore\"")
add_link_options("/LIBPATH:\"\$\(VC_LibraryPath_VC_x86_OneCore\)\"")
set(CMAKE_C_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/x86/OneCoreUap.lib" CACHE STRING "" FORCE)
set(CMAKE_CXX_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/x86/OneCoreUap.lib" CACHE STRING "" FORCE)
else()
message(FATAL_ERROR "Unsupported architecture ${_onecoreuap_arch}. Only X86 or X86_64 are supported")
endif()
unset(_onecoreuap_arch)
# compile flags
set(includes "/I\"\$\(UniversalCRT_IncludePath\)\"")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${includes}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${includes}")
unset(includes)
# linker flags
foreach(lib kernel32 user32 advapi32 ole32 mscoree combase)
set(linker_flags "/NODEFAULTLIB:${lib}.lib ${linker_flags}")
endforeach()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${linker_flags}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${linker_flags}")
unset(linker_flags)
#
# Flags for 3rd party projects
#
set(use_static_runtime ON)
if(use_static_runtime)
foreach(lang C CXX)
foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO")
set(flag_var "CMAKE_${lang}_FLAGS${build_type}")
string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endforeach()
endforeach()
endif()
function(onecoreuap_set_runtime var)
set(${var} ${use_static_runtime} CACHE BOOL "" FORCE)
endfunction()
# ONNX
onecoreuap_set_runtime(ONNX_USE_MSVC_STATIC_RUNTIME)
# pugixml
onecoreuap_set_runtime(STATIC_CRT)
# protobuf
onecoreuap_set_runtime(protobuf_MSVC_STATIC_RUNTIME)
# clDNN
onecoreuap_set_runtime(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME)
unset(use_static_runtime)

View File

@@ -2,21 +2,19 @@
# SPDX-License-Identifier: Apache-2.0
#
include(ProcessorCount)
#
# Disables deprecated warnings generation
# Defines ie_c_cxx_deprecated varaible which contains C / C++ compiler flags
#
macro(disable_deprecated_warnings)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
set(ie_c_cxx_deprecated "/Qdiag-disable:1478,1786")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(ie_c_cxx_deprecated "/wd4996")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(ie_c_cxx_deprecated "-diag-disable=1478,1786")
else()
set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
@@ -37,13 +35,13 @@ endmacro()
#
macro(ie_deprecated_no_errors)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
set(ie_c_cxx_deprecated "/Qdiag-warning:1478,1786")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(ie_c_cxx_deprecated "/wd4996")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
set(ie_c_cxx_deprecated_no_errors "-diag-warning=1478,1786")
else()
set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
@@ -63,15 +61,15 @@ endmacro()
#
function(ie_sse42_optimization_flags flags)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
# No such option for MSVC 2019
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(${flags} "/arch:SSE4.2 /QxSSE4.2" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(${flags} "-msse4.2 -xSSE4.2" PARENT_SCOPE)
else()
set(${flags} "-msse4.2" PARENT_SCOPE)
@@ -84,15 +82,15 @@ endfunction()
#
function(ie_avx2_optimization_flags flags)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(${flags} "/QxCORE-AVX2" PARENT_SCOPE)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(${flags} "/arch:AVX2" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(${flags} "-march=core-avx2 -xCORE-AVX2 -mtune=core-avx2" PARENT_SCOPE)
else()
set(${flags} "-mavx2 -mfma" PARENT_SCOPE)
@@ -106,47 +104,20 @@ endfunction()
#
function(ie_avx512_optimization_flags flags)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(${flags} "/QxCOMMON-AVX512" PARENT_SCOPE)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(${flags} "/arch:AVX512" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
set(${flags} "-xCOMMON-AVX512" PARENT_SCOPE)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
set(${flags} "-mavx512f -mfma" PARENT_SCOPE)
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|AppleClang)$")
set(${flags} "-mavx512f -mfma" PARENT_SCOPE)
endif()
endif()
endfunction()
function(ie_arm_neon_optimization_flags flags)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# nothing
elseif(ANDROID)
if(ANDROID_ABI STREQUAL "arm64-v8a")
set(${flags} "-mfpu=neon" PARENT_SCOPE)
elseif(ANDROID_ABI STREQUAL "armeabi-v7a-hard with NEON")
set(${flags} "-march=armv7-a -mfloat-abi=hard -mhard-float -D_NDK_MATH_NO_SOFTFP=1 -mfpu=neon" PARENT_SCOPE)
elseif((ANDROID_ABI STREQUAL "armeabi-v7a with NEON") OR
(ANDROID_ABI STREQUAL "armeabi-v7a" AND
DEFINED CMAKE_ANDROID_ARM_NEON AND CMAKE_ANDROID_ARM_NEON))
set(${flags} "-march=armv7-a -mfloat-abi=softfp -mfpu=neon" PARENT_SCOPE)
endif()
else()
if(AARCH64)
set(${flags} "-O2 -ftree-vectorize" PARENT_SCOPE)
elseif(ARM)
set(${flags} "-mfpu=neon" PARENT_SCOPE)
endif()
endif()
endfunction()
@@ -154,7 +125,27 @@ endfunction()
# Enables Link Time Optimization compilation
#
macro(ie_enable_lto)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON)
if(UNIX)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -flto")
# LTO causes issues with gcc 4.8.5 during cmake pthread check
if(NOT CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -flto")
endif()
# modify linker and ar
if(LINUX)
set(CMAKE_AR "gcc-ar")
set(CMAKE_RANLIB "gcc-ranlib")
endif()
elseif(WIN32)
if(CMAKE_BUILD_TYPE STREQUAL Release)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GL")
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GL")
# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LTCG:STATUS")
# set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LTCG:STATUS")
# set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /LTCG:STATUS")
endif()
endif()
endmacro()
#
@@ -176,7 +167,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
# to allows to override CMAKE_CXX_STANDARD from command line
if(NOT DEFINED CMAKE_CXX_STANDARD)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_CXX_STANDARD 14)
else()
set(CMAKE_CXX_STANDARD 11)
@@ -185,13 +176,14 @@ if(NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
endif()
if(ENABLE_COVERAGE)
ie_add_compiler_flags(--coverage)
if(COVERAGE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")
endif()
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
ie_add_compiler_flags(-fsigned-char)
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsigned-char")
endif()
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
@@ -206,10 +198,10 @@ if(WIN32)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
if (TREAT_WARNING_AS_ERROR)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
ie_add_compiler_flags(/WX)
ie_add_compiler_flags(/Qdiag-warning:47,1740,1786)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
# ie_add_compiler_flags(/WX) # Too many warnings
endif()
endif()
@@ -217,35 +209,47 @@ if(WIN32)
# Compiler specific flags
ie_add_compiler_flags(/bigobj)
ie_add_compiler_flags(/MP)
# Disable noisy warnings
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
# C4251 needs to have dll-interface to be used by clients of class
ie_add_compiler_flags(/wd4251)
# C4275 non dll-interface class used as base for dll-interface class
ie_add_compiler_flags(/wd4275)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# 161: unrecognized pragma
# 177: variable was declared but never referenced
# 556: not matched type of assigned function pointer
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
# 161 unrecognized pragma
# 177 variable was declared but never referenced
# 556 not matched type of assigned function pointer
# 1744: field of class type without a DLL interface used in a class with a DLL interface
# 1879: unimplemented pragma ignored
# 2586: decorated name length exceeded, name was truncated
# 2586 decorated name length exceeded, name was truncated
# 2651: attribute does not apply to any entity
# 3180: unrecognized OpenMP pragma
# 3180 unrecognized OpenMP pragma
# 11075: To get full report use -Qopt-report:4 -Qopt-report-phase ipo
# 15335: was not vectorized: vectorization possible but seems inefficient. Use vector always directive or /Qvec-threshold0 to override
ie_add_compiler_flags(/Qdiag-disable:161,177,556,1744,1879,2586,2651,3180,11075,15335)
# 15335 was not vectorized: vectorization possible but seems inefficient. Use vector always directive or /Qvec-threshold0 to override
ie_add_compiler_flags(/Qdiag-disable:161,177,556,1744,2586,2651,3180,11075,15335)
endif()
# Debug information flags
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
if(ENABLE_DEBUG_SYMBOLS)
ie_add_compiler_flags(/Z7)
set(DEBUG_SYMBOLS_LINKER_FLAGS "/DEBUG")
if (CMAKE_BUILD_TYPE STREQUAL "Release")
# Keep default /OPT values. See /DEBUG reference for details.
set(DEBUG_SYMBOLS_LINKER_FLAGS "${DEBUG_SYMBOLS_LINKER_FLAGS} /OPT:REF /OPT:ICF")
endif()
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
endif()
else()
# TODO: enable for C sources as well
# ie_add_compiler_flags(-Werror)
@@ -256,7 +260,6 @@ else()
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
ie_add_compiler_flags(-fdiagnostics-show-option)
ie_add_compiler_flags(-Wundef)
ie_add_compiler_flags(-Wreturn-type)
# Disable noisy warnings
@@ -273,8 +276,6 @@ else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
ie_add_compiler_flags(-diag-disable=remark)
# noisy warnings from Intel Compiler 19.1.1.217 20200306
ie_add_compiler_flags(-diag-disable=2196)
endif()
# Linker flags

View File

@@ -14,10 +14,6 @@ if (ENABLE_SANITIZER)
set(SANITIZER_LINKER_FLAGS "-fsanitize=address")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
@@ -28,15 +24,10 @@ if (ENABLE_SANITIZER)
endif()
if (ENABLE_THREAD_SANITIZER)
set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread -fno-omit-frame-pointer")
set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread")
set(SANITIZER_LINKER_FLAGS "-fsanitize=thread")
if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
else()
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -static-libsan")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")

View File

@@ -14,7 +14,9 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(IE_LINKER_FLAGS "${IE_LINKER_FLAGS} -z noexecstack -z relro -z now")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
else()
@@ -23,28 +25,21 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
if (NOT ENABLE_SANITIZER)
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s")
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if (NOT ENABLE_SANITIZER)
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -Wl,--strip-all")
endif()
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-strong")
set(IE_LINKER_FLAGS "${IE_LINKER_FLAGS} -z noexecstack -z relro -z now")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /sdl")
endif()
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /guard:cf")
if(ENABLE_INTEGRITYCHECK)
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /INTEGRITYCHECK")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /sdl")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${IE_C_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IE_C_CXX_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
endif()

View File

@@ -1,49 +0,0 @@
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
include(CMakeParseArguments)
find_host_program(shellcheck_PROGRAM NAMES shellcheck DOC "Path to shellcheck tool")
function(ie_shellcheck_process)
if(NOT shellcheck_PROGRAM)
message(WARNING "shellcheck tool is not found")
return()
endif()
cmake_parse_arguments(IE_SHELLCHECK "" "DIRECTORY" "SKIP" ${ARGN})
set(IE_SHELLCHECK_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/shellcheck/shellcheck_process.cmake")
file(GLOB_RECURSE scripts "${IE_SHELLCHECK_DIRECTORY}/*.sh")
foreach(script IN LISTS scripts)
# check if we need to skip scripts
unset(skip_script)
foreach(skip_directory IN LISTS IE_SHELLCHECK_SKIP)
if(script MATCHES "${skip_directory}/*")
set(skip_script ON)
endif()
endforeach()
if(skip_script)
continue()
endif()
get_filename_component(dir_name "${script}" DIRECTORY)
string(REPLACE "${IE_SHELLCHECK_DIRECTORY}" "${CMAKE_BINARY_DIR}/shellcheck" output_file ${script})
set(output_file "${output_file}.txt")
get_filename_component(script_name "${script}" NAME)
add_custom_command(OUTPUT ${output_file}
COMMAND ${CMAKE_COMMAND}
-D IE_SHELLCHECK_PROGRAM=${shellcheck_PROGRAM}
-D IE_SHELL_SCRIPT=${script}
-D IE_SHELLCHECK_OUTPUT=${output_file}
-P ${IE_SHELLCHECK_SCRIPT}
DEPENDS ${script} ${IE_SHELLCHECK_SCRIPT}
COMMENT "Check script ${script_name}"
VERBATIM)
list(APPEND outputs ${output_file})
endforeach()
add_custom_target(ie_shellcheck DEPENDS ${outputs})
endfunction()

View File

@@ -1,27 +0,0 @@
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
if(NOT DEFINED IE_SHELLCHECK_PROGRAM)
message(FATAL_ERROR "IE_SHELLCHECK_PROGRAM is not defined")
endif()
if(NOT DEFINED IE_SHELL_SCRIPT)
message(FATAL_ERROR "IE_SHELL_SCRIPT is not defined")
endif()
if(NOT DEFINED IE_SHELLCHECK_OUTPUT)
message(FATAL_ERROR "IE_SHELLCHECK_OUTPUT is not defined")
endif()
set(rules "SC1091,SC2164,SC2162,SC1090")
execute_process(COMMAND ${IE_SHELLCHECK_PROGRAM} --exclude=${rules} ${IE_SHELL_SCRIPT}
OUTPUT_VARIABLE error_message
RESULT_VARIABLE exit_code
OUTPUT_STRIP_TRAILING_WHITESPACE)
file(WRITE "${IE_SHELLCHECK_OUTPUT}" "${error_message}")
if(NOT exit_code EQUAL 0)
message(FATAL_ERROR "${error_message}")
endif()

View File

@@ -7,7 +7,7 @@ if(CMAKE_CL_64)
set(MSVC64 ON)
endif()
if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
OUTPUT_VARIABLE OPENVINO_GCC_TARGET_MACHINE
OUTPUT_STRIP_TRAILING_WHITESPACE)
@@ -16,25 +16,10 @@ if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif()
endif()
macro(_ie_process_msvc_generator_platform flag_name)
# if cmake -A <ARM|ARM64> is passed
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
set(AARCH64 ON)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM")
set(ARM ON)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "x64")
set(X86_64 ON)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32")
set(X86 ON)
else()
set(${flag_name} ON)
endif()
endmacro()
if(MSVC64 OR MINGW64)
_ie_process_msvc_generator_platform(X86_64)
set(X86_64 ON)
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
_ie_process_msvc_generator_platform(X86)
set(X86 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(X86_64 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
@@ -45,13 +30,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
set(AARCH64 ON)
endif()
# in case of cross-compilation (or -m32) CMAKE_SYSTEM_PROCESSOR is equal to
# CMAKE_HOST_SYSTEM_PROCESSOR which is X86_64; patch this until a better solution
if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND X86_64)
unset(X86_64)
set(X86 ON)
endif()
if(UNIX AND NOT APPLE)
set(LINUX ON)
endif()

View File

@@ -1,38 +0,0 @@
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
set(CMAKE_SYSTEM_NAME WindowsStore)
#
# Define CMAKE_SYSTEM_VERSION if not defined
#
if(NOT DEFINED CMAKE_SYSTEM_VERSION)
# Sometimes CMAKE_HOST_SYSTEM_VERSION has form 10.x.y while we need
# form 10.x.y.z Adding .0 at the end fixes the issue
if(CMAKE_HOST_SYSTEM_VERSION MATCHES "^10\.0\.[0-9]+$")
set(CMAKE_SYSTEM_VERSION "${CMAKE_HOST_SYSTEM_VERSION}.0")
else()
set(CMAKE_SYSTEM_VERSION "${CMAKE_HOST_SYSTEM_VERSION}")
endif()
endif()
if(NOT DEFINED CMAKE_SYSTEM_PROCESSOR)
set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
#
# Compilation flags
#
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/src/uwp.hpp"
"#ifdef WINAPI_FAMILY\n"
"#undef WINAPI_FAMILY\n"
"#define WINAPI_FAMILY WINAPI_FAMILY_DESKTOP_APP\n"
"#endif\n")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FI\"${CMAKE_CURRENT_BINARY_DIR}/src/uwp.hpp\"")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /FI\"${CMAKE_CURRENT_BINARY_DIR}/src/uwp.hpp\"")
set(CMAKE_VS_GLOBALS "WindowsTargetPlatformMinVersion=${CMAKE_SYSTEM_VERSION}")

View File

@@ -1,87 +0,0 @@
# Copyright (C) 2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
macro(ie_parse_ci_build_number)
if(CI_BUILD_NUMBER MATCHES "^([0-9]+)\.([0-9]+)\.([0-9]+)\-.*")
set(IE_VERSION_MAJOR ${CMAKE_MATCH_1})
set(IE_VERSION_MINOR ${CMAKE_MATCH_2})
set(IE_VERSION_PATCH ${CMAKE_MATCH_3})
set(IE_VS_VER_HAS_VERSION 1)
else()
set(IE_VS_VER_HAS_VERSION 0)
endif()
endmacro()
ie_parse_ci_build_number()
if(IE_VS_VER_HAS_VERSION)
set(IE_VS_VER_FILEVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
set(IE_VS_VER_PRODUCTVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}.0")
endif()
set(IE_VS_VER_PRODUCTVERSION_STR "${CI_BUILD_NUMBER}")
set(IE_VS_VER_PRODUCTNAME_STR "OpenVINO toolkit")
set(IE_VS_VER_COPYRIGHT_STR "Copyright (C) 2018-2020, Intel Corporation")
set(IE_VS_VER_COMMENTS_STR "https://docs.openvinotoolkit.org/")
#
# ie_add_vs_version_file(NAME <name>
# FILEDESCRIPTION <file description>
# [FILEVERSION <file version>]
# [INTERNALNAME <internal name>]
# [COPYRIGHT <name>]
# [PRODUCTNAME <name>]
# [PRODUCTVERSION <name>]
# [COMMENTS <name>]
# [FILEVERSION_QUAD <name>]
# [PRODUCTVERSION_QUAD <name>])
#
function(ie_add_vs_version_file)
if(NOT WIN32)
return()
endif()
cmake_parse_arguments(VS_VER "" "NAME;FILEDESCRIPTION;FILEVERSION;INTERNALNAME;COPYRIGHT;PRODUCTNAME;PRODUCTVERSION;COMMENTS;FILEVERSION_QUAD;PRODUCTVERSION_QUAD" "" ${ARGN})
if(NOT TARGET ${VS_VER_NAME})
message(FATAL_ERROR "${VS_VER_NAME} must define a target")
endif()
macro(_vs_ver_update_variable name)
if(VS_VER_NAME AND DEFINED IE_${VS_VER_NAME}_VS_VER_${name})
set(IE_VS_VER_${name} "${IE_${VS_VER_NAME}_VS_VER_${name}}")
elseif(VS_VER_${name})
set(IE_VS_VER_${name} "${VS_VER_${name}}")
endif()
endmacro()
_vs_ver_update_variable(FILEVERSION_QUAD)
_vs_ver_update_variable(PRODUCTVERSION_QUAD)
macro(_vs_ver_update_str_variable name)
if(VS_VER_NAME AND DEFINED IE_${VS_VER_NAME}_VS_VER_${name})
set(IE_VS_VER_${name}_STR "${IE_${VS_VER_NAME}_VS_VER_${name}}")
elseif(VS_VER_${name})
set(IE_VS_VER_${name}_STR "${VS_VER_${name}}")
endif()
endmacro()
_vs_ver_update_str_variable(FILEDESCRIPTION)
_vs_ver_update_str_variable(FILEVERSION)
_vs_ver_update_str_variable(INTERNALNAME)
_vs_ver_update_str_variable(COPYRIGHT)
_vs_ver_update_str_variable(PRODUCTNAME)
_vs_ver_update_str_variable(PRODUCTVERSION)
_vs_ver_update_str_variable(COMMENTS)
set(IE_VS_VER_ORIGINALFILENAME_STR "${CMAKE_SHARED_LIBRARY_PREFIX}${VS_VER_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(IE_VS_VER_INTERNALNAME_STR ${VS_VER_NAME})
set(vs_version_output "${CMAKE_CURRENT_BINARY_DIR}/vs_version.rc")
configure_file("${OpenVINO_MAIN_SOURCE_DIR}/cmake/vs_version/vs_version.rc.in" "${vs_version_output}" @ONLY)
source_group("src" FILES ${vs_version_output})
target_sources(${VS_VER_NAME} PRIVATE ${vs_version_output})
endfunction()

View File

@@ -1,38 +0,0 @@
#include <winver.h>
VS_VERSION_INFO VERSIONINFO
#if @IE_VS_VER_HAS_VERSION@
FILEVERSION @IE_VS_VER_FILEVERSION_QUAD@
PRODUCTVERSION @IE_VS_VER_PRODUCTVERSION_QUAD@
#endif
FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
#ifdef _DEBUG
FILEFLAGS 1
#else
FILEFLAGS 0
#endif
FILEOS VOS__WINDOWS32
FILETYPE VFT_DLL
FILESUBTYPE 0
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "040904E4"
BEGIN
VALUE "FileDescription", "@IE_VS_VER_FILEDESCRIPTION_STR@\0"
#if @IE_VS_VER_HAS_VERSION@
VALUE "FileVersion", "@IE_VS_VER_FILEVERSION_STR@\0"
#endif
VALUE "InternalName", "@IE_VS_VER_INTERNALNAME_STR@\0"
VALUE "LegalCopyright", "@IE_VS_VER_COPYRIGHT_STR@\0"
VALUE "OriginalFilename", "@IE_VS_VER_ORIGINALFILENAME_STR@\0"
VALUE "ProductName", "@IE_VS_VER_PRODUCTNAME_STR@\0"
VALUE "ProductVersion", "@IE_VS_VER_PRODUCTVERSION_STR@\0"
VALUE "Comments", "@IE_VS_VER_COMMENTS_STR@\0"
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x0409, 1252
END
END

View File

@@ -3,190 +3,45 @@
#
if(NOT ENABLE_DOCKER)
add_subdirectory(snippets)
add_subdirectory(examples)
# Detect nGraph
find_package(ngraph QUIET)
if(NOT ngraph_FOUND)
set(ngraph_DIR ${CMAKE_BINARY_DIR}/ngraph)
endif()
# Detect InferenceEngine
find_package(InferenceEngine QUIET)
if(NOT InferenceEngine_FOUND)
set(InferenceEngine_DIR ${CMAKE_BINARY_DIR})
endif()
if (NGRAPH_ONNX_IMPORT_ENABLE)
add_subdirectory(onnx_custom_op)
endif()
add_subdirectory(template_extension)
set(all_docs_targets
ie_docs_snippets
template_extension
templatePlugin TemplateBehaviorTests TemplateFunctionalTests)
foreach(target_name IN LISTS all_docs_targets)
if (TARGET ${target_name})
set_target_properties(${target_name} PROPERTIES FOLDER docs)
endif()
endforeach()
endif()
function(build_docs)
find_package(Doxygen REQUIRED dot)
find_package(Python3 COMPONENTS Interpreter)
find_package(LATEX)
# OpenVINO docs
if(NOT DOXYGEN_FOUND)
message(FATAL_ERROR "Doxygen is required to build the documentation")
endif()
set(OPENVINO_DOCS_PATH "" CACHE PATH "Path to openvino-documentation local repository")
set(args "")
if(NOT Python3_FOUND)
message(FATAL_ERROR "Python3 is required to build the documentation")
endif()
if(OPENVINO_DOCS_PATH)
set(args "${args} ovinodoc_path:${OPENVINO_DOCS_PATH}")
endif()
if(NOT LATEX_FOUND)
message(FATAL_ERROR "LATEX is required to build the documentation")
endif()
file(GLOB_RECURSE docs_files "${OpenVINO_MAIN_SOURCE_DIR}/docs")
file(GLOB_RECURSE include_files "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/include")
file(GLOB_RECURSE ovino_files "${OPENVINO_DOCS_PATH}")
set(DOCS_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(DOXYGEN_DIR "${OpenVINO_MAIN_SOURCE_DIR}/docs/doxygen")
set(IE_SOURCE_DIR "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine")
set(PYTHON_API_IN "${IE_SOURCE_DIR}/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx")
set(PYTHON_API_OUT "${DOCS_BINARY_DIR}/python_api/ie_api.pyx")
set(C_API "${IE_SOURCE_DIR}/ie_bridges/c/include")
set(PLUGIN_API_DIR "${DOCS_BINARY_DIR}/IE_PLUGIN_DG")
add_custom_target(ie_docs
COMMAND ./build_docs.sh ${args}
WORKING_DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}/docs/build_documentation"
COMMENT "Generating OpenVINO documentation"
SOURCES ${docs_files} ${include_files} ${ovino_files}
VERBATIM)
# Preprocessing scripts
set(DOXY_MD_FILTER "${DOXYGEN_DIR}/doxy_md_filter.py")
set(PYX_FILTER "${DOXYGEN_DIR}/pyx_filter.py")
file(GLOB_RECURSE doc_source_files
LIST_DIRECTORIES true RELATIVE ${OpenVINO_MAIN_SOURCE_DIR}
"${OpenVINO_MAIN_SOURCE_DIR}/docs/*.md"
"${OpenVINO_MAIN_SOURCE_DIR}/docs/*.png"
"${OpenVINO_MAIN_SOURCE_DIR}/docs/*.gif"
"${OpenVINO_MAIN_SOURCE_DIR}/docs/*.jpg"
"${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.md"
"${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.png"
"${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.gif"
"${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/*.jpg")
configure_file(${PYTHON_API_IN} ${PYTHON_API_OUT} @ONLY)
set(IE_CONFIG_SOURCE "${DOXYGEN_DIR}/ie_docs.config")
set(C_CONFIG_SOURCE "${DOXYGEN_DIR}/ie_c_api.config")
set(PY_CONFIG_SOURCE "${DOXYGEN_DIR}/ie_py_api.config")
set(PLUGIN_CONFIG_SOURCE "${DOXYGEN_DIR}/ie_plugin_api.config")
set(IE_CONFIG_BINARY "${DOCS_BINARY_DIR}/ie_docs.config")
set(C_CONFIG_BINARY "${DOCS_BINARY_DIR}/ie_c_api.config")
set(PY_CONFIG_BINARY "${DOCS_BINARY_DIR}/ie_py_api.config")
set(PLUGIN_CONFIG_BINARY "${DOCS_BINARY_DIR}/ie_plugin_api.config")
set(IE_LAYOUT_SOURCE "${DOXYGEN_DIR}/ie_docs.xml")
set(C_LAYOUT_SOURCE "${DOXYGEN_DIR}/ie_c_api.xml")
set(PY_LAYOUT_SOURCE "${DOXYGEN_DIR}/ie_py_api.xml")
set(PLUGIN_LAYOUT_SOURCE "${DOXYGEN_DIR}/ie_plugin_api.xml")
set(IE_LAYOUT_BINARY "${DOCS_BINARY_DIR}/ie_docs.xml")
set(C_LAYOUT_BINARY "${DOCS_BINARY_DIR}/ie_c_api.xml")
set(PY_LAYOUT_BINARY "${DOCS_BINARY_DIR}/ie_py_api.xml")
set(PLUGIN_LAYOUT_BINARY "${DOCS_BINARY_DIR}/ie_plugin_api.xml")
# Tables of contents
configure_file(${IE_LAYOUT_SOURCE} ${IE_LAYOUT_BINARY} @ONLY)
configure_file(${C_LAYOUT_SOURCE} ${C_LAYOUT_BINARY} @ONLY)
configure_file(${PY_LAYOUT_SOURCE} ${PY_LAYOUT_BINARY} @ONLY)
configure_file(${PLUGIN_LAYOUT_SOURCE} ${PLUGIN_LAYOUT_BINARY} @ONLY)
# Doxygen config files
configure_file(${IE_CONFIG_SOURCE} ${IE_CONFIG_BINARY} @ONLY)
configure_file(${C_CONFIG_SOURCE} ${C_CONFIG_BINARY} @ONLY)
configure_file(${PY_CONFIG_SOURCE} ${PY_CONFIG_BINARY} @ONLY)
configure_file(${PLUGIN_CONFIG_SOURCE} ${PLUGIN_CONFIG_BINARY} @ONLY)
# Preprocessing scripts
set(DOXY_MD_FILTER "${DOXYGEN_DIR}/doxy_md_filter.py")
set(PYX_FILTER "${DOXYGEN_DIR}/pyx_filter.py")
# C API
add_custom_target(c_api
COMMAND ${DOXYGEN_EXECUTABLE} ${C_CONFIG_BINARY}
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
COMMENT "Generating C API Reference"
VERBATIM)
# Python API
add_custom_target(py_api
COMMAND ${DOXYGEN_EXECUTABLE} ${PY_CONFIG_BINARY}
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
COMMENT "Generating Python API Reference"
VERBATIM)
add_custom_command(TARGET py_api
PRE_BUILD
COMMAND ${Python3_EXECUTABLE} ${PYX_FILTER} ${PYTHON_API_OUT}
COMMENT "Pre-process Python API")
# Preprocess docs
add_custom_target(preprocess_docs
COMMENT "Pre-process docs"
VERBATIM)
foreach(source_file ${doc_source_files})
list(APPEND commands COMMAND ${CMAKE_COMMAND} -E copy
"${OpenVINO_MAIN_SOURCE_DIR}/${source_file}" "${DOCS_BINARY_DIR}/${source_file}")
endforeach()
add_custom_command(TARGET preprocess_docs
PRE_BUILD
${commands}
COMMAND ${Python3_EXECUTABLE} ${DOXY_MD_FILTER} ${DOCS_BINARY_DIR}
COMMENT "Pre-process markdown and image links")
# IE dev guide and C++ API
add_custom_target(ie_docs
DEPENDS preprocess_docs
COMMAND ${DOXYGEN_EXECUTABLE} ${IE_CONFIG_BINARY}
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
VERBATIM)
# Plugin API
add_custom_target(plugin_api
find_program(browser NAMES xdg-open)
if(browser)
add_custom_target(ie_docs_open
COMMAND ${browser} "${OpenVINO_MAIN_SOURCE_DIR}/doc/html/index.html"
DEPENDS ie_docs
COMMAND ${DOXYGEN_EXECUTABLE} ${PLUGIN_CONFIG_BINARY}
WORKING_DIRECTORY ${DOCS_BINARY_DIR}
COMMENT "Generating Plugin API Reference"
COMMENT "Open OpenVINO documentation"
VERBATIM)
# Umbrella OpenVINO target
add_custom_target(openvino_docs
DEPENDS c_api py_api ie_docs plugin_api
COMMENT "Generating OpenVINO documentation"
VERBATIM)
set_target_properties(openvino_docs ie_docs c_api py_api preprocess_docs plugin_api
PROPERTIES FOLDER docs)
find_program(browser NAMES xdg-open)
if(browser)
add_custom_target(ie_docs_open
COMMAND ${browser} "${OpenVINO_MAIN_SOURCE_DIR}/docs/html/index.html"
DEPENDS ie_docs
COMMENT "Open OpenVINO documentation"
VERBATIM)
set_target_properties(ie_docs_open PROPERTIES FOLDER docs)
endif()
endfunction()
if(ENABLE_DOCS)
build_docs()
endif()

View File

@@ -1,382 +0,0 @@
# Custom Operations Guide {#openvino_docs_HOWTO_Custom_Layers_Guide}
The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks including
TensorFlow*, Caffe*, MXNet*, Kaldi* and ONNX* file format. The list of supported operations (layers) is different for
each of the supported frameworks. To see the operations supported by your framework, refer to
[Supported Framework Layers](../MO_DG/prepare_model/Supported_Frameworks_Layers.md).
Custom operations are operations that are not included in the list of known operations. If your model contains any
operation that is not in the list of known operations, the Model Optimizer is not able to generate an Intermediate
Representation (IR) for this model.
This guide illustrates the workflow for running inference on topologies featuring custom operations, allowing you to
plug in your own implementation for existing or completely new operation.
> **NOTE:** *Layer* — The legacy term for an *operation* which came from Caffe\* framework. Currently it is not used.
> Refer to the [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../MO_DG/IR_and_opsets.md)
> for more information on the topic.
## Terms Used in This Guide
- *Intermediate Representation (IR)* — Neural Network used only by the Inference Engine in OpenVINO abstracting the
different frameworks and describing the model topology, operations parameters and weights.
- *Operation* — The abstract concept of a math function that is selected for a specific purpose. Operations supported by
OpenVINO™ are listed in the supported operation set provided in the [Available Operations Sets](../ops/opset.md).
Examples of the operations are: [ReLU](../ops/activation/ReLU_1.md), [Convolution](../ops/convolution/Convolution_1.md),
[Add](../ops/arithmetic/Add_1.md), etc.
- *Kernel* — The implementation of a operation function in the OpenVINO™ plugin, in this case, the math programmed (in
C++ and OpenCL) to perform the operation for a target hardware (CPU or GPU).
- *Inference Engine Extension* — Device-specific module implementing custom operations (a set of kernels).
## Custom Operation Support Overview
There are three steps to support inference of a model with custom operation(s):
1. Add support for a custom operation in the [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) so
the Model Optimizer can generate the IR with the operation.
2. Create an operation set and implement a custom nGraph operation in it as described in the
[Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md).
3. Implement a customer operation in one of the [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
plugins to support inference of this operation using a particular target hardware (CPU, GPU or VPU).
To see the operations that are supported by each device plugin for the Inference Engine, refer to the
[Supported Devices](../IE_DG/supported_plugins/Supported_Devices.md).
> **NOTE:** If a device doesn't support a particular operation, an alternative to creating a new operation is to target
> an additional device using the HETERO plugin. The [Heterogeneous Plugin](../IE_DG/supported_plugins/HETERO.md) may be
> used to run an inference model on multiple devices allowing the unsupported operations on one device to "fallback" to
> run on another device (e.g., CPU) that does support those operations.
### Custom Operation Support for the Model Optimizer
Model Optimizer model conversion pipeline is described in details in "Model Conversion Pipeline" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
It is recommended to read that article first for a better understanding of the following material.
Model Optimizer provides extensions mechanism to support new operations and implement custom model transformations to
generate optimized IR. This mechanism is described in the "Model Optimizer Extensions" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
Two types of the Model Optimizer extensions should be implemented to support custom operation at minimum:
1. Operation class for a new operation. This class stores information about the operation, its attributes, shape
inference function, attributes to be saved to an IR and some others internally used attributes. Refer to the
"Model Optimizer Operation" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
detailed instruction on how to implement it.
2. Operation attributes extractor. The extractor is responsible for parsing framework-specific representation of the
operation and uses corresponding operation class to update graph node attributes with necessary attributes of the
operation. Refer to the "Operation Extractor" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
detailed instruction on how to implement it.
> **NOTE:** In some cases you may need to implement some transformation to support the operation. This topic is covered
> in the "Graph Transformation Extensions" section on the
> [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
## Custom Operations Extensions for the Inference Engine
Inference Engine provides extensions mechanism to support new operations. This mechanism is described in the
[Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
Each device plugin includes a library of optimized implementations to execute known operations which must be extended to
execute a custom operation. The custom operation extension is implemented according to the target device:
- Custom Operation CPU Extension
- A compiled shared library (`.so`, `.dylib` or `.dll`) needed by the CPU Plugin for executing the custom operation
on a CPU. Refer to the [How to Implement Custom CPU Operations](../IE_DG/Extensibility_DG/CPU_Kernel.md) for more
details.
- Custom Operation GPU Extension
- OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the GPU along with a
operation description file (.xml) needed by the GPU Plugin for the custom operation kernel. Refer to the
[How to Implement Custom GPU Operations](../IE_DG/Extensibility_DG/GPU_Kernel.md) for more details.
- Custom Operation VPU Extension
- OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the VPU along with a
operation description file (.xml) needed by the VPU Plugin for the custom operation kernel. Refer to the
[How to Implement Custom Operations for VPU](../IE_DG/Extensibility_DG/VPU_Kernel.md) for more details.
Also, it is necessary to implement nGraph custom operation according to the
[Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md) so the Inference Engine can read an IR with this
operation and correctly infer output tensors shape and type.
## Enabling Magnetic Resonance Image Reconstruction Model
This chapter provides a step-by-step instruction on how to enable the magnetic resonance image reconstruction model
implemented in the [repository](https://github.com/rmsouza01/Hybrid-CS-Model-MRI/) using a custom operation on CPU. The
example is prepared for a model generated from the repository with hash `2ede2f96161ce70dcdc922371fe6b6b254aafcc8`.
### Download and Convert the Model to a Frozen TensorFlow\* Model Format
The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to
be converted to TensorFlow\* frozen model format first.
1. Download repository `https://github.com/rmsouza01/Hybrid-CS-Model-MRI`:<br
```bash
git clone https://github.com/rmsouza01/Hybrid-CS-Model-MRI
git checkout 2ede2f96161ce70dcdc922371fe6b6b254aafcc8
```
2. Convert pre-trained `.hdf5` to a frozen `.pb` graph using the following script (tested with TensorFlow==1.15.0 and
Keras==2.2.4) which should be executed from the root of the cloned repository:<br>
```py
import keras as K
import numpy as np
import Modules.frequency_spatial_network as fsnet
import tensorflow as tf
under_rate = '20'
stats = np.load("Data/stats_fs_unet_norm_" + under_rate + ".npy")
var_sampling_mask = np.load("Data/sampling_mask_" + under_rate + "perc.npy")
model = fsnet.wnet(stats[0], stats[1], stats[2], stats[3], kshape = (5,5), kshape2=(3,3))
model_name = "Models/wnet_" + under_rate + ".hdf5"
model.load_weights(model_name)
inp = np.random.standard_normal([1, 256, 256, 2]).astype(np.float32)
np.save('inp', inp)
sess = K.backend.get_session()
sess.as_default()
graph_def = sess.graph.as_graph_def()
graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, ['conv2d_44/BiasAdd'])
with tf.gfile.FastGFile('wnet_20.pb', 'wb') as f:
f.write(graph_def.SerializeToString())
```
As a result the TensorFlow\* frozen model file "wnet_20.pb" is generated.
### Convert the Frozen TensorFlow\* Model to Intermediate Representation
Firstly, open the model in the TensorBoard or other TensorFlow* model visualization tool. The model supports dynamic
batch dimension because the value for the batch dimension is not hardcoded in the model. Model Optimizer need to set all
dynamic dimensions to some specific value to create the IR, therefore specify the command line parameter `-b 1` to set
the batch dimension equal to 1. The actual batch size dimension can be changed at runtime using the Inference Engine API
described in the [Using Shape Inference](../IE_DG/ShapeInference.md). Also refer to
[Converting a Model Using General Conversion Parameters](../MO_DG/prepare_model/convert_model/Converting_Model_General.md)
and [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
for more details and command line parameters used for the model conversion.
```bash
./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1
```
Model Optimizer produces the following error:
```bash
[ ERROR ] List of operations that cannot be converted to Inference Engine IR:
[ ERROR ] Complex (1)
[ ERROR ] lambda_2/Complex
[ ERROR ] IFFT2D (1)
[ ERROR ] lambda_2/IFFT2D
[ ERROR ] ComplexAbs (1)
[ ERROR ] lambda_2/Abs
[ ERROR ] Part of the nodes was not converted to IR. Stopped.
```
The error means that the Model Optimizer doesn't know how to handle 3 types of TensorFlow\* operations: "Complex",
"IFFT2D" and "ComplexAbs". In order to see more details about the conversion process run the model conversion with
additional parameter `--log_level DEBUG`. It is worth to mention the following lines from the detailed output:
```bash
[ INFO ] Called "tf_native_tf_node_infer" for node "lambda_2/Complex"
[ <TIMESTAMP> ] [ DEBUG ] [ tf:228 ] Added placeholder with name 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder'
[ <TIMESTAMP> ] [ DEBUG ] [ tf:228 ] Added placeholder with name 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder'
[ <TIMESTAMP> ] [ DEBUG ] [ tf:241 ] update_input_in_pbs: replace input 'lambda_2/lambda_3/strided_slice' with input 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder'
[ <TIMESTAMP> ] [ DEBUG ] [ tf:249 ] Replacing input '0' of the node 'lambda_2/Complex' with placeholder 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder'
[ <TIMESTAMP> ] [ DEBUG ] [ tf:241 ] update_input_in_pbs: replace input 'lambda_2/lambda_4/strided_slice' with input 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder'
[ <TIMESTAMP> ] [ DEBUG ] [ tf:249 ] Replacing input '1' of the node 'lambda_2/Complex' with placeholder 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder'
[ <TIMESTAMP> ] [ DEBUG ] [ tf:148 ] Inferred shape of the output tensor with index '0' of the node 'lambda_2/Complex': '[ 1 256 256]'
[ <TIMESTAMP> ] [ DEBUG ] [ infer:145 ] Outputs:
[ <TIMESTAMP> ] [ DEBUG ] [ infer:32 ] output[0]: shape = [ 1 256 256], value = <UNKNOWN>
[ <TIMESTAMP> ] [ DEBUG ] [ infer:129 ] --------------------
[ <TIMESTAMP> ] [ DEBUG ] [ infer:130 ] Partial infer for lambda_2/IFFT2D
[ <TIMESTAMP> ] [ DEBUG ] [ infer:131 ] Op: IFFT2D
[ <TIMESTAMP> ] [ DEBUG ] [ infer:132 ] Inputs:
[ <TIMESTAMP> ] [ DEBUG ] [ infer:32 ] input[0]: shape = [ 1 256 256], value = <UNKNOWN>
```
This is a part of the log of the partial inference phase of the model conversion. See the "Partial Inference" section on
the [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for
more information about this phase. Model Optimizer inferred output shape for the unknown operation of type "Complex"
using a "fallback" to TensorFlow\*. However, it is not enough to generate the IR because Model Optimizer doesn't know
which attributes of the operation should be saved to IR. So it is necessary to implement Model Optimizer extensions to
support these operations.
Before going into the extension development it is necessary to understand what these unsupported operations do according
to the TensorFlow\* framework specification.
* "Complex" - returns a tensor of complex type constructed from two real input tensors specifying real and imaginary
part of a complex number.
* "IFFT2D" - returns a tensor with inverse 2-dimensional discrete Fourier transform over the inner-most 2 dimensions of
an input.
* "ComplexAbs" - returns a tensor with absolute values of input tensor with complex numbers.
The part of the model with all three unsupported operations is depicted below:
![Unsupported sub-graph](img/unsupported_subgraph.png)
This model uses complex numbers during the inference but Inference Engine does not support tensors of this data type. So
it is necessary to find a way how to avoid using tensors of such a type in the model. Fortunately, the complex tensor
appear as a result of "Complex" operation, is used as input in the "IFFT2D" operation then is passed to "ComplexAbs"
which produces real value tensor as output. So there are just 3 operations consuming/producing complex tensors in the
model.
Let's design an OpenVINO operation "FFT" which get a single real number tensor describing the complex number and
produces a single real number tensor describing output complex tensor. This way the fact that the model uses complex
numbers is hidden inside the "FFT" operation implementation. The operation gets a tensor of shape `[N, H, W, 2]` and
produces the output tensor with the same shape, where the innermost dimension contains pairs of real numbers describing
the complex number (its real and imaginary part). As we will see further this operation will allow us to support the
model. The implementation of the Model Optimizer operation should be saved to `mo_extensions/ops/FFT.py` file:
@snippet FFT.py fft:operation
The attribute `inverse` is a flag specifying type of the FFT to apply: forward or inverse.
See the "Model Optimizer Operation" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
detailed instruction on how to implement the operation.
Now it is necessary to implement extractor for the "IFFT2D" operation according to the
"Operation Extractor" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). The
following snippet provides two extractors: one for "IFFT2D", another one for "FFT2D", however only on of them is used
in this example. The implementation should be saved to the file `mo_extensions/front/tf/FFT_ext.py`.
@snippet FFT_ext.py fft_ext:extractor
> **NOTE:** The graph is in inconsistent state after extracting node attributes because according to original operation
> "IFFT2D" semantic it should have an input consuming a tensor of complex numbers, but the extractor instantiated an
> operation "FFT" which expects a real tensor with specific layout. But the inconsistency will be resolved during
> applying front phase transformations discussed below.
The output shape of the operation "AddV2" from the picture above is `[N, H, W, 2]`. Where the innermost dimension
contains pairs of real numbers describing the complex number (its real and imaginary part). The following "StridedSlice"
operations split the input tensor into 2 parts to get a tensor of real and a tensor of imaginary parts which are then
consumed with the "Complex" operation to produce a tensor of complex numbers. These "StridedSlice" and "Complex"
operations can be removed so the "FFT" operation will get a real value tensor encoding complex numbers. To achieve this
we implement the front phase transformation which searches for a pattern of two "StridedSlice" operations with specific
attributes producing data to "Complex" operation and removes it from the graph. Refer to the
"Pattern-Defined Front Phase Transformations" section on the
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for more
information on how this type of transformation works. The code snippet should be saved to the file
`mo_extensions/front/tf/Complex.py`.
@snippet Complex.py complex:transformation
> **NOTE:** The graph is in inconsistent state because the "ComplexAbs" operation consumes complex value tensor but
> "FFT" produces real value tensor.
Now lets implement a transformation which replace a "ComplexAbs" operation with a sub-graph of primitive operations
which calculate the result using the following formulae: \f$module(z) = \sqrt{real(z) \cdot real(z) + imag(z) \cdot imag(z)}\f$.
Original "IFFT2D" operation produces tensor of complex values, but the "FFT" operation produces a real value tensor with
the same format and shape as the input for the operation. So the input shape for the "ComplexAbs" will be `[N, H, W, 2]`
with the innermost dimension containing tuple with real and imaginary part of a complex number. In order to calculate
absolute values for the complex tensor we do the following:
1. Raise all elements in the power of 2.
2. Calculate a reduced sum over the innermost dimension.
3. Calculate a square root.
The implementation should be saved to the file `mo_extensions/front/tf/ComplexAbs.py` and provided below:
@snippet ComplexAbs.py complex_abs:transformation
Now it is possible to convert the model using the following command line:
```bash
./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1 --extensions mo_extensions/
```
The sub-graph corresponding to the originally non-supported one is depicted on the image below:
![Converted sub-graph](img/converted_subgraph.png)
> **NOTE:** Model Optimizer performed conversion of the model from NHWC to NCHW layout that is why the dimension with
> the value 2 moved to another position.
### Inference Engine Extension Implementation
Now it is necessary to implement the extension for the CPU plugin with operation "FFT" introduced previously. The code
below is based on the template extension described on the
[Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
#### CMake Build File
The first step is to create a CMake configuration file which builds the extension. The content of the "CMakeLists.txt"
file is the following:
@snippet ie_cpu_extension/CMakeLists.txt fft_cmake_list:cmake
The CPU FFT kernel implementation uses OpenCV to perform the FFT that is why the extension library is linked with
"opencv_core" which comes with the OpenVINO.
#### Custom nGraph Operation "FFT" Implementation
The next step is to create the nGraph operation FFT. The header file "fft_op.hpp" has the following content:
@snippet fft_op.hpp fft_op:header
The operation has just one boolean attribute `inverse`. Implementation of the necessary nGraph operation functions are
in the "fft_op.cpp" file with the following content:
@snippet fft_op.cpp fft_op:implementation
Refer to the [Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md) for more details.
#### CPU FFT Kernel Implementation
The operation implementation for CPU plugin uses OpenCV to perform the FFT. The header file "fft_kernel.hpp" has the
following content:
@snippet fft_kernel.hpp fft_kernel:header
The "fft_kernel.cpp" with the implementation of the CPU has the following content:
@snippet fft_kernel.cpp fft_kernel:implementation
Refer to the [How to Implement Custom CPU Operations](../IE_DG/Extensibility_DG/CPU_Kernel.md) for more details.
#### Extension Implementation
The source code of the extension itself contains the "extension.hpp" and "extension.cpp" files.
**extension.hpp**:
@snippet ie_cpu_extension/extension.hpp fft_extension:header
**extension.cpp**:
@snippet ie_cpu_extension/extension.cpp fft_extension:implementation
### Building and Running the Custom Extension
In order to build the extension run the following:<br>
```bash
mkdir build && cd build
source /opt/intel/openvino/bin/setupvars.sh
cmake .. -DCMAKE_BUILD_TYPE=Release
make --jobs=$(nproc)
```
The result of this command is a compiled shared library (`.so`, `.dylib` or `.dll`). It should be loaded in the
application using `Core` class instance method `AddExtension` like this
`core.AddExtension(make_so_pointer<IExtension>(compiled_library_file_name), "CPU");`.
To test that the extension is implemented correctly we can run the [Benchmark App](../../inference-engine/tools/benchmark_tool/README.md)
the following way:
```bash
python3 $INTEL_OPENVINO_DIR/deployment_tools/tools/benchmark_tool/benchmark_app.py \
-m <PATH_TO_IR>/wnet_20.xml \
-l <PATH_TO_BUILD_DIR>/libfft_cpu_extension.so \
-d CPU
```
## Additional Resources
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
- OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
- [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
- [Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md)
- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index)
- For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit).
## Converting Models:
- [Convert Your Caffe* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md)
- [Convert Your Kaldi* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md)
- [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
- [Convert Your MXNet* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md)
- [Convert Your ONNX* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md)

View File

@@ -1,36 +0,0 @@
#
# Copyright (C) 2018-2019 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# ===============================================================================
# Generated file for building library with user generated CPU extensions
#
# Contains implementation of the basic layer methods
#
# Refer to the section "Adding Your Own Kernels to the Inference Engine" in
# OpenVINO* documentation (either online or offline in
# <INSTALL_DIR>/deployment_tools/documentation/docs/index.html an then navigate
# to the corresponding section).
# ===============================================================================
# [fft_cmake_list:cmake]
set(CPU_EXTENSIONS_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "")
set(CMAKE_CXX_STANDARD 11)
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_importer)
find_package(InferenceEngine REQUIRED)
find_package(OpenCV REQUIRED COMPONENTS core)
set(TARGET_NAME fft_cpu_extension)
file(GLOB SRC ${CPU_EXTENSIONS_BASE_DIR}/*.cpp)
add_library(${TARGET_NAME} SHARED ${SRC})
target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_EXTENSION_API)
target_link_libraries(${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES}
${NGRAPH_LIBRARIES}
opencv_core)
# [fft_cmake_list:cmake]

View File

@@ -1,67 +0,0 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// source: https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension
//! [fft_extension:implementation]
#include "extension.hpp"
#include "fft_kernel.hpp"
#include "fft_op.hpp"
#include <ngraph/factory.hpp>
#include <ngraph/opsets/opset.hpp>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
using namespace FFTExtension;
void Extension::GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept {
static InferenceEngine::Version ExtensionDescription = {
{1, 0}, // extension API version
"1.0",
"The CPU plugin extension with FFT operation" // extension description message
};
versionInfo = &ExtensionDescription;
}
std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
std::map<std::string, ngraph::OpSet> opsets;
ngraph::OpSet opset;
opset.insert<FFTOp>();
opsets["fft_extension"] = opset;
return opsets;
}
std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::Node> &node) {
if (std::dynamic_pointer_cast<FFTOp>(node)) {
return {"CPU"};
}
return {};
}
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node> &node, const std::string &implType) {
if (std::dynamic_pointer_cast<FFTOp>(node) && implType == "CPU") {
return std::make_shared<FFTImpl>(node);
}
return nullptr;
}
INFERENCE_EXTENSION_API(InferenceEngine::StatusCode) InferenceEngine::CreateExtension(InferenceEngine::IExtension *&ext,
InferenceEngine::ResponseDesc *resp) noexcept {
try {
ext = new Extension();
return OK;
} catch (std::exception &ex) {
if (resp) {
std::string err = ((std::string) "Couldn't create extension: ") + ex.what();
err.copy(resp->msg, 255);
}
return InferenceEngine::GENERAL_ERROR;
}
}
//! [fft_extension:implementation]

View File

@@ -1,32 +0,0 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// source: https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension
//! [fft_extension:header]
#pragma once
#include <ie_iextension.h>
#include <ie_api.h>
#include <ngraph/ngraph.hpp>
#include <memory>
#include <vector>
#include <string>
#include <map>
namespace FFTExtension {
class Extension : public InferenceEngine::IExtension {
public:
Extension() = default;
void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override;
void Unload() noexcept override {}
void Release() noexcept override { delete this; }
std::map<std::string, ngraph::OpSet> getOpSets() override;
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override;
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override;
};
}
//! [fft_extension:header]

View File

@@ -1,119 +0,0 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
//! [fft_kernel:implementation]
#include "fft_kernel.hpp"
#include "fft_op.hpp"
#include <details/ie_exception.hpp>
#include <ie_layouts.h>
#include <opencv2/opencv.hpp>
using namespace FFTExtension;
FFTImpl::FFTImpl(const std::shared_ptr<ngraph::Node> &node) {
auto castedNode = std::dynamic_pointer_cast<FFTOp>(node);
if (!castedNode)
THROW_IE_EXCEPTION << "Cannot create implementation for unknown operation!";
if (castedNode->inputs().size() != 1 || castedNode->outputs().size() != 1)
THROW_IE_EXCEPTION << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
THROW_IE_EXCEPTION << "Cannot create implementation for op with dynamic shapes!";
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
THROW_IE_EXCEPTION << "Operation supports only FP32 tensors.";
inpShape = castedNode->get_input_shape(0);
outShape = castedNode->get_output_shape(0);
inverse = castedNode->inverse;
}
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
InferenceEngine::ResponseDesc *resp) noexcept {
std::vector<InferenceEngine::DataConfig> inDataConfig;
std::vector<InferenceEngine::DataConfig> outDataConfig;
InferenceEngine::SizeVector order(inpShape.size());
std::iota(order.begin(), order.end(), 0);
// Allow any offset before data
size_t offset((std::numeric_limits<size_t>::max)());
// Input shape
InferenceEngine::DataConfig inpConf;
inpConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inpShape, {inpShape, order, offset});
inDataConfig.push_back(inpConf);
// Output shape
InferenceEngine::DataConfig outConf;
outConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
outDataConfig.push_back(outConf);
InferenceEngine::LayerConfig layerConfig;
layerConfig.inConfs = inDataConfig;
layerConfig.outConfs = outDataConfig;
conf.push_back(layerConfig);
return InferenceEngine::StatusCode::OK;
}
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig &config, InferenceEngine::ResponseDesc *resp) noexcept {
try {
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
THROW_IE_EXCEPTION << "Operation cannot be initialized with incorrect number of inputs/outputs!";
}
if (config.outConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32 ||
config.inConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32) {
THROW_IE_EXCEPTION << "Operation supports only FP32 precisions!";
}
} catch (InferenceEngine::details::InferenceEngineException& ex) {
if (resp) {
strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
resp->msg[sizeof(resp->msg)-1] = 0;
}
return InferenceEngine::GENERAL_ERROR;
}
return InferenceEngine::OK;
}
static cv::Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
{
// NOTE: Inference Engine sizes are reversed.
std::vector<size_t> dims = blob->getTensorDesc().getDims();
std::vector<int> size(dims.begin(), dims.end());
auto precision = blob->getTensorDesc().getPrecision();
CV_Assert(precision == InferenceEngine::Precision::FP32);
return cv::Mat(size, CV_32F, (void*)blob->buffer());
}
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
std::vector<InferenceEngine::Blob::Ptr> &outputs,
InferenceEngine::ResponseDesc *resp) noexcept {
cv::Mat inp = infEngineBlobToMat(inputs[0]);
cv::Mat out = infEngineBlobToMat(outputs[0]);
const int n = inp.size[0];
const int h = inp.size[2];
const int w = inp.size[3];
cv::Mat complex(h, w, CV_32FC2), interleavedOut(h, w, CV_32FC2);
for (int i = 0; i < n; ++i) {
std::vector<cv::Mat> components = {
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)),
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))
};
cv::merge(components, complex);
if (!inverse)
cv::dft(complex, interleavedOut);
else
cv::idft(complex, interleavedOut, cv::DFT_SCALE);
components = {
cv::Mat(h, w, CV_32F, out.ptr<float>(i, 0)),
cv::Mat(h, w, CV_32F, out.ptr<float>(i, 1))
};
cv::split(interleavedOut, components);
}
return InferenceEngine::OK;
}
//! [fft_kernel:implementation]

View File

@@ -1,32 +0,0 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// source: https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension
//! [fft_kernel:header]
#pragma once
#include <ie_iextension.h>
#include <ngraph/ngraph.hpp>
namespace FFTExtension {
class FFTImpl : public InferenceEngine::ILayerExecImpl {
public:
explicit FFTImpl(const std::shared_ptr<ngraph::Node>& node);
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
InferenceEngine::ResponseDesc *resp) noexcept override;
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig &config,
InferenceEngine::ResponseDesc *resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
std::vector<InferenceEngine::Blob::Ptr> &outputs,
InferenceEngine::ResponseDesc *resp) noexcept override;
private:
ngraph::Shape inpShape;
ngraph::Shape outShape;
bool inverse;
std::string error;
};
}
//! [fft_kernel:header]

View File

@@ -1,34 +0,0 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
//! [fft_op:implementation]
#include "fft_op.hpp"
using namespace FFTExtension;
constexpr ngraph::NodeTypeInfo FFTOp::type_info;
FFTOp::FFTOp(const ngraph::Output<ngraph::Node>& inp, bool _inverse) : Op({inp}) {
constructor_validate_and_infer_types();
inverse = _inverse;
}
void FFTOp::validate_and_infer_types() {
auto outShape = get_input_partial_shape(0);
set_output_type(0, get_input_element_type(0), outShape);
}
std::shared_ptr<ngraph::Node> FFTOp::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
if (new_args.size() != 1) {
throw ngraph::ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<FFTOp>(new_args.at(0), inverse);
}
bool FFTOp::visit_attributes(ngraph::AttributeVisitor &visitor) {
visitor.on_attribute("inverse", inverse);
return true;
}
//! [fft_op:implementation]

View File

@@ -1,28 +0,0 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
//! [fft_op:header]
#pragma once
#include <ngraph/ngraph.hpp>
namespace FFTExtension {
class FFTOp : public ngraph::op::Op {
public:
static constexpr ngraph::NodeTypeInfo type_info{"FFT", 0};
const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
FFTOp() = default;
FFTOp(const ngraph::Output<ngraph::Node>& inp, bool inverse);
void validate_and_infer_types() override;
std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
bool inverse;
};
}
//! [fft_op:header]

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f7c8ab4f15874d235968471bcf876c89c795d601e69891208107b8b72aa58eb1
size 70014

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d5ccf51fe1babb93d96d042494695a6a6e055d1f8ebf7eef5083d54d8987a23
size 58789

View File

@@ -1,57 +0,0 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
#! [complex:transformation]
import logging as log
import numpy as np
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.graph.graph import Graph
class Complex(FrontReplacementSubgraph):
enabled = True
def pattern(self):
return dict(
nodes=[
('strided_slice_real', dict(op='StridedSlice')),
('strided_slice_imag', dict(op='StridedSlice')),
('complex', dict(op='Complex')),
],
edges=[
('strided_slice_real', 'complex', {'in': 0}),
('strided_slice_imag', 'complex', {'in': 1}),
])
@staticmethod
def replace_sub_graph(graph: Graph, match: dict):
strided_slice_real = match['strided_slice_real']
strided_slice_imag = match['strided_slice_imag']
complex_node = match['complex']
# make sure that both strided slice operations get the same data as input
assert strided_slice_real.in_port(0).get_source() == strided_slice_imag.in_port(0).get_source()
# identify the output port of the operation producing datat for strided slice nodes
input_node_output_port = strided_slice_real.in_port(0).get_source()
input_node_output_port.disconnect()
# change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
complex_node.out_port(0).get_connection().set_source(input_node_output_port)
#! [complex:transformation]

View File

@@ -1,40 +0,0 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
#! [complex_abs:transformation]
import numpy as np
from extensions.ops.elementwise import Pow
from extensions.ops.ReduceOps import ReduceSum
from mo.front.common.replacement import FrontReplacementOp
from mo.graph.graph import Graph, Node
from mo.ops.const import Const
class ComplexAbs(FrontReplacementOp):
op = "ComplexAbs"
enabled = True
def replace_op(self, graph: Graph, node: Node):
pow_2 = Const(graph, {'value': np.float32(2.0)}).create_node()
reduce_axis = Const(graph, {'value': np.int32(-1)}).create_node()
pow_0_5 = Const(graph, {'value': np.float32(0.5)}).create_node()
sq = Pow(graph, dict(name=node.in_node(0).name + '/sq', power=2.0)).create_node([node.in_node(0), pow_2])
sum = ReduceSum(graph, dict(name=sq.name + '/sum')).create_node([sq, reduce_axis])
sqrt = Pow(graph, dict(name=sum.name + '/sqrt', power=0.5)).create_node([sum, pow_0_5])
return [sqrt.id]
#! [complex_abs:transformation]

View File

@@ -1,47 +0,0 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# ! [fft_ext:extractor]
from ...ops.FFT import FFT
from mo.front.extractor import FrontExtractorOp
from mo.utils.error import Error
class FFT2DFrontExtractor(FrontExtractorOp):
op = 'FFT2D'
enabled = True
@classmethod
def extract(cls, node):
attrs = {
'inverse': 0
}
FFT.update_node_stat(node, attrs)
return cls.enabled
class IFFT2DFrontExtractor(FrontExtractorOp):
op = 'IFFT2D'
enabled = True
@classmethod
def extract(cls, node):
attrs = {
'inverse': 1
}
FFT.update_node_stat(node, attrs)
return cls.enabled
# ! [fft_ext:extractor]

View File

@@ -1,40 +0,0 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
#! [fft:operation]
from mo.front.common.partial_infer.elemental import copy_shape_infer
from mo.graph.graph import Node, Graph
from mo.ops.op import Op
class FFT(Op):
op = 'FFT'
enabled = False
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
'type': self.op,
'op': self.op,
'version': 'fft_extension',
'inverse': None,
'in_ports_count': 1,
'out_ports_count': 1,
'infer': copy_shape_infer
}, attrs)
def backend_attrs(self):
return ['inverse']
#! [fft:operation]

View File

@@ -1,685 +0,0 @@
# Inference Engine API Changes History {#openvino_docs_IE_DG_API_Changes}
The sections below contain detailed list of changes made to the Inference Engine API in recent releases.
## 2021.2
### New API
**State API**
* InferenceEngine::InferRequest::QueryState query state value of network on current infer request
* InferenceEngine::IVariableState class instead of IMemoryState (rename)
* InferenceEngine::IVariableState::GetState instead of IMemoryState::GetLastState (rename)
**BatchedBlob** - represents a InferenceEngine::BatchedBlob containing other blobs - one per batch.
**Transformations API** - added a new header `ie_transformations.hpp` which contains transformations for InferenceEngine::CNNNetwork object. Such transformations can be called prior to loading network for compilation for particular device:
* InferenceEngine::LowLatency
### Deprecated API
**State API**
* InferenceEngine::ExecutableNetwork::QueryState - use InferenceEngine::InferRequest::QueryState
* InferenceEngine::IVariableState::GetLastState - use InferenceEngine::IVariableState::GetState
## 2021.1
### Deprecated API
**Utility functions to convert Unicode paths**
* InferenceEngine::stringToFileName - use OS-specific native conversion functions
* InferenceEngine::fileNameToString - use OS-specific native conversion functions
### Removed API
**Plugin API:**
* InferenceEngine::InferencePlugin C++ plugin wrapper class
* InferenceEngine::IInferencePlugin plugin interface
* InferenceEngine::PluginDispatcher class
* InferenceEngine::InferenceEnginePluginPtr typedef
* InferenceEngine::ICNNNetReader reader interface
* InferenceEngine::CNNNetReader class
**Extensibility API:**
* InferenceEngine::ILayerImplFactory class
* InferenceEngine::IShapeInferImpl class
* InferenceEngine::IShapeInferExtension class
* InferenceEngine::IExtension::getFactoryFor(ILayerImplFactory\*& factory, const CNNLayer\* cnnLayer, ResponseDesc\* resp) noexcept method
* InferenceEngine::IExtension::getPrimitiveTypes(char\*\*& types, unsigned int& size, ResponseDesc\* resp) noexcept method
* InferenceEngine::ShapeInferImpl class
* InferenceEngine::Extension::getFactoryFor(ILayerImplFactory\*& factory, const CNNLayer\* cnnLayer, ResponseDesc\* resp) noexcept method
* InferenceEngine::Extension::getPrimitiveTypes(char\*\*& types, unsigned int& size, ResponseDesc\* resp) noexcept method
**Network API:**
* InferenceEngine::details::CNNNetworkIterator class
* InferenceEngine::CNNNetwork::getPrecision() const method
* InferenceEngine::CNNNetwork::getLayerByName(const char\* layerName) const method
* InferenceEngine::CNNNetwork::size() const method
* InferenceEngine::CNNNetwork::begin() const method
* InferenceEngine::CNNNetwork::end() const method
* InferenceEngine::CNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension) method
* InferenceEngine::ICNNNetwork::getPrecision() const noexcept method
* InferenceEngine::ICNNNetwork::getName(char\* pName, size_t len) const noexcept method
* InferenceEngine::ICNNNetwork::getData(const char\* dname) noexcept method
* InferenceEngine::ICNNNetwork::addLayer(const CNNLayerPtr& layer) noexcept method
* InferenceEngine::ICNNNetwork::getLayerByName(const char\* layerName, CNNLayerPtr& out, ResponseDesc\* resp) const noexcept method
* InferenceEngine::ICNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension, ResponseDesc\* resp) noexcept method
* InferenceEngine::ICNNNetwork::getStats(ICNNNetworkStats\*\* stats, ResponseDesc\* resp) const noexcept method
* InferenceEngine::ICNNNetworkStats class
* InferenceEngine::NetworkNodeStats class
* InferenceEngine::Data::getCreatorLayer() method
* InferenceEngine::Data::getInputTo() method
* InferenceEngine::LayerParams class
**Layer API:**
* InferenceEngine::CNNLayer class
* InferenceEngine::WeightableLayer class
* InferenceEngine::BatchNormalizationLayer class
* InferenceEngine::BatchToSpaceLayer class
* InferenceEngine::BinaryConvolutionLayer class
* InferenceEngine::BroadcastLayer class
* InferenceEngine::BucketizeLayer class
* InferenceEngine::ClampLayer class
* InferenceEngine::ConcatLayer class
* InferenceEngine::ConvolutionLayer class
* InferenceEngine::CropLayer class
* InferenceEngine::DeconvolutionLayer class
* InferenceEngine::DeformableConvolutionLayer class
* InferenceEngine::DepthToSpaceLayer class
* InferenceEngine::EltwiseLayer class
* InferenceEngine::ExperimentalDetectronPriorGridGenerator class
* InferenceEngine::ExperimentalDetectronPriorGridGeneratorLayer class
* InferenceEngine::ExperimentalSparseWeightedReduceLayer class
* InferenceEngine::FillLayer class
* InferenceEngine::FullyConnectedLayer class
* InferenceEngine::GRNLayer class
* InferenceEngine::GRUCell class
* InferenceEngine::GatherLayer class
* InferenceEngine::GemmLayer class
* InferenceEngine::LSTMCell class
* InferenceEngine::MVNLayer class
* InferenceEngine::MathLayer class
* InferenceEngine::NonMaxSuppression class
* InferenceEngine::NormLayer class
* InferenceEngine::OneHotLayer class
* InferenceEngine::PReLULayer class
* InferenceEngine::PadLayer class
* InferenceEngine::PoolingLayer class
* InferenceEngine::PowerLayer class
* InferenceEngine::QuantizeLayer class
* InferenceEngine::RNNCell class
* InferenceEngine::RNNCellBase class
* InferenceEngine::RNNSequenceLayer class
* InferenceEngine::RangeLayer class
* InferenceEngine::ReLU6Layer class
* InferenceEngine::ReLULayer class
* InferenceEngine::ReduceLayer class
* InferenceEngine::ReshapeLayer class
* InferenceEngine::ReverseSequenceLayer class
* InferenceEngine::ScaleShiftLayer class
* InferenceEngine::ScatterLayer class
* InferenceEngine::SelectLayer class
* InferenceEngine::ShuffleChannelsLayer class
* InferenceEngine::SoftMaxLayer class
* InferenceEngine::SpaceToBatchLayer class
* InferenceEngine::SpaceToDepthLayer class
* InferenceEngine::SparseFillEmptyRowsLayer class
* InferenceEngine::SparseSegmentReduceLayer class
* InferenceEngine::SparseToDenseLayer class
* InferenceEngine::SplitLayer class
* InferenceEngine::StridedSliceLayer class
* InferenceEngine::TensorIterator class
* InferenceEngine::TileLayer class
* InferenceEngine::TopKLayer class
* InferenceEngine::UniqueLayer class
## 2020.4
### New API
**CPU Plugin API:**
* InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16 config key
**Metrics and values for Query API:**
* METRIC_KEY(OPTIMIZATION_CAPABILITIES)
* METRIC_VALUE(BF16)
### Deprecated API
**MYRIAD Plugin API:**
* VPU_CONFIG_KEY(IGNORE_IR_STATISTIC)
### Removed API
**Inference Engine NN Builder API:**
* InferenceEngine::Builder::EltwiseLayer
* InferenceEngine::Builder::MemoryLayer
* InferenceEngine::Builder::ROIPoolingLayer
* InferenceEngine::Builder::DeconvolutionLayer
* InferenceEngine::Builder::ReLULayer
* InferenceEngine::Builder::TanHLayer
* InferenceEngine::Builder::InputLayer
* InferenceEngine::Builder::PoolingLayer
* InferenceEngine::Builder::CropLayer
* InferenceEngine::Builder::GRUSequenceLayer
* InferenceEngine::Builder::NormLayer
* InferenceEngine::Builder::LSTMSequenceLayer
* InferenceEngine::Builder::ClampLayer
* InferenceEngine::Builder::PSROIPoolingLayer
* InferenceEngine::Builder::Layer
* InferenceEngine::Builder::RNNSequenceLayer
* InferenceEngine::Builder::ReorgYoloLayer
* InferenceEngine::Builder::NormalizeLayer
* InferenceEngine::Builder::PriorBoxClusteredLayer
* InferenceEngine::Builder::MVNLayer
* InferenceEngine::Builder::PermuteLayer
* InferenceEngine::Builder::SimplerNMSLayer
* InferenceEngine::Builder::ConstLayer
* InferenceEngine::Builder::DeformableConvolutionLayer
* InferenceEngine::Builder::FullyConnectedLayer
* InferenceEngine::Builder::PriorBoxLayer
* InferenceEngine::Builder::SoftMaxLayer
* InferenceEngine::Builder::OutputLayer
* InferenceEngine::Builder::TileLayer
* InferenceEngine::Builder::SplitLayer
* InferenceEngine::Builder::PReLULayer
* InferenceEngine::Builder::RegionYoloLayer
* InferenceEngine::Builder::ReshapeLayer
* InferenceEngine::Builder::ConvolutionLayer
* InferenceEngine::Builder::DetectionOutputLayer
* InferenceEngine::Builder::ConcatLayer
* InferenceEngine::Builder::ELULayer
* InferenceEngine::Builder::GRNLayer
* InferenceEngine::Builder::LRNLayer
* InferenceEngine::Builder::ArgMaxLayer
* InferenceEngine::Builder::ReLU6Layer
* InferenceEngine::Builder::ScaleShiftLayer
* InferenceEngine::Builder::ProposalLayer
* InferenceEngine::Builder::SigmoidLayer
* InferenceEngine::Builder::ResampleLayer
* InferenceEngine::Builder::CTCGreedyDecoderLayer
* InferenceEngine::Builder::BatchNormalizationLayer
* InferenceEngine::Builder::LayerDecorator
* InferenceEngine::Builder::PowerLayer
* InferenceEngine::Builder::Network
* InferenceEngine::Builder::PortInfo
* InferenceEngine::Builder::Connection
* InferenceEngine::Builder::PortData
* InferenceEngine::Builder::Port
* InferenceEngine::Builder::ILayer
* InferenceEngine::Builder::INetworkIterator
* InferenceEngine::Builder::INetwork
* InferenceEngine::Builder::ILayer
## 2020.2
### New API
**Extensibility API:**
* InferenceEngine::IExtension::getImplTypes(const std::shared_ptr<ngraph::Node>& node) method
* InferenceEngine::IExtension::getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) method
### Deprecated API
**Extensibility API:**
* InferenceEngine::ILayerImplFactory class
* InferenceEngine::IShapeInferImpl class
* InferenceEngine::IShapeInferImpl class
* InferenceEngine::IShapeInferExtension class
* InferenceEngine::IExtension::getFactoryFor(ILayerImplFactory\*& factory, const CNNLayer\* cnnLayer, ResponseDesc\* resp) noexcept method
* InferenceEngine::IExtension::getPrimitiveTypes(char\*\*& types, unsigned int& size, ResponseDesc\* resp) noexcept method
* InferenceEngine::ShapeInferImpl class
* InferenceEngine::Extension::getFactoryFor(ILayerImplFactory\*& factory, const CNNLayer\* cnnLayer, ResponseDesc\* resp) noexcept method
* InferenceEngine::Extension::getPrimitiveTypes(char\*\*& types, unsigned int& size, ResponseDesc\* resp) noexcept method
**Network API:**
* InferenceEngine::details::CNNNetworkIterator class
* InferenceEngine::CNNNetwork::getPrecision() const method
* InferenceEngine::CNNNetwork::getLayerByName(const char\* layerName) const method
* InferenceEngine::CNNNetwork::size() const method
* InferenceEngine::CNNNetwork::begin() const method
* InferenceEngine::CNNNetwork::end() const method
* InferenceEngine::CNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension) method
* InferenceEngine::ICNNNetwork::getPrecision() const noexcept method
* InferenceEngine::ICNNNetwork::getName(char\* pName, size_t len) const noexcept method
* InferenceEngine::ICNNNetwork::getData(const char\* dname) noexcept method
* InferenceEngine::ICNNNetwork::addLayer(const CNNLayerPtr& layer) noexcept method
* InferenceEngine::ICNNNetwork::getLayerByName(const char\* layerName, CNNLayerPtr& out, ResponseDesc\* resp) const noexcept method
* InferenceEngine::ICNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension, ResponseDesc\* resp) noexcept method
* InferenceEngine::ICNNNetwork::getStats(ICNNNetworkStats\*\* stats, ResponseDesc\* resp) const noexcept method
* InferenceEngine::ICNNNetworkStats class
* InferenceEngine::NetworkNodeStats class
* InferenceEngine::Data::getCreatorLayer() method
* InferenceEngine::Data::getInputTo() method
* InferenceEngine::LayerParams class
**Layer API:**
* InferenceEngine::CNNLayer class
* InferenceEngine::WeightableLayer class
* InferenceEngine::BatchNormalizationLayer class
* InferenceEngine::BatchToSpaceLayer class
* InferenceEngine::BinaryConvolutionLayer class
* InferenceEngine::BroadcastLayer class
* InferenceEngine::BucketizeLayer class
* InferenceEngine::ClampLayer class
* InferenceEngine::ConcatLayer class
* InferenceEngine::ConvolutionLayer class
* InferenceEngine::CropLayer class
* InferenceEngine::DeconvolutionLayer class
* InferenceEngine::DeformableConvolutionLayer class
* InferenceEngine::DepthToSpaceLayer class
* InferenceEngine::EltwiseLayer class
* InferenceEngine::ExperimentalDetectronPriorGridGenerator class
* InferenceEngine::ExperimentalDetectronPriorGridGeneratorLayer class
* InferenceEngine::ExperimentalSparseWeightedReduceLayer class
* InferenceEngine::FillLayer class
* InferenceEngine::FullyConnectedLayer class
* InferenceEngine::GRNLayer class
* InferenceEngine::GRUCell class
* InferenceEngine::GatherLayer class
* InferenceEngine::GemmLayer class
* InferenceEngine::LSTMCell class
* InferenceEngine::MVNLayer class
* InferenceEngine::MathLayer class
* InferenceEngine::NonMaxSuppression class
* InferenceEngine::NormLayer class
* InferenceEngine::OneHotLayer class
* InferenceEngine::PReLULayer class
* InferenceEngine::PadLayer class
* InferenceEngine::PoolingLayer class
* InferenceEngine::PowerLayer class
* InferenceEngine::QuantizeLayer class
* InferenceEngine::RNNCell class
* InferenceEngine::RNNCellBase class
* InferenceEngine::RNNSequenceLayer class
* InferenceEngine::RangeLayer class
* InferenceEngine::ReLU6Layer class
* InferenceEngine::ReLULayer class
* InferenceEngine::ReduceLayer class
* InferenceEngine::ReshapeLayer class
* InferenceEngine::ReverseSequenceLayer class
* InferenceEngine::ScaleShiftLayer class
* InferenceEngine::ScatterLayer class
* InferenceEngine::SelectLayer class
* InferenceEngine::ShuffleChannelsLayer class
* InferenceEngine::SoftMaxLayer class
* InferenceEngine::SpaceToBatchLayer class
* InferenceEngine::SpaceToDepthLayer class
* InferenceEngine::SparseFillEmptyRowsLayer class
* InferenceEngine::SparseSegmentReduceLayer class
* InferenceEngine::SparseToDenseLayer class
* InferenceEngine::SplitLayer class
* InferenceEngine::StridedSliceLayer class
* InferenceEngine::TensorIterator class
* InferenceEngine::TileLayer class
* InferenceEngine::TopKLayer class
* InferenceEngine::UniqueLayer class
## 2020.1
### New API
**Integration with ngraph API:**
* InferenceEngine::CNNNetwork(const std::shared_ptr<ngraph::Function>& network) ctor from ngraph::Function
* InferenceEngine::CNNNetwork::getFunction() const noexcept method
* InferenceEngine::ICNNNetwork::getFunction() const noexcept method
* InferenceEngine::Parameter(const std::shared_ptr<ngraph::Variant>& var) ctor
* InferenceEngine::Parameter::asVariant() const method
* InferenceEngine::Parameter::operator std::shared_ptr<ngraph::Variant>() const operator
* InferenceEngine::Core::ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath) method
* InferenceEngine::Core::ReadNetwork(const std::string& modelPath, const std::string& binPath = "") method
* InferenceEngine::Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights) method
* InferenceEngine::Code::AddExtension(const IExtensionPtr& extension) method
* InferenceEngine::IExtension::getOpSets() method
**Offline compilation: import / export to std::stream:**
* InferenceEngine::ExecutableNetwork::Export(std::ostream& networkModel) method
* InferenceEngine::Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName = {}, const std::map<std::string, std::string>& config = {}) method
* InferenceEngine::IExecutableNetwork::Export(std::ostream& networkModel, ResponseDesc \*resp) noexcept method
**RemoteBlob accelerator memory sharing API:**
* InferenceEngine::RemoteContext class
* InferenceEngine::RemoteBlob class
* InferenceEngine::Core::CreateContext(const std::string& deviceName, const ParamMap& params) method
* InferenceEngine::Core::GetDefaultContext(const std::string& deviceName) method
* InferenceEngine::Core::LoadNetwork(CNNNetwork network, RemoteContext::Ptr context, const std::map<std::string, std::string>& config = std::map<std::string, std::string>()) method
**GNA firmware model image generation:**
* GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION) config key
* GNA_CONFIG_VALUE(GEN) value
* GNA_CONFIG_VALUE(GEN_EXACT) value
* GNA_CONFIG_VALUE(SSE) value
* GNA_CONFIG_VALUE(SSE_EXACT) value
* GNA_CONFIG_VALUE(AVX1) value
* GNA_CONFIG_VALUE(AVX1_EXACT) value
* GNA_CONFIG_VALUE(AVX2) value
* GNA_CONFIG_VALUE(AVX2_EXACT) value
**MemoryBlob mapping of memory to the user space:**
* InferenceEngine::MemoryBlob::rwmap() noexcept method
* InferenceEngine::MemoryBlob::rmap() noexcept method
* InferenceEngine::MemoryBlob::wmap() noexcept method
**Memory interoperability on acceleration devices. General classes and GPU helper functions**
* InferenceEngine::RemoteBlob class
* InferenceEngine::RemoteContext class
* InferenceEngine::Core::CreateContext(const std::string& deviceName, const ParamMap& params) method
* InferenceEngine::Core::GetDefaultContext(const std::string& deviceName) method
* InferenceEngine::make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx) function
* InferenceEngine::gpu::make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, VASurfaceID nv12_surf) function
* InferenceEngine::gpu::make_shared_context(Core& core, std::string deviceName, VADisplay device) function
* InferenceEngine::gpu::make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, VASurfaceID surface, uint32_t plane = 0) function
* InferenceEngine::gpu::make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2D& nv12_image_plane_y, cl::Image2D& nv12_image_plane_uv) function
* InferenceEngine::gpu::make_shared_context(Core& core, std::string deviceName, cl_context ctx) function
* InferenceEngine::gpu::make_shared_blob(const TensorDesc& desc, ClContext::Ptr ctx) function
* InferenceEngine::gpu::make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, cl::Buffer& buffer) function
* InferenceEngine::gpu::make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, cl_mem buffer) function
* InferenceEngine::gpu::make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, cl::Image2D& image) function
### Deprecated API
**Inference Engine NN Builder API:**
* InferenceEngine::Builder::EltwiseLayer
* InferenceEngine::Builder::MemoryLayer
* InferenceEngine::Builder::ROIPoolingLayer
* InferenceEngine::Builder::DeconvolutionLayer
* InferenceEngine::Builder::ReLULayer
* InferenceEngine::Builder::TanHLayer
* InferenceEngine::Builder::InputLayer
* InferenceEngine::Builder::PoolingLayer
* InferenceEngine::Builder::CropLayer
* InferenceEngine::Builder::GRUSequenceLayer
* InferenceEngine::Builder::NormLayer
* InferenceEngine::Builder::LSTMSequenceLayer
* InferenceEngine::Builder::ClampLayer
* InferenceEngine::Builder::PSROIPoolingLayer
* InferenceEngine::Builder::Layer
* InferenceEngine::Builder::RNNSequenceLayer
* InferenceEngine::Builder::ReorgYoloLayer
* InferenceEngine::Builder::NormalizeLayer
* InferenceEngine::Builder::PriorBoxClusteredLayer
* InferenceEngine::Builder::MVNLayer
* InferenceEngine::Builder::PermuteLayer
* InferenceEngine::Builder::SimplerNMSLayer
* InferenceEngine::Builder::ConstLayer
* InferenceEngine::Builder::DeformableConvolutionLayer
* InferenceEngine::Builder::FullyConnectedLayer
* InferenceEngine::Builder::PriorBoxLayer
* InferenceEngine::Builder::SoftMaxLayer
* InferenceEngine::Builder::OutputLayer
* InferenceEngine::Builder::TileLayer
* InferenceEngine::Builder::SplitLayer
* InferenceEngine::Builder::PReLULayer
* InferenceEngine::Builder::RegionYoloLayer
* InferenceEngine::Builder::ReshapeLayer
* InferenceEngine::Builder::ConvolutionLayer
* InferenceEngine::Builder::DetectionOutputLayer
* InferenceEngine::Builder::ConcatLayer
* InferenceEngine::Builder::ELULayer
* InferenceEngine::Builder::GRNLayer
* InferenceEngine::Builder::LRNLayer
* InferenceEngine::Builder::ArgMaxLayer
* InferenceEngine::Builder::ReLU6Layer
* InferenceEngine::Builder::ScaleShiftLayer
* InferenceEngine::Builder::ProposalLayer
* InferenceEngine::Builder::SigmoidLayer
* InferenceEngine::Builder::ResampleLayer
* InferenceEngine::Builder::CTCGreedyDecoderLayer
* InferenceEngine::Builder::BatchNormalizationLayer
* InferenceEngine::Builder::LayerDecorator
* InferenceEngine::Builder::PowerLayer
* InferenceEngine::Builder::Network
* InferenceEngine::Builder::PortInfo
* InferenceEngine::Builder::Connection
* InferenceEngine::Builder::PortData
* InferenceEngine::Builder::Port
* InferenceEngine::Builder::ILayer
* InferenceEngine::Builder::INetworkIterator
* InferenceEngine::Builder::INetwork
* InferenceEngine::Builder::ILayer
**Plugin API:**
* InferenceEngine::InferencePlugin C++ plugin wrapper class
* InferenceEngine::IInferencePlugin plugin interface
* InferenceEngine::PluginDispatcher class
* InferenceEngine::InferenceEnginePluginPtr typedef
* InferenceEngine::ICNNNetReader reader interface
* InferenceEngine::CNNNetReader class
**Blob API:**
* Blob::element_size() const noexcept method
* Blob::buffer() noexcept method
* Blob::cbuffer() noexcept method
* MemoryBlob::buffer() noexcept method
* MemoryBlob::cbuffer() noexcept method
### Removed API
Removed all [Inference Engine API which deprecated in 2019'R2](https://docs.openvinotoolkit.org/2019_R3/_docs_IE_DG_API_Changes.html#deprecated_api)
## 2019 R3
### New API
**New supported layers:**
* InferenceEngine::SparseFillEmptyRowsLayer new class
* InferenceEngine::UniqueLayer new class
* InferenceEngine::NonMaxSuppressionLayer new class
* InferenceEngine::ScatterLayer new class
**FPGA plugin streaming support:**
* DLIA_METRIC_VALUE(INPUT_STREAMING) value to METRIC_KEY(OPTIMIZATION_CAPABILITIES)
* DLIA_CONFIG_KEY(ENABLE_STREAMING) config key
### Removed API
* InferenceEngine::EltwiseLayer::Select from InferenceEngine::EltwiseLayer::eOperation enumeration
## 2019 R2
### New API
**Inference Engine Core API:**
* Introduced InferenceEngine::Core high level class to manage devices
**Query API extensions to InferenceEngine::ExecutableNetwork and InferenceEngine::IExecutableNetwork:**
* InferenceEngine::ExecutableNetwork::SetConfig method
* InferenceEngine::ExecutableNetwork::GetConfig method
* InferenceEngine::ExecutableNetwork::GetMetric method
* InferenceEngine::IExecutableNetwork::SetConfig method
* InferenceEngine::IExecutableNetwork::GetConfig method
* InferenceEngine::IExecutableNetwork::GetMetric method
**Metrics and values for Query API:**
* METRIC_KEY(AVAILABLE_DEVICES)
* METRIC_KEY(SUPPORTED_METRICS)
* METRIC_KEY(SUPPORTED_CONFIG_KEYS)
* METRIC_KEY(FULL_DEVICE_NAME)
* METRIC_KEY(OPTIMIZATION_CAPABILITIES)
* METRIC_VALUE(FP32)
* METRIC_VALUE(FP16)
* METRIC_VALUE(INT8)
* METRIC_VALUE(BIN)
* METRIC_VALUE(WINOGRAD)
* DLIA_METRIC_VALUE(FP11)
* METRIC_KEY(RANGE_FOR_STREAMS)
* METRIC_KEY(NUMBER_OF_WAITING_INFER_REQUESTS)
* METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS)
* METRIC_KEY(DEVICE_THERMAL)
* METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)
* EXEC_NETWORK_METRIC_KEY(NETWORK_NAME)
* EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)
**Common API:**
* CLDNN_CONFIG_KEY(INT8_ENABLED) config key
* CONFIG_KEY(GPU_THROUGHPUT_AUTO)
* CONFIG_KEY(GPU_THROUGHPUT_STREAMS)
* DLIA_CONFIG_KEY(IO_TRANSFORMATIONS_NATIVE) config key
* DLIA_CONFIG_KEY(DUMP_SUPPORTED_LAYERS_INFORMATION) config key
* GNA_CONFIG_VALUE(SW_FP32) config value for GNA_CONFIG_KEY(DEVICE_MODE) key
* MULTI_CONFIG_KEY(DEVICE_PRIORITIES) config key for `MULTI` device
* InferenceEngine::CNNNetReader::ReadNetwork(const std::wstring &filepath) new method
* InferenceEngine::CNNNetReader::ReadWeights(const std::wstring &filepath) new method
* InferenceEngine::ExecutableNetwork::ExecutableNetwork(IExecutableNetwork::Ptr actual, InferenceEnginePluginPtr plg) constructor with additional `plg` parameter
* InferenceEngine::InferRequest::InferRequest(IInferRequest::Ptr request, InferenceEnginePluginPtr plg) constructor with additional `plg` parameter
* InferenceEngine::Data::setName method
* InferenceEngine::QueryNetworkResult::supportedLayersMap
* InferenceEngine::Precision::I64 extension to InferenceEngine::Precision::ePrecision enumeration
**New supported primitives:**
* InferenceEngine::Builder::DeformableConvolutionLayer new class
* InferenceEngine::DeformableConvolutionLayer new class
* InferenceEngine::EltwiseLayer::Logical_NOT, InferenceEngine::EltwiseLayer::Mean, InferenceEngine::EltwiseLayer::Select extensions to InferenceEngine::EltwiseLayer::eOperation enumeration
* InferenceEngine::OneHotLayer new class
* InferenceEngine::SelectLayer new class
* InferenceEngine::BroadcastLayer new class
* InferenceEngine::MathLayer new class
* InferenceEngine::ReduceLayer new class
* InferenceEngine::TopKLayer new class
**Extensions to Blob creation API:**
* InferenceEngine::Blob::is method
* InferenceEngine::Blob::is const method
* InferenceEngine::Blob::as method
* InferenceEngine::Blob::as const method
* InferenceEngine::Blob::getAllocator abstract method
* InferenceEngine::Blob::getHandle abstract method
* InferenceEngine::MemoryBlob class
* InferenceEngine::ColorFormat enumeration
* InferenceEngine::PreProcessInfo::setColorFormat method
* InferenceEngine::PreProcessInfo::getColorFormat method
* InferenceEngine::CompoundBlob class to work with blobs consisting of several planes
* InferenceEngine::NV12Blob class representing NV12 blob with two planes
### Deprecated API
The methods listed below are deprecated and will be removed in 2019 R4 release:
**Common API:**
* InferenceEngine::InputInfo::getInputPrecision method
* InferenceEngine::InputInfo::setInputPrecision method
* InferenceEngine::InputInfo::getDims method
* InferenceEngine::CNNLayer::GetParamsAsBool method
* InferenceEngine::CNNNetwork::CNNNetwork(ICNNNetwork* actual) constructor
* InferenceEngine::CNNNetwork::setTargetDevice method
* HETERO_CONFIG_KEY(DUMP_DLA_MESSAGES) config key
* InferenceEngine::ILayerImplFactory::getShapes method
* InferenceEngine::IShapeInferImpl::inferShapes(const std::vector<SizeVector>&, const std::map<std::string, std::string>& , const std::map<std::string, Blob::Ptr>&, std::vector<SizeVector>&, ResponseDesc\*) method
* InferenceEngine::Data::setBatchSize method
* InferenceEngine::QueryNetworkResult::supportedLayers field
* InferenceEngine::ICNNNetwork::setBatchSize(const size_t size) method
* InferenceEngine::Blob::Resize method
* InferenceEngine::Blob::Reshape method
* InferenceEngine::TBlob::set method
**InferenceEngine::IInferencePlugin and InferenceEngine:InferencePlugin obsolete methods:**
* InferenceEngine::InferencePlugin::LoadNetwork(ICNNNetwork &network) method
* InferenceEngine::InferencePlugin::Infer method
* InferenceEngine::InferencePlugin::GetPerformanceCounts method
* InferenceEngine::InferencePlugin::QueryNetwork(const ICNNNetwork &network, QueryNetworkResult &res) const method
* InferenceEngine::IInferencePlugin::LoadNetwork(ICNNNetwork &network, ResponseDesc \*resp) method
* InferenceEngine::IInferencePlugin::Infer(const Blob &input, Blob &result, ResponseDesc \*resp) method
* InferenceEngine::IInferencePlugin::Infer(const BlobMap &input, BlobMap &result, ResponseDesc \*resp) method
* InferenceEngine::IInferencePlugin::GetPerformanceCounts method
* InferenceEngine::IInferencePlugin::QueryNetwork(const ICNNNetwork& network, QueryNetworkResult& res) const method
**Fields in InferenceEngine::Data class are replaced with appropriate methods:**
* InferenceEngine::Data::precision field
* InferenceEngine::Data::layout field
* InferenceEngine::Data::dims field
* InferenceEngine::Data::creatorLayer field
* InferenceEngine::Data::name field
* InferenceEngine::Data::inputTo field
* InferenceEngine::Data::userObject field
**Heterogeneous plugin:**
* InferenceEngine::IHeteroDeviceLoader class
* InferenceEngine::IHeteroInferencePlugin class
* InferenceEngine::HeteroPluginPtr class
* operator InferenceEngine::InferencePlugin::HeteroPluginPtr operator
**Blob creation API with dimensions in reverse order:**
* InferenceEngine::Blob::Blob(Precision p) constructor
* InferenceEngine::Blob::Blob(Precision p, Layout l) constructor
* InferenceEngine::Blob::Blob(Precision p, const SizeVector &dims) constructor
* InferenceEngine::Blob::Blob(Precision p, Layout l, const SizeVector &dims) constructor
* InferenceEngine::TBlob::TBlob(Precision p, Layout l) constructor
* InferenceEngine::TBlob::TBlob(Precision p, Layout l, const SizeVector& dims) constructor
* InferenceEngine::TBlob::TBlob(Precision p, Layout l, const SizeVector& dims, T* ptr, size_t data_size) constructor
* InferenceEngine::TBlob::TBlob(Precision p, Layout l, const SizeVector &dims, std::shared_ptr<IAllocator> alloc) constructor
* InferenceEngine::Blob::type() method
* InferenceEngine::Blob::precision() method
* InferenceEngine::Blob::layout() method
* InferenceEngine::Blob::dims() method
* InferenceEngine::make_shared_blob(Precision p, Layout l, const SizeVector &dims) function
* InferenceEngine::make_shared_blob(Precision p, const SizeVector &dims) function
* InferenceEngine::make_shared_blob(Precision p, Layout l, const TArg &arg) function
* InferenceEngine::make_shared_blob(Precision p, const TArg &arg) function
* InferenceEngine::make_shared_blob(TBlob<TypeTo> &&arg) function
* InferenceEngine::make_shared_blob(Precision p, Layout l) function
* InferenceEngine::make_shared_blob(Precision p, Layout l, SizeVector dims, const std::vector<TypeTo> &arg) function
* InferenceEngine::make_shared_blob(Precision p, Layout l, const std::vector<TypeTo> &arg) function
* InferenceEngine::make_shared_blob(Precision p, const std::vector<TypeTo> &arg) function
* InferenceEngine::make_shared_blob(Precision p, Layout l, const SizeVector &dims, TypeTo * ptr, size_t size) function
* InferenceEngine::make_shared_blob(Precision p, const SizeVector &dims, TypeTo * ptr, size_t size) function
* InferenceEngine::I_N variable
* InferenceEngine::I_C variable
* InferenceEngine::I_H variable
* InferenceEngine::I_W variable
* InferenceEngine::LayoutOffsetCounter class
* InferenceEngine::ConvertLayout function
**API working with device enumeration:**
* InferenceEngine::TargetDevice enumeration
* InferenceEngine::TargetDeviceInfo class
* InferenceEngine::getDeviceName function
* InferenceEngine::FindPluginRequest class
* InferenceEngine::FindPluginResponse class
* InferenceEngine::findPlugin(const FindPluginRequest &req, FindPluginResponse &result, ResponseDesc *resp) function
* InferenceEngine::ICNNNetwork::setTargetDevice method
* InferenceEngine::ICNNNetwork::getTargetDevice method
* InferenceEngine::PluginDispatcher::getPluginByDevice method
* InferenceEngine::PluginDispatcher::getSuitablePlugin method

View File

@@ -1,80 +0,0 @@
# Bfloat16 Inference {#openvino_docs_IE_DG_Bfloat16Inference}
## Disclaimer
Inference Engine with the bfloat16 inference implemented on CPU must support the `avx512_bf16` instruction and therefore the bfloat16 data format.
## Introduction
Bfloat16 computations (referred to as BF16) is the Brain Floating-Point format with 16 bits. This is a truncated 16-bit version of the 32-bit IEEE 754 single-precision floating-point format FP32. BF16 preserves 8 exponent bits as FP32 but reduces precision of the sign and mantissa from 24 bits to 8 bits.
![bf16_format]
Preserving the exponent bits keeps BF16 to the same range as the FP32 (~1e-38 to ~3e38). This simplifies conversion between two data types: you just need to skip or flush to zero 16 low bits.
Truncated mantissa leads to occasionally less precision, but according to [investigations](https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus), neural networks are more sensitive to the size of the exponent than the mantissa size. Also, in lots of models, precision is needed close to zero but not so much at the maximum range.
Another useful feature of BF16 is possibility to encode an INT8 in BF16 without loss of accuracy, because INT8 range completely fits in BF16 mantissa field. It reduces data flow in conversion from INT8 input image data to BF16 directly without intermediate representation in FP32, or in combination of [INT8 inference](Int8Inference.md) and BF16 layers.
See the [Intel's site](https://software.intel.com/sites/default/files/managed/40/8b/bf16-hardware-numerics-definition-white-paper.pdf) for more bfloat16 format details.
There are two ways to check if CPU device can support bfloat16 computations for models:
1. Query the instruction set via system `lscpu | grep avx512_bf16` or `cat /proc/cpuinfo | grep avx512_bf16`.
2. Use [Query API](InferenceEngine_QueryAPI.md) with `METRIC_KEY(OPTIMIZATION_CAPABILITIES)`, which should return `BF16` in the list of CPU optimization options:
@snippet snippets/Bfloat16Inference0.cpp part0
Current Inference Engine solution for bfloat16 inference uses Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) and supports inference of the following layers in BF16 computation mode:
* Convolution
* FullyConnected
* InnerProduct
* LRN
* Pooling
This means that BF16 inference can only be performed with the CPU plugin on the layers listed above. All other layers are executed in FP32.
## Lowering Inference Precision
Lowering precision to increase performance is [widely used](https://software.intel.com/content/www/us/en/develop/articles/lower-numerical-precision-deep-learning-inference-and-training.html) for optimization of inference. The bfloat16 data type usage on CPU for the first time opens the possibility of default optimization approach.
The embodiment of this approach is to use the optimization capabilities of the current platform to achieve maximum performance while maintaining the accuracy of calculations within the acceptable range.
Bfloat16 data usage provides the following benefits that increase performance:
1. Faster multiplication of two BF16 numbers because of shorter mantissa of bfloat16 data.
2. No need to support denormals and handling exceptions as this is a performance optimization.
3. Fast conversion of float32 to bfloat16 and vice versa.
4. Reduced size of data in memory, as a result, larger models fit in the same memory bounds.
5. Reduced amount of data that must be transferred, as a result, reduced data transition time.
For default optimization on CPU, source model converts from FP32 or FP16 to BF16 and executes internally on platforms with native BF16 support. In that case, `KEY_ENFORCE_BF16` is set to `YES`.
The code below demonstrates how to check if the key is set:
@snippet snippets/Bfloat16Inference1.cpp part1
To disable BF16 internal transformations, set the `KEY_ENFORCE_BF16` to `NO`. In this case, the model infers AS IS without modifications with precisions that were set on each layer edge.
@snippet snippets/Bfloat16Inference2.cpp part2
An exception with message `Platform doesn't support BF16 format` is formed in case of setting `KEY_ENFORCE_BF16` to `YES` on CPU without native BF16 support.
Low-Precision 8-bit integer models do not convert to BF16, even if bfloat16 optimization is set by default.
## Performance Counters
Information about layer precision is stored in the performance counters that are
available from the Inference Engine API. The layers have the following marks:
* Suffix `BF16` for layers that had bfloat16 data type input and were computed in BF16 precision
* Suffix `FP32` for layers computed in 32-bit precision
For example, the performance counters table for the Inception model can look as follows:
```
pool5 EXECUTED layerType: Pooling realTime: 143 cpu: 143 execType: jit_avx512_BF16
fc6 EXECUTED layerType: FullyConnected realTime: 47723 cpu: 47723 execType: jit_gemm_BF16
relu6 NOT_RUN layerType: ReLU realTime: 0 cpu: 0 execType: undef
fc7 EXECUTED layerType: FullyConnected realTime: 7558 cpu: 7558 execType: jit_gemm_BF16
relu7 NOT_RUN layerType: ReLU realTime: 0 cpu: 0 execType: undef
fc8 EXECUTED layerType: FullyConnected realTime: 2193 cpu: 2193 execType: jit_gemm_BF16
prob EXECUTED layerType: SoftMax realTime: 68 cpu: 68 execType: jit_avx512_FP32
```
The `execType` column of the table includes inference primitives with specific suffixes.
[bf16_format]: img/bf16_format.png

View File

@@ -1,298 +0,0 @@
Cross Check Tool {#openvino_docs_IE_DG_Cross_Check_Tool}
================
Cross Check Tool is a console application that enables comparing accuracy and performance metrics for two successive
model inferences that are performed
on two different supported Intel&reg; devices or with different precisions.
The Cross Check Tool can compare metrics per layer or all over the model.
On Linux* OS, before running the Cross Check Tool binary, make sure your application can find the
Deep Learning Inference Engine libraries.
Navigate to the `<INSTALL_DIR>/deployment_tools/inference_engine/bin` folder and run the `setvars.sh` script to
set all necessary environment variables:
```sh
source setvars.sh
```
## Running the Cross Check Tool
Cross Check Tool is distributed as a binary file and there is no need to build it. To run the Cross Check Tool,
execute the tool's binary file with necessary parameters. Please note that the Inference Engine assumes that weights
are in the same folder as the _.xml_ file.
You can get the list of all available options using the -h option:
```sh
$./cross_check_tool -h
InferenceEngine:
API version ............ 1.0
Build .................. ###
[ INFO ] Parsing input parameters
./cross_check_tool [OPTION]
Options:
-h Prints a usage message.
-i "<path>" Optional. Path to an input image file or multi-input file to infer. Generates input(s) from normal distribution if empty
-m "<path>" Required. Path to an .xml file that represents the first IR of the trained model to infer.
-l "<absolute_path>" Required for MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the kernels implementation.
Or
-c "<absolute_path>" Required for clDNN (GPU)-targeted custom kernels. Absolute path to the xml file with the kernels description.
-conf "<path>" Optional. Path to config file for -d device plugin
-ref_conf "<path>" Optional. Path to config file for -ref_d device plugin
-pp "<path>" Optional. Path to a plugin folder.
-d "<device>" Required. The first target device to infer the model specified with the -m option. CPU, GPU, HDDL or MYRIAD is acceptable.
-ref_m "<path>" Optional. Path to an .xml file that represents the second IR in different precision to compare the metrics.
-ref_d "<device>" Required. The second target device to infer the model and compare the metrics. CPU, GPU, HDDL or MYRIAD is acceptable.
-layers "<options>" Defines layers to check. Options: all, None - for output layers check, list of comma-separated layer names to check. Default value is None.
-eps "<float>" Optional. Threshold for filtering out those blob statistics that do not statify the condition: max_abs_diff < eps.
-dump Enables blobs statistics dumping
-load "<path>" Path to a file to load blobs from
```
### Examples
1. To check per-layer accuracy and performance of inference in FP32 precision on the CPU against the GPU, run:
```sh
./cross_check_tool -i <path_to_input_image_or_multi_input_file> \
-m <path_to_FP32_xml> \
-d CPU \
-ref_d GPU \
-layers all
```
The output looks as follows:
```
InferenceEngine:
API version ............ 1.0
Build .................. ###
[ INFO ] Parsing input parameters
The same IR on both devices: <path_to_IR>
[ INFO ] No extensions provided
API version ............ 1.0
Build .................. lnx_20180510
Description ....... MKLDNNPlugin
API version ............ 0.1
Build .................. ci-main-03659
Description ....... clDNNPlugin
[ INFO ] Inputs detected: Placeholder
[ INFO ] Statistics will be dumped for X layers: <layer_1_name>, <layer_2_name>, ... , <layer_X_name>
[ INFO ] Layer <layer_1_name> statistics
Max absolute difference: 1.52588e-05
Min absolute difference: 0
Max relative difference: 0.000288028%
Min relative difference: 0%
Blob size: 1000
Devices: CPU_FP32 GPU_FP32
Status: EXECUTED EXECUTED
Layer type: Reshape Reshape
Real time, microsec: 20 154
Execution type: unknown GPU
Number of NAN: 0 0
Number of INF: 0 0
Number of ZERO: 0 0
...
<list_of_layer_statistics>
...
[ INFO ] Overall max absolute difference 2.81334e-05 was reached by <layer_name> layer
[ INFO ] Overall min absolute difference 0 was reached by <layer_name> layer
[ INFO ] Overall max relative difference 0.744893% was reached by <layer_name> layer
[ INFO ] Overall min relative difference -2.47948% was reached by <layer_name> layer
[ INFO ] Execution successful
```
2. To check the overall accuracy and performance of inference on the CPU in FP32 precision against the
Intel&reg; Movidius&trade; Myriad&trade; device in FP16 precision, run:
```sh
./cross_check_tool -i <path_to_input_image_or_multi_input_file> \
-m <path_to_FP16_xml> \
-ref_d CPU \
-ref_m <path_to_FP32_xml>\
-d MYRIAD \
```
The output looks as follows:
```
InferenceEngine:
API version ............ 1.0
Build .................. ###
[ INFO ] Parsing input parameters
[ INFO ] MYRIAD vs CPU
IR for MYRIAD : <path_to_FP16_xml>
IR for CPU : <path_to_FP32_xml>
[ INFO ] No extensions provided
[ INFO ] Loading plugins
API version ............ 0.1
Build .................. ###
Description ....... myriadPlugin
API version ............ 1.0
Build .................. ###
Description ....... MKLDNNPlugin
[ INFO ] Inputs detected: <list_of_input_layers>
[ INFO ] Statistics will be dumped for 1 layers: <output_layer_name(s)>
[ INFO ] Layer <output_layer_name> statistics
Max absolute difference: 0.003889
Min absolute difference: 2.49778e-12
Max relative difference: 290.98%
Min relative difference: 0.0327804%
Devices: MYRIAD_FP16 CPU_FP32
Real time, microsec: 69213.978946 4149.904940
[ INFO ] Execution successful
```
3. To dump layer statistics from specific list of layers, run:
```sh
./cross_check_tool -i <path_to_input_image_or_multi_input_file> \
-m <path_to_FP16_xml> \
-d MYRIAD \
-dump \
-layers <comma_separated_list_of_layers>
```
The output looks as follows:
```
InferenceEngine:
API version ............ 1.0
Build .................. ###
[ INFO ] Blob and statistics dumping enabled
[ INFO ] No extensions provided
API version ............ 0.1
Build .................. custom_releases/cvsdk-2018-r2_e28ec0278fb749d6b999c688a8e90a8a25c0f2b5
Description ....... myriadPlugin
[ INFO ] Inputs detected: <list_of_input_layers>
[ INFO ] Statistics will be dumped for X layers: <comma_separated_list_of_layers>
[ INFO ] Dump path: <path_where_dump_will_be_saved>
[ INFO ] <layer_1_name> layer processing
...
[ INFO ] <layer_X_name> layer processing
[ INFO ] Execution successful
```
If you do not provide the `-i` key, the Cross Check Tool generates an input from normal distributed noise and saves
it in a multi-input file format with the filename `<path_to_xml>_input_layers_dump.txt` in the same folder as the IR.
4. To check the overall accuracy and performance of inference on the CPU in FP32 precision against dumped results, run:
```sh
./cross_check_tool -i <path_to_input_image_or_multi_input_file> \
-m <path_to_FP32_xml> \
-d CPU \
-load <path_to_dump> \
-layers all
```
The output looks as follows:
```
InferenceEngine:
API version ............ 1.0
Build .................. ###
[ INFO ] Blob and statistics loading enabled. File /localdisk/models/FP16/icv_squeezenet_v1.0_MYRIAD_FP16_dump.txt
The same IR on both devices: <path_to_FP32_xml>
[ INFO ] No extensions provided
API version ............ 0.1
Build .................. ###
Description ....... myriadPlugin
[ INFO ] Inputs detected: <list_of_input_layers>
[ INFO ] Statistics will be dumped for X layers: <layer_1_name>, <layer_2_name>, ... , <layer_X_name>
[ INFO ] <layer_1_name> layer processing
[ INFO ] Layer <layer_1_name> statistics
Max absolute difference: 0
Min absolute difference: 0
Max relative difference: 0%
Min relative difference: 0%
Blob size: 1000
Devices: MYRIAD_FP16 MYRIAD_FP16_loaded
Status: EXECUTED EXECUTED
Layer type: SoftMax SoftMax
Real time, microsec: 43 43
Execution type: SoftMax SoftMax
Number of NAN: 0 0
Number of INF: 0 0
Number of ZERO: 0 0
...
<list_of_layer_statistics>
...
[ INFO ] Overall max absolute difference 0
[ INFO ] Overall min absolute difference 0 was reached by <layer_1_name> layer
[ INFO ] Overall max relative difference 0%
[ INFO ] Overall min relative difference 0% was reached by <layer_1_name> layer
[ INFO ] Execution successful
```
### Multi-input and dump file experimental format
Text file contains description of each layer in structure like this:
* 1<sup>st</sup> line is layer name (required)
* 2<sup>nd</sup> line is shape like "(1,224,224,3)" (required)
* 3<sup>rd</sup> line is a device and precision information like "CPU_FP32" (optional for multi-input file)
* 4<sup>th</sup> line is execution status Options are: EXECUTED, OPTIMIZED_OUT (optional for multi-input file)
* 5<sup>th</sup> line is type of layer (optional for multi-input file)
* 6<sup>th</sup> line is execution time in microseconds (optional for multi-input file)
* 7<sup>th</sup> line is type of execution (optional for multi-input file)
* 8<sup>th</sup> line is word "CONTENT" which means that the next line or lines are consisted of blob elements
* Next line or lines are for blob elements. They may be separated with one or several spaces, tabs and new lines.
#### Multi-input file example
```
Input_1
(1,10)
CONTENT
0 0.000628471375 0.00185108185
0.000580787659
0.00137138367
0.000561237335 0.0040473938 0 0 0
Input_2
(1,8)
CONTENT
0 0 0.00194549561 0.0017490387 7.73072243e-05 0.000135779381 0.000186920166 0 7.52806664e-05
```
#### Dump file example
```
Softmax
(1,10)
MYRIAD_FP16
EXECUTED
SoftMax
43
SoftMax
CONTENT
7.44462013e-05
0
0.000810623169
0.000361680984
0
9.14335251e-05
0
0
8.15987587e-05
0
```
### Configuration file
There is an option to pass configuration file to plugin by providing
`-conf` and/or `--ref_conf` keys.
Configuration file is a text file with content of pairs of keys and values.
Structure of configuration file:
```sh
KEY VALUE
ANOTHER_KEY ANOTHER_VALUE,VALUE_1
```

View File

@@ -1,92 +0,0 @@
# Inference Engine Developer Guide {#openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide}
## Introduction to the OpenVINO™ Toolkit
The OpenVINO™ toolkit is a comprehensive toolkit that you can use to develop and deploy vision-oriented solutions on
Intel® platforms. Vision-oriented means the solutions use images or videos to perform specific tasks.
A few of the solutions use cases include autonomous navigation, digital surveillance cameras, robotics,
and mixed-reality headsets.
The OpenVINO™ toolkit:
* Enables CNN-based deep learning inference on the edge
* Supports heterogeneous execution across an Intel&reg; CPU, Intel&reg; Integrated Graphics, Intel&reg; Neural Compute Stick 2
* Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
* Includes optimized calls for computer vision standards including OpenCV\*, OpenCL&trade;, and OpenVX\*
The OpenVINO™ toolkit includes the following components:
* Intel® Deep Learning Deployment Toolkit (Intel® DLDT)
- [Deep Learning Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) — A cross-platform command-line tool for importing models and
preparing them for optimal execution with the Deep Learning Inference Engine. The Model Optimizer supports converting Caffe*,
TensorFlow*, MXNet*, Kaldi*, ONNX* models.
- [Deep Learning Inference Engine](inference_engine_intro.md) — A unified API to allow high performance inference on many hardware types
including Intel® CPU, Intel® Processor Graphics, Intel® FPGA, Intel® Neural Compute Stick 2.
- [nGraph](../nGraph_DG/nGraph_dg.md) — graph representation and manipulation engine which is used to represent a model inside Inference Engine and allows the run-time model construction without using Model Optimizer.
* [OpenCV](https://docs.opencv.org/) — OpenCV* community version compiled for Intel® hardware.
Includes PVL libraries for computer vision.
* Drivers and runtimes for OpenCL™ version 2.1
* [Intel® Media SDK](https://software.intel.com/en-us/media-sdk)
* [OpenVX*](https://software.intel.com/en-us/cvsdk-ovx-guide) — Intel's implementation of OpenVX*
optimized for running on Intel® hardware (CPU, GPU, IPU).
* [Demos and samples](Samples_Overview.md).
This Guide provides overview of the Inference Engine describing the typical workflow for performing
inference of a pre-trained and optimized deep learning model and a set of sample applications.
> **NOTES:**
> - Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_intel_index).
> - [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
## Table of Contents
* [Inference Engine API Changes History](API_Changes.md)
* [Introduction to Inference Engine](inference_engine_intro.md)
* [Understanding Inference Engine Memory Primitives](Memory_primitives.md)
* [Introduction to Inference Engine Device Query API](InferenceEngine_QueryAPI.md)
* [Adding Your Own Layers to the Inference Engine](Extensibility_DG/Intro.md)
* [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md)
* [[DEPRECATED] Migration from Inference Engine Plugin API to Core API](Migration_CoreAPI.md)
* [Introduction to Performance Topics](Intro_to_Performance.md)
* [Inference Engine Python API Overview](../../inference-engine/ie_bridges/python/docs/api_overview.md)
* [Using Dynamic Batching feature](DynamicBatching.md)
* [Using Static Shape Infer feature](ShapeInference.md)
* [Using Low-Precision 8-bit Integer Inference](Int8Inference.md)
* [Using Bfloat16 Inference](Bfloat16Inference.md)
* Utilities to Validate Your Converted Model
* [Using Cross Check Tool for Per-Layer Comparison Between Plugins](../../inference-engine/tools/cross_check_tool/README.md)
* [Supported Devices](supported_plugins/Supported_Devices.md)
* [GPU](supported_plugins/CL_DNN.md)
* [CPU](supported_plugins/CPU.md)
* [VPU](supported_plugins/VPU.md)
* [MYRIAD](supported_plugins/MYRIAD.md)
* [HDDL](supported_plugins/HDDL.md)
* [Heterogeneous execution](supported_plugins/HETERO.md)
* [GNA](supported_plugins/GNA.md)
* [MULTI](supported_plugins/MULTI.md)
* [Pre-Trained Models](@ref omz_models_intel_index)
* [Known Issues](Known_Issues_Limitations.md)
**Typical Next Step:** [Introduction to Inference Engine](inference_engine_intro.md)
## Video: Inference Engine Concept
[![](https://img.youtube.com/vi/e6R13V8nbak/0.jpg)](https://www.youtube.com/watch?v=e6R13V8nbak)
<iframe width="560" height="315" src="https://www.youtube.com/embed/e6R13V8nbak" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

View File

@@ -1,52 +0,0 @@
Using Dynamic Batching {#openvino_docs_IE_DG_DynamicBatching}
======================
Dynamic Batching feature allows you+ to dynamically change batch size for inference calls
within preset batch size limit.
This feature might be useful when batch size is unknown beforehand, and using extra large batch size is
undesired or impossible due to resource limitations.
For example, face detection with person age, gender, or mood recognition is a typical usage scenario.
## Usage
You can activate Dynamic Batching by setting <code>KEY_DYN_BATCH_ENABLED</code> flag to <code>YES</code> in a configuration map that is
passed to the plugin while loading a network.
This configuration creates an <code>ExecutableNetwork</code> object that will allow setting batch size
dynamically in all of its infer requests using <code>SetBatch()</code> method.
The batch size that was set in passed <code>CNNNetwork</code> object will be used as a maximum batch size limit.
Here is a code example:
@snippet snippets/DynamicBatching.cpp part0
## Limitations
Currently, certain limitations for using Dynamic Batching exist:
* Use Dynamic Batching with CPU and GPU plugins only.
* Use Dynamic Batching on topologies that consist of certain layers only:
* Convolution
* Deconvolution
* Activation
* LRN
* Pooling
* FullyConnected
* SoftMax
* Split
* Concatenation
* Power
* Eltwise
* Crop
* BatchNormalization
* Copy
Do not use layers that might arbitrary change tensor shape (such as Flatten, Permute, Reshape),
layers specific to object detection topologies (ROIPooling, ProirBox, DetectionOutput), and
custom layers.
Topology analysis is performed during the process of loading a network into plugin, and if topology is
not applicable, an exception is generated.

View File

@@ -1,79 +0,0 @@
# Custom nGraph Operation {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps}
Inference Engine Extension API allows to register operation sets (opsets) with custom nGraph operations, it allows to support Networks with unknown operations.
## Operation Class
To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below:
1. Define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
2. Implement constructors that can optionally take the operation inputs and attributes as parameters.
3. Override the shape inference method `validate_and_infer_types`. This method is called multiple times during graph manipulations to determine the shapes and element types of the outputs of the operations. You can access the input shapes through the `get_input_partial_shape()` method and input element types through the `get_input_element_type()` method of `ngraph::Node`. Set the inferred shape and element type of the output using `set_output_type`.
4. Override the `clone_with_new_inputs` method, which allows graph manipulation routines to create copies of this operation and connect it to different nodes during optimization.
5. Override the `visit_attributes` method, which allows serialization and deserialization of attributes. An `AttributeVisitor` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware `on_attribute` helper. Helpers are already implemented for standard C++ types like `int64_t`, `float`, `bool`, `vector` and for existing nGraph defined types.
6. Override `evaluate`, which is an optional method that enables the application of constant folding if there is a custom operation on the constant branch.
Based on that, declaration of a operation class can look as follows:
@snippet template_extension/op.hpp op:header
### Class Fields
The provided implementation has several fields:
* `add` of type `int64_t` is an attribute of custom operation
* `type_info` of type `ngraph::NodeTypeInfo` defines the type and version of operation
### Operation Constructors
nGraph operation contains two constructors: a default constructor, which allows to create operation without attributes and a constructor that creates and validates operation with specified inputs and attributes.
@snippet template_extension/op.cpp op:ctor
### `validate_and_infer_types()`
`ngraph::Node::validate_and_infer_types` method validates operation attributes and calculates output shapes using attributes of operation.
@snippet template_extension/op.cpp op:validate
### `clone_with_new_inputs()`
`ngraph::Node::clone_with_new_inputs` method creates a copy of nGraph operation with new inputs.
@snippet template_extension/op.cpp op:copy
### `visit_attributes()`
`ngraph::Node::visit_attributes` method allows to visit all operation attributes.
@snippet template_extension/op.cpp op:visit_attributes
### `evaluate()`
`ngraph::Node::evaluate` method allows to apply constant folding to an operation.
@snippet template_extension/op.cpp op:evaluate
## Register Custom Operations in Extension Class
To add custom operations to the [Extension](Extension.md) class, create an operation set with custom operations and implement the `InferenceEngine::IExtension::getOpSets` method:
@snippet template_extension/extension.cpp extension:getOpSets
This method returns a map of opsets that exist in the extension library.
nGraph provides opsets mechanism for operation versioning. Different opsets distinguish between different versions of one operation.
When specifying opset names, follow the rules below:
* Use unique opset names.
* Do not use the following built-in opset names: `extension`, `experimental`, `opset1`, `opset2`, `opset3`, ... , `opsetN`.
* Make sure that the Model Optimizer and your extension use the same opset names.
* IR v10 operations have the mandatory `version` attribute specifying the opset.
Operations from the default opset cannot be redefined.
Use a custom opset to create a new operation or extend functionality of an existing operation from another opset.

View File

@@ -1,19 +0,0 @@
# Build Extension Library Using CMake* {#openvino_docs_IE_DG_Extensibility_DG_Building}
Inference Engine build infrastructure provides the Inference Engine Package for application development.
To build an extension library, use the following CMake script:
@snippet template_extension/CMakeLists.txt cmake:extension
This CMake script finds the Inference Engine and nGraph using the `find_package` CMake command.
To build an extension library, run the commands below:
```sh
$ cd template_extension
$ mkdir build
$ cd build
$ cmake -DInferenceEngine_DIR=[IE_DIR] -Dngraph_DIR=[NGRAPH_DIR] ../
$ cmake --build .
```

View File

@@ -1,69 +0,0 @@
# How to Implement Custom CPU Operations {#openvino_docs_IE_DG_Extensibility_DG_CPU_Kernel}
The primary vehicle for the performance of the CPU codepath in the Inference Engine is the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN), and new CPU kernels extend the Inference Engine plugin for the Intel MKL-DNN. Implementing the InferenceEngine::ILayerExecImpl defines a general CPU-side extension. There are no Intel MKL-DNN specifics in the way you need to implement a kernel.
## Implementation Class
All custom kernels for the CPU plugin should be inherited from the InferenceEngine::ILayerExecImpl interface.
Based on that, declaration of a kernel implementation class can look as follows:
@snippet template_extension/cpu_kernel.hpp cpu_implementation:header
### Class Fields
The provided implementation has several fields:
* `add` of the type `int64_t` is an attribute of a custom operation
* `inShape` of the type `ngraph::Shape` is an input shape
* `outShape` of the type `ngraph::Shape` is an output shape
* `error` of the type `std::string` is a field to handle errors from a constructor
### Constructor of Implementation
An implementation constructor checks parameters of nGraph operation, stores needed attributes, and stores an error message in the case of an error.
@snippet template_extension/cpu_kernel.cpp cpu_implementation:ctor
### `getSupportedConfigurations`
InferenceEngine::ILayerExecImpl::getSupportedConfigurations method returns all supported configuration formats (input/output tensor layouts) for your implementation. To specify formats of data, use InferenceEngine::TensorDesc. Refer to the [Memory Primitives](../Memory_primitives.md) section for instructions on how to do it.
@snippet template_extension/cpu_kernel.cpp cpu_implementation:getSupportedConfigurations
### `init`
InferenceEngine::ILayerExecImpl::init method gets a runtime-selected configuration from a vector that is populated from the `getSupportedConfigurations` method and checks the parameters:
@snippet template_extension/cpu_kernel.cpp cpu_implementation:init
### `execute`
InferenceEngine::ILayerExecImpl::execute method accepts and processes the actual tenors as input/output blobs:
@snippet template_extension/cpu_kernel.cpp cpu_implementation:execute
## Register Implementation in `Extension` Class
To register custom kernel implementation in the [Extension](Extension.md) class, implement the following methods:
* <a href="#getImpTypes">getImplTypes</a>
* <a href="#getImplementation">getImplementation</a>
### <a name="getImpTypes"><code>getImplTypes</code></a>
InferenceEngine::IExtension::getImplTypes returns a vector of implementation types for an operation.
@snippet template_extension/extension.cpp extension:getImplTypes
### <a name="getImplementation"><code>getImplementation</code></a>
InferenceEngine::IExtension::getImplementation returns the kernel implementation with a specified type for an operation.
@snippet template_extension/extension.cpp extension:getImplementation
## Load Extension with Executable Kernels to Plugin
Use the `AddExtension` method of the general plugin interface to load your primitives:
@snippet snippets/CPU_Kernel.cpp part0

View File

@@ -1,57 +0,0 @@
# Custom ONNX operators {#openvino_docs_IE_DG_Extensibility_DG_Custom_ONNX_Ops}
ONNX importer provides mechanism to register custom ONNX operators based on predefined or user-defined nGraph operations.
The function responsible for registering a new operator is called `ngraph::onnx_import::register_operator` and is defined in `onnx_import/onnx_utils.hpp`.
## Registering custom ONNX operator based on predefined nGraph operations
The steps below explain how to register a custom ONNX operator, for example, CustomRelu, in a domain called com.example.
CustomRelu is defined as follows:
```
x >= 0 => f(x) = x * alpha
x < 0 => f(x) = x * beta
```
where alpha, beta are float constants.
1. Include headers:
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:headers
2. Register the CustomRelu operator in the ONNX importer:
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:register_operator
The `register_operator` function takes four arguments: op_type, opset version, domain, and a function object.
The function object is a user-defined function that takes `ngraph::onnx_import::Node` as an input and based on that, returns a graph with nGraph operations.
The `ngraph::onnx_import::Node` class represents a node in ONNX model. It provides functions to fetch input node(s) (`get_ng_inputs`), fetch attribute value (`get_attribute_value`) and many more (please refer to `onnx_import/core/node.hpp` for full class declaration).
New operator registration must happen before the ONNX model is read, for example, if an ONNX model uses the 'CustomRelu' operator, `register_operator("CustomRelu", ...)` must be called before InferenceEngine::Core::ReadNetwork.
Re-registering ONNX operators within the same process is supported. During registration of the existing operator, a warning is printed.
The example below demonstrates an exemplary model that requires previously created 'CustomRelu' operator:
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:model
For a reference on how to create a graph with nGraph operations, visit [Custom nGraph Operation](AddingNGraphOps.md).
For a complete list of predefined nGraph operators, visit [available operations sets](../../ops/opset.md).
If operator is no longer needed, it can be unregistered by calling `unregister_operator`. The function takes three arguments `op_type`, `version`, and `domain`.
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:unregister_operator
## Registering custom ONNX operator based on custom nGraph operations
The same principles apply when registering custom ONNX operator based on custom nGraph operations.
This example shows how to register custom ONNX operator based on `Operation` presented in [this tutorial](AddingNGraphOps.md), which is used in [TemplateExtension](Extension.md).
@snippet template_extension/extension.cpp extension:ctor
Here, the `register_operator` function is called in Extension's constructor, which makes sure that it is called before InferenceEngine::Core::ReadNetwork (since InferenceEngine::Core::AddExtension must be called before a model with custom operator is read).
The example below demonstrates how to unregister operator from Extension's destructor:
@snippet template_extension/extension.cpp extension:dtor
Note that it is mandatory to unregister custom ONNX operator if it is defined in dynamic shared library.
## Requirements for building with CMake
Program that uses the `register_operator` functionality, requires (in addition to Inference Engine) `ngraph` and `onnx_importer` libraries.
The `onnx_importer` is a component of `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_importer)` is sufficient to find both.
The `ngraph` package exposes two variables (`${NGRAPH_LIBRARIES}` and `${ONNX_IMPORTER_LIBRARIES}`), which reference `ngraph` and `onnx_importer` libraries.
Those variables need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
See below CMakeLists.txt for reference:
@snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op

View File

@@ -1,26 +0,0 @@
# Extension Library {#openvino_docs_IE_DG_Extensibility_DG_Extension}
Inference Engine provides an InferenceEngine::IExtension interface, which defines the interface for Inference Engine Extension libraries.
All extension libraries should be inherited from this interface.
Based on that, declaration of an extension class can look as follows:
@snippet template_extension/extension.hpp extension:header
The extension library should contain and export the method InferenceEngine::CreateExtension, which creates an `Extension` class:
@snippet template_extension/extension.cpp extension:CreateExtension
Also, an `Extension` object should implement the following methods:
* InferenceEngine::IExtension::Release deletes an extension object
* InferenceEngine::IExtension::GetVersion returns information about version of the library
@snippet template_extension/extension.cpp extension:GetVersion
Implement the InferenceEngine::IExtension::getOpSets method if the extension contains custom layers.
Read the [guide about custom operations](AddingNGraphOps.md) for more information.
To understand how integrate execution kernels to the extension library, read the [guide about development of custom CPU kernels](CPU_Kernel.md).
To understand how to register custom ONNX operator to the extension library, read the [guide about custom ONNX operators](Custom_ONNX_Ops.md).

View File

@@ -1,246 +0,0 @@
# How to Implement Custom GPU Operations {#openvino_docs_IE_DG_Extensibility_DG_GPU_Kernel}
The GPU codepath abstracts many details about OpenCL&trade;. You need to provide the kernel code in OpenCL C and the configuration file that connects the kernel and its parameters to the parameters of the operation.
There are two options of using custom operation configuration file:
* Include a section with your kernels into the global automatically-loaded `cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file, which is hosted in the `<INSTALL_DIR>/deployment_tools/inference_engine/bin/intel64/{Debug/Release}` folder
* Call the `InferenceEngine::Core::SetConfig()` method from your application with the `InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE` key and the configuration file name as a value before loading the network that uses custom operations to the plugin:
@snippet snippets/GPU_Kernel.cpp part0
All Inference Engine samples, except trivial `hello_classification`,
feature a dedicated command-line option `-c` to load custom kernels. For example, to load custom operations for the classification sample, run the command below:
```sh
$ ./classification_sample -m <path_to_model>/bvlc_alexnet_fp16.xml -i ./validation_set/daily/227x227/apron.bmp -d GPU
-c <absolute_path_to_config>/custom_layer_example.xml
```
## Configuration File Format <a name="config-file-format"></a>
The configuration file is expected to follow the `.xml` file structure
with a node of the type `CustomLayer` for every custom operation you provide.
The definitions described in the sections below use the following notations:
Notation | Description
---|---
(0/1) | Can have 0 or 1 instances of this node/attribute
(1) | Must have only 1 instance of this node/attribute
(0+) | Can have any number of instances of this node/attribute
(1+) | Can have 1 or more instances of this node/attribute
### CustomLayer Node and Sub-node Structure
`CustomLayer` node contains the entire configuration for a single custom operation.
| Attribute Name |\# | Description |
|-----|-----|-----|
| `name` | (1) | The name of the operation type to be used. This name should be identical to the type used in the IR.|
| `type` | (1) | Must be `SimpleGPU`. |
| `version` | (1) | Must be `1`. |
**Sub-nodes**: `Kernel` (1), `Buffers` (1), `CompilerOptions` (0+),
`WorkSizes` (0/1)
### Kernel Node and Sub-node Structure
`Kernel` node contains all kernel source code configuration. No kernel
node structure exists.
**Sub-nodes**: `Source` (1+), `Define` (0+)
### Source Node and Sub-node Structure
`Source` node points to a single OpenCL source file.
| Attribute Name | \# ||
|-----|-----|-----|
| `filename` | (1) | Name of the file containing OpenCL source code. Notice that path is relative to your executable. Multiple source nodes will have their sources concatenated in order. |
**Sub-nodes**: None
### Define Node and Sub-node Structure
`Define` node configures a single `#&zwj;define` instruction to be added to
the sources during compilation (JIT).
| Attribute Name | \# | Description |
|------|-------|------|
| `name` | (1) | The name of the defined JIT. For static constants, this can include the value as well (taken as a string). |
| `param` | (0/1) | This parameter value is used as the value of this JIT definition. |
| `type` | (0/1) | The parameter type. Accepted values: `int`, `float`, and `int[]`, `float[]` for arrays. |
| `default` | (0/1) | The default value to be used if the specified parameters is missing from the operation in the IR. |
**Sub-nodes:** None
The resulting JIT has the following form:
`#&zwj;define [name] [type] [value/default]`.
### Buffers Node and Sub-node Structure
`Buffers` node configures all input/output buffers for the OpenCL entry
function. No buffers node structure exists.
**Sub-nodes:** `Data` (0+), `Tensor` (1+)
### Data Node and Sub-node Structure
`Data` node configures a single input with static data (for example,
weights or biases).
| Attribute Name | \# | Description |
|----|-----|------|
| `name` | (1) | Name of a blob attached to a operation in the IR |
| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to |
**Sub-nodes**: None
### Tensor Node and Sub-node Structure
`Tensor` node configures a single input or output tensor.
| Attribute Name | \# | Description |
|------|-------|-------|
| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to. |
| `type` | (1) | `input` or `output` |
| `port-index` | (1) | 0-based index in the operation input/output ports in the IR |
| `format` | (0/1) | Data layout declaration for the tensor. Accepted values: `BFYX`, `BYXF`, `YXFB`, `FYXB` (also in all lowercase). Default value: `BFYX` |
### CompilerOptions Node and Sub-node Structure
`CompilerOptions` node configures the compilation flags for the OpenCL
sources.
| Attribute Name | \# | Description |
|--------|-----|------|
| `options` | (1) | Options string to be passed to the OpenCL compiler |
**Sub-nodes**: None
### WorkSizes Node and Sub-node Structure
`WorkSizes` node configures the global/local work sizes to be used when
queuing the OpenCL program for execution.
| Attribute Name | \# | Description |
|-----|------|-----|
| `global`<br>`local` | (0/1)<br>(0/1) | An array of up to 3 integers (or formulas) for defining the OpenCL work-sizes to be used during execution.<br> The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,\*,% (all evaluated in integer arithmetic). <br>Default value: `global=”B*F*Y*X” local=””` |
| `dim` | (0/1) | A tensor to take the work size from. Accepted values: `input N`, `output`, where `N` is an index of input tensor starting with 0. Default value: `output` |
**Sub-nodes**: None
## Example Configuration File
The following code sample provides an example configuration file (in the
`.xml` format). For information on configuration file structure, see
[Configuration File Format](#config-file-format).
```xml
<CustomLayer name="ReLU" type="SimpleGPU" version="1">
<Kernel entry="example_relu_kernel">
<Source filename="custom_layer_kernel.cl"/>
<Define name="neg_slope" type="float" param="negative_slope" default="0.0"/>
</Kernel>
<Buffers>
<Tensor arg-index="0" type="input" port-index="0" format="BFYX"/>
<Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
</Buffers>
<CompilerOptions options="-cl-mad-enable"/>
<WorkSizes global="X,Y,B*F"/>
</CustomLayer>
```
## Built-In Defines for Custom Layers
The following table includes definitions that are attached before
the user sources, where `<TENSOR>` is the actual input and output, for
example, `INPUT0` or `OUTPUT0`.
For an example, see [Example Kernel](#example-kernel).
| Name | Value |
|---|---|
| `NUM_INPUTS` | Number of the input tensors bound to this kernel |
| `GLOBAL_WORKSIZE` | An array of global work sizes used to execute this kernel |
| `GLOBAL_WORKSIZE_SIZE` | The size of the `GLOBAL_WORKSIZE` array |
| `LOCAL_WORKSIZE` | An array of local work sizes used to execute this kernel |
| `LOCAL_WORKSIZE_SIZE` | The size of the `LOCAL_WORKSIZE` array |
| `<TENSOR>_DIMS`| An array of the tensor dimension sizes. Always ordered as `BFYX` |
| `<TENSOR>_DIMS_SIZE`| The size of the `<TENSOR>_DIMS` array.|
| `<TENSOR>_TYPE`| The datatype of the tensor: `float`, `half`, or `char`|
| `<TENSOR>_FORMAT_` | The format of the tensor, BFYX, BYXF, YXFB , FYXB, or ANY. The format is concatenated to the defined name. You can use the tensor format to define codepaths in your code with `#&zwj;ifdef/#&zwj;endif`. |
| `<TENSOR>_LOWER_PADDING` | An array of padding elements used for the tensor dimensions before they start. Always ordered as BFYX.|
| `<TENSOR>_ LOWER_PADDING_SIZE` | The size of the `<TENSOR>_LOWER_PADDING` array |
| `<TENSOR>_UPPER_PADDING` | An array of padding elements used for the tensor dimensions after they end. Always ordered as BFYX. |
| `<TENSOR>_UPPER_PADDING_SIZE` | The size of the `<TENSOR>_UPPER_PADDING` array |
| `<TENSOR>_PITCHES` | The number of elements between adjacent elements in each dimension. Always ordered as BFYX.|
| `<TENSOR>_PITCHES_SIZE`| The size of the `<TENSOR>_PITCHES` array |
| `<TENSOR>_OFFSET`| The number of elements from the start of the tensor to the first valid element (bypassing the lower padding) |
All `<TENSOR>` values are automatically defined for every tensor
bound to this operation (`INPUT0`, `INPUT1`, `OUTPUT0`, and so on), as shown
in the following for example:
```sh
#define INPUT0_DIMS_SIZE 4
#define INPUT0_DIMS (int []){ 1,96,55,55, }
```
## Example Kernel<a name="example-kernel"></a>
```c
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void example_relu_kernel(
const __global INPUT0_TYPE* input0,
__global OUTPUT0_TYPE* output)
{
const uint idx = get_global_id(0);
const uint idy = get_global_id(1);
const uint idbf = get_global_id(2);//batches*features, as OpenCL supports 3D nd-ranges only
const uint feature = idbf%OUTPUT0_DIMS[1];
const uint batch = idbf/OUTPUT0_DIMS[1];
//notice that pitches are in elements, not in bytes!
const uint in_id = batch*INPUT0_PITCHES[0] + feature*INPUT0_PITCHES[1] + idy*INPUT0_PITCHES[2] + idx*INPUT0_PITCHES[3] + INPUT0_OFFSET;
const uint out_id = batch*OUTPUT0_PITCHES[0] + feature*OUTPUT0_PITCHES[1] + idy*OUTPUT0_PITCHES[2] + idx*OUTPUT0_PITCHES[3] + OUTPUT0_OFFSET;
INPUT0_TYPE value = input0[in_id];
//neg_slope (which is non-zero for leaky ReLU) is put automatically as #define, refer to the config xml
output[out_id] = value < 0 ? value * neg_slope : value;
}
```
> **NOTE:** As described in the previous section, all the things like
> `INPUT0_TYPE` are actually defined as OpenCL (pre-)compiler inputs by
> the Inference Engine for efficiency reasons. See [Debugging
> Tips](#debugging-tips) for information on debugging the results.
> **NOTE**: Several GPU-targeted kernels are also added to the binaries upon samples compilation
> so that the sample application can easy load them.
> Refer to the `cldnn_global_custom_kernels` folder in the GPU plugin installation directory.
## Debugging Tips<a name="debugging-tips"></a>
* **Dumping the Resulting Kernels**.
It is recommended to get a dump of the kernel with all of
the values set by the Inference Engine, such as tensor sizes,
floating-point, and integer kernel parameters. To get the dump, add the
following line to your code that configures the GPU plugin to output the
custom kernels:
@snippet snippets/GPU_Kernel.cpp part1
When the Inference Engine compiles the kernels for the specific network,
it also outputs the resulting code for the custom kernels. In the
directory of your executable, find files like
`clDNN_program0.cl`, `clDNN_program1.cl`. There are as many files as
distinct sets of parameters for your custom kernel: different input
tensor sizes and kernel parameters.
* **Using `printf` in the OpenCL™ Kernels**.
To debug the specific values, you can use `printf` in your kernels.
However, be careful: for instance, do not output excessively
as it would generate too much data. The `printf` output is typical, so
your output can be truncated to fit the buffer. Also, because of
buffering, you actually get an entire buffer of output when the
execution ends.<br>
For more information, refer to the [printf
Function](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/printfFunction.html).

View File

@@ -1,52 +0,0 @@
# Inference Engine Extensibility Mechanism {#openvino_docs_IE_DG_Extensibility_DG_Intro}
Inference Engine Extensibility API allows to add support of custom operations to the Inference Engine.
Extension should contain operation sets with custom operations and execution kernels for custom operations.
Physically, an extension library can be represented as a dynamic library exporting the single `CreateExtension` function
that allows to create a new extension instance.
Extensibility library can be loaded to the `InferenceEngine::Core` object using the
`InferenceEngine::Core::AddExtension` method.
## Inference Engine Extension Library
Inference Engine Extension dynamic library contains several components:
* [Extension Library](Extension.md):
- Contains custom operation sets
- Provides CPU implementations for custom operations
* [Custom nGraph Operation](AddingNGraphOps.md):
- Allows to use `InferenceEngine::Core::ReadNetwork` to read Intermediate Representation (IR) with unsupported
operations
- Allows to create `ngraph::Function` with unsupported operations
- Provides shape inference mechanism for custom operations
> **NOTE**: This documentation is written based on the `Template extension`, which demonstrates extension
development details. Find the complete code of the `Template extension`, which is fully compilable and up-to-date,
at `<dldt source tree>/docs/template_extension`.
## Execution Kernels
The Inference Engine workflow involves the creation of custom kernels and either custom or existing operations.
An _Operation_ is a network building block implemented in the training framework, for example, `Convolution` in Caffe*.
A _Kernel_ is defined as the corresponding implementation in the Inference Engine.
Refer to the [Model Optimizer Extensibility](../../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
for details on how a mapping between framework operations and Inference Engine kernels is registered.
In short, you can plug your own kernel implementations into the Inference Engine and map them to the operations in the original framework.
The following pages describe how to integrate custom _kernels_ into the Inference Engine:
* [Introduction to development of custom CPU kernels](CPU_Kernel.md)
* [Introduction to development of custom GPU kernels](GPU_Kernel.md)
* [Introduction to development of custom VPU kernels](VPU_Kernel.md)
## Additional Resources
* [Build an extension library using CMake*](Building.md)
## See Also
* [Using Inference Engine Samples](../Samples_Overview.md)
* [Hello Shape Infer SSD sample](../../../inference-engine/samples/hello_reshape_ssd/README.md)

View File

@@ -1,679 +0,0 @@
# How to Implement Custom Layers for VPU (Intel® Neural Compute Stick 2) {#openvino_docs_IE_DG_Extensibility_DG_VPU_Kernel}
> **NOTE:** OpenCL™ custom layer support is available in the preview mode.
> **NOTE:** This section assumes you are familiar with developing kernels using OpenCL™.
To customize your topology with an OpenCL™ layer, follow the steps below:
1. Write and compile you OpenCL™ code with the standalone offline OpenCL™ compiler (`clc`).
2. Write a configuration file to bind the OpenCL™ kernel to the topology file (`.xml`) of the model IR.
3. Pass the configuration file to Inference engine with the model IR.
## Compile OpenCL™ code for VPU (Intel® Neural Compute Stick 2)
> **NOTE:** OpenCL compiler, targeting Intel® Neural Compute Stick 2 for the SHAVE* processor only, is redistributed with OpenVINO.
OpenCL support is provided by ComputeAorta*, and is distributed under a license agreement between Intel® and Codeplay* Software Ltd.
The OpenCL™ toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `<INSTALL_DIR>/deployment_tools/tools/cl_compiler`.
> **NOTE:** By design, custom OpenCL layers support any OpenCL kernels written with 1.2 version assumed. It also supports half float
extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs.
1. Prior to running a compilation, make sure that the following variables are set:
* `SHAVE_MA2X8XLIBS_DIR=<INSTALL_DIR>/deployment_tools/tools/cl_compiler/lib/`
* `SHAVE_LDSCRIPT_DIR=<INSTALL_DIR>/deployment_tools/tools/cl_compiler/ldscripts/`
* `SHAVE_MYRIAD_LD_DIR=<INSTALL_DIR>/deployment_tools/tools/cl_compiler/bin/`
* `SHAVE_MOVIASM_DIR=<INSTALL_DIR>/deployment_tools/tools/cl_compiler/bin/`
2. Run the compilation with the command below. You should use `--strip-binary-header` to make an OpenCL runtime-agnostic binary runnable with the Inference Engine.
```bash
cd <INSTALL_DIR>/deployment_tools/tools/cl_compiler/bin
./clc --strip-binary-header custom_layer.cl -o custom_layer.bin
```
## Write a Configuration File
To tie the topology IR for a layer you customize, prepare a configuration file, so that the Inference Engine can find parameters for your kernel and the execution work grid is described.
For example, given the following OpenCL kernel signature:
```cpp
__kernel void reorg_nhwc(__global const half *src, __global half *out, int w, int h, int c, int stride);
```
Configuration file for this kernel might be the following:
```xml
<CustomLayer name="ReorgYolo" type="MVCL" version="1">
<Kernel entry="reorg_nhwc">
<Source filename="reorg.bin"/>
</Kernel>
<Parameters>
<Tensor arg-name="src" type="input" port-index="0" format="BYXF"/>
<Tensor arg-name="out" type="output" port-index="0" format="BYXF"/>
<Scalar arg-name="w" type="int" port-index="0" source="I.X" />
<Scalar arg-name="h" type="int" port-index="0" source="I.Y" />
<Scalar arg-name="c" type="int" port-index="0" source="I.F" />
<Scalar arg-name="stride" type="int" source="stride" />
</Parameters>
<WorkSizes dim="input,0" global="(Y+7)/8*8,1,1" local="8,1,1"/>
</CustomLayer>
```
Each custom layer is described with the `CustomLayer` node. It has the following nodes and attributes:
- Root node `CustomLayer` contains the following attributes:
- `name` (Required) A name of the Inference Engine layer to bind the kernel with.
- `type` and `version` (Required) Reserved for future use. Set them to `MVCL` and `1` respectively.
- `max-shaves` (Optional) The maximum number of SHAVE cores that should be dedicated for the layer. It is useful for debugging concurrency issues or for resource saving if memory bound kernel does not scale well with the number of cores, so more resources can be left for the rest of a topology.
- Sub-node `Kernel` must contain the following attributes:
- `entry` A name of your kernel function as you defined it in a source file (in the example above, it is `reorg_nhwc`).
- Node `Source` must contain the following attributes:
- `filename` A path to a compiled binary relative to the `.xml` binding file.
- Sub-node `Parameters` Describes parameters bindings. For more information, see the description below.
- Sub-node `WorkSizes` Describes local and global work group sizes and the source for dimension deduction as a pair `direction,port`. In the example above, the work group is described relatively to the dimension of the input tensor that comes through port 0 in the IR. `global` and `local` work group configurations support any simple math expressions with +,-,\*,/, and () from `B`(batch), `Y`(height), `X`(width) and `F`(channels).
- Sub-node `Where` Allows to customize bindings with the `key="value"` attribute. For example, to substitute only 3x3 convolutions, write `<Where kernel="3,3"/>` in the binging xml.
Parameter description supports `Tensor` of one of tensor types such as `input`, `output`, `input_buffer`, `output_buffer` or `data`, `Scalar`, or `Data` nodes and has the following format:
- Each `Tensor` node of `input` or `output` type must contain the following attributes:
- `arg-name` A name of a kernel parameter in the kernel signature.
- `type` Node type: `input` or `output` as in the IR.
- `port-index` A number of input/output ports as in the IR.
- `format` The channel order in the tensor. Optional conversion layers are generated if the custom layer format is not compatible with formats of neighboring layers. `BFXY`, `BYXF`, and `ANY` formats are supported currently.
- Each `Tensor` node of `input_buffer` or `output_buffer` type must contain the following attributes:
- `arg-name` A name of a kernel parameter in the kernel signature.
- `type` Node type: `input_buffer` or `output_buffer`. Use the appropriate type to bind multiple kernels that correspond to different stages of the same layer.
- `port-index` The unique identifier to bind by.
- `dim` The dim source with the same `direction,port` format used for `WorkSizes` bindings.
- `size` Amount of bytes needed. Current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and might be expended in the future.
Here is an example of multi-stage MVN layer binding:
```xml
<CustomLayer name="MVN" stage="0" type="MVCL" version="1">
<Kernel entry="reduction_mean">
<Source filename="mvn.bin"/>
</Kernel>
<Parameters>
<Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
<Tensor arg-name="mean" type="output_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
<Tensor arg-name="variance" type="output_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
<!--other parameters -->
</Parameters>
<WorkSizes dim="output,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
</CustomLayer>
<CustomLayer name="MVN" stage="1" type="MVCL" version="1">
<Kernel entry="mvn_scale">
<Source filename="mvn_scale_changed_orded.bin"/>
</Kernel>
<Parameters>
<Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
<Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
<Tensor arg-name="mean_part" type="input_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
<Tensor arg-name="power_mean" type="input_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
<!--other parameters -->
</Parameters>
<WorkSizes dim="output,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
</CustomLayer>
```
- Each `Tensor` node that has the type `data` must contain the following attributes:
- `source` A name of the blob as it is in the IR (typical example is `weights` for convolution
- `format` Specifies the channel order in the tensor. Optional conversion layers are generated if the custom layer format is not.
```xml
<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
<Kernel entry="binary_convolution">
<Source filename="binary_layers.bin"/>
</Kernel>
<Parameters>
<Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
<Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
<Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
<!--other parameters -->
</Parameters>
<WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
</CustomLayer>
```
- Each `Scalar` node must contain the following attributes:
- `arg-name` A name of a kernel parameter in the kernel signature.
- `type` `int` or `float` value. It is used for correct argument extraction from IR parameters.
- `source` Contains the name of the parameter in the IR file or input/output (`I`/`O`, `In`/`On`, where `n` is a port number)
followed by dimension `B`(batch), `Y`(height), `X`(width), or `F`(channels).
- Each `Data` node must contain the following attributes:
- `arg-name` A name of a kernel parameter in the kernel signature.
- `type` Node type. Currently, `local_data` is the only supported value, which defines buffer allocated in fast local on-chip memory. It is limited to 100K for all `__local` and
`__private` arrays defined inside the kernel as well as all `__local` parameters passed to the kernel. Please, consider that a manual-DMA extension requires double buffering.
If the custom layer is detected to run out of local memory, the inference fails.
- `dim` The dim source with the same `direction,port` format used for `WorkSizes` bindings.
- `size` Amount of bytes needed. The current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and may be extended in the future.
The example binding below illustrates a kernel with two local buffers passed to the kernel.
```xml
<CustomLayer name="GRN" type="MVCL" version="1">
<Kernel entry="grn_NCHW">
<Source filename="grn.bin"/>
</Kernel>
<Parameters>
<Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
<Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
<Data arg-name="src" type="local_data" dim="input,0" size="X*F*2" />
<Data arg-name="dst" type="local_data" dim="input,0" size="X*F*2" />
<Scalar arg-name="C" type="int" port-index="0" source="I.F" />
<Scalar arg-name="bias" type="float" source="bias" />
</Parameters>
<WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
</CustomLayer>
```
## Pass Configuration File to Inference Runtime
> **NOTE**: If both native and custom layer implementations are present, the custom kernel has a priority over the native one.
Before loading the network that features the custom layers, provide a separate configuration file and load it using the InferenceEngine::Core::SetConfig() method with the PluginConfigParams::KEY_CONFIG_FILE key and the configuration file name as a value:
```cpp
InferenceEngine::Core core;
// Load custom layers
core.SetConfig({ { InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, "<path to the xml file>" } }, "MYRIAD");
```
Optionally, set a path to a custom layers description with a pair of `VPU_CUSTOM_LAYERS` and `/path/to/your/customLayers.xml`
as a network configuration:
```cpp
InferenceEngine::Core core;
std::map<std::string, std::string> networkConfig;
config["VPU_CUSTOM_LAYERS"] = "/path/to/your/customLayers.xml";
// Load custom layers in network config
auto exeNetwork = core.LoadNetwork(cnnNetwork, "MYRIAD", networkConfig);
```
## Optimizing Kernels with OpenCL™ for VPU (Intel® Neural Compute Stick 2)
This section provides optimization guidelines on writing custom layers with OpenCL for VPU devices. Knowledge about general OpenCL
programming model and OpenCL kernel language is assumed and not a subject of this section. The OpenCL model mapping to VPU is described in the table below.
| OpenCL Model | VPU Mapping|
|-----|----|
| Device code | Executed on SHAVE cores |
| Private memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed |
| Local memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed |
| Global memory | Mapped to DDR, used to pass execution preserved parameters for inputs, outputs, and blobs |
| Work group | Executed on a single SHAVE core iterating over multiple work items |
Note that by the OpenCL specification, the work group execution order is not specified. This means that it is your
responsibility to ensure that race conditions among work groups are not introduced. Custom layer runtime spits evenly
work grid among available compute resources and executes them in an arbitrary order. This static scheduling approach works best if the load is evenly spread out across work groups, which is a typical case for Deep Learning kernels. The following guidelines are recommended to use for work group partitioning:
1. Split work evenly across work groups.
2. Adjust work group granularity to maintain equal workload for all compute codes.
3. Set the maximum number of cores (using the `max-shaves` attribute for the `CustomLayer` node). This keeps more resources for the rest of topology. It is also useful if the kernel scalability reached its limits, which may happen while optimizing memory bound kernels or kernels with poor parallelization.
4. Try an alternate data layout (`BFXY`/`BYXF`) for the kernel if it improves work group partitioning or data access patterns.
Consider full topology performance (not just specific layer boost) since data conversion layers would be automatically inserted
as appropriate.
Offline OpenCL compiler (`clc`) features automatic vectorization over `get_global_id(0)` usage, if uniform access is detected.
For example, the kernel below could be automatically vectorized:
```cpp
__kernel void cvtf32f16(__global float* restrict inImage, __global half* restrict outImage,
float scale, float bais)
{
int idx = get_global_id(0) + get_global_id(1) * get_global_size(0) + get_global_id(2) * get_global_size(0) * get_global_size(1);
outImage[idx] = convert_half(inImage[idx]*scale+bais);
}
```
However, this work-group based vectorizer (WGV) conflicts with the default LLVM vectorizer based on superword level parallelism
(SLP) for the current compiler version. Manual vectorization is recommended to provide the best performance for non-uniform code
patterns. WGV works if and only if vector types are not used in the code.
Here is a short list of optimization tips:
1. Help auto-vectorizer ensure non-aliasing pointers for kernel parameters by putting `restrict` where possible.
- This may give a performance boost, especially for kernels with unrolling, like `ocl_grn` from the example below.
- Place `restrict` markers for kernels with manually vectorized codes. In the `ocl_grn` kernel below, the unrolled version without `restrict` is up to 20% slower than the most optimal one, which combines unrolling and `restrict`.
2. Put `#&zwj;pragma unroll N` to your loop header. Since the compiler does not trigger unrolling by default, it is your responsibility to
annotate the code with pragmas as appropriate. The `ocl_grn` version with `#&zwj;pragma unroll 4` is up to 50% faster, most of which comes from unrolling the first loop, because LLVM, in general, is better in scheduling 3-stage loops (load-compute-store), while the fist loop
`variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Please, pay
attention to unrolling such cases first. Unrolling factor is loop-dependent. Choose the smallest number that
still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4`to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel® Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages:
```cpp
__kernel void ocl_grn(__global const half* restrict src_data, __global half* restrict dst_data, int C, float bias)
{
int x = get_global_id(0);
int W = get_global_size(0);
int y = get_global_id(1);
int H = get_global_size(1);
float variance = bias + 1e-9f;
#pragma unroll 4
for (int c = 0; c < C; c++)
variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);
variance = 1.f / native_sqrt(variance);
#pragma unroll 4
for (int c = 0; c < C; c++)
dst_data[c*H*W + y*W + x] = (half)((float)src_data[c*H*W + y*W + x] * variance);
}
```
To check the efficiency of WGV, you can compare performance of the kernel above with the kernel below, which is manually vectorized over width:
```cpp
__kernel void ocl_grn_line(__global const half* restrict src_data, __global half* restrict dst_data, int C, int W, float bias)
{
int y = get_global_id(1);
int H = get_global_size(1);
for (int x = 0; x < W/8; x++)
{
float8 variance = (float8)(bias+1e-9f);
#pragma unroll 4
for (int c = 0; c < C; c++)
{
__global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W));
half8 sh = src_line[x];
variance += convert_float8(sh*sh);
}
variance = 1.f/native_sqrt(variance);
#pragma unroll 4
for (int c = 0; c < C; c++)
{
__global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W));
__global half8* restrict dst_line = ((__global half8 * restrict)(dst_data + c*H*W + y*W));
dst_line[x] = convert_half8(convert_float8(src_line[x])*variance);
}
}
for (int x = W/8*8; x < W; x++)
{
float variance = bias+1e-9f;
#pragma unroll 4
for (int c = 0; c < C; c++)
variance += (float)(src_data[c*H*W + y*W + x]*src_data[c*H*W + y*W + x]);
variance = 1.f/native_sqrt(variance);
#pragma unroll 4
for (int c = 0; c < C; c++)
dst_data[c*H*W + y*W + x] = (float)src_data[c*H*W + y*W + x]*variance;
}
}
```
Both versions perform the same, but the second one has more complex code.
3. If it is easy to predict the work group size, you can also use the `reqd_work_group_size` kernel attribute to ask the compiler
to unroll the code up to local size of the work group. Please note that if the kernel is actually executed with the
different work group configuration, the result is undefined.
4. Prefer to use the `half` compute, if it keeps reasonable accuracy. 16-bit float is a native type for Intel® Neural Compute Stick 2, most of the functions `half_*` are mapped to a single hardware instruction.
Use the standard `native_*` function for the rest of types.
5. Prefer to use the `convert_half` function over `vstore_half` if conversion to 32-bit float is required. `convert_half` is mapped to a single hardware instruction. For the `cvtf32f16` kernel above, the line `outImage[idx] = convert_half(inImage[idx]*scale+bais);` is 8 times slower than the code with `vstore_half`.
6. Mind early exits. Early exit may be extremely costly for the current version of the `clc` compiler due to conflicts with the
auto-vectorizer. The generic advice would be to setup local size by `x` dimension equal to inputs or/and outputs width.
If it is impossible to define the work grid that exactly matches inputs or/and outputs to eliminate checks, for example,
`if (get_global_id(0) >= width) return`, use line-wise kernel variant with manual vectorization.
The kernel example below demonstrates the impact of early exits on kernel performance.
```cpp
// Initial version
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int stride)
{
int w = get_global_id(0);
int W = get_global_size(0);
int h = get_global_id(1);
int H = get_global_size(1);
int c = get_global_id(2);
int C = get_global_size(2);
int C2 = C/(stride*stride);
int offset = c / C2;
int c2 = c - C2 * offset;
int H2 = H*stride;
int W2 = W*stride;
int h2 = h*stride + offset / stride;
int w2 = w*stride + offset - stride * (offset / stride);
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
}
```
This `reorg` kernel is auto-vectorizable, but an input for YOLO v2 topology is `NCHW=<1,64,26,26>` and it is not multiple of vector width (which is `8` for `half` data type). As a result, the Inference Engine does not select the auto-vectorized kernel.
To compare performance of auto-vectorized and scalar version of the kernel, change the input size to`NCHW=<1,64,26,32>`. This allows the auto-vectorized version to be selected by the Inference Engine and can give you about 30% uplift.
Since the auto-vectorized version is faster, it makes sense to enable it for the YOLO v2 topology input size by setting the local size multiple of vector (e.g. 32) and adjust global sizes accordingly. As a result, the execution work grid exceeds actual input dimension, so out-of-bound checks should be inserted. See the updated kernel version below:
```cpp
// Version with out-of-bound checks added
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int W, int stride)
{
int w = get_global_id(0);
w = min(w, W-1);
int h = get_global_id(1);
int H = get_global_size(1);
int c = get_global_id(2);
int C = get_global_size(2);
int C2 = C/(stride*stride);
int offset = c / C2;
int c2 = c - C2 * offset;
int H2 = H*stride;
int W2 = W*stride;
int h2 = h*stride + offset / stride;
int w2 = w*stride + offset - stride * (offset / stride);
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
}
```
This code performs the same as the initial kernel above (scalar) due to branching overhead. If you replace min/max expression `w = min(w, W-1);` with `if (w >= W) return;`, runtime increases up to 2x against to code without branching (initial version).<br>
If branching is inevitable for your element-based kernel, it is recommended to change the scheme to line-based. See the kernel variant below:
```cpp
// Line-wise version
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int H, int W, int stride)
{
int h = min((int)get_global_id(0), H-1);
int c = get_global_id(1);
int C = get_global_size(1);
int C2 = C/(stride*stride);
int offset = c / C2;
int c2 = c - C2 * offset;
int H2 = H*stride;
int W2 = W*stride;
for (int w = 0; w < W; ++w)
{
int h2 = h*stride + offset / stride;
int w2 = w*stride + offset - stride * (offset / stride);
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
}
}
```
This decreases the execution time up to 40% against the best performing vectorized kernel without early exits (initial version).
7. Reuse computations among work items by using line-based kernels or sharing values though `__local` memory.
8. Improve data access locality. Most of custom kernels are memory bound while convolution and fully connected layers are hardware-implemented. The code below demonstrates a further optimized version of the `reorg` kernel unrolled by `stride`:
```cpp
// Unrolled line-wise version
__kernel void reorg_unrolled_by_stride(const __global half* restrict src, __global half* restrict dst,
int H, int W, int stride)
{
int h = min((int)get_global_id(0), H-1);
int c2 = get_global_id(1);
int C2 = get_global_size(1);
int C = C2*stride*stride;
int H2 = H*stride;
int W2 = W*stride;
for (int stride_y = 0; stride_y < stride; stride_y++)
for (int stride_x = 0; stride_x < stride; stride_x++)
for (int w2 = 0, w = 0; w < W; w2 += stride, w++)
dst[W*H*C2*(stride_y*stride+stride_x) + W*H*c2 + W*h + w] = src[W2*H2*c2 + W2*h*stride + W2*stride_y + w2 + stride_x];
}
```
`scr` data in this case loaded only once. As the result, the cycle count drops up to 45% against the line-wise version.
9. Copy data from `__dlobal` to `__local` or `__private` memory if the data is accessed more than once. Access to
`__dlobal` memory is orders of magnitude slower than access to `__local`/`__private` due to statically scheduled pipeline, which
stalls completely on memory access without any prefetch. The same recommendation is applicable for scalar load/store
from/to a `__blobal` pointer since work-group copying could be done in a vector fashion.
10. Use a manual DMA extension. Local (on-chip) memory throughput is up to 24x higher than DDR throughput. Starting from OpenVINO™ 2020.1, VPU OpenCL features manual-DMA kernel extension to copy sub-tensor used by work group into local memory and performing compute without DDR evolved. Here is the simple GRN kernel implementation that runs over DDR. Local size is equal to (width of the input tensor, 1, 1) to define a large enough work group to get code automatically vectorized and unrolled, while global size is (width of the input tensor, height of the input tensor, 1):
```cpp
__kernel void grn_NCHW(
__global const half* restrict src_data,
__global half* restrict dst_data,
int C,
float bias)
{
float variance = bias + 1e-9f;
#pragma unroll 4
for (int c = 0; c < C; c++)
{
float val = (float) src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)];
variance += val*val;
}
half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f);
#pragma unroll 4
for (int c = 0; c < C; c++)
{
dst_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]
= src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance;
}
}
```
This kernel can be rewritten to introduce special data binding `__dma_preload` and `__dma_postwrite intrinsics`. This means that instead of one kernel, a group of three kernels should be implemented: `kernelName`, `__dma_preload_kernelName` and `__dma_postwrite_kernelName`. `__dma_preload_kernelName` for a particular work group `n` is guaranteed to be executed before `n`-th work group itself, while `__dma_postwrite_kernelName` is guaranteed to be executed after a corresponding work group. You can define one of those functions that are intended to be used to copy data from-to `__global` and `__local` memory. The syntactics requires exact functional signature match. The example below illustrates how to prepare your kernel for manual-DMA.
```cpp
__kernel void __dma_preload_grn_NCHW(
__global const half* restrict src,
__global half* restrict dst,
__local half* restrict local_src,
__local half* restrict local_dst,
int C,
float bias)
{
// ToDO: copy required piece of src tensor into local_src
}
__kernel void __dma_postwrite_grn_NCHW(
__global const half* restrict src,
__global half* restrict dst,
__local const half* restrict local_src,
__local half* restrict local_dst,
int C,
float bias)
{
// ToDO: copy back computed piece of local_dst into dst
}
__kernel void grn_NCHW(
__global const half* restrict src_data,
__global half* restrict dst_data,
__local half* restrict src,
__local half* restrict dst,
int C,
float bias)
{
// same as the example above
}
```
GRN kernel operates on channel-major tensors to compute average over full channel range and then normalizes input elements to produce the output.
As a part of manual DMA extension, a group of work group copy functions are introduced in addition to `async_work_group_copy`, which is also mapped to DMA call.
Here is the list of supported functions:
```cpp
// 2D sub-tensor copy
event_t WorkGroupDmaCreateStrideTransaction(
const local T *src,
global T *dst,
size_t src_width, // width of the line of source in bytes
size_t dst_width, // width of the line of destination in bytes
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
size_t size, // total number of bytes loaded for all lines from source to destination
event_t event) __OVERLOAD;
event_t WorkGroupDmaCreateStrideTransaction(
const global T *src,
local T *dst,
size_t src_width, // width of the line of source in bytes
size_t dst_width, // width of the line of destination in bytes
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
size_t size, // total number of bytes loaded for all lines from source to destination
event_t event) __OVERLOAD;
// 3D sub-tensor copy
event_t WorkGroupDmaCreate3DTransaction(
const local T *src,
global T *dst,
size_t src_width, // width of the line of source in bytes
size_t dst_width, // width of the line of destination in bytes
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
size_t num_planes, // number of planes to be copied
size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes
size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes
size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case
event_t event) __OVERLOAD;
event_t WorkGroupDmaCreate3DTransaction(
const global T *src,
local T *dst,
size_t src_width, // width of the line of source in bytes
size_t dst_width, // width of the line of destination in bytes
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
size_t num_planes, // number of planes to be copied
size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes
size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes
size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case
event_t event) __OVERLOAD;
```
where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`.
Modified version of the GRN kernel could be the following:
```cpp
__kernel void __dma_preload_grn_NCHW(
__global const half* restrict src,
__global half* restrict dst,
__local half* restrict local_src,
__local half* restrict local_dst,
int C,
float bias)
{
WorkGroupDmaCreate3DTransaction(
src + get_group_id(0)*get_local_size(0)
+ get_group_id(1)*get_local_size(1)*get_global_size(0), // src
local_src, // dst
get_local_size(0) * sizeof(half), // src width
get_local_size(0) * sizeof(half), // dst width
get_global_size(0) * sizeof(half), // src stride
get_local_size(0) * sizeof(half), // dst stride
C, // num planes
get_global_size(0) * get_global_size(1) * sizeof(half), // src plane stride
get_local_size(0) * get_local_size(1) * sizeof(half), // dst plane stride
get_local_size(0) * get_local_size(1) * sizeof(half), // plane size
0);
}
__kernel void __dma_postwrite_grn_NCHW(
__global const half* restrict src,
__global half* restrict dst,
__local const half* restrict local_src,
__local half* restrict local_dst,
int C,
float bias)
{
WorkGroupDmaCreate3DTransaction(
local_dst, // src
dst + get_group_id(0)*get_local_size(0)
+ get_group_id(1)*get_local_size(1)*get_global_size(0), // dst
get_local_size(0) * sizeof(half), // src width
get_local_size(0) * sizeof(half), // dst width
get_local_size(0) * sizeof(half), // src stride
get_global_size(0) * sizeof(half), // dst stride
C, // num planes
get_local_size(0) * get_local_size(1) * sizeof(half), // src plane stride
get_global_size(0) * get_global_size(1) * sizeof(half), // dst plane stride
get_local_size(0) * get_local_size(1) * sizeof(half), // plane size
0);
}
__kernel void grn_NCHW(
__global const half* restrict src_data,
__global half* restrict dst_data,
__local half* restrict src,
__local half* restrict dst,
int C,
float bias)
{
float variance = bias + 1e-9f;
#pragma unroll 8
for (int c = 0; c < C; c++)
{
float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
variance += val*val;
}
half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f);
#pragma unroll 8
for (int c = 0; c < C; c++)
{
dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
= src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
}
}
```
Please note `get_local_size` and `get_local_id` usage inside the kernel. 21x speedup is expected for a kernel on enet-curbs setup since it was completely limited by memory usage.
An alternative method of using DMA is to use work item copy extension. Those functions are executed inside a kernel and requires work groups equal to single work item.
Here is the list of supported work item functions:
```cpp
item_dma_event_t WorkItemDmaCreateTransaction(
const global T *src,
private T *dst,
size_t size,
item_dma_event_t event) __OVERLOAD;
item_dma_event_t WorkItemDmaCreateTransaction(
const private T *src,
global T *dst,
size_t size,
item_dma_event_t event) __OVERLOAD;
item_dma_event_t WorkItemDmaCreateStrideTransaction(
const global T *src,
private T *dst,
size_t src_width,
size_t dst_width,
size_t src_stride,
size_t dst_stride,
size_t size,
item_dma_event_t event) __OVERLOAD;
item_dma_event_t WorkItemDmaCreateStrideTransaction(
const private T *src,
global T *dst,
size_t src_width,
size_t dst_width,
size_t src_stride,
size_t dst_stride,
size_t size,
item_dma_event_t event) __OVERLOAD;
item_dma_event_t WorkItemDmaCreate3DTransaction(
const global T *src,
private T *dst,
size_t src_width,
size_t dst_width,
size_t src_stride,
size_t dst_stride,
size_t num_planes,
size_t src_plane_stride,
size_t dst_plane_stride,
size_t size,
item_dma_event_t event) __OVERLOAD;
item_dma_event_t WorkItemDmaCreate3DTransaction(
const private T *src,
global T *dst,
size_t src_width,
size_t dst_width,
size_t src_stride,
size_t dst_stride,
size_t num_planes,
size_t src_plane_stride,
size_t dst_plane_stride,
size_t size,
item_dma_event_t event) __OVERLOAD;
```
where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`.

View File

@@ -1,40 +0,0 @@
Using GPU Kernels Tuning {#openvino_docs_IE_DG_GPU_Kernels_Tuning}
======================
GPU Kernels Tuning allows you to tune models, so the heavy computational layers are configured to fit better into
hardware, which the tuning was done on. It is required to achieve best performance on GPU.
> **NOTE** Currently only convolution and fully connected layers undergo tuning process. It means that the performance boost depends on the amount of that layers in the model.
OpenVINO™ releases include the `<INSTALL_DIR>/inference_engine/bin/intel64/Release/cache.json` file with pretuned data for current state of the art models. It is highly recommended to do the
tuning for new kind of models, hardwares or drivers.
## Tuned data
GPU tuning data is saved in JSON format.
File's content is composed of 2 types of attributes and 1 type of value:
1. Execution units number - this attribute splits the content into different EU sections.
2. Hash - hashed tuned kernel data.
Key: Array with kernel name and kernel's mode index.
## Usage
---
You can activate Kernels Tuning process by setting `KEY_TUNING_MODE` flag to `TUNING_CREATE` and `KEY_TUNING_FILE` to `<"filename">` in a configuration map that is
passed to the plugin while loading a network.
This configuration modifies the behavior of the `ExecutableNetwork` object. Instead of standard network compilation, it will run the tuning process.
Please keep in mind that the tuning can be very time consuming. The bigger the network, the longer it will take.
File with tuned data is the result of this step.
> **NOTE** If a filename passed to `KEY_TUNING_FILE` points to existing tuned data and you are tuning a new model, then this file will be extended by new data. This allows you to extend existing `cache.json` provided in the OpenVINO™ release package.
The example below shows how to set and use the key files:
@snippet snippets/GPU_Kernels_Tuning.cpp part0
---
You can activate the inference with tuned data by setting `KEY_TUNING_MODE` flag to `TUNING_USE_EXISTING` and
`KEY_TUNING_FILE` flag to `<"filename">`.
GPU backend will process the content of the file during network compilation to configure the OpenCL kernels for the best performance.

View File

@@ -1,87 +0,0 @@
Glossary {#openvino_docs_IE_DG_Glossary}
=======
## Acronyms and Abbreviations
| Abbreviation | Description |
| :--- | :--- |
| API | Application Programming Interface |
| AVX | Advanced Vector Extensions |
| clDNN | Compute Library for Deep Neural Networks |
| CLI | Command Line Interface |
| CNN | Convolutional Neural Network |
| CPU | Central Processing Unit |
| CV | Computer Vision |
| DL | Deep Learning |
| DLDT | Intel(R) Deep Learning Deployment Toolkit |
| DLL | Dynamic Link Library |
| DNN | Deep Neural Networks |
| ELU | Exponential Linear rectification Unit |
| FCN | Fully Convolutional Network |
| FP | Floating Point |
| FPGA | Field-Programmable Gate Array |
| GCC | GNU Compiler Collection |
| GPU | Graphics Processing Unit |
| HD | High Definition |
| IE | Inference Engine |
| IR | Intermediate Representation |
| JIT | Just In Time |
| JTAG | Joint Test Action Group |
| LPR | License-Plate Recognition |
| LRN | Local Response Normalization |
| mAP | Mean Average Precision |
| Intel(R) MKL-DNN | Intel(R) Math Kernel Library Deep Neural Networks |
| MO | Model Optimizer |
| MVN | Mean Variance Normalization |
| NCDHW | Number of images, Channels, Depth, Height, Width |
| NCHW | Number of images, Channels, Height, Width |
| NHWC | Number of images, Height, Width, Channels |
| NMS | Non-Maximum Suppression |
| NN | Neural Network |
| NST | Neural Style Transfer |
| OD | Object Detection |
| OS | Operating System |
| PCI | Peripheral Component Interconnect |
| PReLU | Parametric Rectified Linear Unit |
| PSROI | Position Sensitive Region Of Interest |
| RCNN, R-CNN | Region-based Convolutional Neural Network |
| ReLU | Rectified Linear Unit |
| ROI | Region Of Interest |
| SDK | Software Development Kit |
| SSD | Single Shot multibox Detector |
| SSE | Streaming SIMD Extensions |
| USB | Universal Serial Bus |
| VGG | Visual Geometry Group |
| VOC | Visual Object Classes |
| WINAPI | Windows Application Programming Interface |
## Terms
Glossary of terms used in the Inference Engine
| Term | Description |
| :--- | :--- |
| Batch | Number of images to analyze during one call of infer. Maximum batch size is a property of the network and it is set before loading of the network to the plugin. In NHWC, NCHW and NCDHW image data layout representation, the N refers to the number of images in the batch |
| Blob | Memory container used for storing inputs, outputs of the network, weights and biases of the layers |
| Device (Affinitity) | A preferred Intel(R) hardware device to run the inference (CPU, GPU, etc.) |
| Extensibility mechanism, Custom layers | The mechanism that provides you with capabilities to extend the Inference Engine and Model Optimizer so that they can work with topologies containing layers that are not yet supported |
| <code>ICNNNetwork</code> | An Interface of the Convolutional Neural Network that Inference Engine reads from IR. Consists of topology, weights and biases |
| <code>IExecutableNetwork</code> | An instance of the loaded network which allows the Inference Engine to request (several) infer requests and perform inference synchronously or asynchronously |
| <code>IInferRequest</code> | Interface that represents the end point of inference on the model loaded to the plugin and represented by executable network. Inputs are set here, outputs should be requested from this interface as well |
| <code>InferenceEngineProfileInfo</code> | Represents basic inference profiling information per layer |
| Inference Engine | A C++ library with a set of classes that you can use in your application to infer input data (images) and get the result |
| Inference Engine API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation, set input and output formats and execute the model on various devices |
| Inference Engine <code>Core</code> | Inference Engine Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, MYRIAD, GNA, etc. |
| Layer catalog or Operations specification | A list of supported layers or operations and its parameters. Sets of supported layers are different for different plugins, please check the documentation on plugins to verify if the Inference Engine supports certain layer on the dedicated hardware |
| <code>Layout</code> | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch |
| <code>OutputsDataMap</code> | Structure which contains information about output precisions and layouts |
| Precision | Represents data precision. For example, FP32 is 32-bit floating point, FP16 is 16-bit floating point. Precision can be changed before loading the network to the plugin |
| <code>PreProcessInfo</code> | Class that represents input data for the network. It contains information about input precision, its layout, and pre-processing |
| <code>ResponseDesc</code> | Represents debug information for an error |
## See Also
* [Deep Learning Model Optimizer IR Operations Catalog](../ops/opset.md)
* [Inference Engine Memory primitives](Memory_primitives.md)
* [Terminology](supported_plugins/Supported_Devices.md)

View File

@@ -1,84 +0,0 @@
Introduction to Inference Engine Device Query API {#openvino_docs_IE_DG_InferenceEngine_QueryAPI}
===============================
This section provides a high-level description of the process of querying of different device properties and configuration values.
Refer to the [Hello Query Device Sample](../../inference-engine/samples/hello_query_device/README.md) sources and [Multi-Device Plugin guide](supported_plugins/MULTI.md) for example of using the Inference Engine Query API in user applications.
## Using the Inference Engine Query API in Your Code
The Inference Engine `Core` class provides the following API to query device information, set or get different device configuration properties:
* <code>InferenceEngine::Core::GetAvailableDevices</code> - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with `.suffix` where `suffix` is a unique string identifier. The device name can be passed to all methods of the `InferenceEngine::Core` class that work with devices, for example `InferenceEngine::Core::LoadNetwork`.
* <code>InferenceEngine::Core::GetMetric</code> - Provides information about specific device.
<code>InferenceEngine::Core::GetConfig</code> - Gets the current value of a specific configuration key.
* <code>InferenceEngine::Core::SetConfig</code> - Sets a new value for the configuration key.
The `InferenceEngine::ExecutableNetwork` class is also extended to support the Query API:
* <code>InferenceEngine::ExecutableNetwork::GetMetric</code>
* <code>InferenceEngine::ExecutableNetwork::GetConfig</code>
* <code>InferenceEngine::ExecutableNetwork::SetConfig</code>
## Query API in the Core Class
### GetAvailableDevices
@snippet snippets/InferenceEngine_QueryAPI0.cpp part0
The function returns list of available devices, for example:
```
MYRIAD.1.2-ma2480
MYRIAD.1.4-ma2480
FPGA.0
FPGA.1
CPU
GPU.0
GPU.1
...
```
Each device name can then be passed to:
* `InferenceEngine::Core::LoadNetwork` to load the network to a specific device.
* `InferenceEngine::Core::GetMetric` to get common or device specific metrics.
* All other methods of the `Core` class that accept `deviceName`.
### GetConfig()
The code below demonstrates how to understand whether `HETERO` device dumps `.dot` files with split graphs during the split stage:
@snippet snippets/InferenceEngine_QueryAPI1.cpp part1
For documentation about common configuration keys, refer to `ie_plugin_config.hpp`. Device specific configuration keys can be found in corresponding plugin folders.
### GetMetric()
* To extract device properties such as available device, device name, supported configuration keys, and others, use the `InferenceEngine::Core::GetMetric` method:
@snippet snippets/InferenceEngine_QueryAPI2.cpp part2
A returned value looks as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`.
> **NOTE**: All metrics have specific type, which is specified during metric instantiation. The list of common device-agnostic metrics can be found in `ie_plugin_config.hpp`. Device specific metrics (for example, for `HDDL`, `MYRIAD` devices) can be found in corresponding plugin folders.
## Query API in the ExecutableNetwork Class
### GetMetric()
The method is used to get executable network specific metric such as `METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)`:
@snippet snippets/InferenceEngine_QueryAPI3.cpp part3
Or the current temperature of `MYRIAD` device:
@snippet snippets/InferenceEngine_QueryAPI4.cpp part4
### GetConfig()
The method is used to get information about configuration values the executable network has been created with:
@snippet snippets/InferenceEngine_QueryAPI5.cpp part5
### SetConfig()
The only device that supports this method is [Multi-Device](supported_plugins/MULTI.md).

View File

@@ -1,127 +0,0 @@
# Low-Precision 8-bit Integer Inference {#openvino_docs_IE_DG_Int8Inference}
## Disclaimer
Inference Engine with low-precision 8-bit integer inference requires the following prerequisites to be satisfied:
- Inference Engine [CPU Plugin](supported_plugins/CPU.md) must be built with the Intel® Math Kernel Library (Intel® MKL) dependency. In the Intel® Distribution of OpenVINO™ it is
satisfied by default, this is mostly the requirement if you are using OpenVINO™ available in open source, because [open source version of OpenVINO™](https://github.com/openvinotoolkit/openvino) can be built with OpenBLAS* that is unacceptable if you want to use 8-bit integer inference.
- Intel® platforms that support at least one extension to x86 instruction set from the following list:
- Intel® Advanced Vector Extensions 512 (Intel® AVX-512)
- Intel® Advanced Vector Extensions 2.0 (Intel® AVX2)
- Intel® Streaming SIMD Extensions 4.2 (Intel® SSE4.2)
- A model must be quantized. To quantize the model, you can use the [Post-Training Optimization Tool](@ref pot_README) delivered with the Intel® Distribution of OpenVINO™ toolkit release package.
The 8-bit inference feature was validated on the following topologies:
* **Classification models:**
* Caffe\* DenseNet-121, DenseNet-161, DenseNet-169, DenseNet-201
* Caffe Inception v1, Inception v2, Inception v3, Inception v4
* Caffe YOLO v1 tiny, YOLO v3
* Caffe ResNet-50 v1, ResNet-101 v1, ResNet-152 v1, ResNet-269 v1
* Caffe ResNet-18
* Caffe MobileNet, MobileNet v2
* Caffe SE ResNeXt-50
* Caffe SqueezeNet v1.0, SqueezeNet v1.1
* Caffe VGG16, VGG19
* TensorFlow\* DenseNet-121, DenseNet-169
* TensorFlow Inception v1, Inception v2, Inception v3, Inception v4, Inception ResNet v2
* TensorFlow Lite Inception v1, Inception v2, Inception v3, Inception v4, Inception ResNet v2
* TensorFlow Lite MobileNet v1, MobileNet v2
* TensorFlow MobileNet v1, MobileNet v2
* TensorFlow ResNet-50 v1.5, ResNet-50 v1, ResNet-101 v1, ResNet-152 v1, ResNet-50 v2, ResNet-101 v2, ResNet-152 v2
* TensorFlow VGG16, VGG19
* TensorFlow YOLO v3
* MXNet\* CaffeNet
* MXNet DenseNet-121, DenseNet-161, DenseNet-169, DenseNet-201
* MXNet Inception v3, inception_v4
* MXNet Mobilenet, Mobilenet v2
* MXNet ResNet-101 v1, ResNet-152 v1, ResNet-101 v2, ResNet-152 v2
* MXNet ResNeXt-101
* MXNet SqueezeNet v1.1
* MXNet VGG16, VGG19
* **Object detection models:**
* Caffe SSD GoogLeNet
* Caffe SSD MobileNet
* Caffe SSD SqueezeNet
* Caffe SSD VGG16 300, SSD VGG16 512
* TensorFlow SSD MobileNet v1, SSD MobileNet v2
* MXNet SSD Inception v3 512
* MXNet SSD MobileNet 512
* MXNet SSD ResNet-50 512
* MXNet SSD VGG16 300
* ONNX\* SSD ResNet 34
* **Semantic segmentation models:**
* Unet2D
* **Recommendation system models:**
* NCF
## Introduction
A lot of investigation was made in the field of deep learning with the idea of using low precision computations during inference in order to boost deep learning pipelines and gather higher performance. For example, one of the popular approaches is to shrink the precision of activations and weights values from `fp32` precision to smaller ones, for example, to `fp11` or `int8`. For more information about this approach, refer to
**Brief History of Lower Precision in Deep Learning** section in [this whitepaper](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training).
8-bit computations (referred to as `int8`) offer better performance compared to the results of inference in higher precision (for example, `fp32`), because they allow loading more data into a single processor instruction. Usually the cost for significant boost is a reduced accuracy. However, it is proved that an accuracy drop can be negligible and depends on task requirements, so that the application engineer can set up the maximum accuracy drop that is acceptable.
Current Inference Engine solution for low-precision inference uses Intel MKL-DNN and supports inference of the following layers in 8-bit integer computation mode:
* Convolution
* FullyConnected
* ReLU
* ReLU6
* Reshape
* Permute
* Pooling
* Squeeze
* Eltwise
* Concat
* Resample
* MVN
This means that 8-bit inference can only be performed with the CPU plugin on the layers listed above. All other layers are executed in the format supported by the CPU plugin: 32-bit floating point format (`fp32`).
## Low-Precision 8-bit Integer Inference Workflow
For 8-bit integer computations, a model must be quantized. If the model is not quantized then you can use the [Post-Training Optimization Tool](@ref pot_README) to quantize the model. The quantization process adds `FakeQuantize` layers on activations and weights for most layers. Read more about mathematical computations under the hood in the [white paper](https://intel.github.io/mkl-dnn/ex_int8_simplenet.html).
8-bit inference pipeline includes two stages (also refer to the figure below):
1. *Offline stage*, or *model quantization*. During this stage, `FakeQuantize` layers are added before most layers to have quantized tensors before layers in a way that low-precision accuracy drop for 8-bit integer inference satisfies the specified threshold. The output of this stage is a quantized model. Quantized model precision is not changed, quantized tensors are in original precision range (`fp32`). `FakeQuantize` layer has `Quantization Levels` attribute which defines quants count. Quants count defines precision which is used during inference. For `int8` range `Quantization Levels` attribute value has to be 255 or 256.
2. *Run-time stage*. This stage is an internal procedure of the [CPU Plugin](supported_plugins/CPU.md). During this stage, the quantized model is loaded to the plugin. The plugin updates each `FakeQuantize` layer on activations and weights to have `FakeQuantize` output tensor values in low precision range.
![int8_flow]
### Offline Stage: Model Quantization
To infer a layer in low precision and get maximum performance, the input tensor for the layer has to be quantized and each value has to be in the target low precision range. For this purpose, `FakeQuantize` layer is used in the OpenVINO™ intermediate representation file (IR). To quantize the model, you can use the [Post-Training Optimization Tool](@ref pot_README) delivered with the Intel® Distribution of OpenVINO™ toolkit release package.
When you pass the calibrated IR to the [CPU plugin](supported_plugins/CPU.md), the plugin automatically recognizes it as a quantized model and performs 8-bit inference. Note, if you pass a quantized model to another plugin that does not support 8-bit inference, the model is inferred in precision that this plugin supports.
### Run-Time Stage: Quantization
This is the second stage of the 8-bit integer inference. After you load the quantized model IR to a plugin, the pluing uses the `Low Precision Transformation` component to update the model to infer it in low precision:
* Updates `FakeQuantize` layers to have quantized output tensors in low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers have quantized input tensors in low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in next `FakeQuantize` or `ScaleShift` layers.
* Weights are quantized and stored in `Const` layers.
* Biases are updated to avoid shifts in dequantization layers.
## Performance Counters
Information about layer precision is stored in the performance counters that are
available from the Inference Engine API. The layers have the following marks:
* Suffix `I8` for layers that had 8-bit data type input and were computed in 8-bit precision
* Suffix `FP32` for layers computed in 32-bit precision
For example, the performance counters table for the Inception model can look as follows:
```
inception_5b/5x5_reduce EXECUTED layerType: Convolution realTime: 417 cpu: 417 execType: gemm_blas_I8
inception_5b/output EXECUTED layerType: Concat realTime: 34 cpu: 34 execType: ref_I8
inception_5b/output_U8_nhw... EXECUTED layerType: Reorder realTime: 33092 cpu: 33092 execType: reorder_I8
inception_5b/output_oScale... EXECUTED layerType: ScaleShift realTime: 1390 cpu: 1390 execType: jit_avx2_FP32
inception_5b/output_oScale... EXECUTED layerType: Reorder realTime: 143 cpu: 143 execType: reorder_FP32
inception_5b/pool EXECUTED layerType: Pooling realTime: 59301 cpu: 59301 execType: ref_any_I8
```
The `execType` column of the table includes inference primitives with specific suffixes.
[int8_flow]: img/cpu_int8_flow.png

View File

@@ -1,237 +0,0 @@
Integrate the Inference Engine with Your Application {#openvino_docs_IE_DG_Integrate_with_customer_application_new_API}
===============================
This section provides a high-level description of the process of integrating the Inference Engine into your application.
Refer to the [Hello Classification Sample](../../inference-engine/samples/hello_classification/README.md) sources
for example of using the Inference Engine in applications.
## Use the Inference Engine API in Your Code
The core `libinference_engine.so` library implements loading and parsing a model Intermediate Representation (IR), and triggers inference using a specified device. The core library has the following API:
* `InferenceEngine::Core`
* `InferenceEngine::Blob`, `InferenceEngine::TBlob`,
`InferenceEngine::NV12Blob`
* `InferenceEngine::BlobMap`
* `InferenceEngine::InputsDataMap`, `InferenceEngine::InputInfo`,
* `InferenceEngine::OutputsDataMap`
C++ Inference Engine API wraps the capabilities of core library:
* `InferenceEngine::CNNNetwork`
* `InferenceEngine::ExecutableNetwork`
* `InferenceEngine::InferRequest`
## Integration Steps
Integration process includes the following steps:
![integration_process]
1) **Create Inference Engine Core** to manage available devices and read network objects:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part0
2) **Read a model IR** created by the Model Optimizer (.xml is supported format):
@snippet snippets/Integrate_with_customer_application_new_API.cpp part1
**Or read the model from ONNX format** (.onnx and .prototxt are supported formats). You can find more information about the ONNX format support in the document [ONNX format support in the OpenVINO™](./ONNX_Support.md).
@snippet snippets/Integrate_with_customer_application_new_API.cpp part2
3) **Configure input and output**. Request input and output information using `InferenceEngine::CNNNetwork::getInputsInfo()`, and `InferenceEngine::CNNNetwork::getOutputsInfo()`
methods:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part3
Optionally, set the number format (precision) and memory layout for inputs and outputs. Refer to the
[Supported configurations](supported_plugins/Supported_Devices.md) chapter to choose the relevant configuration.
You can also allow input of any size. To do this, mark each input as resizable by setting a desired resize algorithm (e.g. `BILINEAR`) inside of the appropriate input info.
Basic color format conversions are supported as well. By default, the Inference Engine assumes
that the input color format is `BGR` and color format conversions are disabled. The Inference
Engine supports the following color format conversions:
* `RGB->BGR`
* `RGBX->BGR`
* `BGRX->BGR`
* `NV12->BGR`
where `X` is a channel that will be ignored during inference. To enable the conversions, set a
desired color format (for example, `RGB`) for each input inside of the appropriate input info.
If you want to run inference for multiple images at once, you can use the built-in batch
pre-processing functionality.
> **NOTE**: Batch pre-processing is not supported if input color format is set to `ColorFormat::NV12`.
You can use the following code snippet to configure input and output:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part4
> **NOTE**: NV12 input color format pre-processing differs from other color conversions. In case of NV12,
> Inference Engine expects two separate image planes (Y and UV). You must use a specific
> `InferenceEngine::NV12Blob` object instead of default blob object and set this blob to
> the Inference Engine Infer Request using `InferenceEngine::InferRequest::SetBlob()`.
> Refer to [Hello NV12 Input Classification C++ Sample](../../inference-engine/samples/hello_nv12_input_classification/README.md)
> for more details.
If you skip this step, the default values are set:
* no resize algorithm is set for inputs
* input color format - `ColorFormat::RAW` meaning that input does not need color
conversions
* input and output precision - `Precision::FP32`
* input layout - `Layout::NCHW`
* output layout depends on number of its dimensions:
|Number of dimensions | 5 | 4 | 3 | 2 | 1 |
|:--------------------|-------|------|-----|----|----|
|Layout | NCDHW | NCHW | CHW | NC | C |
4) **Load the model** to the device using `InferenceEngine::Core::LoadNetwork()`:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part5
It creates an executable network from a network object. The executable network is associated with single hardware device.
It is possible to create as many networks as needed and to use them simultaneously (up to the limitation of the hardware resources).
Third parameter is a configuration for plugin. It is map of pairs: (parameter name, parameter value). Choose device from
[Supported devices](supported_plugins/Supported_Devices.md) page for more details about supported configuration parameters.
@snippet snippets/Integrate_with_customer_application_new_API.cpp part6
5) **Create an infer request**:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part7
6) **Prepare input**. You can use one of the following options to prepare input:
* **Optimal way for a single network.** Get blobs allocated by an infer request using `InferenceEngine::InferRequest::GetBlob()`
and feed an image and the input data to the blobs. In this case, input data must be aligned (resized manually) with a
given blob size and have a correct color format.
@snippet snippets/Integrate_with_customer_application_new_API.cpp part8
* **Optimal way for a cascade of networks (output of one network is input for another).** Get output blob from the first
request using `InferenceEngine::InferRequest::GetBlob()` and set it as input for the second request using
`InferenceEngine::InferRequest::SetBlob()`.
@snippet snippets/Integrate_with_customer_application_new_API.cpp part9
* **Optimal way to handle ROI (a ROI object located inside of input of one network is input for another).** It is
possible to re-use shared input by several networks. You do not need to allocate separate input blob for a network if
it processes a ROI object located inside of already allocated input of a previous network. For instance, when first
network detects objects on a video frame (stored as input blob) and second network accepts detected bounding boxes
(ROI inside of the frame) as input.
In this case, it is allowed to re-use pre-allocated input blob (used by first network) by second network and just crop
ROI without allocation of new memory using `InferenceEngine::make_shared_blob()` with passing of
`InferenceEngine::Blob::Ptr` and `InferenceEngine::ROI` as parameters.
@snippet snippets/Integrate_with_customer_application_new_API.cpp part10
Make sure that shared input is kept valid during execution of each network. Otherwise, ROI blob may be corrupted if the
original input blob (that ROI is cropped from) has already been rewritten.
* Allocate input blobs of the appropriate types and sizes, feed an image and the input data to the blobs, and call
`InferenceEngine::InferRequest::SetBlob()` to set these blobs for an infer request:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part11
A blob can be filled before and after `SetBlob()`.
> **NOTE:**
>
> * `SetBlob()` method compares precision and layout of an input blob with ones defined on step 3 and
> throws an exception if they do not match. It also compares a size of the input blob with input
> size of the read network. But if input was configured as resizable, you can set an input blob of
> any size (for example, any ROI blob). Input resize will be invoked automatically using resize
> algorithm configured on step 3. Similarly to the resize, color format conversions allow the color
> format of an input blob to differ from the color format of the read network. Color format
> conversion will be invoked automatically using color format configured on step 3.
>
> * `GetBlob()` logic is the same for pre-processable and not pre-processable input. Even if it is
> called with input configured as resizable or as having specific color format, a blob allocated by
> an infer request is returned. Its size and color format are already consistent with the
> corresponding values of the read network. No pre-processing will happen for this blob. If you
> call `GetBlob()` after `SetBlob()`, you will get the blob you set in `SetBlob()`.
7) **Do inference** by calling the `InferenceEngine::InferRequest::StartAsync` and `InferenceEngine::InferRequest::Wait`
methods for asynchronous request:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part12
or by calling the `InferenceEngine::InferRequest::Infer` method for synchronous request:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part13
`StartAsync` returns immediately and starts inference without blocking main thread, `Infer` blocks
main thread and returns when inference is completed.
Call `Wait` for waiting result to become available for asynchronous request.
There are three ways to use it:
* specify maximum duration in milliseconds to block for. The method is blocked until the specified timeout has elapsed,
or the result becomes available, whichever comes first.
* `InferenceEngine::IInferRequest::WaitMode::RESULT_READY` - waits until inference result becomes available
* `InferenceEngine::IInferRequest::WaitMode::STATUS_ONLY` - immediately returns request status.It does not
block or interrupts current thread.
Both requests are thread-safe: can be called from different threads without fearing corruption and failures.
Multiple requests for single `ExecutableNetwork` are executed sequentially one by one in FIFO order.
While request is ongoing, all its methods except `InferenceEngine::InferRequest::Wait` would throw an
exception.
8) Go over the output blobs and **process the results**.
Note that casting `Blob` to `TBlob` via `std::dynamic_pointer_cast` is not recommended way,
better to access data via `buffer()` and `as()` methods as follows:
@snippet snippets/Integrate_with_customer_application_new_API.cpp part14
## Build Your Application
For details about building your application, refer to the CMake files for the sample applications.
All samples source code is located in the `<INSTALL_DIR>/openvino/inference_engine/samples` directory, where `INSTALL_DIR` is the OpenVINO™ installation directory.
### CMake project creation
1. **Create a structure** for the project:
``` sh
project/
├── CMakeLists.txt - CMake file to build
├── ... - Additional folders like includes/
└── src/ - source folder
└── main.cpp
build/ - build directory
...
```
2. **Include Inference Engine, nGraph and OpenCV libraries** in `project/CMakeLists.txt`
[OpenCV](https://docs.opencv.org/master/db/df5/tutorial_linux_gcc_cmake.html) integration is needed mostly for pre-processing input data and ngraph for more complex applications using [ngraph API](../nGraph_DG/nGraph_dg.md).
``` cmake
cmake_minimum_required(VERSION 3.0.0)
project(project_name)
find_package(ngraph REQUIRED)
find_package(InferenceEngine REQUIRED)
find_package(OpenCV REQUIRED)
add_executable(${PROJECT_NAME} src/main.cpp)
target_link_libraries(${PROJECT_NAME} PRIVATE ${InferenceEngine_LIBRARIES} ${OpenCV_LIBS} ${NGRAPH_LIBRARIES})
```
3. **To build your project** using CMake with the default build tools currently available on your machine, execute the following commands:
> **NOTE**: Make sure **Set the Environment Variables** step in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document is applied to your terminal, otherwise `InferenceEngine_DIR` and `OpenCV_DIR` variables won't be configured properly to pass `find_package` calls.
```sh
cd build/
cmake ../project
cmake --build .
```
It's allowed to specify additional build options (e.g. to build CMake project on Windows with a specific build tools). Please refer to the [CMake page](https://cmake.org/cmake/help/latest/manual/cmake.1.html#manual:cmake(1)) for details.
### Run Your Application
> **NOTE**: Before running, make sure you completed **Set the Environment Variables** section in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document so that the application can find the libraries.
To run compiled applications on Microsoft* Windows* OS, make sure that Microsoft* Visual C++ 2017
Redistributable and Intel® C++ Compiler 2017 Redistributable packages are installed and
`<INSTALL_DIR>/bin/intel64/Release/*.dll` files are placed to the
application folder or accessible via `%PATH%` environment variable.
[integration_process]: img/integration_process.png

View File

@@ -1,99 +0,0 @@
# Introduction to the Performance Topics {#openvino_docs_IE_DG_Intro_to_Performance}
This section is a shorter version of the
[Optimization Guide](supported_plugins/MULTI.md) for the Intel Deep Learning Deployment Toolkit.
## Precision
Inference precision directly affects the performance.
Model Optimizer can produce an IR with different precision. For example, float16 IR initially targets VPU and GPU devices, while, for example, the CPU can also execute regular float32.
Also, further device-specific inference precision settings are available, for example, [8-bit integer](Int8Inference.md) or [bfloat16](Bfloat16Inference.md) inference on the CPU.
Note that for [MULTI device](supported_plugins/MULTI.md) that supports automatic inference on multiple devices in parallel, you can use the FP16 IR.
You can find more information, including preferred data types for specific devices, in the
[Supported Devices](supported_plugins/Supported_Devices.md) section.
## Lowering Inference Precision
Default optimization is used for CPU and implies that inference is made with lower precision if it is possible on a given platform to reach better performance with acceptable range of accuracy.
This approach is used for CPU device if platform supports the AVX512_BF16 instruction. In this case, a regular float32 model is converted to [bfloat16](Bfloat16Inference.md) internal representation and inference is provided with bfloat16 layers usage.
Below is the example command line to disable this feature on the CPU device with the AVX512_BF16 instruction and execute regular float32.
```
$ benchmark_app -m <model.xml> -enforcebf16=false
```
## Latency vs. Throughput
One way to increase computational efficiency is batching, which combines many (potentially tens) of
input images to achieve optimal throughput. However, high batch size also comes with a
latency penalty. So, for more real-time oriented usages, lower batch sizes (as low as a single input) are used.
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.
## Using Async API
To gain better performance on accelerators, such as VPU, the Inference Engine uses the asynchronous approach (see
[Integrating Inference Engine in Your Application (current API)](Integrate_with_customer_application_new_API.md)).
The point is amortizing the costs of data transfers, by pipe-lining, see [Async API explained](@ref omz_demos_object_detection_demo_ssd_async_README).
Since the pipe-lining relies on the availability of the parallel slack, running multiple inference requests in parallel is essential.
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which enables running a number of inference requests in parallel. Specifying different number of request produces different throughput measurements.
## Best Latency on the Multi-Socket CPUs
Note that when latency is of concern, there are additional tips for multi-socket systems.
When input is limited to the single image, the only way to achieve the best latency is to limit execution to the single socket.
The reason is that single image is simply not enough
to saturate more than one socket. Also NUMA overheads might dominate the execution time.
Below is the example command line that limits the execution to the single socket using numactl for the best *latency* value
(assuming the machine with 28 phys cores per socket):
```
limited to the single socket).
$ numactl -m 0 --physcpubind 0-27 benchmark_app -m <model.xml> -api sync -nthreads 28
```
Note that if you have more than one input, running as many inference requests as you have NUMA nodes (or sockets)
usually gives the same best latency as a single request on the single socket, but much higher throughput. Assuming two NUMA nodes machine:
```
$ benchmark_app -m <model.xml> -nstreams 2
```
Number of NUMA nodes on the machine can be queried via 'lscpu'.
Please see more on the NUMA support in the [Optimization Guide](supported_plugins/MULTI.md).
## Throughput Mode for CPU
Unlike most accelerators, CPU is perceived as an inherently latency-oriented device.
Since 2018 R5 release, the Inference Engine introduced the "throughput" mode, which allows the Inference Engine to efficiently run multiple inference requests on the CPU simultaneously, greatly improving the throughput.
Internally, the execution resources are split/pinned into execution "streams".
Using this feature gains much better performance for the networks that originally are not scaled well with a number of threads (for example, lightweight topologies). This is especially pronounced for the many-core server machines.
Run the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) and play with number of infer requests running in parallel, next section.
Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance.
In addition to the number of streams, it is also possible to play with the batch size to find the throughput sweet-spot.
The throughput mode relaxes the requirement to saturate the CPU by using a large batch: running multiple independent inference requests in parallel often gives much better performance, than using a batch only.
This allows you to simplify the app-logic, as you don't need to combine multiple inputs into a batch to achieve good CPU performance.
Instead, it is possible to keep a separate infer request per camera or another source of input and process the requests in parallel using Async API.
## Benchmark App
[Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample is the best performance reference.
It has a lot of device-specific knobs, but the primary usage is as simple as:
```bash
$ ./benchmark_app d GPU m <model> -i <input>
```
to measure the performance of the model on the GPU.
Or
```bash
$ ./benchmark_app d CPU m <model> -i <input>
```
to execute on the CPU instead.
For example, for the CPU throughput mode from the previous section, you can play with number of streams (`-nstreams` command-line param).
Try different values of the `-nstreams` argument from `1` to a number of CPU cores and find one that provides the best performance. For example, on a 8-core CPU, compare the `-nstreams 1` (which is a latency-oriented scenario) to the `2`, `4` and `8` streams. Notice that `benchmark_app` automatically queries/creates/runs number of requests required to saturate the given number of streams.
Finally, notice that when you don't specify number of streams with `-nstreams`, "AUTO" value for the streams is used, e.g. for the CPU this is [CPU_THROUGHPUT_AUTO](supported_plugins/CPU.md). You can spot the actual value behind "AUTO" for your machine in the application output.
Notice that the "AUTO" number is not necessarily most optimal, so it is generally recommended to play either with the benchmark_app's "-nstreams" as described above, or via [new Workbench tool](@ref workbench_docs_Workbench_DG_Introduction).This allows you to simplify the app-logic, as you don't need to combine multiple inputs into a batch to achieve good CPU performance.
Instead, it is possible to keep a separate infer request per camera or another source of input and process the requests in parallel using Async API.
## Kernels Tuning for GPU
GPU backend comes with a feature, that allows models tuning, so the workload is configured to fit better into hardware.
Tuning is time consuming process, which internally execute every layer several (or even hundreds) times to find most performant configuration.
This configuration is saved into json-formatted file, whose name can be passed as plugin param to network. GPU backend will process this data to configure kernels for the best performance.
For more details about Kernels Tuning and How-To please refer to [GPU Kernels Tuning](GPU_Kernels_Tuning.md).

Some files were not shown because too many files have changed in this diff Show More