Remove myriad plugin (#15131)
* Removed Intel MYRIAD plugin * Removed Intel MYIAD from CI files * Removed Intel MYRIAD from cmake folder * Removed MYRIAD, HDDL from samples * Removed MYRIAD, HDDL from scripts folder * Removed MYRIAD from bindings folder (C and Python API) * Removed MYRIAD tests * Removed MYRIAD from tests folder * Removed MYRIAD from tools folder * Removed HDDL (VAD), MYRIAD (NSC2) from documentation * Fixed build for AUTO unit tests * Fixed clang code style * Fixed comments and issues * removed MYRIAD from AUTO tests * Disabled MULTI tests in CI * Update docs/OV_Runtime_UG/auto_device_selection.md Co-authored-by: Yuan Xu <yuan1.xu@intel.com> * Update docs/get_started/get_started_demos.md Co-authored-by: Yuan Xu <yuan1.xu@intel.com> * Update docs/OV_Runtime_UG/deployment/local-distribution.md Co-authored-by: Yuan Xu <yuan1.xu@intel.com> Co-authored-by: Yuan Xu <yuan1.xu@intel.com>
This commit is contained in:
@@ -412,6 +412,7 @@ jobs:
|
||||
|
||||
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
enabled: 'false'
|
||||
|
||||
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml
|
||||
displayName: 'TEMPLATE FuncTests'
|
||||
|
||||
@@ -177,7 +177,6 @@ jobs:
|
||||
-DENABLE_DATA=OFF
|
||||
-DCMAKE_TOOLCHAIN_FILE=$(OPENVINO_REPO_DIR)/cmake/arm64.toolchain.cmake
|
||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||
-DENABLE_INTEL_MYRIAD=OFF
|
||||
-DTHREADING=TBB
|
||||
-DTBB_DIR=$(INSTALL_ONETBB)/lib/cmake/TBB
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
|
||||
@@ -122,7 +122,6 @@ jobs:
|
||||
-DENABLE_AUTO_BATCH=OFF
|
||||
-DENABLE_HETERO=OFF
|
||||
-DENABLE_TEMPLATE=OFF
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF
|
||||
-DENABLE_OV_ONNX_FRONTEND=OFF
|
||||
-DENABLE_OV_PADDLE_FRONTEND=OFF
|
||||
-DENABLE_OV_TF_FRONTEND=OFF
|
||||
|
||||
@@ -133,7 +133,6 @@ jobs:
|
||||
-DENABLE_INTEL_CPU=OFF \
|
||||
-DENABLE_INTEL_GPU=OFF \
|
||||
-DENABLE_INTEL_GNA=OFF \
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF \
|
||||
-DENABLE_OV_TF_FRONTEND=OFF \
|
||||
-DENABLE_OV_PADDLE_FRONTEND=OFF \
|
||||
-DENABLE_OV_ONNX_FRONTEND=OFF \
|
||||
|
||||
@@ -328,6 +328,7 @@ jobs:
|
||||
|
||||
- script: $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
enabled: 'false'
|
||||
|
||||
- script: |
|
||||
$(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml
|
||||
|
||||
@@ -107,7 +107,6 @@ jobs:
|
||||
-GNinja
|
||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF
|
||||
-DENABLE_INTEL_GNA=OFF
|
||||
-DENABLE_INTEL_GPU=OFF
|
||||
-DENABLE_CPPLINT=OFF
|
||||
|
||||
@@ -178,7 +178,6 @@ jobs:
|
||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON ^
|
||||
-DENABLE_STRICT_DEPENDENCIES=OFF ^
|
||||
-DENABLE_PYTHON=ON ^
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF ^
|
||||
-DBUILD_nvidia_plugin=OFF ^
|
||||
-DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.10.7\x64\python.exe" ^
|
||||
-DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.10.7\x64\include" ^
|
||||
@@ -296,6 +295,7 @@ jobs:
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
enabled: 'false'
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_template_func_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-templateFuncTests.xml
|
||||
displayName: 'TEMPLATE FuncTests'
|
||||
|
||||
@@ -127,8 +127,6 @@ jobs:
|
||||
-DENABLE_AUTO_BATCH=OFF ^
|
||||
-DENABLE_HETERO=OFF ^
|
||||
-DENABLE_TEMPLATE=OFF ^
|
||||
-DENABLE_INTEL_MYRIAD=OFF ^
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF ^
|
||||
-DENABLE_TESTS=OFF ^
|
||||
-DENABLE_OV_ONNX_FRONTEND=OFF ^
|
||||
-DENABLE_OV_PADDLE_FRONTEND=OFF ^
|
||||
@@ -174,8 +172,6 @@ jobs:
|
||||
-DENABLE_AUTO_BATCH=OFF ^
|
||||
-DENABLE_HETERO=OFF ^
|
||||
-DENABLE_TEMPLATE=OFF ^
|
||||
-DENABLE_INTEL_MYRIAD=OFF ^
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF ^
|
||||
-DENABLE_TESTS=OFF ^
|
||||
-DENABLE_OV_ONNX_FRONTEND=OFF ^
|
||||
-DENABLE_OV_PADDLE_FRONTEND=OFF ^
|
||||
|
||||
@@ -50,7 +50,6 @@ WORKDIR /openvino/build
|
||||
RUN cmake .. \
|
||||
-G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
|
||||
-DENABLE_INTEL_MYRIAD_COMMON=OFF \
|
||||
-DENABLE_INTEL_GNA=OFF \
|
||||
-DENABLE_INTEL_GPU=OFF \
|
||||
-DENABLE_HETERO=OFF \
|
||||
|
||||
4
.github/CODEOWNERS
vendored
4
.github/CODEOWNERS
vendored
@@ -59,10 +59,6 @@
|
||||
/src/tests/**/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers
|
||||
/thirdparty/ocl/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
|
||||
# OpenVINO VPU:
|
||||
/src/plugins/intel_myriad @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
/src/tests/**/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
|
||||
# OpenVINO GNA:
|
||||
/src/plugins/intel_gna/ @openvinotoolkit/openvino-ie-gna-maintainers
|
||||
/src/common/legacy/ @openvinotoolkit/openvino-ie-gna-maintainers
|
||||
|
||||
1
.github/github_org_control/config.json
vendored
1
.github/github_org_control/config.json
vendored
@@ -38,7 +38,6 @@
|
||||
"openvino-tf-frontend-maintainers": "category: TF FE",
|
||||
"openvino-onnx-frontend-maintainers": "category: ONNX FE",
|
||||
"openvino-ie-tests-maintainers": "category: IE Tests",
|
||||
"openvino-ie-vpu-maintainers": "category: VPU",
|
||||
"openvino-mo-maintainers": "category: MO",
|
||||
"openvino-ngraph-maintainers": "category: Core",
|
||||
"openvino-scripts-maintainers": "category: build",
|
||||
|
||||
5
.github/labeler.yml
vendored
5
.github/labeler.yml
vendored
@@ -71,7 +71,6 @@
|
||||
- 'src/frontends/tests/frontend/shared/**/*'
|
||||
- any: ['src/tests/**/*',
|
||||
'!src/tests/**/gpu/**/*',
|
||||
'!src/tests/**/myriad/**/*',
|
||||
'!src/tests/**/inference_engine/**/*']
|
||||
|
||||
'category: inference':
|
||||
@@ -129,7 +128,3 @@
|
||||
'category: transformations':
|
||||
- 'src/common/transformations/**/*'
|
||||
- 'src/common/offline_transformations/**/*'
|
||||
|
||||
'category: VPU':
|
||||
- 'src/plugins/intel_myriad/**/*'
|
||||
- 'src/tests/**/myriad/**/*'
|
||||
|
||||
2
.github/workflows/build_doc.yml
vendored
2
.github/workflows/build_doc.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
echo "$(pwd)/doxygen-$DOXY_VER/bin/" >> $GITHUB_PATH
|
||||
|
||||
- name: CMake configure
|
||||
run: cmake -DENABLE_INTEL_MYRIAD_COMMON=OFF -DENABLE_DOCS=ON -B build
|
||||
run: cmake -DENABLE_DOCS=ON -B build
|
||||
|
||||
- name: Cache documentation
|
||||
id: cache_sphinx_docs
|
||||
|
||||
2
.github/workflows/code_snippets.yml
vendored
2
.github/workflows/code_snippets.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
lfs: true
|
||||
|
||||
- name: CMake configure
|
||||
run: cmake -DENABLE_INTEL_MYRIAD_COMMON=OFF -DCMAKE_BUILD_TYPE=Release -B build
|
||||
run: cmake -DCMAKE_BUILD_TYPE=Release -B build
|
||||
|
||||
- name: Get number of CPU cores
|
||||
uses: SimenB/github-actions-cpu-cores@v1
|
||||
|
||||
6
.github/workflows/code_style.yml
vendored
6
.github/workflows/code_style.yml
vendored
@@ -22,8 +22,6 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt --assume-yes install libusb-1.0-0-dev
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install -r ./src/bindings/python/requirements.txt
|
||||
# Add for -DENABLE_PYTHON=ON, no cython
|
||||
@@ -59,7 +57,7 @@ jobs:
|
||||
sudo apt --assume-yes install shellcheck
|
||||
|
||||
- name: CMake configure
|
||||
run: cmake -DENABLE_INTEL_MYRIAD_COMMON=OFF -B build
|
||||
run: cmake -B build
|
||||
|
||||
- name: Shellcheck cmake target
|
||||
run: cmake --build build --target ie_shellcheck -j8
|
||||
@@ -94,7 +92,7 @@ jobs:
|
||||
run: python3 -m pip install -r cmake/developer_package/ncc_naming_style/requirements_dev.txt
|
||||
|
||||
- name: CMake configure
|
||||
run: cmake -DENABLE_INTEL_MYRIAD_COMMON=OFF -B build
|
||||
run: cmake -B build
|
||||
|
||||
- name: Naming convention check
|
||||
run: cmake --build build --target ncc_all -j8
|
||||
|
||||
@@ -34,7 +34,7 @@ OpenVINO™ is an open-source toolkit for optimizing and deploying AI inference.
|
||||
- Reduce resource demands and efficiently deploy on a range of Intel® platforms from edge to cloud
|
||||
|
||||
|
||||
This open-source version includes several components: namely [Model Optimizer], [OpenVINO™ Runtime], [Post-Training Optimization Tool], as well as CPU, GPU, MYRIAD, multi device and heterogeneous plugins to accelerate deep learning inference on Intel® CPUs and Intel® Processor Graphics.
|
||||
This open-source version includes several components: namely [Model Optimizer], [OpenVINO™ Runtime], [Post-Training Optimization Tool], as well as CPU, GPU, GNA, multi device and heterogeneous plugins to accelerate deep learning inference on Intel® CPUs and Intel® Processor Graphics.
|
||||
It supports pre-trained models from [Open Model Zoo], along with 100+ open
|
||||
source and public models in popular formats such as TensorFlow, ONNX, PaddlePaddle, MXNet, Caffe, Kaldi.
|
||||
|
||||
@@ -90,12 +90,6 @@ The OpenVINO™ Runtime can infer models on different hardware devices. This sec
|
||||
<td><b><i><a href="./src/plugins/intel_gna">openvino_intel_gna_plugin</a></i></b></td>
|
||||
<td>Intel Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel Pentium Silver J5005 Processor, Intel Pentium Silver N5000 Processor, Intel Celeron J4005 Processor, Intel Celeron J4105 Processor, Intel Celeron Processor N4100, Intel Celeron Processor N4000, Intel Core i3-8121U Processor, Intel Core i7-1065G7 Processor, Intel Core i7-1060G7 Processor, Intel Core i5-1035G4 Processor, Intel Core i5-1035G7 Processor, Intel Core i5-1035G1 Processor, Intel Core i5-1030G7 Processor, Intel Core i5-1030G4 Processor, Intel Core i3-1005G1 Processor, Intel Core i3-1000G1 Processor, Intel Core i3-1000G4 Processor</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>VPU</td>
|
||||
<td><a href="https://docs.openvino.ai/nightly/openvino_docs_IE_DG_supported_plugins_VPU.html#doxid-openvino-docs-i-e-d-g-supported-plugins-v-p-u">Myriad plugin</a></td>
|
||||
<td><b><i><a href="./src/plugins/intel_myriad">openvino_intel_myriad_plugin</a></i></b></td>
|
||||
<td>Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
@@ -106,13 +106,6 @@ if (ENABLE_INTEL_GPU)
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
endif()
|
||||
|
||||
if(ENABLE_INTEL_MYRIAD)
|
||||
ov_coverage_extract(INPUT "openvino" OUTPUT "intel_myriad_plugin"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/plugins/intel_myriad/*")
|
||||
ov_coverage_genhtml(INFO_FILE "intel_myriad_plugin"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
endif()
|
||||
|
||||
if(ENABLE_TEMPLATE)
|
||||
ov_coverage_extract(INPUT "openvino" OUTPUT "template_plugin"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/plugins/template/*")
|
||||
|
||||
@@ -23,10 +23,6 @@ message(STATUS "MODELS_PATH=" ${MODELS_PATH})
|
||||
|
||||
fetch_models_and_validation_set()
|
||||
|
||||
if(ENABLE_INTEL_MYRIAD)
|
||||
include(${OpenVINO_SOURCE_DIR}/src/plugins/intel_myriad/myriad_dependencies.cmake)
|
||||
endif()
|
||||
|
||||
## Intel OMP package
|
||||
if(THREADING STREQUAL "OMP")
|
||||
reset_deps_cache(OMP)
|
||||
|
||||
@@ -7,7 +7,7 @@ function links static library without removing any symbol from it.
|
||||
|
||||
ieTargetLinkWholeArchive(<target name> <lib1> [<lib2> ...])
|
||||
Example:
|
||||
ieTargetLinkWholeArchive("MyriadFunctionalTests" "CommonLib" "AnotherLib")
|
||||
ieTargetLinkWholeArchive("FunctionalTests" "CommonLib" "AnotherLib")
|
||||
|
||||
#]]
|
||||
|
||||
|
||||
@@ -98,24 +98,10 @@ ie_option (ENABLE_HETERO "Enables Hetero Device Plugin" ON)
|
||||
|
||||
ie_option (ENABLE_TEMPLATE "Enable template plugin" ON)
|
||||
|
||||
ie_dependent_option (ENABLE_INTEL_MYRIAD_COMMON "common part of myriad plugin" ON "NOT WINDOWS_PHONE;NOT WINDOWS_STORE" OFF)
|
||||
|
||||
if(UNIVERSAL2 OR EMSCRIPTEN)
|
||||
set(ENABLE_INTEL_MYRIAD_DEFAULT OFF)
|
||||
else()
|
||||
set(ENABLE_INTEL_MYRIAD_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
ie_dependent_option (ENABLE_INTEL_MYRIAD "myriad targeted plugin for OpenVINO Runtime" ${ENABLE_INTEL_MYRIAD_DEFAULT} "NOT RISCV64;ENABLE_INTEL_MYRIAD_COMMON" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_MYRIAD_NO_BOOT "myriad plugin will skip device boot" OFF "ENABLE_INTEL_MYRIAD" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_GAPI_TESTS "tests for GAPI kernels" ON "ENABLE_GAPI_PREPROCESSING;ENABLE_TESTS" OFF)
|
||||
|
||||
ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF "ENABLE_GAPI_TESTS" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_MYRIAD_MVNC_TESTS "functional and behavior tests for mvnc api" OFF "ENABLE_TESTS;ENABLE_INTEL_MYRIAD" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_DATA "fetch models from testdata repo" ON "ENABLE_FUNCTIONAL_TESTS;NOT ANDROID" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_BEH_TESTS "tests oriented to check OpenVINO Runtime API correctness" ON "ENABLE_TESTS" OFF)
|
||||
|
||||
@@ -52,8 +52,6 @@ macro(ov_cpack_settings)
|
||||
NOT item STREQUAL OV_CPACK_COMP_PYTHON_WHEELS AND
|
||||
# see ticket # 82605
|
||||
NOT item STREQUAL "gna" AND
|
||||
# myriad is EOL in 2023.0
|
||||
NOT item STREQUAL "myriad" AND
|
||||
# even for case of system TBB we have installation rules for wheels packages
|
||||
# so, need to skip this explicitly
|
||||
NOT item MATCHES "^tbb(_dev)?$" AND
|
||||
@@ -182,16 +180,6 @@ macro(ov_cpack_settings)
|
||||
set(gpu_copyright "generic")
|
||||
endif()
|
||||
|
||||
# intel-myriad
|
||||
if(ENABLE_INTEL_MYRIAD AND "myriad" IN_LIST CPACK_COMPONENTS_ALL)
|
||||
set(CPACK_COMPONENT_MYRIAD_DESCRIPTION "Intel® Movidius™ VPU plugin")
|
||||
set(CPACK_COMPONENT_MYRIAD_DEPENDS "${OV_CPACK_COMP_CORE}")
|
||||
set(CPACK_DEBIAN_MYRIAD_PACKAGE_NAME "libopenvino-intel-vpu-plugin-${cpack_name_ver}")
|
||||
set(CPACK_DEBIAN_MYRIAD_PACKAGE_CONTROL_EXTRA "${def_postinst};${def_postrm}")
|
||||
_ov_add_plugin(myriad OFF)
|
||||
set(myriad_copyright "generic")
|
||||
endif()
|
||||
|
||||
# intel-gna
|
||||
if(ENABLE_INTEL_GNA AND "gna" IN_LIST CPACK_COMPONENTS_ALL)
|
||||
set(CPACK_COMPONENT_GNA_DESCRIPTION "Intel® Gaussian Neural Accelerator")
|
||||
|
||||
@@ -38,8 +38,6 @@ macro(ov_cpack_settings)
|
||||
NOT item STREQUAL OV_CPACK_COMP_PYTHON_WHEELS AND
|
||||
# see ticket # 82605
|
||||
NOT item STREQUAL "gna" AND
|
||||
# myriad is EOL in 2023.0
|
||||
NOT item STREQUAL "myriad" AND
|
||||
# even for case of system TBB we have installation rules for wheels packages
|
||||
# so, need to skip this explicitly
|
||||
NOT item MATCHES "^tbb(_dev)?$" AND
|
||||
@@ -179,15 +177,6 @@ macro(ov_cpack_settings)
|
||||
set(gpu_copyright "generic")
|
||||
endif()
|
||||
|
||||
# intel-myriad
|
||||
if(ENABLE_INTEL_MYRIAD AND "myriad" IN_LIST CPACK_COMPONENTS_ALL)
|
||||
set(CPACK_COMPONENT_MYRIAD_DESCRIPTION "Intel® Movidius™ VPU")
|
||||
set(CPACK_RPM_MYRIAD_PACKAGE_REQUIRES "${core_package}")
|
||||
set(CPACK_RPM_MYRIAD_PACKAGE_NAME "libopenvino-intel-vpu-plugin-${cpack_name_ver}")
|
||||
_ov_add_package(plugin_packages myriad)
|
||||
set(myriad_copyright "generic")
|
||||
endif()
|
||||
|
||||
# intel-gna
|
||||
if(ENABLE_INTEL_GNA AND "gna" IN_LIST CPACK_COMPONENTS_ALL)
|
||||
set(CPACK_COMPONENT_GNA_DESCRIPTION "Intel® Gaussian Neural Accelerator")
|
||||
|
||||
@@ -231,29 +231,6 @@ macro(_ov_find_intel_gna_dependencies)
|
||||
unset(_OV_ENABLE_INTEL_GNA)
|
||||
endmacro()
|
||||
|
||||
macro(_ov_find_intel_myriad_dependencies)
|
||||
set(_OV_ENABLE_INTEL_MYRIAD "@ENABLE_INTEL_MYRIAD@")
|
||||
if(_OV_ENABLE_INTEL_MYRIAD)
|
||||
find_package(PkgConfig QUIET)
|
||||
if(PkgConfig_FOUND AND NOT ANDROID)
|
||||
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
|
||||
set(pkg_config_quiet_arg QUIET)
|
||||
endif()
|
||||
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
|
||||
set(pkg_config_required_arg REQUIRED)
|
||||
endif()
|
||||
pkg_search_module(libusb
|
||||
${pkg_config_quiet_arg}
|
||||
${pkg_config_required_arg}
|
||||
IMPORTED_TARGET
|
||||
libusb-1.0)
|
||||
unset(pkg_config_quiet_arg)
|
||||
unset(pkg_config_required_arg)
|
||||
endif()
|
||||
endif()
|
||||
unset(_OV_ENABLE_INTEL_MYRIAD)
|
||||
endmacro()
|
||||
|
||||
function(_ov_target_no_deprecation_error)
|
||||
if(NOT MSVC)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
@@ -282,7 +259,6 @@ set(_OV_ENABLE_OPENVINO_BUILD_SHARED "@BUILD_SHARED_LIBS@")
|
||||
if(NOT _OV_ENABLE_OPENVINO_BUILD_SHARED)
|
||||
_ov_find_tbb()
|
||||
_ov_find_intel_gna_dependencies()
|
||||
_ov_find_intel_myriad_dependencies()
|
||||
endif()
|
||||
|
||||
_ov_find_dependency(Threads)
|
||||
|
||||
@@ -24,6 +24,3 @@ endmacro()
|
||||
|
||||
# for ittapi
|
||||
_set_if_not_defined(FORCE_32 ON)
|
||||
|
||||
# need libusb 32-bits version
|
||||
_set_if_not_defined(ENABLE_INTEL_MYRIAD_COMMON OFF)
|
||||
|
||||
@@ -11,8 +11,6 @@ openvino_tensorflow.set_backend('<backend_name>')
|
||||
**OpenVINO™ integration with TensorFlow** accelerates inference across many AI models on a variety of Intel® technologies, such as:
|
||||
- Intel® CPUs
|
||||
- Intel® integrated GPUs
|
||||
- Intel® Movidius™ Vision Processing Units - referred to as VPU
|
||||
- Intel® Vision Accelerator Design with 8 Intel Movidius™ MyriadX VPUs - referred to as VAD-M or HDDL
|
||||
|
||||
> **NOTE**: For maximum performance, efficiency, tooling customization, and hardware control, we recommend developers to adopt native OpenVINO™ solutions.
|
||||
To find out more about the product itself, as well as learn how to use it in your project, check its dedicated [GitHub repository](https://github.com/openvinotoolkit/openvino_tensorflow/tree/master/docs).
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
openvino_docs_Extensibility_UG_add_openvino_ops
|
||||
openvino_docs_Extensibility_UG_Frontend_Extensions
|
||||
openvino_docs_Extensibility_UG_GPU
|
||||
openvino_docs_Extensibility_UG_VPU_Kernel
|
||||
openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer
|
||||
|
||||
.. toctree::
|
||||
@@ -36,7 +35,7 @@ Importing models with such operations requires additional steps. This guide illu
|
||||
|
||||
Defining a new custom operation basically consists of two parts:
|
||||
|
||||
1. Definition of operation semantics in OpenVINO, the code that describes how this operation should be inferred consuming input tensor(s) and producing output tensor(s). The implementation of execution kernels for [GPU](./GPU_Extensibility.md) and [VPU](./VPU_Extensibility.md) is described in separate guides.
|
||||
1. Definition of operation semantics in OpenVINO, the code that describes how this operation should be inferred consuming input tensor(s) and producing output tensor(s). The implementation of execution kernels for [GPU](./GPU_Extensibility.md) is described in separate guides.
|
||||
|
||||
2. Mapping rule that facilitates conversion of framework operation representation to OpenVINO defined operation semantics.
|
||||
|
||||
|
||||
@@ -1,603 +0,0 @@
|
||||
# How to Implement Custom Layers for VPU (Intel® Neural Compute Stick 2) {#openvino_docs_Extensibility_UG_VPU_Kernel}
|
||||
|
||||
To enable operations not supported by OpenVINO™ out of the box, you need a custom extension for Model Optimizer, a custom nGraph operation set, and a custom kernel for the device you will target. This page describes custom kernel support for one the VPU, the Intel® Neural Compute Stick 2 device, which uses the MYRIAD device plugin.
|
||||
|
||||
> **NOTE:**
|
||||
> * OpenCL custom layer support is available in the preview mode.
|
||||
> * This section assumes you are familiar with developing kernels using OpenCL.
|
||||
|
||||
To customize your topology with an OpenCL layer, carry out the tasks described on this page:
|
||||
|
||||
1. Write and compile your OpenCL code with the standalone offline OpenCL compiler (`clc`).
|
||||
2. Write a configuration file to bind the OpenCL kernel to the topology file (`.xml`) of the model IR.
|
||||
3. Pass the configuration file to the OpenVINO™ Runtime with the model IR.
|
||||
|
||||
## Compile OpenCL code for VPU (Intel® Neural Compute Stick 2)
|
||||
|
||||
> **NOTE**: OpenCL compiler, targeting Intel® Neural Compute Stick 2 for the SHAVE processor only, is redistributed with OpenVINO.
|
||||
OpenCL support is provided by ComputeAorta and is distributed under a license agreement between Intel® and Codeplay Software Ltd.
|
||||
The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only. Start with compiling OpenCL C code, using the standalone `clc` compiler. You can find the compiler binary at `<INSTALL_DIR>/tools/cl_compiler`.
|
||||
|
||||
> **NOTE**: By design, custom OpenCL layers support any OpenCL kernels written assuming OpenCL version 1.2. It also supports half float extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs.
|
||||
1. Prior to running a compilation, make sure that the following variables are set:
|
||||
* `SHAVE_MA2X8XLIBS_DIR=<INSTALL_DIR>/tools/cl_compiler/lib/`
|
||||
* `SHAVE_LDSCRIPT_DIR=<INSTALL_DIR>/tools/cl_compiler/ldscripts/`
|
||||
* `SHAVE_MYRIAD_LD_DIR=<INSTALL_DIR>/tools/cl_compiler/bin/`
|
||||
* `SHAVE_MOVIASM_DIR=<INSTALL_DIR>/tools/cl_compiler/bin/`
|
||||
2. Run the compilation with the command below. You should use `--strip-binary-header` to make an OpenCL runtime-agnostic binary runnable with the OpenVINO™ Runtime.
|
||||
```bash
|
||||
cd <INSTALL_DIR>/tools/cl_compiler/bin
|
||||
./clc --strip-binary-header custom_layer.cl -o custom_layer.bin
|
||||
```
|
||||
|
||||
## Write a Configuration File
|
||||
|
||||
To tie the topology IR for a layer you customize, prepare a configuration file, so that the OpenVINO™ Runtime can find parameters for your kernel and the execution work grid is described.
|
||||
For example, consider the following OpenCL kernel signature:
|
||||
```cpp
|
||||
__kernel void reorg_nhwc(__global const half *src, __global half *out, int w, int h, int c, int stride);
|
||||
```
|
||||
A configuration file for this kernel might be the following:
|
||||
```xml
|
||||
<CustomLayer name="ReorgYolo" type="MVCL" version="1">
|
||||
<Kernel entry="reorg_nhwc">
|
||||
<Source filename="reorg.bin"/>
|
||||
</Kernel>
|
||||
<Parameters>
|
||||
<Tensor arg-name="src" type="input" port-index="0" format="BYXF"/>
|
||||
<Tensor arg-name="out" type="output" port-index="0" format="BYXF"/>
|
||||
<Scalar arg-name="w" type="int" port-index="0" source="I.X" />
|
||||
<Scalar arg-name="h" type="int" port-index="0" source="I.Y" />
|
||||
<Scalar arg-name="c" type="int" port-index="0" source="I.F" />
|
||||
<Scalar arg-name="stride" type="int" source="stride" />
|
||||
</Parameters>
|
||||
<WorkSizes dim="input,0" global="(Y+7)/8*8,1,1" local="8,1,1"/>
|
||||
</CustomLayer>
|
||||
```
|
||||
Each custom layer is described with the `CustomLayer` node. It has the following nodes and attributes:
|
||||
- Root node `CustomLayer` contains the following attributes:
|
||||
- `name` – (Required) The name of the OpenVINO™ Runtime layer to bind the kernel with.
|
||||
- `type` and `version` – (Required) Reserved for future use. Set them to `MVCL` and `1` respectively.
|
||||
- `max-shaves` – (Optional) The maximum number of SHAVE cores that should be dedicated for the layer. It is useful for debugging concurrency issues or for resource saving that memory bound kernel does not scale well with the number of cores, so more resources can be left for the rest of a topology.
|
||||
- Sub-node `Kernel` must contain the following attributes:
|
||||
- `entry` – The name of your kernel function as you defined it in a source file. In the example above, it is `reorg_nhwc`.
|
||||
- Node `Source` must contain the following attributes:
|
||||
- `filename` – The path to a compiled binary relative to the XML configuration file.
|
||||
- Sub-node `Parameters` – Describes parameters bindings. For more information, see the description below.
|
||||
- Sub-node `WorkSizes` – Describes local and global work group sizes and the source for dimension deduction as a pair `direction,port`. In the example above, the work group is described relatively to the dimension of the input tensor that comes through port 0 in the OpenVINO IR. Work group configurations, namely `global` and `local` support any simple math expressions with +,-,\*,/, and () from `B`(batch), `Y`(height), `X`(width) and `F`(channels).
|
||||
- Sub-node `Where` – Allows to customize bindings with the `key="value"` attribute. For example, to substitute only 3x3 convolutions, write `<Where kernel="3,3"/>` in the binding xml.
|
||||
|
||||
Parameter description supports `Tensor` of one of tensor types such as `input`, `output`, `input_buffer`, `output_buffer` or `data`, `Scalar`, or `Data` nodes and has the following format:
|
||||
- Each `Tensor` node of `input` or `output` type must contain the following attributes:
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type: `input` or `output` as specified in the IR.
|
||||
- `port-index` – A number of input/output ports as specified in the IR.
|
||||
- `format` – The channel order in the tensor. Optional conversion layers are generated if the custom layer format is not compatible with formats of neighboring layers. `BFXY`, `BYXF`, and `ANY` formats are supported currently.
|
||||
- Each `Tensor` node of `input_buffer` or `output_buffer` type must contain the following attributes:
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type: `input_buffer` or `output_buffer`. Use the appropriate type to bind multiple kernels that correspond to different stages of the same layer.
|
||||
- `port-index` – The unique identifier to bind by.
|
||||
- `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings.
|
||||
- `size` – Amount of bytes needed. Current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and might be extended in the future.
|
||||
|
||||
Here is an example of multi-stage MVN layer binding:
|
||||
```xml
|
||||
<CustomLayer name="MVN" stage="0" type="MVCL" version="1">
|
||||
<Kernel entry="reduction_mean">
|
||||
<Source filename="mvn.bin"/>
|
||||
</Kernel>
|
||||
<Parameters>
|
||||
<Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
|
||||
<Tensor arg-name="mean" type="output_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
|
||||
<Tensor arg-name="variance" type="output_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
|
||||
<!--other parameters -->
|
||||
</Parameters>
|
||||
<WorkSizes dim="output,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
|
||||
</CustomLayer>
|
||||
<CustomLayer name="MVN" stage="1" type="MVCL" version="1">
|
||||
<Kernel entry="mvn_scale">
|
||||
<Source filename="mvn_scale_changed_orded.bin"/>
|
||||
</Kernel>
|
||||
<Parameters>
|
||||
<Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
|
||||
<Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
|
||||
<Tensor arg-name="mean_part" type="input_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
|
||||
<Tensor arg-name="power_mean" type="input_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
|
||||
<!--other parameters -->
|
||||
</Parameters>
|
||||
<WorkSizes dim="output,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
|
||||
</CustomLayer>
|
||||
```
|
||||
- Each `Tensor` node that has the `data` type must contain the following attributes:
|
||||
- `source` – A name of the blob as it is in the IR. Typical example is `weights` for convolution.
|
||||
- `format` – Specifies the channel order in the tensor. Optional conversion layers are generated if the custom layer format is not.
|
||||
```xml
|
||||
<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
|
||||
<Kernel entry="binary_convolution">
|
||||
<Source filename="binary_layers.bin"/>
|
||||
</Kernel>
|
||||
<Parameters>
|
||||
<Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
|
||||
<Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
|
||||
<Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
|
||||
<!--other parameters -->
|
||||
</Parameters>
|
||||
<WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
|
||||
</CustomLayer>
|
||||
```
|
||||
- Each `Scalar` node must contain the following attributes:
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – `int` or `float` value. It is used for correct argument extraction from IR parameters.
|
||||
- `source` – Contains the name of the parameter in the IR file or input/output (`I`/`O`, `In`/`On`, where `n` is a port number)
|
||||
followed by dimension `B`(batch), `Y`(height), `X`(width), or `F`(channels).
|
||||
|
||||
- Each `Data` node must contain the following attributes:
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type. Currently, `local_data` is the only supported value, which defines buffer allocated in fast local on-chip memory. It is limited to 100KB for all `__local` and
|
||||
`__private` arrays defined inside the kernel as well as all `__local` parameters passed to the kernel. A manual-DMA extension requires double buffering.
|
||||
If the custom layer is detected to run out of local memory, the inference fails.
|
||||
- `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings.
|
||||
- `size` – Amount of bytes needed. The current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and may be extended in the future.
|
||||
The example binding below illustrates a kernel with two local buffers passed to the kernel.
|
||||
```xml
|
||||
<CustomLayer name="GRN" type="MVCL" version="1">
|
||||
<Kernel entry="grn_NCHW">
|
||||
<Source filename="grn.bin"/>
|
||||
</Kernel>
|
||||
<Parameters>
|
||||
<Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
|
||||
<Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
|
||||
<Data arg-name="src" type="local_data" dim="input,0" size="X*F*2" />
|
||||
<Data arg-name="dst" type="local_data" dim="input,0" size="X*F*2" />
|
||||
<Scalar arg-name="C" type="int" port-index="0" source="I.F" />
|
||||
<Scalar arg-name="bias" type="float" source="bias" />
|
||||
</Parameters>
|
||||
<WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
|
||||
</CustomLayer>
|
||||
```
|
||||
|
||||
## Pass Configuration File to OpenVINO™ Runtime
|
||||
|
||||
> **NOTE**: If both native and custom layer implementations are present, the custom kernel has a priority over the native one.
|
||||
Before loading the network that features the custom layers, provide a separate configuration file and load it using the `ov::Core::set_property()` method. Use the "CONFIG_KEY" key and the configuration file name as a value before loading the network that uses custom operations to the plugin:
|
||||
|
||||
@snippet docs/snippets/vpu/custom_op.cpp part0
|
||||
|
||||
## Optimizing Kernels with OpenCL for VPU (Intel® Neural Compute Stick 2)
|
||||
|
||||
This section provides optimization guidelines on writing custom layers with OpenCL for VPU devices. Knowledge about general OpenCL programming model and OpenCL kernel language is assumed and not a subject of this section. The OpenCL model mapping to VPU is described in the table below.
|
||||
|
||||
| OpenCL Model | VPU Mapping|
|
||||
|-----|----|
|
||||
| Device code | Executed on SHAVE cores |
|
||||
| Private memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed |
|
||||
| Local memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed |
|
||||
| Global memory | Mapped to DDR, used to pass execution preserved parameters for inputs, outputs, and blobs |
|
||||
| Work group | Executed on a single SHAVE core iterating over multiple work items |
|
||||
|
||||
The work group execution order is not defined in the OpenCL specifications. This means it is your responsibility to ensure that race conditions among work groups are not introduced. Custom layer runtime distributes work grid evenly among available compute resources and executes them in an arbitrary order. This static scheduling approach works best if the load is evenly spread out across work groups, which is a typical case for Deep Learning kernels. The following guidelines are recommended to use for work group partitioning:
|
||||
|
||||
1. Distribute work evenly across work groups.
|
||||
2. Adjust work group granularity to maintain equal workload for all compute codes.
|
||||
3. Set the maximum number of cores using the `max-shaves` attribute for the `CustomLayer` node. This keeps more resources for the rest of topology. It is also useful if the kernel scalability reached its limits, which may happen while optimizing memory bound kernels or kernels with poor parallelization.
|
||||
4. Try an alternate data layout (`BFXY`/`BYXF`) for the kernel to see if it improves work group partitioning or data access patterns.
|
||||
Consider not just specific layer boost, but also full topology performance because data conversion layers will be automatically inserted as appropriate.
|
||||
|
||||
Offline OpenCL compiler (`clc`) features automatic vectorization over `get_global_id(0)` usage, if uniform access is detected.
|
||||
For example, the kernel below could be automatically vectorized:
|
||||
```cpp
|
||||
__kernel void cvtf32f16(__global float* restrict inImage, __global half* restrict outImage,
|
||||
float scale, float bias)
|
||||
{
|
||||
int idx = get_global_id(0) + get_global_id(1) * get_global_size(0) + get_global_id(2) * get_global_size(0) * get_global_size(1);
|
||||
outImage[idx] = convert_half(inImage[idx]*scale+bias);
|
||||
}
|
||||
```
|
||||
However, this work-group based vectorizer (WGV) conflicts with the default LLVM vectorizer based on superword level parallelism (SLP) for the current compiler version. Manual vectorization is recommended to provide the best performance for non-uniform code patterns. WGV works if and only if vector types are not used in the code.
|
||||
|
||||
Here is a short list of optimization tips:
|
||||
|
||||
1. Help auto-vectorizer ensure non-aliasing pointers for kernel parameters by putting the `restrict` markers where possible.
|
||||
- This can give a performance boost, especially for kernels with unrolling, like the `ocl_grn` from the example below.
|
||||
- Place `restrict` markers for kernels with manually vectorized codes. In the `ocl_grn` kernel below, the unrolled version without the `restrict` is up to 20% slower than the most optimal one, which combines both unrolling and `restrict`.
|
||||
2. Put `#‍pragma unroll N` to your loop header. The compiler does not trigger unrolling by default, so it is your responsibility to annotate the code with pragmas as appropriate. The `ocl_grn` version with `#‍pragma unroll 4` is up to 50% faster, most of which comes from unrolling the first loop, because LLVM, in general, is better in scheduling 3-stage loops (load-compute-store), while the first loop
|
||||
The `variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Pay attention to unrolling such cases first. Unrolling factor is loop-dependent. Choose the smallest number that still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4` to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages:
|
||||
```cpp
|
||||
__kernel void ocl_grn(__global const half* restrict src_data, __global half* restrict dst_data, int C, float bias)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int W = get_global_size(0);
|
||||
int y = get_global_id(1);
|
||||
int H = get_global_size(1);
|
||||
float variance = bias + 1e-9f;
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);
|
||||
variance = 1.f / native_sqrt(variance);
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
dst_data[c*H*W + y*W + x] = (half)((float)src_data[c*H*W + y*W + x] * variance);
|
||||
}
|
||||
```
|
||||
To check the efficiency of WGV, compare performance of the kernel above with the kernel below, which is manually vectorized over width:
|
||||
```cpp
|
||||
__kernel void ocl_grn_line(__global const half* restrict src_data, __global half* restrict dst_data, int C, int W, float bias)
|
||||
{
|
||||
int y = get_global_id(1);
|
||||
int H = get_global_size(1);
|
||||
for (int x = 0; x < W/8; x++)
|
||||
{
|
||||
float8 variance = (float8)(bias+1e-9f);
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
{
|
||||
__global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W));
|
||||
half8 sh = src_line[x];
|
||||
variance += convert_float8(sh*sh);
|
||||
}
|
||||
variance = 1.f/native_sqrt(variance);
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
{
|
||||
__global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W));
|
||||
__global half8* restrict dst_line = ((__global half8 * restrict)(dst_data + c*H*W + y*W));
|
||||
dst_line[x] = convert_half8(convert_float8(src_line[x])*variance);
|
||||
}
|
||||
}
|
||||
for (int x = W/8*8; x < W; x++)
|
||||
{
|
||||
float variance = bias+1e-9f;
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
variance += (float)(src_data[c*H*W + y*W + x]*src_data[c*H*W + y*W + x]);
|
||||
variance = 1.f/native_sqrt(variance);
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
dst_data[c*H*W + y*W + x] = (float)src_data[c*H*W + y*W + x]*variance;
|
||||
}
|
||||
}
|
||||
```
|
||||
Both versions perform the same, but the second one has more complex code.
|
||||
|
||||
3. If it is easy to predict the work group size, use the `reqd_work_group_size` kernel attribute to ask the compiler to unroll the code up to the local size of the work group. If the kernel is actually executed with the different work group configuration, the result is undefined.
|
||||
|
||||
4. Prefer to use the `half` compute if it keeps reasonable accuracy. A 16-bit float is a native type for Intel Neural Compute Stick 2, most of the `half_*` functions are mapped to a single hardware instruction.
|
||||
Use the standard `native_*` function for the rest of types.
|
||||
|
||||
5. Prefer to use the `convert_half` function over the `vstore_half` if conversion to 32-bit float is required. The `convert_half` function is mapped to a single hardware instruction. For the `cvtf32f16` kernel above, the `outImage[idx] = convert_half(inImage[idx]*scale+bias);` code is eight times slower than the code with `vstore_half`.
|
||||
|
||||
6. Be aware of early exits, as they can be extremely costly for the current version of the `clc` compiler due to conflicts with the auto-vectorizer. It is recommended to setup local size by `x` dimension equal to inputs or/and outputs width. If it is impossible to define the work grid that exactly matches inputs or/and outputs to eliminate checks, for example, `if (get_global_id(0) >= width) return`, use line-wise kernel variant with manual vectorization.
|
||||
The kernel example below demonstrates the impact of early exits on kernel performance.
|
||||
```cpp
|
||||
// Initial version
|
||||
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int stride)
|
||||
{
|
||||
int w = get_global_id(0);
|
||||
int W = get_global_size(0);
|
||||
int h = get_global_id(1);
|
||||
int H = get_global_size(1);
|
||||
int c = get_global_id(2);
|
||||
int C = get_global_size(2);
|
||||
int C2 = C/(stride*stride);
|
||||
int offset = c / C2;
|
||||
int c2 = c - C2 * offset;
|
||||
int H2 = H*stride;
|
||||
int W2 = W*stride;
|
||||
int h2 = h*stride + offset / stride;
|
||||
int w2 = w*stride + offset - stride * (offset / stride);
|
||||
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
|
||||
}
|
||||
```
|
||||
This `reorg` kernel is auto-vectorizable, but an input for YOLO v2 topology is `NCHW=<1,64,26,26>` and it is not multiple of vector width, which is `8` for `half` data type. As a result, the Inference Engine does not select the auto-vectorized kernel.
|
||||
To compare performance of auto-vectorized and scalar version of the kernel, change the input size to `NCHW=<1,64,26,32>`. This enables the auto-vectorized version to be selected by the Inference Engine and can give you about 30% uplift.
|
||||
Since the auto-vectorized version is faster, it is recommended to enable it for the YOLO v2 topology input size by setting the local size multiple of vector, for example, `32`, and adjust global sizes accordingly. As a result, the execution work grid exceeds actual input dimension, so out-of-bound checks should be inserted. See the updated kernel version below:
|
||||
```cpp
|
||||
// Version with out-of-bound checks added
|
||||
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int W, int stride)
|
||||
{
|
||||
int w = get_global_id(0);
|
||||
w = min(w, W-1);
|
||||
int h = get_global_id(1);
|
||||
int H = get_global_size(1);
|
||||
int c = get_global_id(2);
|
||||
int C = get_global_size(2);
|
||||
int C2 = C/(stride*stride);
|
||||
int offset = c / C2;
|
||||
int c2 = c - C2 * offset;
|
||||
int H2 = H*stride;
|
||||
int W2 = W*stride;
|
||||
int h2 = h*stride + offset / stride;
|
||||
int w2 = w*stride + offset - stride * (offset / stride);
|
||||
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
|
||||
}
|
||||
```
|
||||
This code performs the same as the initial kernel above (scalar) due to branching overhead. If the `w = min(w, W-1);` min/max expression is replaced with the `if (w >= W) return;`, runtime increases up to 2x against to code without branching (initial version).<br>
|
||||
If branching is inevitable for your element-based kernel, it is recommended to change the scheme to line-based. See the kernel variant below:
|
||||
```cpp
|
||||
// Line-wise version
|
||||
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int H, int W, int stride)
|
||||
{
|
||||
int h = min((int)get_global_id(0), H-1);
|
||||
int c = get_global_id(1);
|
||||
int C = get_global_size(1);
|
||||
int C2 = C/(stride*stride);
|
||||
int offset = c / C2;
|
||||
int c2 = c - C2 * offset;
|
||||
int H2 = H*stride;
|
||||
int W2 = W*stride;
|
||||
for (int w = 0; w < W; ++w)
|
||||
{
|
||||
int h2 = h*stride + offset / stride;
|
||||
int w2 = w*stride + offset - stride * (offset / stride);
|
||||
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
|
||||
}
|
||||
}
|
||||
```
|
||||
This decreases the execution time up to 40% against the best performing vectorized kernel without early exits (initial version).
|
||||
7. Reuse computations among work items by using line-based kernels or sharing values through the `__local` memory.
|
||||
8. Improve data access locality. Most of custom kernels are memory bound while convolution and fully connected layers are hardware-implemented. The code below demonstrates a further optimized version of the `reorg` kernel unrolled by the `stride`:
|
||||
```cpp
|
||||
// Unrolled line-wise version
|
||||
__kernel void reorg_unrolled_by_stride(const __global half* restrict src, __global half* restrict dst,
|
||||
int H, int W, int stride)
|
||||
{
|
||||
int h = min((int)get_global_id(0), H-1);
|
||||
int c2 = get_global_id(1);
|
||||
int C2 = get_global_size(1);
|
||||
int C = C2*stride*stride;
|
||||
int H2 = H*stride;
|
||||
int W2 = W*stride;
|
||||
for (int stride_y = 0; stride_y < stride; stride_y++)
|
||||
for (int stride_x = 0; stride_x < stride; stride_x++)
|
||||
for (int w2 = 0, w = 0; w < W; w2 += stride, w++)
|
||||
dst[W*H*C2*(stride_y*stride+stride_x) + W*H*c2 + W*h + w] = src[W2*H2*c2 + W2*h*stride + W2*stride_y + w2 + stride_x];
|
||||
}
|
||||
```
|
||||
The `scr` data in this case is loaded only once. As the result, the cycle count drops up to 45% against the line-wise version.
|
||||
|
||||
9. Copy data from the `__dlobal` to the `__local` or `__private` memory if the data is accessed more than once. Access to the `__dlobal` memory is orders of magnitude slower than access to the `__local`/`__private` due to statically scheduled pipeline, which stalls completely on memory access without any prefetch. The same recommendation is applicable for scalar load/store from/to the `__blobal` pointer since work-group copying could be done in a vector fashion.
|
||||
|
||||
10. Use a manual DMA extension. Local (on-chip) memory throughput is up to 24x higher than DDR throughput. Since the OpenVINO 2020.1 release, VPU OpenCL features manual-DMA kernel extension to copy sub-tensor used by a work group into local memory and performing compute without DDR evolved. Here is the simple GRN kernel implementation that runs over DDR. Local size is in the form (width of the input tensor, 1, 1) to define a large enough work group to get code automatically vectorized and unrolled, while global size is (width of the input tensor, height of the input tensor, 1):
|
||||
```cpp
|
||||
__kernel void grn_NCHW(
|
||||
__global const half* restrict src_data,
|
||||
__global half* restrict dst_data,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
float variance = bias + 1e-9f;
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
{
|
||||
float val = (float) src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)];
|
||||
variance += val*val;
|
||||
}
|
||||
half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f);
|
||||
#pragma unroll 4
|
||||
for (int c = 0; c < C; c++)
|
||||
{
|
||||
dst_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]
|
||||
= src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This kernel can be rewritten to introduce the `__dma_preload` and `__dma_postwrite intrinsics` special data binding. This means that instead of one kernel, a group of three kernels should be implemented: `kernelName`, `__dma_preload_kernelName`, and `__dma_postwrite_kernelName`. The `__dma_preload_kernelName` kernel for a particular work group `n` is guaranteed to be executed before the `n`-th work group itself, while the `__dma_postwrite_kernelName` is guaranteed to be executed after a corresponding work group. One of those functions may be defined to copy data from-to `__global` and `__local` memory. The syntactics requires exact functional signature match. The example below illustrates how to prepare your kernel for manual-DMA.
|
||||
|
||||
```cpp
|
||||
__kernel void __dma_preload_grn_NCHW(
|
||||
__global const half* restrict src,
|
||||
__global half* restrict dst,
|
||||
__local half* restrict local_src,
|
||||
__local half* restrict local_dst,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
// ToDO: copy required piece of src tensor into local_src
|
||||
}
|
||||
|
||||
__kernel void __dma_postwrite_grn_NCHW(
|
||||
__global const half* restrict src,
|
||||
__global half* restrict dst,
|
||||
__local const half* restrict local_src,
|
||||
__local half* restrict local_dst,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
// ToDO: copy back computed piece of local_dst into dst
|
||||
}
|
||||
|
||||
__kernel void grn_NCHW(
|
||||
__global const half* restrict src_data,
|
||||
__global half* restrict dst_data,
|
||||
__local half* restrict src,
|
||||
__local half* restrict dst,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
// same as the example above
|
||||
}
|
||||
```
|
||||
The GRN kernel operates on channel-major tensors to compute average over full channel range and then normalizes input elements to produce the output.
|
||||
As a part of the manual DMA extension, a group of work group copy functions are introduced in addition to `async_work_group_copy`, which is also mapped to a DMA call.
|
||||
|
||||
Here is the list of supported functions:
|
||||
```cpp
|
||||
// 2D sub-tensor copy
|
||||
event_t WorkGroupDmaCreateStrideTransaction(
|
||||
const local T *src,
|
||||
global T *dst,
|
||||
size_t src_width, // width of the line of source in bytes
|
||||
size_t dst_width, // width of the line of destination in bytes
|
||||
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
|
||||
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
|
||||
size_t size, // total number of bytes loaded for all lines from source to destination
|
||||
event_t event) __OVERLOAD;
|
||||
event_t WorkGroupDmaCreateStrideTransaction(
|
||||
const global T *src,
|
||||
local T *dst,
|
||||
size_t src_width, // width of the line of source in bytes
|
||||
size_t dst_width, // width of the line of destination in bytes
|
||||
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
|
||||
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
|
||||
size_t size, // total number of bytes loaded for all lines from source to destination
|
||||
event_t event) __OVERLOAD;
|
||||
// 3D sub-tensor copy
|
||||
event_t WorkGroupDmaCreate3DTransaction(
|
||||
const local T *src,
|
||||
global T *dst,
|
||||
size_t src_width, // width of the line of source in bytes
|
||||
size_t dst_width, // width of the line of destination in bytes
|
||||
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
|
||||
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
|
||||
size_t num_planes, // number of planes to be copied
|
||||
size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes
|
||||
size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes
|
||||
size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case
|
||||
event_t event) __OVERLOAD;
|
||||
event_t WorkGroupDmaCreate3DTransaction(
|
||||
const global T *src,
|
||||
local T *dst,
|
||||
size_t src_width, // width of the line of source in bytes
|
||||
size_t dst_width, // width of the line of destination in bytes
|
||||
size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes
|
||||
size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes
|
||||
size_t num_planes, // number of planes to be copied
|
||||
size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes
|
||||
size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes
|
||||
size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case
|
||||
event_t event) __OVERLOAD;
|
||||
```
|
||||
where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`.
|
||||
|
||||
Modified version of the GRN kernel could be the following:
|
||||
```cpp
|
||||
__kernel void __dma_preload_grn_NCHW(
|
||||
__global const half* restrict src,
|
||||
__global half* restrict dst,
|
||||
__local half* restrict local_src,
|
||||
__local half* restrict local_dst,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
WorkGroupDmaCreate3DTransaction(
|
||||
src + get_group_id(0)*get_local_size(0)
|
||||
+ get_group_id(1)*get_local_size(1)*get_global_size(0), // src
|
||||
local_src, // dst
|
||||
get_local_size(0) * sizeof(half), // src width
|
||||
get_local_size(0) * sizeof(half), // dst width
|
||||
get_global_size(0) * sizeof(half), // src stride
|
||||
get_local_size(0) * sizeof(half), // dst stride
|
||||
C, // num planes
|
||||
get_global_size(0) * get_global_size(1) * sizeof(half), // src plane stride
|
||||
get_local_size(0) * get_local_size(1) * sizeof(half), // dst plane stride
|
||||
get_local_size(0) * get_local_size(1) * sizeof(half), // plane size
|
||||
0);
|
||||
}
|
||||
__kernel void __dma_postwrite_grn_NCHW(
|
||||
__global const half* restrict src,
|
||||
__global half* restrict dst,
|
||||
__local const half* restrict local_src,
|
||||
__local half* restrict local_dst,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
WorkGroupDmaCreate3DTransaction(
|
||||
local_dst, // src
|
||||
dst + get_group_id(0)*get_local_size(0)
|
||||
+ get_group_id(1)*get_local_size(1)*get_global_size(0), // dst
|
||||
get_local_size(0) * sizeof(half), // src width
|
||||
get_local_size(0) * sizeof(half), // dst width
|
||||
get_local_size(0) * sizeof(half), // src stride
|
||||
get_global_size(0) * sizeof(half), // dst stride
|
||||
C, // num planes
|
||||
get_local_size(0) * get_local_size(1) * sizeof(half), // src plane stride
|
||||
get_global_size(0) * get_global_size(1) * sizeof(half), // dst plane stride
|
||||
get_local_size(0) * get_local_size(1) * sizeof(half), // plane size
|
||||
0);
|
||||
}
|
||||
__kernel void grn_NCHW(
|
||||
__global const half* restrict src_data,
|
||||
__global half* restrict dst_data,
|
||||
__local half* restrict src,
|
||||
__local half* restrict dst,
|
||||
int C,
|
||||
float bias)
|
||||
{
|
||||
float variance = bias + 1e-9f;
|
||||
#pragma unroll 8
|
||||
for (int c = 0; c < C; c++)
|
||||
{
|
||||
float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
|
||||
variance += val*val;
|
||||
}
|
||||
half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f);
|
||||
#pragma unroll 8
|
||||
for (int c = 0; c < C; c++)
|
||||
{
|
||||
dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
|
||||
= src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> **NOTE**: The `get_local_size` and `get_local_id` usage inside the kernel. 21x speedup is expected for a kernel on enet-curbs setup since it is completely limited by memory usage.
|
||||
|
||||
An alternative method to using DMA is to use work item copy extension. Those functions are executed inside a kernel and require work groups equal to single work item.
|
||||
|
||||
Here is the list of supported work item functions:
|
||||
```cpp
|
||||
item_dma_event_t WorkItemDmaCreateTransaction(
|
||||
const global T *src,
|
||||
private T *dst,
|
||||
size_t size,
|
||||
item_dma_event_t event) __OVERLOAD;
|
||||
item_dma_event_t WorkItemDmaCreateTransaction(
|
||||
const private T *src,
|
||||
global T *dst,
|
||||
size_t size,
|
||||
item_dma_event_t event) __OVERLOAD;
|
||||
item_dma_event_t WorkItemDmaCreateStrideTransaction(
|
||||
const global T *src,
|
||||
private T *dst,
|
||||
size_t src_width,
|
||||
size_t dst_width,
|
||||
size_t src_stride,
|
||||
size_t dst_stride,
|
||||
size_t size,
|
||||
item_dma_event_t event) __OVERLOAD;
|
||||
item_dma_event_t WorkItemDmaCreateStrideTransaction(
|
||||
const private T *src,
|
||||
global T *dst,
|
||||
size_t src_width,
|
||||
size_t dst_width,
|
||||
size_t src_stride,
|
||||
size_t dst_stride,
|
||||
size_t size,
|
||||
item_dma_event_t event) __OVERLOAD;
|
||||
item_dma_event_t WorkItemDmaCreate3DTransaction(
|
||||
const global T *src,
|
||||
private T *dst,
|
||||
size_t src_width,
|
||||
size_t dst_width,
|
||||
size_t src_stride,
|
||||
size_t dst_stride,
|
||||
size_t num_planes,
|
||||
size_t src_plane_stride,
|
||||
size_t dst_plane_stride,
|
||||
size_t size,
|
||||
item_dma_event_t event) __OVERLOAD;
|
||||
item_dma_event_t WorkItemDmaCreate3DTransaction(
|
||||
const private T *src,
|
||||
global T *dst,
|
||||
size_t src_width,
|
||||
size_t dst_width,
|
||||
size_t src_stride,
|
||||
size_t dst_stride,
|
||||
size_t num_planes,
|
||||
size_t src_plane_stride,
|
||||
size_t dst_plane_stride,
|
||||
size_t size,
|
||||
item_dma_event_t event) __OVERLOAD;
|
||||
```
|
||||
where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`.
|
||||
@@ -24,7 +24,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('-l', '--cpu_extension', dest='cpu_extension', help='Path to extensions library with FFT implementation.')
|
||||
parser.add_argument('-d', '--device', dest='device', default='CPU',
|
||||
help='Optional. Specify the target device to infer on; CPU, '
|
||||
'GPU, HDDL or MYRIAD is acceptable. For non-CPU targets, '
|
||||
'GPU, GNA is acceptable. For non-CPU targets, '
|
||||
'HETERO plugin is used with CPU fallbacks to FFT implementation. '
|
||||
'Default value is CPU')
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -57,22 +57,6 @@ Kaldi-specific parameters:
|
||||
|
||||
* You can use the *OpenVINO Speech Recognition* sample application for the sample inference of Kaldi models. This sample supports models with only one output. If your model has several outputs, specify the desired one with the `--output` option.
|
||||
|
||||
## Converting a Model for Intel® Movidius™ Myriad™ VPU
|
||||
|
||||
If you want to convert a model for inference on Intel® Movidius™ Myriad™ VPU, use the `--remove_memory` option.
|
||||
It removes the Memory layers from the OpenVINO IR files. Additional inputs and outputs will appear in the IR files instead.
|
||||
Model Optimizer will output the mapping between inputs and outputs. For example:
|
||||
```sh
|
||||
[ WARNING ] Add input/output mapped Parameter_0_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out -> Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out
|
||||
[ WARNING ] Add input/output mapped Parameter_1_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out -> Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out
|
||||
[ WARNING ] Add input/output mapped Parameter_0_for_iteration_Offset_fastlstm3.c_trunc__3390 -> Result_for_iteration_Offset_fastlstm3.c_trunc__3390
|
||||
```
|
||||
Based on this mapping, link inputs and outputs in your application manually as follows:
|
||||
|
||||
1. Initialize inputs from the mapping as zeros in the first frame of an utterance.
|
||||
2. Copy output blobs from the mapping to the corresponding inputs. For example, data from `Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out`
|
||||
must be copied to `Parameter_0_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out`.
|
||||
|
||||
## Supported Kaldi Layers
|
||||
For the list of supported standard layers, refer to the [Supported Framework Layers ](@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers) page.
|
||||
|
||||
|
||||
@@ -38,10 +38,7 @@ The logic behind the choice is as follows:
|
||||
| 2 || iGPU | FP32, FP16, BIN |
|
||||
| || (e.g. Intel® UHD Graphics 620 (iGPU)) | |
|
||||
+----------+------------------------------------------------------+-------------------------------------+
|
||||
| 3 || Intel® Movidius™ Myriad™ X VPU | FP16 |
|
||||
| || (e.g. Intel® Neural Compute Stick 2 (Intel® NCS2)) | |
|
||||
+----------+------------------------------------------------------+-------------------------------------+
|
||||
| 4 || Intel® CPU | FP32, FP16, INT8, BIN |
|
||||
| 3 || Intel® CPU | FP32, FP16, INT8, BIN |
|
||||
| || (e.g. Intel® Core™ i7-1165G7) | |
|
||||
+----------+------------------------------------------------------+-------------------------------------+
|
||||
@endsphinxdirective
|
||||
@@ -271,7 +268,7 @@ The `ov::hint::model_priority` property enables you to control the priorities of
|
||||
|
||||
## Configuring Individual Devices and Creating the Auto-Device plugin on Top
|
||||
|
||||
Although the methods described above are currently the preferred way to execute inference with AUTO, the following steps can be also used as an alternative. It is currently available as a legacy feature and used if the device candidate list includes Myriad devices, uncapable of utilizing the Performance Hints option.
|
||||
Although the methods described above are currently the preferred way to execute inference with AUTO, the following steps can be also used as an alternative. It is currently available as a legacy feature and used if AUTO is uncapable of utilizing the Performance Hints option.
|
||||
|
||||
|
||||
@sphinxdirective
|
||||
@@ -303,7 +300,7 @@ benchmark_app –d AUTO –m <model> -i <input> -niter 1000
|
||||
For limited device choice:
|
||||
|
||||
```sh
|
||||
benchmark_app –d AUTO:CPU,GPU,MYRIAD –m <model> -i <input> -niter 1000
|
||||
benchmark_app –d AUTO:CPU,GPU,GNA –m <model> -i <input> -niter 1000
|
||||
```
|
||||
|
||||
For more information, refer to the [C++](../../samples/cpp/benchmark_app/README.md) or [Python](../../tools/benchmark_tool/README.md) version instructions.
|
||||
|
||||
@@ -10,8 +10,6 @@ To use the Deployment Manager tool, the following requirements need to be met:
|
||||
* Intel® Distribution of OpenVINO™ toolkit is installed. See the [Installation Guide](../../install_guides/installing-openvino-overview.md) for instructions on different operating systems.
|
||||
* To run inference on a target device other than CPU, device drivers must be pre-installed:
|
||||
* **For GPU**, see [Configurations for Intel® Processor Graphics (GPU)](../../install_guides/configurations-for-intel-gpu.md).
|
||||
* **For NCS2**, see [Intel® Neural Compute Stick 2 section](../../install_guides/configurations-for-ncs2.md)
|
||||
* **For VPU**, see [Configurations for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs](../../install_guides/configurations-for-ivad-vpu.md).
|
||||
* **For GNA**, see [Intel® Gaussian & Neural Accelerator (GNA)](../../install_guides/configurations-for-intel-gna.md)
|
||||
|
||||
> **IMPORTANT**: The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04.
|
||||
@@ -133,7 +131,7 @@ To launch the Deployment Manager tool in the standard mode: open a new terminal
|
||||
|
||||
The following options are available:
|
||||
|
||||
* `<--targets>` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, `--targets cpu gpu vpu`.
|
||||
* `<--targets>` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, `--targets cpu gpu`.
|
||||
To get a list of currently available targets, run the program with the `-h` option.
|
||||
|
||||
* `[--output_dir]` (optional): the path to the output directory. By default, it is set to your home directory.
|
||||
@@ -191,7 +189,7 @@ To deploy the OpenVINO Runtime components from the development machine to the ta
|
||||
* `install_dependencies` — a snapshot of the `install_dependencies` directory from the OpenVINO installation directory.
|
||||
* `<user_data>` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package.
|
||||
|
||||
3. On a target Linux system, to run inference on a target Intel® GPU, Intel® Movidius™ VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, install additional dependencies by running the `install_openvino_dependencies.sh` script:
|
||||
3. On a target Linux system, to run inference install additional dependencies by running the `install_openvino_dependencies.sh` script:
|
||||
```sh
|
||||
cd <destination_dir>/openvino/install_dependencies
|
||||
sudo -E ./install_openvino_dependencies.sh
|
||||
|
||||
@@ -55,4 +55,4 @@ Here the term "optional" means that if the application does not use the capabili
|
||||
|
||||
Building a local distribution will require more detailed information, and you will find it in the dedicated [Libraries for Local Distribution](local-distribution.md) article.
|
||||
|
||||
> **NOTE**: Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: [Configurations for GPU](../../install_guides/configurations-for-intel-gpu.md), [Configurations for GNA](../../install_guides/configurations-for-intel-gna.md), [Configurations for NCS2](../../install_guides/configurations-for-ncs2.md), [Configurations for VPU](../../install_guides/configurations-for-ivad-vpu.md).
|
||||
> **NOTE**: Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: [Configurations for GPU](../../install_guides/configurations-for-intel-gpu.md), [Configurations for GNA](../../install_guides/configurations-for-intel-gna.md).
|
||||
|
||||
@@ -28,8 +28,6 @@ For each inference device, OpenVINO Runtime has its own plugin library:
|
||||
- `openvino_intel_cpu_plugin` for [Intel® CPU devices](../supported_plugins/CPU.md).
|
||||
- `openvino_intel_gpu_plugin` for [Intel® GPU devices](../supported_plugins/GPU.md).
|
||||
- `openvino_intel_gna_plugin` for [Intel® GNA devices](../supported_plugins/GNA.md).
|
||||
- `openvino_intel_myriad_plugin` for [Intel® MYRIAD devices](../supported_plugins/MYRIAD.md).
|
||||
- `openvino_intel_hddl_plugin` for [Intel® HDDL device](../supported_plugins/HDDL.md).
|
||||
- `openvino_arm_cpu_plugin` for [ARM CPU devices](../supported_plugins/ARM_CPU.md).
|
||||
|
||||
Depending on what devices are used in the app, the appropriate libraries need to be put to the distribution package.
|
||||
@@ -48,8 +46,6 @@ As it is shown on the picture above, some plugin libraries may have OS-specific
|
||||
|-------------|------------|
|
||||
| CPU | `-` |
|
||||
| GPU | `OpenCL.dll`, `cache.json` |
|
||||
| MYRIAD | `usb.dll`, `usb-ma2x8x.mvcmd`, `pcie-ma2x8x.elf` |
|
||||
| HDDL | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` |
|
||||
| GNA | `gna.dll` |
|
||||
| Arm® CPU | `-` |
|
||||
|
||||
@@ -72,8 +68,6 @@ As it is shown on the picture above, some plugin libraries may have OS-specific
|
||||
|-------------|-------------|
|
||||
| CPU | `-` |
|
||||
| GPU | `libOpenCL.so`, `cache.json` |
|
||||
| MYRIAD | `libusb.so`, `usb-ma2x8x.mvcmd`, `pcie-ma2x8x.mvcmd` |
|
||||
| HDDL | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so` |
|
||||
| GNA | `gna.dll` |
|
||||
| Arm® CPU | `-` |
|
||||
|
||||
@@ -95,7 +89,6 @@ As it is shown on the picture above, some plugin libraries may have OS-specific
|
||||
| Device | Dependency |
|
||||
|-------------|-------------|
|
||||
| CPU | `-` |
|
||||
| MYRIAD | `libusb.dylib`, `usb-ma2x8x.mvcmd`, `pcie-ma2x8x.mvcmd` |
|
||||
| Arm® CPU | `-` |
|
||||
|
||||
@sphinxdirective
|
||||
@@ -144,19 +137,19 @@ In this example, the application is written in C language, performs inference on
|
||||
- `openvino_intel_cpu_plugin` is used for inference.
|
||||
- `openvino_ir_frontend` is used to read source models.
|
||||
|
||||
**MULTI execution on GPU and MYRIAD in `tput` mode**
|
||||
**MULTI execution on GPU and CPU in `tput` mode**
|
||||
|
||||
In this example, the application is written in C++, performs inference [simultaneously on GPU and MYRIAD devices](../multi_device.md) with the `ov::hint::PerformanceMode::THROUGHPUT` property set, and reads models stored in the ONNX format. The following libraries are used:
|
||||
In this example, the application is written in C++, performs inference [simultaneously on GPU and CPU devices](../multi_device.md) with the `ov::hint::PerformanceMode::THROUGHPUT` property set, and reads models stored in the ONNX format. The following libraries are used:
|
||||
- The `openvino` library is a main dependency of the application. It links against this library.
|
||||
- `openvino_intel_gpu_plugin` and `openvino_intel_myriad_plugin` are used for inference.
|
||||
- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference.
|
||||
- `openvino_auto_plugin` is used for Multi-Device Execution.
|
||||
- `openvino_auto_batch_plugin` can be also put to the distribution to improve the saturation of [Intel® GPU](../supported_plugins/GPU.md) device. If there is no such plugin, [Automatic Batching](../automatic_batching.md) is turned off.
|
||||
- `openvino_onnx_frontend` is used to read source models.
|
||||
|
||||
**Auto-Device Selection between HDDL and CPU**
|
||||
**Auto-Device Selection between GPU and CPU**
|
||||
|
||||
In this example, the application is written in C++, performs inference with the [Automatic Device Selection](../auto_device_selection.md) mode, limiting device list to HDDL and CPU, and reads models [created using C++ code](../model_representation.md). The following libraries are used:
|
||||
In this example, the application is written in C++, performs inference with the [Automatic Device Selection](../auto_device_selection.md) mode, limiting device list to GPU and CPU, and reads models [created using C++ code](../model_representation.md). The following libraries are used:
|
||||
- The `openvino` library is a main dependency of the application. It links against this library.
|
||||
- `openvino_auto_plugin` is used to enable Automatic Device Selection.
|
||||
- `openvino_intel_hddl_plugin` and `openvino_intel_cpu_plugin` are used for inference. AUTO selects between CPU and HDDL devices according to their physical existence on the deployed machine.
|
||||
- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine.
|
||||
- No frontend library is needed because `ov::Model` is created in code.
|
||||
|
||||
@@ -54,7 +54,7 @@ Randomly selecting operations and setting affinities may lead to decrease in mod
|
||||
|
||||
|
||||
#### The Automatic Mode
|
||||
It decides automatically which operation is assigned to which device according to the support from dedicated devices (`GPU`, `CPU`, `MYRIAD`, etc.) and query model step is called implicitly by Hetero device during model compilation.
|
||||
It decides automatically which operation is assigned to which device according to the support from dedicated devices (`GPU`, `CPU`, `GNA`, etc.) and query model step is called implicitly by Hetero device during model compilation.
|
||||
|
||||
The automatic mode causes "greedy" behavior and assigns all operations that can be executed on a given device to it, according to the priorities you specify (for example, `ov::device::priorities("GPU,CPU")`).
|
||||
It does not take into account device peculiarities such as the inability to infer certain operations without other special operations placed before or after that layer. If the device plugin does not support the subgraph topology constructed by the HETERO device, then you should set affinity manually.
|
||||
@@ -165,7 +165,7 @@ where:
|
||||
- `HETERO` stands for the Heterogeneous execution
|
||||
- `GPU,CPU` points to a fallback policy with the priority on GPU and fallback to CPU
|
||||
|
||||
You can also point to more than two devices: `-d HETERO:MYRIAD,GPU,CPU`
|
||||
You can also point to more than two devices: `-d HETERO:GNA,GPU,CPU`
|
||||
|
||||
### Additional Resources
|
||||
|
||||
|
||||
@@ -129,10 +129,10 @@ using the [configure devices](supported_plugins/config_properties.md) property:
|
||||
## Using the Multi-Device with OpenVINO Samples and Benchmarking Performance
|
||||
|
||||
To see how the Multi-Device execution is used in practice and test its performance, take a look at OpenVINO's Benchmark Application which presents the optimal performance of the plugin without the need for additional settings, like the number of requests or CPU threads.
|
||||
Here is an example command to evaluate performance of HDDL+GPU:
|
||||
Here is an example command to evaluate performance of CPU + GPU:
|
||||
|
||||
```sh
|
||||
./benchmark_app –d MULTI:HDDL,GPU –m <model> -i <input> -niter 1000
|
||||
./benchmark_app –d MULTI:CPU,GPU –m <model> -i <input> -niter 1000
|
||||
```
|
||||
|
||||
For more information, refer to the [C++](../../samples/cpp/benchmark_app/README.md) or [Python](../../tools/benchmark_tool/README.md) version instructions.
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
OpenVINO Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the OpenVINO Runtime API to read an Intermediate Representation (IR),
|
||||
TensorFlow (check [TensorFlow Frontend Capabilities and Limitations](../resources/tensorflow_frontend.md)), ONNX, or PaddlePaddle model and execute it on preferred devices.
|
||||
|
||||
OpenVINO Runtime uses a plugin architecture. Its plugins are software components that contain complete implementation for inference on a particular Intel® hardware device: CPU, GPU, VPU, etc. Each plugin implements the unified API and provides additional hardware-specific APIs for configuring devices or API interoperability between OpenVINO Runtime and underlying plugin backend.
|
||||
OpenVINO Runtime uses a plugin architecture. Its plugins are software components that contain complete implementation for inference on a particular Intel® hardware device: CPU, GPU, GNA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs for configuring devices or API interoperability between OpenVINO Runtime and underlying plugin backend.
|
||||
|
||||
The scheme below illustrates the typical workflow for deploying a trained deep learning model:
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ Consider the following standard example: deep learning model expects input with
|
||||
|
||||
Even though it is relatively easy to implement all these steps in the application code manually, before actual inference, it is also possible with the use of Preprocessing API. Advantages of using the API are:
|
||||
- Preprocessing API is easy to use.
|
||||
- Preprocessing steps will be integrated into execution graph and will be performed on selected device (CPU/GPU/VPU/etc.) rather than always being executed on CPU. This will improve selected device utilization which is always good.
|
||||
- Preprocessing steps will be integrated into execution graph and will be performed on selected device (CPU/GPU/etc.) rather than always being executed on CPU. This will improve selected device utilization which is always good.
|
||||
|
||||
## Preprocessing API
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
openvino_docs_OV_UG_query_api
|
||||
openvino_docs_OV_UG_supported_plugins_CPU
|
||||
openvino_docs_OV_UG_supported_plugins_GPU
|
||||
openvino_docs_OV_UG_supported_plugins_VPU
|
||||
openvino_docs_OV_UG_supported_plugins_GNA
|
||||
openvino_docs_OV_UG_supported_plugins_ARM_CPU
|
||||
|
||||
@@ -19,7 +18,6 @@ OpenVINO™ Runtime can infer deep learning models using the following device ty
|
||||
|
||||
* [CPU](CPU.md)
|
||||
* [GPU](GPU.md)
|
||||
* [VPUs](VPU.md)
|
||||
* [GNA](GNA.md)
|
||||
* [Arm® CPU](ARM_CPU.md)
|
||||
|
||||
@@ -59,7 +57,7 @@ The OpenVINO Runtime API features dedicated methods of enumerating devices and t
|
||||
...
|
||||
Device: GPU.1
|
||||
...
|
||||
Device: HDDL
|
||||
Device: GNA
|
||||
```
|
||||
|
||||
A simple programmatic way to enumerate the devices and use with the multi-device is as follows:
|
||||
@@ -74,7 +72,7 @@ A simple programmatic way to enumerate the devices and use with the multi-device
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
Beyond the typical "CPU", "GPU", "HDDL", and so on, when multiple instances of a device are available, the names are more qualified. For example, this is how two Intel® Movidius™ Myriad™ X sticks are listed with the hello_query_sample:
|
||||
Beyond the typical "CPU", "GPU", and so on, when multiple instances of a device are available, the names are more qualified. For example, this is how two Intel® Movidius™ Myriad™ X sticks are listed with the hello_query_sample:
|
||||
```
|
||||
...
|
||||
Device: MYRIAD.1.2-ma2480
|
||||
|
||||
@@ -37,7 +37,7 @@ Available devices:
|
||||
...
|
||||
Device: GPU.1
|
||||
...
|
||||
Device: HDDL
|
||||
Device: GNA
|
||||
```
|
||||
|
||||
Then, device name can be passed to the `ov::Core::compile_model()` method:
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# HDDL Device {#openvino_docs_OV_UG_supported_plugins_HDDL}
|
||||
|
||||
## Introducing the HDDL Plugin
|
||||
|
||||
The OpenVINO Runtime HDDL plugin was developed for inference with neural networks on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. It is designed for use cases that require large throughput for deep learning inference, up to dozens of times more than the MYRIAD Plugin.
|
||||
|
||||
## Configuring the HDDL Plugin
|
||||
|
||||
To configure your Intel® Vision Accelerator Design With Intel® Movidius™ on supported operating systems, refer to the [Additional Configurations for VPU](../../install_guides/configurations-for-ivad-vpu.md) in Installation Guide.
|
||||
|
||||
> **NOTE**: The HDDL and Myriad plugins may cause conflicts when used at the same time.
|
||||
> To ensure proper operation in such a case, the number of booted devices needs to be limited in the 'hddl_autoboot.config' file.
|
||||
> Otherwise, the HDDL plugin will boot all available Intel® Movidius™ Myriad™ X devices.
|
||||
|
||||
## Supported networks
|
||||
|
||||
To see the list of supported networks for the HDDL plugin, refer to the list on the [MYRIAD Plugin page](MYRIAD.md).
|
||||
|
||||
## Supported Configuration Parameters
|
||||
|
||||
For information on VPU common configuration parameters, see the [VPU Plugins](VPU.md).
|
||||
When specifying key values as raw strings (when using the Python API), omit the `KEY_` prefix.
|
||||
|
||||
In addition to common parameters for both VPU plugins, the HDDL plugin accepts the following options:
|
||||
|
||||
| Parameter Name | Parameter Values | Default | Description |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| `KEY_PERF_COUNT` | `YES`/`NO` | `NO` | Enables performance counter option. |
|
||||
| `KEY_VPU_HDDL_GRAPH_TAG` | string | empty string | Allows executing network on specified count of devices. |
|
||||
| `KEY_VPU_HDDL_STREAM_ID` | string | empty string | Allows executing inference on a specified device. |
|
||||
| `KEY_VPU_HDDL_DEVICE_TAG` | string | empty string | Allows allocating/deallocating networks on specified devices. |
|
||||
| `KEY_VPU_HDDL_BIND_DEVICE` | `YES`/`NO` | `NO` | Enables the network to be bound to a device. Refer to the 'vpu_plugin_config.hpp' file. |
|
||||
| `KEY_VPU_HDDL_RUNTIME_PRIORITY` | signed int | 0 | Specifies the runtime priority of a device among all devices running the same network. Refer to the `vpu_plugin_config.hpp` file. |
|
||||
|
||||
## Additional Resources
|
||||
|
||||
* [Supported Devices](Supported_Devices.md)
|
||||
* [VPU Plugins](VPU.md)
|
||||
* [MYRIAD Plugin](MYRIAD.md)
|
||||
@@ -1,40 +0,0 @@
|
||||
# MYRIAD Device {#openvino_docs_OV_UG_supported_plugins_MYRIAD}
|
||||
|
||||
|
||||
The OpenVINO Runtime MYRIAD plugin has been developed for inference of neural networks on Intel® Neural Compute Stick 2.
|
||||
|
||||
## Configuring the MYRIAD Plugin
|
||||
|
||||
To configure your Intel® Vision Accelerator Design With Intel® Movidius™ on supported operating systems, refer to the [Additional Configurations for VPU](../../install_guides/configurations-for-ivad-vpu.md) in Installation Guide.
|
||||
|
||||
> **NOTE**: The HDDL and MYRIAD plugins may cause conflicts when used at the same time.
|
||||
> To ensure proper operation in such a case, the number of booted devices needs to be limited in the `hddl_autoboot.config` file.
|
||||
> Otherwise, the HDDL plugin will boot all available Intel® Movidius™ Myriad™ X devices.
|
||||
|
||||
## Supported Configuration Parameters
|
||||
|
||||
For information on the VPU common configuration parameters, see the [VPU Plugins](VPU.md).
|
||||
When specifying key values as raw strings (when using the Python API), omit the `KEY_` prefix.
|
||||
|
||||
In addition to common parameters, the MYRIAD plugin accepts the following options:
|
||||
|
||||
| Parameter Name | Parameter Values | Default | Description |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| `KEY_VPU_MYRIAD_PROTOCOL` | empty string/`VPU_MYRIAD_USB`/`VPU_MYRIAD_PCIE` | empty string | If set, the plugin will use a device with specific protocol to allocate a network. |
|
||||
| `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO` | `NO` | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
||||
| `KEY_VPU_FORCE_RESET` | `YES`/`NO` | `NO` | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
||||
|
||||
## Device allocation <a name="MYRIAD_DEVICE_ALLOC"> </a>
|
||||
|
||||
Each `IExecutableNetwork` instance tries to allocate new device on `InferenceEngine::Core::LoadNetwork`, but if all available devices are already allocated it will use the one with the minimal number of uploaded networks.
|
||||
The maximum number of networks a single device can handle depends on device memory capacity and the size of the networks.
|
||||
|
||||
If the `KEY_VPU_MYRIAD_FORCE_RESET` option is set to `YES`, the plugin will reset all VPU devices in the system.
|
||||
|
||||
Single device cannot be shared across multiple processes.
|
||||
|
||||
## See Also
|
||||
|
||||
* [Supported Devices](Supported_Devices.md)
|
||||
* [VPU Plugins](VPU.md)
|
||||
* [Intel® Neural Compute Stick 2 Get Started](https://software.intel.com/en-us/neural-compute-stick/get-started)
|
||||
@@ -1,7 +1,7 @@
|
||||
Supported Devices {#openvino_docs_OV_UG_supported_plugins_Supported_Devices}
|
||||
==================
|
||||
|
||||
The OpenVINO Runtime can infer models in different formats with various input and output formats. This section provides supported and optimal configurations per device. In OpenVINO™ documentation, "device" refers to an Intel® processors used for inference, which can be a supported CPU, GPU, VPU (vision processing unit), or GNA (Gaussian neural accelerator coprocessor), or a combination of those devices.
|
||||
The OpenVINO Runtime can infer models in different formats with various input and output formats. This section provides supported and optimal configurations per device. In OpenVINO™ documentation, "device" refers to an Intel® processors used for inference, which can be a supported CPU, GPU, or GNA (Gaussian neural accelerator coprocessor), or a combination of those devices.
|
||||
|
||||
> **NOTE**: With OpenVINO™ 2020.4 release, Intel® Movidius™ Neural Compute Stick support has been cancelled.
|
||||
|
||||
@@ -11,7 +11,6 @@ The OpenVINO Runtime provides unique capabilities to infer deep learning models
|
||||
|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
|[GPU plugin](GPU.md) |Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics |
|
||||
|[CPU plugin](CPU.md) |Intel® Xeon® with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) |
|
||||
|[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs |
|
||||
|[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor|
|
||||
|[Arm® CPU plugin](ARM_CPU.md) (unavailable in the Intel® Distribution of OpenVINO™ toolkit) |Raspberry Pi™ 4 Model B, Apple® Mac mini with M1 chip, NVIDIA® Jetson Nano™, Android™ devices |
|
||||
|[Multi-Device execution](../multi_device.md) |Multi-Device execution enables simultaneous inference of the same model on several devices in parallel |
|
||||
@@ -59,7 +58,6 @@ For example, the CHW value at index (c,h,w) is physically located at index (c\*H
|
||||
|:------------------|:----------------------:|:----------------------:|:----------------------:|
|
||||
|CPU plugin |Supported and preferred |Supported |Supported |
|
||||
|GPU plugin |Supported |Supported and preferred |Supported |
|
||||
|VPU plugins |Not supported |Supported |Not supported |
|
||||
|GNA plugin |Supported |Supported |Not supported |
|
||||
|Arm® CPU plugin |Supported and preferred |Supported |Supported (partially) |
|
||||
|
||||
@@ -72,7 +70,6 @@ the supported models formats depends on the actual underlying devices. _Generall
|
||||
|:------------------|:--------:|:-------------:|:-------------:|:-------------:|:------------:|:-------------:|
|
||||
|CPU plugin |Supported |Supported |Supported |Supported |Supported |Supported |
|
||||
|GPU plugin |Supported |Supported\* |Supported\* |Supported\* |Not supported |Supported\* |
|
||||
|VPU plugins |Supported |Supported |Supported |Not supported |Not supported |Not supported |
|
||||
|GNA plugin |Supported |Not supported |Supported |Not supported |Supported |Supported |
|
||||
|Arm® CPU plugin |Supported |Supported |Supported |Supported |Not supported |Not supported |
|
||||
|
||||
@@ -86,7 +83,6 @@ the supported input precision depends on the actual underlying devices. _Genera
|
||||
|:------------------|:--------:|:------------:|
|
||||
|CPU plugin |Supported |Supported |
|
||||
|GPU plugin |Supported |Supported |
|
||||
|VPU plugins |Supported |Supported |
|
||||
|GNA plugin |Supported |Not supported |
|
||||
|Arm® CPU plugin |Supported |Supported |
|
||||
|
||||
@@ -99,7 +95,6 @@ the supported output precision depends on the actual underlying devices. _Gener
|
||||
|:------------------|:------------:|:------------:|:------------:|:------------:|
|
||||
|CPU plugin |Supported |Supported |Supported |Supported |
|
||||
|GPU plugin |Supported |Supported |Supported |Supported |
|
||||
|VPU plugins |Supported |Supported |Supported |Supported |
|
||||
|GNA plugin |Not supported |Supported |Supported |Supported |
|
||||
|Arm® CPU plugin |Not supported |Supported |Supported |Supported |
|
||||
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
# VPU Devices {#openvino_docs_OV_UG_supported_plugins_VPU}
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
||||
openvino_docs_OV_UG_supported_plugins_MYRIAD
|
||||
openvino_docs_OV_UG_supported_plugins_HDDL
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
This chapter provides information on the OpenVINO™ Runtime plugins that enable inference of deep learning models on the supported VPU devices:
|
||||
|
||||
* Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X — Supported by the [MYRIAD Plugin](MYRIAD.md)
|
||||
* Intel® Vision Accelerator Design with Intel® Movidius™ VPUs — Supported by the [HDDL Plugin](HDDL.md)
|
||||
|
||||
> **NOTE**: With the OpenVINO™ 2020.4 release, Intel® Movidius™ Neural Compute Stick powered by the Intel® Movidius™ Myriad™ 2 is no longer supported.
|
||||
|
||||
## Supported Networks
|
||||
|
||||
**Caffe**:
|
||||
* AlexNet
|
||||
* CaffeNet
|
||||
* GoogleNet (Inception) v1, v2, v4
|
||||
* VGG family (VGG16, VGG19)
|
||||
* SqueezeNet v1.0, v1.1
|
||||
* ResNet v1 family (18\*\*\*, 50, 101, 152)
|
||||
* MobileNet (mobilenet-v1-1.0-224, mobilenet-v2)
|
||||
* Inception ResNet v2
|
||||
* DenseNet family (121,161,169,201)
|
||||
* SSD-300, SSD-512, SSD-MobileNet, SSD-GoogleNet, SSD-SqueezeNet
|
||||
|
||||
**TensorFlow**:
|
||||
* AlexNet
|
||||
* Inception v1, v2, v3, v4
|
||||
* Inception ResNet v2
|
||||
* MobileNet v1, v2
|
||||
* ResNet v1 family (50, 101, 152)
|
||||
* ResNet v2 family (50, 101, 152)
|
||||
* SqueezeNet v1.0, v1.1
|
||||
* VGG family (VGG16, VGG19)
|
||||
* Yolo family (yolo-v2, yolo-v3, tiny-yolo-v1, tiny-yolo-v2, tiny-yolo-v3)
|
||||
* faster_rcnn_inception_v2, faster_rcnn_resnet101
|
||||
* ssd_mobilenet_v1
|
||||
* DeepLab-v3+
|
||||
|
||||
**Apache MXNet**:
|
||||
* AlexNet and CaffeNet
|
||||
* DenseNet family (121,161,169,201)
|
||||
* SqueezeNet v1.1
|
||||
* MobileNet v1, v2
|
||||
* NiN
|
||||
* ResNet v1 (101, 152)
|
||||
* ResNet v2 (101)
|
||||
* SqueezeNet v1.1
|
||||
* VGG family (VGG16, VGG19)
|
||||
* SSD-Inception-v3, SSD-MobileNet, SSD-ResNet-50, SSD-300
|
||||
|
||||
\*\*\* Network is tested on Intel® Neural Compute Stick 2 with BatchNormalization fusion optimization disabled during Model Optimizer import
|
||||
|
||||
## Optimizations
|
||||
|
||||
VPU plugins support layer fusion and decomposition.
|
||||
|
||||
### Layer Fusion
|
||||
|
||||
#### Fusing Rules
|
||||
|
||||
Certain layers can be merged into 'convolution', 'ReLU', and 'Eltwise' layers according to the patterns below:
|
||||
|
||||
- Convolution
|
||||
- Convolution + ReLU → Convolution
|
||||
- Convolution + Clamp → Convolution
|
||||
- Convolution + LeakyReLU → Convolution
|
||||
- Convolution (3x3, stride=1, padding=1) + Pooling (2x2, stride=2, padding=0) → Convolution
|
||||
|
||||
- Pooling + ReLU → Pooling
|
||||
|
||||
- FullyConnected + ReLU → FullyConnected
|
||||
|
||||
- Eltwise
|
||||
- Eltwise + ReLU → Eltwise
|
||||
- Eltwise + LeakyReLU → Eltwise
|
||||
- Eltwise + Clamp → Eltwise
|
||||
|
||||
#### Joining Rules
|
||||
|
||||
> **NOTE**: Application of these rules depends on tensor sizes and available resources.
|
||||
|
||||
Layers can be joined only when the two conditions below are met:
|
||||
|
||||
- Layers are located on topologically independent branches.
|
||||
- Layers can be executed simultaneously on the same hardware units.
|
||||
|
||||
### Decomposition Rules
|
||||
|
||||
- Convolution and Pooling layers are tiled, resulting in the following pattern:
|
||||
- A `Split` layer that splits tensors into tiles
|
||||
- A set of tiles, optionally with service layers like `Copy`
|
||||
- Depending on a tiling scheme, a `Concatenation` or `Sum` layer that joins all resulting tensors into one and restores the full blob that contains the result of a tiled operation
|
||||
|
||||
Names of tiled layers contain the `@soc=M/N` part, where `M` is the tile number and `N` is the number of tiles:
|
||||

|
||||
|
||||
> **NOTE**: Nominal layers, such as `Shrink` and `Expand`, are not executed.
|
||||
|
||||
> **NOTE**: VPU plugins can add extra layers like `Copy`.
|
||||
|
||||
## VPU Common Configuration Parameters
|
||||
|
||||
VPU plugins support the configuration parameters listed below.
|
||||
The parameters are passed as `std::map<std::string, std::string>` on `InferenceEngine::Core::LoadNetwork`
|
||||
or `InferenceEngine::Core::SetConfig`.
|
||||
When specifying key values as raw strings (when using Python API), omit the `KEY_` prefix.
|
||||
|
||||
| Parameter Name | Parameter Values | Default | Description |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| `KEY_VPU_HW_STAGES_OPTIMIZATION` | `YES`/`NO` | `YES` | Turn on HW stages usage<br /> Applicable for Intel Movidius Myriad X and Intel Vision Accelerator Design devices only. |
|
||||
| `KEY_VPU_COMPUTE_LAYOUT` | `VPU_AUTO`, `VPU_NCHW`, `VPU_NHWC` | `VPU_AUTO` | Specify internal input and output layouts for network layers. |
|
||||
| `KEY_VPU_PRINT_RECEIVE_TENSOR_TIME` | `YES`/`NO` | `NO` | Add device-side time spent waiting for input to PerformanceCounts.<br />See the <a href="#VPU_DATA_TRANSFER_PIPELINING">Data Transfer Pipelining</a> section for details. |
|
||||
| `KEY_VPU_IGNORE_IR_STATISTIC` | `YES`/`NO` | `NO` | VPU plugin could use statistic present in IR in order to try to improve calculations precision.<br /> This option is enabled to exclude the statistic. |
|
||||
| `KEY_VPU_CUSTOM_LAYERS` | path to XML file | empty string | This option allows passing XML file with custom layers binding.<br />If a layer is present in such file, it will be used during inference even if the layer is natively supported. |
|
||||
|
||||
|
||||
## Data Transfer Pipelining <a name="VPU_DATA_TRANSFER_PIPELINING"> </a>
|
||||
|
||||
MYRIAD plugin tries to pipeline data transfer to/from a device with computations.
|
||||
While one infer request is executed, the data for the next infer request can be uploaded to a device in parallel.
|
||||
The same applies to result downloading.
|
||||
|
||||
`KEY_VPU_PRINT_RECEIVE_TENSOR_TIME` configuration parameter can be used to check the efficiency of current pipelining.
|
||||
The new record in performance counters will show the time that device spent waiting for input before starting the inference.
|
||||
In a perfect pipeline this time should be near zero, which means that the data was already transferred when new inference started.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**When running inference with the VPU plugin: "[VPU] Cannot convert layer <layer_name> due to unsupported layer type <layer_type>"**
|
||||
|
||||
This means that the topology has a layer unsupported by the target VPU plugin. To resolve this issue, the custom layer can be implemented for the target device, using the [OpenVINO™ Extensibility mechanism](../../Extensibility_UG/Intro.md). To quickly get a working prototype, use the heterogeneous scenario with the default fallback policy (see the [Heterogeneous execution](../hetero_execution.md) section). Use the HETERO mode with a fallback device that supports this layer, for example, CPU: `HETERO:MYRIAD,CPU`.
|
||||
For a list of VPU-supported layers, see the **Supported Layers** section of the [Supported Devices](Supported_Devices.md) page.
|
||||
|
||||
## Known Layers Limitations
|
||||
|
||||
* `ScaleShift` layer is supported for zero value of `broadcast` attribute only.
|
||||
* `CTCGreedyDecoder` layer works with the `ctc_merge_repeated` attribute equal to 1.
|
||||
* `DetectionOutput` layer works with zero values of `interpolate_orientation` and `num_orient_classes` parameters only.
|
||||
* `MVN` layer uses fixed value for `eps` parameters (1e-9).
|
||||
* `Normalize` layer uses fixed value for `eps` parameters (1e-9) and is supported for zero value of `across_spatial` only.
|
||||
* `Pad` layer works only with 4D tensors.
|
||||
* `Floor` layer works only with FP16 type.
|
||||
* `ConvTranspose` layer is not supported.
|
||||
* `GatherElements` layer is not supported for negative axis.
|
||||
|
||||
## See Also
|
||||
|
||||
* [Supported Devices](Supported_Devices.md)
|
||||
* [Intel® Neural Compute Stick 2 Get Started](https://software.intel.com/en-us/neural-compute-stick/get-started)
|
||||
@@ -39,8 +39,6 @@ Based on the `ov::available_devices` read-only property, OpenVINO Core collects
|
||||
The function returns a list of available devices, for example:
|
||||
|
||||
```
|
||||
MYRIAD.1.2-ma2480
|
||||
MYRIAD.1.4-ma2480
|
||||
CPU
|
||||
GPU.0
|
||||
GPU.1
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
The [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) helps accelerate deep learning inference across a variety of Intel® processors and accelerators.
|
||||
|
||||
The benchmark results below demonstrate high performance gains on several public neural networks on multiple Intel® CPUs, GPUs and VPUs covering a broad performance range. The results may be helpful when deciding which hardware is best for your applications or to plan AI workload on the Intel computing already included in your solutions.
|
||||
The benchmark results below demonstrate high performance gains on several public neural networks on multiple Intel® CPUs, GPUs and GNAs covering a broad performance range. The results may be helpful when deciding which hardware is best for your applications or to plan AI workload on the Intel computing already included in your solutions.
|
||||
|
||||
Benchmarks are available for:
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ Below are four parameters for measurements, which are key elements to consider f
|
||||
@endsphinxdirective
|
||||
|
||||
This benchmark setup includes a single machine on which both the benchmark application and the OpenVINO™ installation reside. The presented performance benchmark numbers are based on the release 2022.3 of the Intel® Distribution of OpenVINO™ toolkit.
|
||||
The benchmark application loads the OpenVINO™ Runtime and executes inferences on the specified hardware (CPU, GPU or VPU).
|
||||
The benchmark application loads the OpenVINO™ Runtime and executes inferences on the specified hardware (CPU, GPU or GNA).
|
||||
It measures the time spent on actual inferencing (excluding any pre or post processing) and then reports on the inferences per second (or Frames Per Second).
|
||||
|
||||
|
||||
|
||||
@@ -74,10 +74,6 @@ For the OpenVINO build next tools are required:
|
||||
```sh
|
||||
% python3 -m pip install -r <openvino source tree>/src/bindings/python/wheel/requirements-dev.txt
|
||||
```
|
||||
- (native compilation only) libusb library for MYRIAD device and `pkg-config` which is used to find `libusb` files:
|
||||
```sh
|
||||
% brew install pkg-config libusb
|
||||
```
|
||||
- (Optional; native compilation only) Latest version of TBB library. By default, OpenVINO downloads prebuilt version of TBB 2020.4 library, but if you want to use latest (add `-DENABLE_SYSTEM_TBB=ON` additionally to cmake configuration step):
|
||||
```sh
|
||||
% brew install tbb
|
||||
@@ -336,8 +332,6 @@ clang: error: linker command failed with exit code 1 (use -v to see invocation)
|
||||
```
|
||||
Disable its usage in cmake or totally remove such library from the system (e.g. `brew uninstall opencv`), because it's pure arm64 and cannot be used to compile x86_64 binaries.
|
||||
|
||||
> **Note:** using such way OpenVINO Intel CPU plugin can be cross-compiled, because MYRIAD plugin cannot be linked against `arm64` version of `libusb`
|
||||
|
||||
Or you have to explicitly find / compile x86_64 (or even `universal2`) dependencies by yourself and pass it to OpenVINO cmake scripts. E.g. compile oneTBB using additional option `-DCMAKE_OSX_ARCHITECTURES="x86_64;arm64"`, install and then set `export TBBROOT=<universal oneTBB install root>` which will be used by OpenVINO.
|
||||
|
||||
<p>
|
||||
@@ -392,8 +386,6 @@ file /opt/homebrew/Cellar/tbb/2021.5.0_2/lib/libtbb.12.5.dylib
|
||||
/opt/homebrew/Cellar/tbb/2021.5.0_2/lib/libtbb.12.5.dylib: Mach-O 64-bit dynamically linked shared library arm64
|
||||
```
|
||||
|
||||
The same for other external dependencies like `libusb`. If you want to enable extra functionality like enable MYRIAD plugin build, you need to provide either x86_64 or universal2 `libusb` library. All other steps are the same as for usual compilation: build, install.
|
||||
|
||||
> **Note:** since you are building with `universal2` python libraries, wheel package is created with name `openvino-2022.3.0-000-cp39-cp39-macosx_12_0_universal2.whl` and have proper `universal2` tags, so can *potentially* be used on both Apple Silicon and Intel CPU.
|
||||
|
||||
|
||||
|
||||
@@ -47,7 +47,6 @@ OpenVINO components define several common groups which allow to run tests for se
|
||||
- CPU - CPU plugin tests
|
||||
- GPU - GPU plugin tests
|
||||
- GNA - GNA plugin tests
|
||||
- VPU - VPU plugin tests
|
||||
|
||||
|
||||
After sufficient number of tests are executed, the coverage numbers can be calculated. In order to do this, run:
|
||||
|
||||
@@ -355,7 +355,7 @@ When the sample application is complete, you are given the label and confidence
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
The following two examples show how to run the same sample using GPU or MYRIAD as the target device.
|
||||
The following example shows how to run the same sample using GPU as the target device.
|
||||
|
||||
#### Running Inference on GPU
|
||||
|
||||
@@ -376,31 +376,6 @@ The following two examples show how to run the same sample using GPU or MYRIAD a
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
#### Running Inference on MYRIAD
|
||||
|
||||
> **NOTE**: Running inference on VPU devices (Intel® Movidius™ Neural Compute Stick or Intel® Neural Compute Stick 2) with the MYRIAD plugin requires [additional hardware configuration steps](../install_guides/configurations-for-ncs2.md), as described earlier on this page.
|
||||
|
||||
@sphinxdirective
|
||||
.. tab:: Linux
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d MYRIAD
|
||||
|
||||
.. tab:: Windows
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
.\classification_sample_async.exe -i %USERPROFILE%\Downloads\dog.bmp -m %USERPROFILE%\Documents\ir\googlenet-v1.xml -d MYRIAD
|
||||
|
||||
.. tab:: macOS
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
./classification_sample_async -i ~/Downloads/dog.bmp -m ~/ir/googlenet-v1.xml -d MYRIAD
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
## Other Demos and Samples
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ Glossary of terms used in the OpenVINO™
|
||||
| <code>ov::ProfilingInfo</code> | Represents basic inference profiling information per operation |
|
||||
| OpenVINO™ Runtime | A C++ library with a set of classes that you can use in your application to infer input tensors and get the results |
|
||||
| OpenVINO™ API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation or convert from ONNX, PaddlePaddle file formars, set input and output formats and execute the model on various devices |
|
||||
| OpenVINO™ <code>Core</code> | OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, MYRIAD, GNA, etc. |
|
||||
| OpenVINO™ <code>Core</code> | OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, GNA, etc. |
|
||||
| <code>ov::Layout</code> | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch. See also [Layout API Overview](./OV_Runtime_UG/layout_overview.md) |
|
||||
| <code>ov::element::Type</code> | Represents data element type. For example, f32 is 32-bit floating point, f16 is 16-bit floating point. |
|
||||
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
# Configurations for IEI Mustang-V100-MX8-R10 Card {#openvino_docs_install_guides_movidius_setup_guide}
|
||||
|
||||
> **NOTE**: These steps are only required for **IEI Mustang-V100-MX8-R10** card. **IEI Mustang-V100-MX8-R11** card doesn't require any additional steps and it's completely configured using the [general guidance](configurations-for-ivad-vpu.md).
|
||||
|
||||
|
||||
The IEI Mustang-V100-MX8 is an OEM version of the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
|
||||
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and OpenVINO™ Runtime.
|
||||
|
||||
Instructions in this guide for configuring your accelerator include:
|
||||
1. Installing the required IEI BSL reset software
|
||||
2. Configuration settings for the `hddldaemon` service
|
||||
|
||||
> **NOTE**: This guide does not apply to Uzel cards.
|
||||
|
||||
## Installing IEI Reset Software
|
||||
|
||||
Using the IEI Mustang-V100-MX8 requires downloading and installing the most current software for your system.
|
||||
|
||||
Visit the [IEI Download Center](https://download.ieiworld.com/) for the most current software and documentation.
|
||||
Search for **Mustang-V100-MX8**.
|
||||
|
||||
Download the appropriate software for your system, extract the downloaded archive file, enter the newly created directory, and run the install script:
|
||||
|
||||
On **Linux**:
|
||||
- Run the `install.sh script` with `sudo`, or as `root`.
|
||||
|
||||
On **Windows**, do one of the following:<br>
|
||||
- **GUI**: Double-click `install.bat`
|
||||
- **CLI**: Open a console with administrator privileges, cd into the directory, and run `install.bat`.
|
||||
|
||||
## Configuring Mustang-V100-MX8 Service
|
||||
|
||||
The `hddldaemon` is a system service, a binary executable that is run to manage the computational workload on the board. It is a required abstraction layer that handles inference, graphics processing, and any type of computation that should be run on the video processing units (VPUs). Depending on the board configuration, there can be 8 or 16 VPUs.
|
||||
|
||||
> **NOTE**: Graphics and other specialized processing may require some custom development.
|
||||
|
||||
### Conventions Used in This Document
|
||||
|
||||
`<OV>` refers to the following default OpenVINO™ Runtime directories:
|
||||
- **Linux:**
|
||||
```
|
||||
/opt/intel/openvino_2022/runtime
|
||||
```
|
||||
- **Windows:**
|
||||
```
|
||||
C:\Program Files (x86)\IntelSWTools\openvino\runtime
|
||||
```
|
||||
|
||||
If you have installed OpenVINO™ in a different directory on your system, you will need to enter your unique directory path.
|
||||
|
||||
### Configuration File Location
|
||||
|
||||
`<OV>\3rdparty\hddl\config\hddl_service.config`
|
||||
|
||||
### Service Configuration File Settings
|
||||
|
||||
Below are some possible configuration options.
|
||||
|
||||
> **NOTE**: After changing a configuration file, the `hddldaemon` must be restarted.
|
||||
|
||||
#### Recommended Settings
|
||||
|
||||
`device_snapshot_mode`
|
||||
Changes the output of the `hddldaemon` to display a table with individual VPU statistics.
|
||||
|
||||
**Default Setting:**
|
||||
`"device_snapshot_mode": "none"`
|
||||
|
||||
**Suggested Setting:**
|
||||
`"device_snapshot_mode": "full"`
|
||||
|
||||
**Supported Settings:**
|
||||
- `none` (default)
|
||||
- `base`
|
||||
- `full`
|
||||
|
||||
`device_snapshot_style`
|
||||
|
||||
**Default Setting:**
|
||||
`"device_snapshot_style": "table"`
|
||||
|
||||
**Recommended Setting:**
|
||||
`"device_snapshot_style": "table"`
|
||||
|
||||
The `table` setting presents labels on the left for each column and is recommended as easier to read.
|
||||
The `tape` setting prints the labels in each column.
|
||||
|
||||
**Supported Settings:**
|
||||
- `tape`
|
||||
- `table` (default)
|
||||
|
||||
`user_group `
|
||||
Restricts the service to group members.
|
||||
|
||||
**Recommended setting depends on your unique system configuration.**
|
||||
|
||||
**Default Setting**
|
||||
`"user_group": "users"`
|
||||
|
||||
The `hddldaemon` may be restricted to a privileged group of users. The appropriate group will vary according to the local system configuration.
|
||||
|
||||
**Supported Settings:**
|
||||
Valid groups on the current system. The `"users"` group is a default group that exists on Windows and most Linux distributions.
|
||||
|
||||
|
||||
**Optional Recommended Settings:**
|
||||
|
||||
`"device_utilization" : "off"`
|
||||
This setting displays the percent of time each VPU is in use. It appears in the `table` column when turned on, or if `"device_fps"` is turned on.
|
||||
|
||||
`"memory_usage" : "off"`
|
||||
This setting reports the amount of memory being used by each VPU.
|
||||
|
||||
`"max_cycle_switchout": 3`
|
||||
Requires the squeeze scheduler. This setting might speed up performance significantly, depending on the app.
|
||||
|
||||
> **NOTE**: This setting works in conjunction with: `max_task_number_switch_out`.
|
||||
|
||||
`"client_fps" : "off"`
|
||||
This setting reports the total FPS for the dispatching hddl_service (which will have one or more clients per app).
|
||||
|
||||
`debug_service`
|
||||
`"debug_service": "false"`
|
||||
(default: `"true"`)
|
||||
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [Intel Distribution of OpenVINO Toolkit home page](https://software.intel.com/en-us/openvino-toolkit)
|
||||
- [Troubleshooting Guide](troubleshooting.md)
|
||||
- [Intel® Vision Accelerator Design with Intel® Movidius™ VPUs HAL Configuration Guide](/downloads/595850_Intel_Vision_Accelerator_Design_with_Intel_Movidius_VPUs-HAL Configuration Guide_rev1.3.pdf)
|
||||
- [Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Workload Distribution User Guide](/downloads/613514_Intel Vision Accelerator Design with Intel Movidius VPUs Workload Distribution_UG_r0.9.pdf)
|
||||
- [Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Scheduler User Guide](/downloads/613759_Intel Vision Accelerator Design with Intel Movidius VPUs Scheduler_UG_r0.9.pdf)
|
||||
- [Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Errata](/downloads/Intel Vision Accelerator Design with Intel Movidius VPUs Errata.pdf)
|
||||
@@ -1,80 +0,0 @@
|
||||
# Configurations for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs {#openvino_docs_install_guides_installing_openvino_ivad_vpu}
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. _vpu guide:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
|
||||
IEI Mustang-V100-MX8-R10 Card <openvino_docs_install_guides_movidius_setup_guide>
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
The steps in this guide are only required if you want to perform inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs with OpenVINO™ on Linux or Windows.
|
||||
|
||||
For troubleshooting issues, please see the [Troubleshooting Guide](troubleshooting.md) for more information.
|
||||
|
||||
## Linux
|
||||
|
||||
For Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, the following additional installation steps are required.
|
||||
|
||||
> **NOTE**: If you installed OpenVINO™ Runtime to the non-default install directory, replace `/opt/intel` with the directory in which you installed the software.
|
||||
|
||||
1. Set the environment variables:
|
||||
```sh
|
||||
source /opt/intel/openvino_2022/setupvars.sh
|
||||
```
|
||||
> **NOTE**: The `HDDL_INSTALL_DIR` variable is set to `<openvino_install_dir>/runtime/3rdparty/hddl`. If you installed the Intel® Distribution of OpenVINO™ to the default install directory, the `HDDL_INSTALL_DIR` was set to `/opt/intel/openvino_2022/runtime/3rdparty/hddl`.
|
||||
|
||||
2. Install dependencies:
|
||||
```sh
|
||||
${HDDL_INSTALL_DIR}/install_IVAD_VPU_dependencies.sh
|
||||
```
|
||||
Note, if the Linux kernel is updated after the installation, it is required to install drivers again:
|
||||
```sh
|
||||
cd ${HDDL_INSTALL_DIR}/drivers
|
||||
```
|
||||
```sh
|
||||
sudo ./setup.sh install
|
||||
```
|
||||
Now the dependencies are installed and you are ready to use the Intel® Vision Accelerator Design with Intel® Movidius™ with OpenVINO™.
|
||||
|
||||
### Optional Steps
|
||||
|
||||
For advanced configuration steps for your **IEI Mustang-V100-MX8-R10** accelerator, see [Configurations for IEI Mustang-V100-MX8-R10 card](configurations-for-iei-card.md). **IEI Mustang-V100-MX8-R11** accelerator doesn't require any additional steps.
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. _vpu guide windows:
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
## Windows
|
||||
|
||||
To enable inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, the following additional installation steps are required:
|
||||
|
||||
1. Download and install <a href="https://www.microsoft.com/en-us/download/details.aspx?id=48145">Visual C++ Redistributable for Visual Studio 2017</a>
|
||||
2. Check with a support engineer if your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs card requires SMBUS connection to PCIe slot (most unlikely). Install the SMBUS driver only if confirmed (by default, it's not required):
|
||||
1. Go to the `<INSTALL_DIR>\runtime\3rdparty\hddl\drivers\SMBusDriver` directory, where `<INSTALL_DIR>` is the directory in which OpenVINO™ Runtime is installed.
|
||||
2. Right click on the `hddlsmbus.inf` file and choose **Install** from the pop up menu.
|
||||
|
||||
You are done installing your device driver and are ready to use your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
|
||||
|
||||
For advanced configuration steps for your IEI Mustang-V100-MX8 accelerator, see [Configurations for IEI Mustang-V100-MX8-R10 card](configurations-for-iei-card.md).
|
||||
|
||||
## What’s Next?
|
||||
|
||||
After configuration is done, you are ready to try out OpenVINO™.
|
||||
|
||||
Developing in Python:
|
||||
* [Start with tensorflow models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/101-tensorflow-to-openvino-with-output.html)
|
||||
* [Start with ONNX and PyTorch models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/102-pytorch-onnx-to-openvino-with-output.html)
|
||||
* [Start with PaddlePaddle models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/103-paddle-onnx-to-openvino-classification-with-output.html)
|
||||
|
||||
Developing in C++:
|
||||
* [Image Classification Async C++ Sample](@ref openvino_inference_engine_samples_classification_sample_async_README)
|
||||
* [Hello Classification C++ Sample](@ref openvino_inference_engine_samples_hello_classification_README)
|
||||
* [Hello Reshape SSD C++ Sample](@ref openvino_inference_engine_samples_hello_reshape_ssd_README)
|
||||
@@ -1,108 +0,0 @@
|
||||
# Configurations for Intel® Neural Compute Stick 2 {#openvino_docs_install_guides_configurations_for_ncs2}
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. _ncs guide:
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
## Linux
|
||||
|
||||
Once you have OpenVINO™ Runtime installed, follow these steps to be able to work on NCS2:
|
||||
|
||||
1. Go to the install_dependencies directory:
|
||||
```sh
|
||||
cd <INSTALL_DIR>/install_dependencies/
|
||||
```
|
||||
2. Run the `install_NCS_udev_rules.sh` script:
|
||||
```
|
||||
./install_NCS_udev_rules.sh
|
||||
```
|
||||
3. You may need to reboot your machine for this to take effect.
|
||||
|
||||
You've completed all required configuration steps to perform inference on Intel® Neural Compute Stick 2.
|
||||
Proceed to the [Get Started Guide](@ref get_started) section to learn the basic OpenVINO™ workflow and run code samples and demo applications.
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. _ncs guide raspbianos:
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
## Raspbian OS
|
||||
|
||||
1. Add the current Linux user to the `users` group:
|
||||
```sh
|
||||
sudo usermod -a -G users "$(whoami)"
|
||||
```
|
||||
Log out and log in for it to take effect.
|
||||
2. If you didn't modify `.bashrc` to permanently set the environment variables, run `setupvars.sh` again after logging in:
|
||||
```sh
|
||||
source /opt/intel/openvino_2022/setupvars.sh
|
||||
```
|
||||
3. To perform inference on the Intel® Neural Compute Stick 2, install the USB rules running the `install_NCS_udev_rules.sh` script:
|
||||
```sh
|
||||
sh /opt/intel/openvino_2022/install_dependencies/install_NCS_udev_rules.sh
|
||||
```
|
||||
4. Plug in your Intel® Neural Compute Stick 2.
|
||||
|
||||
5. (Optional) If you want to compile and run the Image Classification sample to verify the installation of OpenVINO, follow the steps below.
|
||||
|
||||
a. Navigate to a directory that you have write access to and create a samples build directory. This example uses a directory named `build`:
|
||||
```sh
|
||||
mkdir build && cd build
|
||||
```
|
||||
b. Build the Hello Classification Sample:
|
||||
```sh
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2022/samples/cpp
|
||||
```
|
||||
```sh
|
||||
make -j2 hello_classification
|
||||
```
|
||||
c. Download the pre-trained squeezenet1.1 image classification model with the Model Downloader or copy it from the host machine:
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/model_tools
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install -r requirements.in
|
||||
python3 downloader.py --name squeezenet1.1
|
||||
```
|
||||
d. Run the sample specifying the model, a path to the input image, and the VPU required to run with the Raspbian OS:
|
||||
```sh
|
||||
./armv7l/Release/hello_classification <path_to_model>/squeezenet1.1.xml <path_to_image> MYRIAD
|
||||
```
|
||||
The application outputs to console window top 10 classification results.
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. _ncs guide macos:
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
## macOS
|
||||
|
||||
These steps are required only if you want to perform inference on Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X VPU.
|
||||
|
||||
To perform inference on Intel® Neural Compute Stick 2, the `libusb` library is required. You can build it from the [source code](https://github.com/libusb/libusb) or install using the macOS package manager you prefer: [Homebrew](https://brew.sh/), [MacPorts](https://www.macports.org/) or other.
|
||||
|
||||
For example, to install the `libusb` library using Homebrew, use the following command:
|
||||
```sh
|
||||
brew install libusb
|
||||
```
|
||||
|
||||
You've completed all required configuration steps to perform inference on your Intel® Neural Compute Stick 2.
|
||||
|
||||
## What’s Next?
|
||||
|
||||
Now you are ready to try out OpenVINO™. You can use the following tutorials to write your applications using Python and C++.
|
||||
|
||||
Developing in Python:
|
||||
* [Start with tensorflow models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/101-tensorflow-to-openvino-with-output.html)
|
||||
* [Start with ONNX and PyTorch models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/102-pytorch-onnx-to-openvino-with-output.html)
|
||||
* [Start with PaddlePaddle models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/103-paddle-onnx-to-openvino-classification-with-output.html)
|
||||
|
||||
Developing in C++:
|
||||
* [Image Classification Async C++ Sample](@ref openvino_inference_engine_samples_classification_sample_async_README)
|
||||
* [Hello Classification C++ Sample](@ref openvino_inference_engine_samples_hello_classification_README)
|
||||
* [Hello Reshape SSD C++ Sample](@ref openvino_inference_engine_samples_hello_reshape_ssd_README)
|
||||
@@ -9,8 +9,6 @@
|
||||
:hidden:
|
||||
|
||||
For GPU <openvino_docs_install_guides_configurations_for_intel_gpu>
|
||||
For NCS2 <openvino_docs_install_guides_configurations_for_ncs2>
|
||||
For VPU <openvino_docs_install_guides_installing_openvino_ivad_vpu>
|
||||
For GNA <openvino_docs_install_guides_configurations_for_intel_gna>
|
||||
|
||||
@endsphinxdirective
|
||||
@@ -19,7 +17,5 @@
|
||||
After you have installed OpenVINO™ Runtime, you may also need do some additional configurations for your device to work with OpenVINO™. See the following pages:
|
||||
|
||||
* [Configurations for GPU](configurations-for-intel-gpu.md)
|
||||
* [Configurations for NCS2](configurations-for-ncs2.md)
|
||||
* [Configurations for VPU](configurations-for-ivad-vpu.md)
|
||||
* [Configurations for GNA](configurations-for-intel-gna.md)
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ This guide provides steps on creating a Docker image with Intel® Distribution o
|
||||
There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs:
|
||||
* Use a prebuilt image. Do the following steps:
|
||||
1. <a href="#get-prebuilt-image">Get a prebuilt image from provided sources</a>.
|
||||
2. <a href="#run-image">Run the image on different devices</a>. To run inferences on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, <a href="#set-up-hddldaemon">configure the Docker image</a> first before you run the image.
|
||||
2. <a href="#run-image">Run the image on different devices</a>.
|
||||
3. <a href="#run-samples">(Optional) Run samples in the Docker image</a>.
|
||||
* If you want to customize your image, you can also build a Docker image manually by using the following steps:
|
||||
1. <a href="#prepare-dockerfile">Prepare a Dockerfile</a>.
|
||||
@@ -112,18 +112,6 @@ RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-r
|
||||
yum remove -y epel-release
|
||||
```
|
||||
|
||||
### <a name="set-up-hddldaemon"></a>Configuring Docker Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
|
||||
|
||||
> **NOTE**: When building the Docker image, create a user in the Dockerfile that has the same UID (User Identifier) and GID (Group Identifier) as the user which that runs hddldaemon on the host, and then run the application in the Docker image with this user. This step is necessary to run the container as a non-root user.
|
||||
|
||||
To use the Docker container for inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, do the following steps:
|
||||
|
||||
1. Set up the environment on the host machine to be used for running Docker. It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board. To learn how to set up the environment (the OpenVINO package or HDDL package must be pre-installed), see [Configuration guide for HDDL device](https://github.com/openvinotoolkit/docker_ci/blob/master/install_guide_vpu_hddl.md) or [Configurations for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs on Linux](configurations-for-ivad-vpu.md).
|
||||
2. Run `hddldaemon` on the host in a separate terminal session using the following command:
|
||||
```sh
|
||||
$HDDL_INSTALL_DIR/hddldaemon
|
||||
```
|
||||
|
||||
## <a name="run-image"></a>Running the Docker Image on Different Devices
|
||||
|
||||
### Running the Image on CPU
|
||||
@@ -165,55 +153,6 @@ To make GPU available in the container, attach the GPU to the container using `-
|
||||
> **NOTE**: To launch a Linux image on WSL2, make sure that the additional requirements in <a href="#system-requirements">System Requirements</a> are met.
|
||||
|
||||
|
||||
### Running the Image on Intel® Neural Compute Stick 2
|
||||
|
||||
Run the Docker image with the following command:
|
||||
```sh
|
||||
docker run -it --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
|
||||
```
|
||||
|
||||
While the command above is not working, you can also run container in the privileged mode, enable the Docker network configuration as host, and mount all devices to the container. Run the following command:
|
||||
```sh
|
||||
docker run -it --rm --privileged -v /dev:/dev --network=host <image_name>
|
||||
```
|
||||
|
||||
> **NOTE**: This option is not recommended, as conflicts with Kubernetes and other tools that use orchestration and private networks may occur. Please use it with caution and only for troubleshooting purposes.
|
||||
|
||||
#### Known Limitations
|
||||
|
||||
- Intel® Neural Compute Stick 2 device changes its VendorID and DeviceID during execution and each time looks for a host system as a brand new device. It means it cannot be mounted as usual.
|
||||
- UDEV events are not forwarded to the container by default, and it does not know about the device reconnection. The prebuilt Docker images and provided Dockerfiles include `libusb` rebuilt without UDEV support.
|
||||
- Only one NCS2 device connected to the host can be used when running inference in a container.
|
||||
|
||||
|
||||
### Running the Image on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
|
||||
|
||||
> **NOTE**: To run inferences on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, make sure that you have <a href="#set-up-hddldaemon">configured the Docker image</a> first.
|
||||
|
||||
Use the following command:
|
||||
```sh
|
||||
docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp <image_name>
|
||||
```
|
||||
|
||||
If your application runs inference of a network with a big size (>4MB) of input/output, the HDDL plugin will use shared memory. In this case, you must mount `/dev/shm` as volume:
|
||||
```sh
|
||||
docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v /dev/shm:/dev/shm <image_name>
|
||||
```
|
||||
|
||||
Note the following things:
|
||||
* The device `/dev/ion` needs to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel.
|
||||
* Since separate inference tasks share the same HDDL service communication interface (the service creates mutexes and a socket file in `/var/tmp`), `/var/tmp` needs to be mounted and shared among them.
|
||||
|
||||
|
||||
#### If the ion Driver is Not Enabled
|
||||
|
||||
In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu (Input-Output Memory Management Unit) incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve this issue, use the following command:
|
||||
```sh
|
||||
docker run -it --rm --ipc=host --net=host -v /var/tmp:/var/tmp <image_name>
|
||||
```
|
||||
If that still does not solve the issue, try starting `hddldaemon` with the root user on host. However, this approach is not recommended. Please use with caution.
|
||||
|
||||
|
||||
## <a name="run-samples"></a>Running Samples in Docker Image
|
||||
|
||||
To run the `Hello Classification Sample` on a specific inference device, run the following commands:
|
||||
@@ -232,20 +171,6 @@ docker run -itu root:root --rm --device /dev/dri:/dev/dri <image_name>
|
||||
/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp GPU"
|
||||
```
|
||||
|
||||
**MYRIAD**:
|
||||
|
||||
```sh
|
||||
docker run -itu root:root --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
|
||||
/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp MYRIAD"
|
||||
```
|
||||
|
||||
**HDDL**:
|
||||
|
||||
```sh
|
||||
docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v /dev/shm:/dev/shm <image_name>
|
||||
/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && umask 000 && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp HDDL"
|
||||
```
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
|
||||
|
||||
@@ -129,26 +129,13 @@ See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page
|
||||
OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
|
||||
|
||||
### <a name="optional-steps"></a>Step 4 (Optional): Configure Inference on Non-CPU Devices
|
||||
OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs, NCS2, VPUs, and GNAs. See the instructions below to set up OpenVINO on these devices.
|
||||
OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. See the instructions below to set up OpenVINO on these devices.
|
||||
|
||||
@sphinxdirective
|
||||
.. tab:: GPU
|
||||
|
||||
To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :ref:`GPU Setup Guide <gpu guide>`.
|
||||
|
||||
.. tab:: NCS 2
|
||||
|
||||
To perform inference on Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X VPU, follow the steps on :ref:`NCS2 Setup Guide <ncs guide>`.
|
||||
<!--For more details, see the `Get Started page for Intel® Neural Compute Stick 2 <https://software.intel.com/en-us/neural-compute-stick/get-started>`.-->
|
||||
|
||||
.. tab:: VPU
|
||||
|
||||
To install and configure your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see the :ref:`VPU Configuration Guide <vpu guide>`.
|
||||
After configuration is done, you are ready to run the verification scripts with the HDDL Plugin for your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
|
||||
|
||||
.. warning::
|
||||
While working with either HDDL or NCS, choose one of them as they cannot run simultaneously on the same machine.
|
||||
|
||||
.. tab:: GNA
|
||||
|
||||
To enable the toolkit components to use Intel® Gaussian & Neural Accelerator (GNA) on your system, follow the steps in :ref:`GNA Setup Guide <gna guide>`.
|
||||
|
||||
@@ -101,14 +101,6 @@ See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page
|
||||
|
||||
OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
|
||||
|
||||
### <a name="configure-ncs2"></a>Step 4 (Optional): Configure the Intel® Neural Compute Stick 2
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
If you want to run inference on Intel® Neural Compute Stick 2 use the following instructions to setup the device: :ref:`NCS2 Setup Guide <ncs guide macos>`.
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
## <a name="get-started"></a>What's Next?
|
||||
Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials.
|
||||
|
||||
|
||||
@@ -104,21 +104,13 @@ See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page
|
||||
OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO).
|
||||
|
||||
### <a name="optional-steps"></a>Step 4 (Optional): Configure Inference on non-CPU Devices
|
||||
OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs, NCS2, VPUs, and GNAs. See the instructions below to set up OpenVINO on these devices.
|
||||
OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. See the instructions below to set up OpenVINO on these devices.
|
||||
|
||||
@sphinxdirective
|
||||
.. tab:: GPU
|
||||
|
||||
To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :ref:`GPU Setup Guide <gpu guide windows>`.
|
||||
|
||||
.. tab:: VPU
|
||||
|
||||
To install and configure your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see the :ref:`VPU Configuration Guide <vpu guide windows>`.
|
||||
|
||||
.. tab:: NCS 2
|
||||
|
||||
No additional configurations are needed.
|
||||
|
||||
.. tab:: GNA
|
||||
|
||||
To enable the toolkit components to use Intel® Gaussian & Neural Accelerator (GNA) on your system, follow the steps in :ref:`GNA Setup Guide <gna guide windows>`.
|
||||
|
||||
@@ -95,7 +95,7 @@ Visit the [Tutorials](../tutorials.md) page for more Jupyter Notebooks to get yo
|
||||
* [Convert a PyTorch model and use it for image background removal](https://docs.openvino.ai/nightly/notebooks/205-vision-background-removal-with-output.html)
|
||||
|
||||
### Run OpenVINO on accelerated devices
|
||||
OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs, NCS2, VPUs, and GNAs. Visit the [Additional Configurations](configurations-header.md) page for instructions on how to configure your hardware devices to work with OpenVINO.
|
||||
OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. Visit the [Additional Configurations](configurations-header.md) page for instructions on how to configure your hardware devices to work with OpenVINO.
|
||||
|
||||
## Additional Resources
|
||||
|
||||
|
||||
@@ -158,19 +158,6 @@ If you want to use your model for inference, the model must be converted to the
|
||||
|
||||
* OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. See the :doc:`Install OpenVINO Development Tools <openvino_docs_install_guides_install_dev_tools>` page for step-by-step installation instructions.
|
||||
|
||||
.. _add-usb-rules:
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
## Step 5 (Optional): Add USB Rules for an Intel® Neural Compute Stick 2 device
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
To perform inference on Intel® Neural Compute Stick 2, follow the steps in :ref:`NCS2 Setup Guide <ncs guide raspbianos>`.
|
||||
|
||||
.. _get-started:
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
|
||||
@@ -58,9 +58,6 @@ Follow the [Yocto Project official documentation](https://docs.yoctoproject.org/
|
||||
# Include OpenVINO Python API package in the target image.
|
||||
CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-python3"
|
||||
|
||||
# Enable MYRIAD plugin
|
||||
CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-vpu-firmware"
|
||||
|
||||
# Include Model Optimizer in the target image.
|
||||
CORE_IMAGE_EXTRA_INSTALL:append = " openvino-model-optimizer"
|
||||
```
|
||||
@@ -89,7 +86,6 @@ openvino-inference-engine-dev
|
||||
openvino-inference-engine-python3
|
||||
openvino-inference-engine-samples
|
||||
openvino-inference-engine-src
|
||||
openvino-inference-engine-vpu-firmware
|
||||
openvino-model-optimizer
|
||||
openvino-model-optimizer-dbg
|
||||
openvino-model-optimizer-dev
|
||||
|
||||
@@ -31,179 +31,6 @@ Users in China might encounter errors while downloading sources via PIP during O
|
||||
|
||||
If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the [Set Proxy section in DL Workbench Installation](https://docs.openvino.ai/latest/workbench_docs_Workbench_DG_Prerequisites.html#set-proxy) for more details.
|
||||
|
||||
### Permission Errors for /dev/shm
|
||||
|
||||
If you encounter a permission error for files in `/dev/shm` (see `hddldaemon.log`). A possible cause is that the uid and gid of the container user are different from the uid and gid of the user who created `hddldaemon` service on the host.
|
||||
|
||||
Try one of these solutions:
|
||||
|
||||
* Create the user in the Docker container with the same uid and gid as the HDDL daemon user.
|
||||
* Set the container user umask to 0000: `umask 000`.
|
||||
* (NOT RECOMMENDED) Start HDDL daemon on the host as root and start the container as root with the `-u root:root` option.
|
||||
|
||||
## Issues with Configurations for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
|
||||
|
||||
<!-- this part was taken from original configurations-for-ivad-vpu.md -->
|
||||
|
||||
### Unable to run inference with the MYRIAD Plugin after running with the HDDL Plugin
|
||||
|
||||
Running inference with the MYRIAD Plugin after running with the HDDL Plugin is failed with the following error generated:
|
||||
|
||||
```sh
|
||||
E: [ncAPI] [ 965618] [MainThread] ncDeviceOpen:677 Failed to find a device, rc: X_LINK_ERROR
|
||||
```
|
||||
|
||||
**Possible solutions (use one of the following):**
|
||||
|
||||
* Reboot the host system and run with the MYRIAD Plugin
|
||||
|
||||
* Kill the HDDL Plugin backend service (`hddldaemon`) and reset all Intel® Movidius™ VPUs before running an application that uses the MYRIAD Plugin:
|
||||
```sh
|
||||
kill -9 $(pidof hddldaemon autoboot)
|
||||
pidof hddldaemon autoboot # Make sure none of them is alive
|
||||
source /opt/intel/openvino_2022/setupvars.sh
|
||||
${HDDL_INSTALL_DIR}/bin/bsl_reset
|
||||
```
|
||||
|
||||
---
|
||||
### "No space left on device" error while loading a network
|
||||
When the application runs inference of a network with a big size(>4MB) of input/output or if the system is running out of the DMA buffer,
|
||||
the HDDL Plugin will fall back to use shared memory.
|
||||
In this case, if the application exits abnormally, the shared memory is not released automatically.
|
||||
To release it manually, remove files with the `hddl_` prefix from the `/dev/shm` directory:
|
||||
```sh
|
||||
sudo rm -f /dev/shm/hddl_*
|
||||
```
|
||||
|
||||
---
|
||||
### Solutions to the permission issue
|
||||
|
||||
Make sure that the following udev rules exist:
|
||||
- `/etc/udev/rules.d/97-myriad-usbboot.rules`
|
||||
- `/etc/udev/rules.d/98-hddlbsl.rules`
|
||||
- `/etc/udev/rules.d/99-hddl-ion.rules`
|
||||
- `/etc/udev/rules.d/99-myriad-vsc.rules`
|
||||
|
||||
Also make sure that the current user is included in the users groups. If not, run the command below to include:
|
||||
```sh
|
||||
sudo usermod -a -G users "$(whoami)"
|
||||
```
|
||||
|
||||
<!--
|
||||
### setup.sh doesn't install the driver binaries to /lib/modules on CentOS systems
|
||||
|
||||
As a temporary workaround, run the commands below to install the drivers. This issue will be fixed in future releases.
|
||||
|
||||
```sh
|
||||
sudo mkdir -p /lib/modules/$(uname -r)/kernel/drivers/myd/
|
||||
```
|
||||
```sh
|
||||
sudo cp drv_vsc/myd_vsc.ko /lib/modules/$(uname -r)/kernel/drivers/myd/
|
||||
```
|
||||
```sh
|
||||
sudo cp drv_ion/myd_ion.ko /lib/modules/$(uname -r)/kernel/drivers/myd/
|
||||
```
|
||||
```sh
|
||||
sudo touch /etc/modules-load.d/intel_vision_accelerator.conf
|
||||
```
|
||||
```sh
|
||||
sudo echo "myd_vsc" >> /etc/modules-load.d/intel_vision_accelerator.conf
|
||||
```
|
||||
```sh
|
||||
sudo echo "myd_ion" >> /etc/modules-load.d/intel_vision_accelerator.conf
|
||||
```
|
||||
```sh
|
||||
sudo depmod
|
||||
```
|
||||
```sh
|
||||
sudo modprobe myd_vsc
|
||||
```
|
||||
```sh
|
||||
sudo modprobe myd_ion
|
||||
```
|
||||
-->
|
||||
|
||||
---
|
||||
### Host machine reboots after running an inference application with the HDDL plugin
|
||||
|
||||
**Symptom:** Boot up the host machine, run the inference application with the HDDL plugin. System reboots in a uncertain time.
|
||||
|
||||
**Root Cause:** The I2C address of the reset device of the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs conflicts with another device I2C address in 0x20-0x27 range. If the target Intel® Vision Accelerator Design with Intel® Movidius™ VPUs device needs to be reset (for example, in case of device errors), the `libbsl` library, which is responsible for reset, expects that the target reset device I2C address is in the 0x20-0x27 range on SMBUS. If there is another device on SMBUS in this address range, `libbsl` treats this device as the target reset device and writes an unexpected value to this address. This causes system reboot.
|
||||
|
||||
**Solution:** Detect if there is any I2C device on SMBUS with address in 0x20-0x27 range. If yes, do the following:
|
||||
|
||||
1. Change the DIP switch on the target PCIE card
|
||||
2. Disable autoscan for the reset device by setting `"autoscan": false` in `${HDDL_INSTALL_DIR}/config/bsl.json`
|
||||
3. Set the correct address of the I2C reset device (for example, `0x21`) in `${HDDL_INSTALL_DIR}/config/bsl.json`
|
||||
|
||||
```sh
|
||||
{
|
||||
"autoscan": false,
|
||||
"ioexpander": {
|
||||
"enabled": true,
|
||||
"i2c_addr": [ 33 ]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
### Cannot reset VPU device and cannot find any 0x20-0x27 (Raw data card with HW version Fab-B and before) I2C addresses on SMBUS (using i2c-tools)
|
||||
|
||||
Please contact your motherboard vendor to verify SMBUS pins are connected to the PCIe slot.
|
||||
|
||||
---
|
||||
### "Error: ipc_connection_linux_UDS : bind() failed" in hddldaemon log
|
||||
|
||||
You may have run hddldaemon under another user. Run the command below and try again:
|
||||
```sh
|
||||
sudo rm -rf /var/tmp/hddl_*
|
||||
```
|
||||
|
||||
---
|
||||
### "I2C bus: SMBus I801 adapter at not found!" in hddldaemon log
|
||||
|
||||
Run the following command to check if a SMBUS I801 adapter can be found:
|
||||
```sh
|
||||
i2cdetect -l
|
||||
```
|
||||
Then run:
|
||||
```sh
|
||||
sudo modprobe i2c-i801
|
||||
```
|
||||
---
|
||||
### "open /dev/ion failed!" in hddldaemon log
|
||||
|
||||
Check if `myd_ion` kernel module is installed by running the following command:
|
||||
```sh
|
||||
lsmod | grep myd_ion
|
||||
```
|
||||
If you do not see any output from the command, you can use the following command to reinstall the `myd_ion` module:
|
||||
```sh
|
||||
sudo modprobe myd_ion
|
||||
```
|
||||
|
||||
---
|
||||
### Constantly getting "\_name\_mapping open failed err=2,No such file or directory" in hddldaemon log
|
||||
|
||||
Check if myd_vsc kernel module is installed by running the following command:
|
||||
```sh
|
||||
lsmod | grep myd_vsc
|
||||
```
|
||||
If you do not see any output from the command, you can use the following command to reinstall the `myd_vsc` module:
|
||||
```sh
|
||||
sudo modprobe myd_vsc
|
||||
```
|
||||
|
||||
---
|
||||
### "Required key not available" appears when trying to install the myd_ion or myd_vsc modules
|
||||
|
||||
Run the following commands:
|
||||
```sh
|
||||
sudo apt install mokutil
|
||||
```
|
||||
```sh
|
||||
sudo mokutil --disable-validation
|
||||
```
|
||||
|
||||
@anchor yocto-install-issues
|
||||
## Issues with Creating a Yocto Image for OpenVINO
|
||||
|
||||
@@ -8,7 +8,7 @@ This configurable method of this device-side parallelism is commonly referred as
|
||||
|
||||
> **NOTE**: Be aware that streams are **really executing the requests in parallel, but not in the lock step** (as the batching does), which makes the streams fully compatible with [dynamically-shaped inputs](../OV_Runtime_UG/ov_dynamic_shapes.md), while individual requests can have different shapes.
|
||||
|
||||
> **NOTE**: Most OpenVINO devices (including CPU, GPU and VPU) support the streams, yet the *optimal* number of the streams is deduced very differently. More information on this topic can be found in the section [below](@ref stream_considerations).
|
||||
> **NOTE**: Most OpenVINO devices (including CPU and GPU) support the streams, yet the *optimal* number of the streams is deduced very differently. More information on this topic can be found in the section [below](@ref stream_considerations).
|
||||
|
||||
A few general considerations:
|
||||
* Using the streams does increase the latency of an individual request:
|
||||
|
||||
@@ -15,7 +15,7 @@ ov::CompiledModel compiled_model1 = core.compile_model(model, "AUTO",
|
||||
ov::CompiledModel compiled_model2 = core.compile_model(model, "AUTO",
|
||||
ov::hint::model_priority(ov::hint::Priority::LOW));
|
||||
/************
|
||||
Assume that all the devices (CPU, GPU, and MYRIAD) can support all the networks.
|
||||
Assume that all the devices (CPU, GPU, and MYRIAD) can support all the models.
|
||||
Result: compiled_model0 will use GPU, compiled_model1 will use MYRIAD, compiled_model2 will use CPU.
|
||||
************/
|
||||
|
||||
@@ -27,7 +27,7 @@ ov::CompiledModel compiled_model4 = core.compile_model(model, "AUTO",
|
||||
ov::CompiledModel compiled_model5 = core.compile_model(model, "AUTO",
|
||||
ov::hint::model_priority(ov::hint::Priority::LOW));
|
||||
/************
|
||||
Assume that all the devices (CPU, GPU, and MYRIAD) can support all the networks.
|
||||
Assume that all the devices (CPU, GPU, and MYRIAD) can support all the models.
|
||||
Result: compiled_model3 will use GPU, compiled_model4 will use GPU, compiled_model5 will use MYRIAD.
|
||||
************/
|
||||
//! [part4]
|
||||
|
||||
@@ -18,7 +18,6 @@ endif()
|
||||
file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/*.c"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/vpu/*.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp")
|
||||
|
||||
find_package(OpenCL)
|
||||
|
||||
@@ -25,7 +25,7 @@ int main(int argc, char* argv[]) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
// Optimize for latency. Most of the devices are configured for latency by default,
|
||||
// but there are exceptions like MYRIAD
|
||||
// but there are exceptions like GNA
|
||||
ov::AnyMap latency{{ov::hint::performance_mode.name(), ov::hint::PerformanceMode::LATENCY}};
|
||||
|
||||
// Create ov::Core and use it to compile a model.
|
||||
|
||||
@@ -55,7 +55,7 @@ For more information on performance hints, see the [High-level Performance Hints
|
||||
|
||||
|
||||
### Device
|
||||
To set which device benchmarking runs on, use the `-d <device>` argument. This will tell benchmark_app to run benchmarking on that specific device. The benchmark app supports "CPU", "GPU", and "MYRIAD" (also known as [VPU](../../../docs/OV_Runtime_UG/supported_plugins/VPU.md)) devices. In order to use the GPU or VPU, the system must have the appropriate drivers installed. If no device is specified, benchmark_app will default to using CPU.
|
||||
To set which device benchmarking runs on, use the `-d <device>` argument. This will tell benchmark_app to run benchmarking on that specific device. The benchmark app supports "CPU", "GPU", and "GNA" devices. In order to use the GPU or GNA, the system must have the appropriate drivers installed. If no device is specified, benchmark_app will default to using CPU.
|
||||
|
||||
For example, to run benchmarking on GPU, use:
|
||||
|
||||
@@ -140,7 +140,7 @@ Options:
|
||||
-latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
|
||||
|
||||
device-specific performance options:
|
||||
-nstreams <integer> Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices (for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1.
|
||||
-nstreams <integer> Optional. Number of streams to use for inference on the CPU or GPU devices (for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1.
|
||||
-nthreads <integer> Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
|
||||
-pin <string> ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):
|
||||
enabling threads->cores pinning("YES", which is already default for any conventional CPU),
|
||||
|
||||
@@ -74,7 +74,7 @@ static const char infer_num_threads_message[] = "Optional. Number of threads to
|
||||
|
||||
/// @brief message for #streams for CPU inference
|
||||
static const char infer_num_streams_message[] =
|
||||
"Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices "
|
||||
"Optional. Number of streams to use for inference on the CPU or GPU devices "
|
||||
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
|
||||
"<nstreams>). "
|
||||
"Default value is determined automatically for a device.Please note that although the "
|
||||
|
||||
@@ -470,26 +470,24 @@ int main(int argc, char* argv[]) {
|
||||
"but it still may be non-optimal for some cases, for more "
|
||||
"information look at README."
|
||||
<< slog::endl;
|
||||
if (device.find("MYRIAD") == std::string::npos) { // MYRIAD sets the default number of
|
||||
// streams implicitly (without _AUTO)
|
||||
if (supported(key)) {
|
||||
device_config[key] = std::string(getDeviceTypeFromName(device) + "_THROUGHPUT_AUTO");
|
||||
} else if (supported(ov::num_streams.name())) {
|
||||
// Use API 2.0 key for streams
|
||||
key = ov::num_streams.name();
|
||||
device_config[key] = ov::streams::AUTO;
|
||||
} else if (device == "MULTI" || device == "AUTO") {
|
||||
// Set nstreams to default value auto if no nstreams specified from cmd line.
|
||||
for (auto& hwdevice : hardware_devices) {
|
||||
std::string key = std::string(getDeviceTypeFromName(hwdevice) + "_THROUGHPUT_STREAMS");
|
||||
auto value = std::string(getDeviceTypeFromName(hwdevice) + "_THROUGHPUT_AUTO");
|
||||
setDeviceProperty(core,
|
||||
hwdevice,
|
||||
device_config,
|
||||
ov::num_streams(ov::streams::AUTO),
|
||||
is_dev_set_property,
|
||||
std::make_pair(key, value));
|
||||
}
|
||||
|
||||
if (supported(key)) {
|
||||
device_config[key] = std::string(getDeviceTypeFromName(device) + "_THROUGHPUT_AUTO");
|
||||
} else if (supported(ov::num_streams.name())) {
|
||||
// Use API 2.0 key for streams
|
||||
key = ov::num_streams.name();
|
||||
device_config[key] = ov::streams::AUTO;
|
||||
} else if (device == "MULTI" || device == "AUTO") {
|
||||
// Set nstreams to default value auto if no nstreams specified from cmd line.
|
||||
for (auto& hwdevice : hardware_devices) {
|
||||
std::string key = std::string(getDeviceTypeFromName(hwdevice) + "_THROUGHPUT_STREAMS");
|
||||
auto value = std::string(getDeviceTypeFromName(hwdevice) + "_THROUGHPUT_AUTO");
|
||||
setDeviceProperty(core,
|
||||
hwdevice,
|
||||
device_config,
|
||||
ov::num_streams(ov::streams::AUTO),
|
||||
is_dev_set_property,
|
||||
std::make_pair(key, value));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -578,9 +576,6 @@ int main(int argc, char* argv[]) {
|
||||
// for CPU and GPU execution, more throughput-oriented execution via streams
|
||||
setThroughputStreams();
|
||||
set_infer_precision();
|
||||
} else if (device.find("MYRIAD") != std::string::npos) {
|
||||
device_config.emplace(ov::log::level(ov::log::Level::WARNING));
|
||||
setThroughputStreams();
|
||||
} else if (device.find("GNA") != std::string::npos) {
|
||||
set_infer_precision();
|
||||
} else if (device.find("AUTO") != std::string::npos) {
|
||||
|
||||
@@ -61,9 +61,6 @@ size_t InputInfo::depth() const {
|
||||
uint32_t device_default_device_duration_in_seconds(const std::string& device) {
|
||||
static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds{{"CPU", 60},
|
||||
{"GPU", 60},
|
||||
{"VPU", 60},
|
||||
{"MYRIAD", 60},
|
||||
{"HDDL", 60},
|
||||
{"UNKNOWN", 120}};
|
||||
uint32_t duration = 0;
|
||||
for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) {
|
||||
|
||||
@@ -58,7 +58,6 @@ Several execution modes are supported via the `-d` flag:
|
||||
|
||||
- `CPU` - All calculation are performed on CPU device using CPU Plugin.
|
||||
- `GPU` - All calculation are performed on GPU device using GPU Plugin.
|
||||
- `MYRIAD` - All calculation are performed on Intel® Neural Compute Stick 2 device using VPU MYRIAD Plugin.
|
||||
- `VPUX` - All calculation are performed on VPUX device using VPUX Plugin.
|
||||
- `GNA_AUTO` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
|
||||
- `GNA_HW` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
|
||||
@@ -103,7 +102,7 @@ Options:
|
||||
-i "<path>" Required. Paths to input file or Layers names with corresponding paths to the input files. Example of usage for single file: <file.ark> or <file.npz>. Example of usage for named layers: <layer1>=<file1.ark>,<layer2>=<file2.ark>.
|
||||
-m "<path>" Required. Path to an .xml file with a trained model (required if -rg is missing).
|
||||
-o "<path>" Optional. Output file name to save scores or Layer names with corresponding files names to save scores. Example of usage for single file: <output.ark> or <output.npz>. Example of usage for named layers: Example of usage for named layers: <layer1:port_num>=<output_file1.ark>,<layer2:port_num>=<output_file2.ark>.
|
||||
-d "<device>" Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, VPUX, GNA_AUTO, GNA_HW, GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.
|
||||
-d "<device>" Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.
|
||||
-pc Optional. Enables per-layer performance report.
|
||||
-q "<mode>" Optional. Input quantization mode: static (default), dynamic, or user (use with -sf).
|
||||
-qb "<integer>" Optional. Weight bits for quantization: 8 or 16 (default)
|
||||
|
||||
@@ -23,7 +23,7 @@ static const char model_message[] = "Required. Path to an .xml file with a train
|
||||
|
||||
/// @brief message for assigning calculation to device
|
||||
static const char target_device_message[] =
|
||||
"Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, VPUX, GNA_AUTO, GNA_HW, "
|
||||
"Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, "
|
||||
"GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
|
||||
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
|
||||
" as a secondary (e.g. HETERO:GNA,CPU) are supported. "
|
||||
@@ -272,7 +272,6 @@ bool parse_and_check_command_line(int argc, char* argv[]) {
|
||||
"HETERO:GNA_HW,CPU",
|
||||
"HETERO:GNA_SW_EXACT,CPU",
|
||||
"HETERO:GNA_SW_FP32,CPU",
|
||||
"MYRIAD",
|
||||
"VPUX"};
|
||||
|
||||
if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) {
|
||||
|
||||
@@ -33,7 +33,7 @@ def main():
|
||||
log.info(f'Usage: {sys.argv[0]} <path_to_model>')
|
||||
return 1
|
||||
# Optimize for latency. Most of the devices are configured for latency by default,
|
||||
# but there are exceptions like MYRIAD
|
||||
# but there are exceptions like GNA
|
||||
latency = {'PERFORMANCE_HINT': 'LATENCY'}
|
||||
|
||||
# Create Core and use it to compile a model.
|
||||
|
||||
@@ -48,7 +48,7 @@ Options:
|
||||
Required. Path to an image file(s).
|
||||
-d DEVICE, --device DEVICE
|
||||
Optional. Specify the target device to infer on; CPU,
|
||||
GPU, MYRIAD, HDDL or HETERO: is acceptable. The sample
|
||||
GPU or HETERO: is acceptable. The sample
|
||||
will look for a suitable plugin for device specified.
|
||||
Default value is CPU.
|
||||
```
|
||||
|
||||
@@ -25,7 +25,7 @@ def parse_args() -> argparse.Namespace:
|
||||
args.add_argument('-i', '--input', type=str, required=True, nargs='+',
|
||||
help='Required. Path to an image file(s).')
|
||||
args.add_argument('-d', '--device', type=str, default='CPU',
|
||||
help='Optional. Specify the target device to infer on; CPU, GPU, MYRIAD, HDDL or HETERO: '
|
||||
help='Optional. Specify the target device to infer on; CPU, GPU, GNA or HETERO: '
|
||||
'is acceptable. The sample will look for a suitable plugin for device specified. '
|
||||
'Default value is CPU.')
|
||||
# fmt: on
|
||||
|
||||
@@ -57,7 +57,6 @@ Several execution modes are supported via the `-d` flag:
|
||||
|
||||
- `CPU` - All calculation are performed on CPU device using CPU Plugin.
|
||||
- `GPU` - All calculation are performed on GPU device using GPU Plugin.
|
||||
- `MYRIAD` - All calculation are performed on Intel® Neural Compute Stick 2 device using VPU MYRIAD Plugin.
|
||||
- `VPUX` - All calculation are performed on VPUX device using VPUX Plugin.
|
||||
- `GNA_AUTO` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
|
||||
- `GNA_HW` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
|
||||
@@ -109,7 +108,7 @@ Options:
|
||||
-r REFERENCE, --reference REFERENCE
|
||||
Optional. Read reference score file and compare scores.
|
||||
-d DEVICE, --device DEVICE
|
||||
Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, VPUX, GNA_AUTO, GNA_HW, GNA_SW_FP32,
|
||||
Optional. Specify a target device to infer on. CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_SW_FP32,
|
||||
GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g.
|
||||
HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.
|
||||
Default value is CPU.
|
||||
|
||||
@@ -25,7 +25,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
|
||||
help='Optional. Read reference score file and compare scores.')
|
||||
args.add_argument('-d', '--device', default='CPU', type=str,
|
||||
help='Optional. Specify a target device to infer on. '
|
||||
'CPU, GPU, MYRIAD, VPUX, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA'
|
||||
'CPU, GPU, VPUX, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA'
|
||||
' as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. '
|
||||
'The sample will look for a suitable plugin for device specified. Default value is CPU.')
|
||||
args.add_argument('-bs', '--batch_size', type=int, choices=range(1, 9), metavar='[1-8]',
|
||||
|
||||
@@ -64,9 +64,6 @@ if(LINUX)
|
||||
if(ENABLE_INTEL_GPU)
|
||||
list(APPEND install_dependencies_files install_NEO_OCL_driver.sh)
|
||||
endif()
|
||||
if(ENABLE_INTEL_MYRIAD)
|
||||
list(APPEND install_dependencies_files install_NCS_udev_rules.sh)
|
||||
endif()
|
||||
|
||||
foreach(install_dependencies_file IN LISTS install_dependencies_files)
|
||||
install(PROGRAMS "${CMAKE_CURRENT_SOURCE_DIR}/install_dependencies/${install_dependencies_file}"
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "$(realpath "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
echo "Updating udev rules..."
|
||||
|
||||
if [ -f "$SCRIPT_DIR/97-myriad-usbboot.rules" ]; then
|
||||
sudo usermod -a -G users "$(whoami)"
|
||||
|
||||
sudo cp "$SCRIPT_DIR/97-myriad-usbboot.rules" /etc/udev/rules.d/
|
||||
sudo udevadm control --reload-rules
|
||||
sudo udevadm trigger
|
||||
sudo ldconfig
|
||||
echo "Udev rules have been successfully installed."
|
||||
else
|
||||
echo "File '97-myriad-usbboot.rules' is missing. Please make sure you installed 'Inference Engine Runtime for Intel® Movidius™ VPU'."
|
||||
exit 1
|
||||
fi
|
||||
@@ -36,8 +36,7 @@ if exist "%INTEL_OPENVINO_DIR%\extras\opencv\setupvars.bat" (
|
||||
set "InferenceEngine_DIR=%INTEL_OPENVINO_DIR%\runtime\cmake"
|
||||
set "ngraph_DIR=%INTEL_OPENVINO_DIR%\runtime\cmake"
|
||||
set "OpenVINO_DIR=%INTEL_OPENVINO_DIR%\runtime\cmake"
|
||||
set "HDDL_INSTALL_DIR=%INTEL_OPENVINO_DIR%\runtime\3rdparty\hddl"
|
||||
set "OPENVINO_LIB_PATHS=%INTEL_OPENVINO_DIR%\runtime\bin\intel64\Release;%INTEL_OPENVINO_DIR%\runtime\bin\intel64\Debug;%HDDL_INSTALL_DIR%\bin;%OPENVINO_LIB_PATHS%"
|
||||
set "OPENVINO_LIB_PATHS=%INTEL_OPENVINO_DIR%\runtime\bin\intel64\Release;%INTEL_OPENVINO_DIR%\runtime\bin\intel64\Debug;%OPENVINO_LIB_PATHS%"
|
||||
|
||||
:: TBB
|
||||
if exist %INTEL_OPENVINO_DIR%\runtime\3rdparty\tbb (
|
||||
|
||||
@@ -32,22 +32,15 @@ if [ -e "$INSTALLDIR/runtime" ]; then
|
||||
system_type=$(ls "$INSTALLDIR/runtime/lib/")
|
||||
IE_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type
|
||||
|
||||
export HDDL_INSTALL_DIR=$INSTALLDIR/runtime/3rdparty/hddl
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH}
|
||||
export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
|
||||
export PKG_CONFIG_PATH=${IE_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}
|
||||
else
|
||||
export LD_LIBRARY_PATH=$HDDL_INSTALL_DIR/lib:${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
|
||||
export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
|
||||
export PKG_CONFIG_PATH=$IE_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH}
|
||||
fi
|
||||
|
||||
HDDL_UNITE_DIR=$INSTALLDIR/runtime/3rdparty/hddl_unite
|
||||
|
||||
if [ -e "$HDDL_UNITE_DIR" ]; then
|
||||
export LD_LIBRARY_PATH=$HDDL_UNITE_DIR/lib:$HDDL_UNITE_DIR/thirdparty/XLink/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
|
||||
fi
|
||||
|
||||
if [ -e "$INSTALLDIR/runtime/3rdparty/tbb" ]; then
|
||||
tbb_lib_path=$INSTALLDIR/runtime/3rdparty/tbb/lib
|
||||
if [ -d "$tbb_lib_path/$system_type" ]; then
|
||||
|
||||
@@ -581,7 +581,7 @@ ie_core_get_config(const ie_core_t* core, const char* device_name, const char* c
|
||||
* @brief Gets available devices for neural network inference.
|
||||
* @ingroup Core
|
||||
* @param core A pointer to ie_core_t instance.
|
||||
* @param avai_devices The devices are returned as { CPU, GPU.0, GPU.1, MYRIAD }
|
||||
* @param avai_devices The devices are returned as { CPU, GPU.0, GPU.1 }
|
||||
* If there more than one device of specific type, they are enumerated with .# suffix
|
||||
* @return Status code of the operation: OK(0) for success.
|
||||
*/
|
||||
|
||||
@@ -9,7 +9,7 @@ from enum import Enum
|
||||
supported_precisions = ['FP32', 'FP64', 'FP16', 'I64', 'U64', 'I32', 'U32',
|
||||
'I16', 'I4', 'I8', 'U16', 'U4', 'U8', 'BOOL', 'BIN', 'BF16']
|
||||
|
||||
known_plugins = ['CPU', 'GPU', 'MYRIAD', 'HETERO', 'HDDL', 'MULTI']
|
||||
known_plugins = ['CPU', 'GPU', 'HETERO', 'MULTI']
|
||||
|
||||
layout_int_to_str_map = {0: 'ANY', 1: 'NCHW', 2: 'NHWC', 3: 'NCDHW', 4: 'NDHWC', 64: 'OIHW', 95: 'SCALAR', 96: 'C',
|
||||
128: 'CHW', 192: 'HW', 193: 'NC', 194: 'CN', 200: 'BLOCKED'}
|
||||
|
||||
@@ -478,11 +478,11 @@ cdef class IECore:
|
||||
|
||||
ie = IECore()
|
||||
net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
|
||||
exec_net = ie.load_network(network=net, device_name="MYRIAD", num_requests=2)
|
||||
exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2)
|
||||
# export executable network
|
||||
exec_net.export(path_to_file_to_save)
|
||||
# import previously exported executable network
|
||||
exec_net_imported = ie.import_network(model_file=path_to_file_to_save, device_name="MYRIAD")
|
||||
exec_net_imported = ie.import_network(model_file=path_to_file_to_save, device_name="CPU")
|
||||
"""
|
||||
cdef ExecutableNetwork exec_net = ExecutableNetwork()
|
||||
cdef map[string, string] c_config
|
||||
@@ -646,12 +646,12 @@ cdef class IECore:
|
||||
"""
|
||||
return self.impl.getConfig(device_name.encode(), config_name.encode())
|
||||
|
||||
## A list of devices. The devices are returned as \[CPU, GPU.0, GPU.1, MYRIAD\].
|
||||
## A list of devices. The devices are returned as \[CPU, GPU.0, GPU.1\].
|
||||
# If there are more than one device of a specific type, they all are listed followed by a dot and a number.
|
||||
@property
|
||||
def available_devices(self):
|
||||
"""
|
||||
A list of devices. The devices are returned as \[CPU, FPGA.0, FPGA.1, MYRIAD\].
|
||||
A list of devices. The devices are returned as \[CPU, GPU.0, GPU.1\].
|
||||
If there are more than one device of a specific type, they all are listed followed by a dot and a number.
|
||||
"""
|
||||
cdef vector[string] c_devices = self.impl.getAvailableDevices()
|
||||
@@ -1239,7 +1239,7 @@ cdef class ExecutableNetwork:
|
||||
# ```python
|
||||
# ie = IECore()
|
||||
# net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
|
||||
# exec_net = ie.load_network(network=net, device_name="MYRIAD", num_requests=2)
|
||||
# exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2)
|
||||
# exec_net.export(path_to_file_to_save)
|
||||
# ```
|
||||
def export(self, model_file: str):
|
||||
@@ -1252,7 +1252,7 @@ cdef class ExecutableNetwork:
|
||||
|
||||
ie = IECore()
|
||||
net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
|
||||
exec_net = ie.load_network(network=net, device_name="MYRIAD", num_requests=2)
|
||||
exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2)
|
||||
exec_net.export(path_to_file_to_save)
|
||||
"""
|
||||
deref(self.impl).exportNetwork(model_file.encode())
|
||||
|
||||
@@ -555,7 +555,7 @@ void regclass_Core(py::module m) {
|
||||
|
||||
GIL is released while running this function.
|
||||
|
||||
:returns: A list of devices. The devices are returned as: CPU, GPU.0, GPU.1, MYRIAD...
|
||||
:returns: A list of devices. The devices are returned as: CPU, GPU.0, GPU.1, GNA...
|
||||
If there more than one device of specific type, they are enumerated with .# suffix.
|
||||
Such enumerated device can later be used as a device name in all Core methods like:
|
||||
compile_model, query_model, set_property and so on.
|
||||
|
||||
@@ -68,9 +68,9 @@ def get_model_with_template_extension():
|
||||
return core, core.read_model(ir)
|
||||
|
||||
|
||||
def model_path(is_myriad=False):
|
||||
def model_path(is_fp16=False):
|
||||
base_path = os.path.dirname(__file__)
|
||||
if is_myriad:
|
||||
if is_fp16:
|
||||
test_xml = os.path.join(base_path, "test_utils", "utils", "test_model_fp16.xml")
|
||||
test_bin = os.path.join(base_path, "test_utils", "utils", "test_model_fp16.bin")
|
||||
else:
|
||||
@@ -93,7 +93,7 @@ def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--backend",
|
||||
default="CPU",
|
||||
choices=["CPU", "GPU", "HDDL", "MYRIAD", "HETERO", "TEMPLATE"],
|
||||
choices=["CPU", "GPU", "GNA", "HETERO", "TEMPLATE"],
|
||||
help="Select target device",
|
||||
)
|
||||
parser.addoption(
|
||||
@@ -118,8 +118,7 @@ def pytest_configure(config):
|
||||
# register additional markers
|
||||
config.addinivalue_line("markers", "skip_on_cpu: Skip test on CPU")
|
||||
config.addinivalue_line("markers", "skip_on_gpu: Skip test on GPU")
|
||||
config.addinivalue_line("markers", "skip_on_hddl: Skip test on HDDL")
|
||||
config.addinivalue_line("markers", "skip_on_myriad: Skip test on MYRIAD")
|
||||
config.addinivalue_line("markers", "skip_on_gna: Skip test on GNA")
|
||||
config.addinivalue_line("markers", "skip_on_hetero: Skip test on HETERO")
|
||||
config.addinivalue_line("markers", "skip_on_template: Skip test on TEMPLATE")
|
||||
config.addinivalue_line("markers", "onnx_coverage: Collect ONNX operator coverage")
|
||||
@@ -135,8 +134,7 @@ def pytest_collection_modifyitems(config, items):
|
||||
keywords = {
|
||||
"CPU": "skip_on_cpu",
|
||||
"GPU": "skip_on_gpu",
|
||||
"HDDL": "skip_on_hddl",
|
||||
"MYRIAD": "skip_on_myriad",
|
||||
"GNA": "skip_on_gna",
|
||||
"HETERO": "skip_on_hetero",
|
||||
"TEMPLATE": "skip_on_template",
|
||||
}
|
||||
@@ -144,8 +142,7 @@ def pytest_collection_modifyitems(config, items):
|
||||
skip_markers = {
|
||||
"CPU": pytest.mark.skip(reason="Skipping test on the CPU backend."),
|
||||
"GPU": pytest.mark.skip(reason="Skipping test on the GPU backend."),
|
||||
"HDDL": pytest.mark.skip(reason="Skipping test on the HDDL backend."),
|
||||
"MYRIAD": pytest.mark.skip(reason="Skipping test on the MYRIAD backend."),
|
||||
"GNA": pytest.mark.skip(reason="Skipping test on the GNA backend."),
|
||||
"HETERO": pytest.mark.skip(reason="Skipping test on the HETERO backend."),
|
||||
"TEMPLATE": pytest.mark.skip(reason="Skipping test on the TEMPLATE backend."),
|
||||
}
|
||||
|
||||
@@ -10,8 +10,7 @@ from tests.conftest import model_path
|
||||
from tests.test_utils.test_utils import get_relu_model, generate_image, generate_model_and_image, generate_relu_compiled_model
|
||||
from openvino.runtime import Model, ConstOutput, Shape, Core, Tensor
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
test_net_xml, test_net_bin = model_path()
|
||||
|
||||
|
||||
def test_get_property(device):
|
||||
|
||||
@@ -19,8 +19,7 @@ from tests import skip_need_mock_op
|
||||
from tests.conftest import model_path
|
||||
from tests.test_utils.test_utils import generate_image, get_relu_model
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
test_net_xml, test_net_bin = model_path()
|
||||
|
||||
|
||||
def create_model_with_memory(input_shape, data_type):
|
||||
@@ -706,8 +705,8 @@ def test_results_async_infer(device):
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("TEST_DEVICE") not in ["GPU, FPGA", "MYRIAD"],
|
||||
reason="Device independent test",
|
||||
os.environ.get("TEST_DEVICE") not in ["GPU"],
|
||||
reason="Device dependent test",
|
||||
)
|
||||
def test_infer_float16(device):
|
||||
model = bytes(
|
||||
|
||||
@@ -11,8 +11,7 @@ import openvino.runtime.opset8 as ops
|
||||
|
||||
from openvino.runtime import Core, OVAny
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
test_net_xml, test_net_bin = model_path()
|
||||
|
||||
|
||||
def test_input_type(device):
|
||||
|
||||
@@ -21,8 +21,7 @@ from openvino.runtime import (
|
||||
import pytest
|
||||
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
test_net_xml, test_net_bin = model_path()
|
||||
|
||||
|
||||
def test_const_output_type(device):
|
||||
|
||||
@@ -8,9 +8,6 @@ from ..conftest import model_path
|
||||
import openvino.runtime.opset8 as ops
|
||||
from openvino.runtime import Type
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
|
||||
|
||||
def test_output_replace(device):
|
||||
param = ops.parameter([1, 64], Type.i64)
|
||||
|
||||
@@ -31,7 +31,7 @@ def test_compare_models():
|
||||
def generate_lib_name(device, full_device_name):
|
||||
lib_name = ""
|
||||
arch = processor()
|
||||
if arch == "x86_64" or "Intel" in full_device_name or device in ["GNA", "HDDL", "MYRIAD", "VPUX"]:
|
||||
if arch == "x86_64" or "Intel" in full_device_name or device in ["GNA", "VPUX"]:
|
||||
lib_name = "openvino_intel_" + device.lower() + "_plugin"
|
||||
elif arch != "x86_64" and device == "CPU":
|
||||
lib_name = "openvino_arm_cpu_plugin"
|
||||
|
||||
@@ -11,9 +11,9 @@ import tests_compatibility
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def model_path(is_myriad=False):
|
||||
def model_path(is_fp16=False):
|
||||
base_path = os.path.dirname(__file__)
|
||||
if is_myriad:
|
||||
if is_fp16:
|
||||
test_xml = os.path.join(base_path, "test_utils", "utils", "test_model_fp16.xml")
|
||||
test_bin = os.path.join(base_path, "test_utils", "utils", "test_model_fp16.bin")
|
||||
else:
|
||||
@@ -44,7 +44,7 @@ def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--backend",
|
||||
default="CPU",
|
||||
choices=["CPU", "GPU", "HDDL", "MYRIAD", "HETERO", "TEMPLATE"],
|
||||
choices=["CPU", "GPU", "GNA", "HETERO", "TEMPLATE"],
|
||||
help="Select target device",
|
||||
)
|
||||
parser.addoption(
|
||||
@@ -69,8 +69,7 @@ def pytest_configure(config):
|
||||
# register additional markers
|
||||
config.addinivalue_line("markers", "skip_on_cpu: Skip test on CPU")
|
||||
config.addinivalue_line("markers", "skip_on_gpu: Skip test on GPU")
|
||||
config.addinivalue_line("markers", "skip_on_hddl: Skip test on HDDL")
|
||||
config.addinivalue_line("markers", "skip_on_myriad: Skip test on MYRIAD")
|
||||
config.addinivalue_line("markers", "skip_on_gna: Skip test on GNA")
|
||||
config.addinivalue_line("markers", "skip_on_hetero: Skip test on HETERO")
|
||||
config.addinivalue_line("markers", "skip_on_template: Skip test on TEMPLATE")
|
||||
config.addinivalue_line("markers", "onnx_coverage: Collect ONNX operator coverage")
|
||||
@@ -86,8 +85,7 @@ def pytest_collection_modifyitems(config, items):
|
||||
keywords = {
|
||||
"CPU": "skip_on_cpu",
|
||||
"GPU": "skip_on_gpu",
|
||||
"HDDL": "skip_on_hddl",
|
||||
"MYRIAD": "skip_on_myriad",
|
||||
"GNA": "skip_on_gna",
|
||||
"HETERO": "skip_on_hetero",
|
||||
"TEMPLATE": "skip_on_template",
|
||||
}
|
||||
@@ -95,8 +93,7 @@ def pytest_collection_modifyitems(config, items):
|
||||
skip_markers = {
|
||||
"CPU": pytest.mark.skip(reason="Skipping test on the CPU backend."),
|
||||
"GPU": pytest.mark.skip(reason="Skipping test on the GPU backend."),
|
||||
"HDDL": pytest.mark.skip(reason="Skipping test on the HDDL backend."),
|
||||
"MYRIAD": pytest.mark.skip(reason="Skipping test on the MYRIAD backend."),
|
||||
"GNA": pytest.mark.skip(reason="Skipping test on the GNA backend."),
|
||||
"HETERO": pytest.mark.skip(reason="Skipping test on the HETERO backend."),
|
||||
"TEMPLATE": pytest.mark.skip(reason="Skipping test on the TEMPLATE backend."),
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ def test_incompatible_input_precision():
|
||||
@pytest.mark.skip(reason="Test will enable when CPU fix will be merge")
|
||||
@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU", reason="Device dependent test")
|
||||
def test_buffer_values_after_add_outputs(device):
|
||||
test_net_xml_fp16, test_net_bin_fp16 = model_path(is_myriad=True)
|
||||
test_net_xml_fp16, test_net_bin_fp16 = model_path(is_fp16=True)
|
||||
ie_core = IECore()
|
||||
if device == "CPU":
|
||||
if ie_core.get_metric(device, "FULL_DEVICE_NAME") == "arm_compute::NEON":
|
||||
|
||||
@@ -11,8 +11,7 @@ from tests_compatibility.conftest import model_path
|
||||
from tests_compatibility.test_utils.test_utils import generate_image, generate_relu_model
|
||||
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
test_net_xml, test_net_bin = model_path(False)
|
||||
|
||||
|
||||
def test_infer(device):
|
||||
@@ -223,20 +222,20 @@ def test_exec_graph(device):
|
||||
del ie_core
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "MYRIAD",
|
||||
reason="Device specific test. Only MYRIAD plugin implements network export")
|
||||
@pytest.mark.skipif(os.environ.get("TEST_DEVICE", "CPU") != "CPU",
|
||||
reason="Device specific test. Only CPU plugin implements network export")
|
||||
def test_export_import():
|
||||
ie_core = ie.IECore()
|
||||
net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
|
||||
exec_net = ie_core.load_network(net, "MYRIAD")
|
||||
exec_net = ie_core.load_network(net, "CPU")
|
||||
exported_net_file = 'exported_model.bin'
|
||||
exec_net.export(exported_net_file)
|
||||
assert os.path.exists(exported_net_file)
|
||||
exec_net = ie_core.import_network(exported_net_file, "MYRIAD")
|
||||
exec_net = ie_core.import_network(exported_net_file, "CPU")
|
||||
os.remove(exported_net_file)
|
||||
img = generate_image()
|
||||
res = exec_net.infer({'data': img})
|
||||
assert np.argmax(res['fc_out'][0]) == 3
|
||||
assert np.argmax(res['fc_out'][0]) == 9
|
||||
del exec_net
|
||||
del ie_core
|
||||
|
||||
|
||||
@@ -14,8 +14,7 @@ from tests_compatibility.test_utils.test_utils import generate_image, generate_r
|
||||
import ngraph as ng
|
||||
from ngraph.impl import Function, Type
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
test_net_xml, test_net_bin = model_path()
|
||||
|
||||
|
||||
def create_function_with_memory(input_shape, data_type):
|
||||
|
||||
@@ -170,7 +170,7 @@ public:
|
||||
/**
|
||||
* @brief Returns devices available for neural networks inference
|
||||
*
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, MYRIAD }
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, GNA }
|
||||
* If there more than one device of specific type, they are enumerated with .# suffix.
|
||||
*/
|
||||
virtual std::vector<std::string> GetAvailableDevices() const = 0;
|
||||
|
||||
@@ -285,7 +285,7 @@ public:
|
||||
/**
|
||||
* @brief Returns devices available for neural networks inference
|
||||
*
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, MYRIAD }
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, GNA }
|
||||
* If there more than one device of specific type, they are enumerated with .# suffix.
|
||||
*/
|
||||
std::vector<std::string> GetAvailableDevices() const;
|
||||
|
||||
@@ -628,7 +628,7 @@ public:
|
||||
* @brief Returns devices available for inference.
|
||||
* Core objects go over all registered plugins and ask about available devices.
|
||||
*
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, MYRIAD }.
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, GNA }.
|
||||
* If there is more than one device of a specific type, they are enumerated with the .# suffix.
|
||||
* Such enumerated device can later be used as a device name in all Core methods like Core::compile_model,
|
||||
* Core::query_model, Core::set_property and so on.
|
||||
|
||||
@@ -278,7 +278,7 @@ public:
|
||||
/**
|
||||
* @brief Returns devices available for neural networks inference
|
||||
*
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, MYRIAD }
|
||||
* @return A vector of devices. The devices are returned as { CPU, GPU.0, GPU.1, GNA }
|
||||
* If there more than one device of specific type, they are enumerated with .# suffix.
|
||||
*/
|
||||
std::vector<std::string> GetAvailableDevices() const override;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user