Merge branch 'master' into itikhono/ts/fix_performance_issues
This commit is contained in:
commit
f5bff5c087
@ -62,23 +62,8 @@ jobs:
|
||||
TMP_DIR: /mnt/tmp
|
||||
SHARE_DIR: /mount/cinfsshare/onnxtestdata
|
||||
CCACHE_DIR: $(SHARE_DIR)/ccache/master/android_arm64
|
||||
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
|
||||
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.10'
|
||||
addToPath: true
|
||||
disableDownloadFromRegistry: false
|
||||
architecture: 'x64'
|
||||
githubToken: $(auth_token)
|
||||
displayName: Setup Python 3.10
|
||||
name: setupPython
|
||||
- bash: |
|
||||
#!/bin/bash
|
||||
python -V
|
||||
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
|
@ -40,9 +40,6 @@ resources:
|
||||
name: openvinotoolkit/testdata
|
||||
ref: master
|
||||
|
||||
variables:
|
||||
- group: github
|
||||
|
||||
jobs:
|
||||
- job: Lin
|
||||
strategy:
|
||||
@ -99,23 +96,8 @@ jobs:
|
||||
CMAKE_VERSION: 3.24.0
|
||||
BUILD_PYTHON: $(WORK_DIR)/build_python
|
||||
INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python
|
||||
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
|
||||
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.10'
|
||||
addToPath: true
|
||||
disableDownloadFromRegistry: false
|
||||
architecture: 'x64'
|
||||
githubToken: $(auth_token)
|
||||
displayName: Setup Python 3.10
|
||||
name: setupPython
|
||||
- bash: |
|
||||
#!/bin/bash
|
||||
python -V
|
||||
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
@ -300,31 +282,37 @@ jobs:
|
||||
|
||||
- script: ls -alR $(INSTALL_DIR)
|
||||
displayName: 'List install test files'
|
||||
|
||||
|
||||
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
|
||||
- script: |
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \
|
||||
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py
|
||||
env:
|
||||
# because of static build libgna is needed for python binary
|
||||
LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64
|
||||
displayName: 'nGraph and IE Python Bindings Tests'
|
||||
|
||||
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
|
||||
- script: |
|
||||
# For python imports to import pybind_mock_frontend
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
|
||||
export PYTHONPATH=$(INSTALL_TEST_DIR):$(INSTALL_DIR)/python/python3.8:$PYTHONPATH
|
||||
python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \
|
||||
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_zoo_models.py \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_backend.py
|
||||
env:
|
||||
# because of static build libgna is needed for python binary and mock_py frontend library
|
||||
LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(INSTALL_TEST_DIR)
|
||||
displayName: 'Python API 2.0 Tests'
|
||||
|
||||
- script: |
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
|
||||
env:
|
||||
# because of static build libgna is needed for python binary
|
||||
LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64
|
||||
displayName: 'Model Optimizer UT'
|
||||
|
||||
- script: |
|
||||
|
@ -39,9 +39,6 @@ resources:
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: master
|
||||
|
||||
variables:
|
||||
- group: github
|
||||
|
||||
jobs:
|
||||
- job: linux_arm64
|
||||
# About 150% of total time
|
||||
@ -80,23 +77,8 @@ jobs:
|
||||
OPENVINO_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64
|
||||
OPENCV_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_opencv
|
||||
ONETBB_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_onetbb
|
||||
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib:$LD_LIBRARY_PATH
|
||||
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.10'
|
||||
addToPath: true
|
||||
disableDownloadFromRegistry: false
|
||||
architecture: 'x64'
|
||||
githubToken: $(auth_token)
|
||||
displayName: Setup Python 3.10
|
||||
name: setupPython
|
||||
- bash: |
|
||||
#!/bin/bash
|
||||
python -V
|
||||
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
|
@ -36,9 +36,6 @@ resources:
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
|
||||
variables:
|
||||
- group: github
|
||||
|
||||
jobs:
|
||||
- job: LinCC
|
||||
# About 150% of total time
|
||||
@ -58,22 +55,8 @@ jobs:
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
INSTALL_DIR: $(WORK_DIR)/install_pkg
|
||||
SETUPVARS: $(INSTALL_DIR)/setupvars.sh
|
||||
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib:$LD_LIBRARY_PATH
|
||||
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.10'
|
||||
addToPath: true
|
||||
disableDownloadFromRegistry: false
|
||||
architecture: 'x64'
|
||||
githubToken: $(auth_token)
|
||||
displayName: Setup Python 3.10
|
||||
name: setupPython
|
||||
- bash: |
|
||||
#!/bin/bash
|
||||
python -V
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
|
@ -65,23 +65,8 @@ jobs:
|
||||
TMP_DIR: /mnt/tmp
|
||||
SHARE_DIR: /mount/cinfsshare/onnxtestdata
|
||||
CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux
|
||||
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
|
||||
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.10'
|
||||
addToPath: true
|
||||
disableDownloadFromRegistry: false
|
||||
architecture: 'x64'
|
||||
githubToken: $(auth_token)
|
||||
displayName: Setup Python 3.10
|
||||
name: setupPython
|
||||
- bash: |
|
||||
#!/bin/bash
|
||||
python -V
|
||||
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
@ -240,9 +225,6 @@ jobs:
|
||||
|
||||
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
|
||||
- script: |
|
||||
# TODO (vurusovs): revert skip of test_core.py::test_register_plugin*,
|
||||
# test should be fixed
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph \
|
||||
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
|
||||
@ -257,9 +239,6 @@ jobs:
|
||||
export LD_LIBRARY_PATH=$(PYTHON_WHEEL_INSTALL_DIR)/openvino/libs:$(INSTALL_TEST_DIR):$LD_LIBRARY_PATH
|
||||
# For python imports to import pybind_mock_frontend
|
||||
export PYTHONPATH=$(INSTALL_TEST_DIR):$PYTHONPATH
|
||||
# TODO (vurusovs): revert skip of test_core.py::test_register_plugin*,
|
||||
# test should be fixed
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyopenvino \
|
||||
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
|
||||
@ -267,9 +246,7 @@ jobs:
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_backend.py -v
|
||||
displayName: 'Python API 2.0 Tests'
|
||||
|
||||
- script: |
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
|
||||
- script: python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
|
||||
displayName: 'Model Optimizer UT'
|
||||
|
||||
- script: |
|
||||
@ -316,6 +293,8 @@ jobs:
|
||||
|
||||
- script: |
|
||||
$(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-NGraphUT.xml
|
||||
env:
|
||||
LD_LIBRARY_PATH: $(INSTALL_TEST_DIR)
|
||||
displayName: 'OV Core UT'
|
||||
|
||||
- script: |
|
||||
|
@ -31,9 +31,6 @@ pr:
|
||||
- 'tools/*'
|
||||
- 'tests/layer_tests/*'
|
||||
|
||||
variables:
|
||||
- group: github
|
||||
|
||||
jobs:
|
||||
- job: onnxruntime
|
||||
timeoutInMinutes: '90'
|
||||
@ -55,23 +52,8 @@ jobs:
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime
|
||||
ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build
|
||||
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib:$LD_LIBRARY_PATH
|
||||
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '3.10'
|
||||
addToPath: true
|
||||
disableDownloadFromRegistry: false
|
||||
architecture: 'x64'
|
||||
githubToken: $(auth_token)
|
||||
displayName: Setup Python 3.10
|
||||
name: setupPython
|
||||
- bash: |
|
||||
#!/bin/bash
|
||||
python -V
|
||||
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
|
@ -9,17 +9,19 @@ ov_coverage_clean(REPOSITORY "openvino"
|
||||
ov_coverage_capture(INFO_FILE "openvino"
|
||||
BASE_DIRECTORY "${OV_COVERAGE_BASE_DIRECTORY}"
|
||||
DIRECTORY "${OV_COVERAGE_GCDA_DATA_DIRECTORY}"
|
||||
EXCLUDE_PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/*.pb.cc"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/*.pb.h"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests/*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests_deprecated/*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/thirdparty/*") # Skip some pb files, tests and thirdparty
|
||||
EXCLUDE_PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/*.pb.cc"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/*.pb.h"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests/*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests_deprecated/*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/thirdparty/*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/CMakeCXXCompilerId.cpp"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/CMakeCCompilerId.c") # Skip some service files, tests and thirdparty
|
||||
# Generate reports
|
||||
|
||||
# Common report
|
||||
ov_coverage_genhtml(INFO_FILE "openvino"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
# Generate reports
|
||||
|
||||
##################### Core Components #####################
|
||||
ov_coverage_extract(INPUT "openvino" OUTPUT "inference"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/inference/*")
|
||||
|
@ -292,7 +292,7 @@ function(ie_mark_target_as_cc TARGET_NAME)
|
||||
endif()
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ${cc_library})
|
||||
|
||||
if(NOT (SELECTIVE_BUILD STREQUAL "ON"))
|
||||
if(NOT SELECTIVE_BUILD STREQUAL "ON")
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
@ -6,22 +6,24 @@ include(ProcessorCount)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
#
|
||||
# disable_deprecated_warnings()
|
||||
# ov_disable_deprecated_warnings()
|
||||
#
|
||||
# Disables deprecated warnings generation in current scope (directory, function)
|
||||
# Defines ie_c_cxx_deprecated varaible which contains C / C++ compiler flags
|
||||
#
|
||||
macro(disable_deprecated_warnings)
|
||||
macro(ov_disable_deprecated_warnings)
|
||||
if(WIN32)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
set(ie_c_cxx_deprecated "/Qdiag-disable:1478,1786")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
set(ie_c_cxx_deprecated "/wd4996")
|
||||
elseif(OV_COMPILER_IS_CLANG)
|
||||
set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
|
||||
endif()
|
||||
else()
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
set(ie_c_cxx_deprecated "-diag-disable=1478,1786")
|
||||
else()
|
||||
elseif(OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
|
||||
endif()
|
||||
endif()
|
||||
@ -36,30 +38,36 @@ macro(disable_deprecated_warnings)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_deprecated}")
|
||||
endmacro()
|
||||
|
||||
macro(disable_deprecated_warnings)
|
||||
ov_disable_deprecated_warnings()
|
||||
endmacro()
|
||||
|
||||
#
|
||||
# ie_deprecated_no_errors()
|
||||
# ov_deprecated_no_errors()
|
||||
#
|
||||
# Don't threat deprecated warnings as errors in current scope (directory, function)
|
||||
# Defines ie_c_cxx_deprecated_no_errors varaible which contains C / C++ compiler flags
|
||||
#
|
||||
macro(ie_deprecated_no_errors)
|
||||
macro(ov_deprecated_no_errors)
|
||||
if(WIN32)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
set(ie_c_cxx_deprecated_no_errors "/Qdiag-warning:1478,1786")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# show 4996 only for /w4
|
||||
set(ie_c_cxx_deprecated_no_errors "/wd4996")
|
||||
elseif(OV_COMPILER_IS_CLANG)
|
||||
set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
|
||||
endif()
|
||||
else()
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
set(ie_c_cxx_deprecated_no_errors "-diag-warning=1478,1786")
|
||||
else()
|
||||
elseif(OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT ie_c_cxx_deprecated_no_errors)
|
||||
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
|
||||
endif()
|
||||
if(NOT ie_c_cxx_deprecated_no_errors)
|
||||
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${ie_c_cxx_deprecated_no_errors}")
|
||||
@ -68,6 +76,25 @@ macro(ie_deprecated_no_errors)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_deprecated_no_errors}")
|
||||
endmacro()
|
||||
|
||||
#
|
||||
# ov_dev_package_no_errors()
|
||||
#
|
||||
# Exports flags for 3rdparty modules, but without errors
|
||||
#
|
||||
macro(ov_dev_package_no_errors)
|
||||
if(OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(ie_c_cxx_dev_no_errors "-Wno-all")
|
||||
if(SUGGEST_OVERRIDE_SUPPORTED)
|
||||
set(ie_cxx_dev_no_errors "${ie_c_cxx_dev_no_errors} -Wno-error=suggest-override")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${ie_c_cxx_dev_no_errors} ${ie_cxx_dev_no_errors}")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${ie_c_cxx_dev_no_errors}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ie_c_cxx_dev_no_errors} ${ie_cxx_dev_no_errors}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_dev_no_errors}")
|
||||
endmacro()
|
||||
|
||||
#
|
||||
# ie_sse42_optimization_flags(<output flags>)
|
||||
#
|
||||
@ -165,7 +192,10 @@ macro(ie_arm_neon_optimization_flags flags)
|
||||
endif()
|
||||
else()
|
||||
if(AARCH64)
|
||||
set(${flags} -O2 -ftree-vectorize)
|
||||
set(${flags} -O2)
|
||||
if(NOT CMAKE_CL_64)
|
||||
list(APPEND ${flags} -ftree-vectorize)
|
||||
endif()
|
||||
elseif(ARM)
|
||||
set(${flags} -mfpu=neon -Wno-unused-command-line-argument)
|
||||
endif()
|
||||
@ -190,7 +220,9 @@ function(ov_disable_all_warnings)
|
||||
if(target_type STREQUAL "SHARED_LIBRARY" OR target_type STREQUAL "EXECUTABLE")
|
||||
set(link_interface LINK_OPTIONS)
|
||||
endif()
|
||||
set_target_properties(${target} PROPERTIES ${link_interface} "-Wno-error=maybe-uninitialized;-Wno-maybe-uninitialized")
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
set_target_properties(${target} PROPERTIES ${link_interface} "-Wno-error=maybe-uninitialized;-Wno-maybe-uninitialized")
|
||||
endif()
|
||||
elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
# 193: zero used for undefined preprocessing identifier "XXX"
|
||||
# 1011: missing return statement at end of non-void function "XXX"
|
||||
@ -238,6 +270,21 @@ function(ov_force_include target scope header_file)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
#
|
||||
# ie_python_minimal_api(<target>)
|
||||
#
|
||||
# Set options to use only Python Limited API
|
||||
#
|
||||
function(ie_python_minimal_api target)
|
||||
# pybind11 uses a lot of API which is not a part of minimal python API subset
|
||||
# Ref 1: https://docs.python.org/3.11/c-api/stable.html
|
||||
# Ref 2: https://github.com/pybind/pybind11/issues/1755
|
||||
# target_compile_definitions(${target} PRIVATE Py_LIMITED_API=0x03090000)
|
||||
# if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# target_compile_options(${target} PRIVATE "-Wno-unused-variable")
|
||||
# endif()
|
||||
endfunction()
|
||||
|
||||
#
|
||||
# Compilation and linker flags
|
||||
#
|
||||
@ -262,34 +309,53 @@ if(ENABLE_COVERAGE)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
ie_add_compiler_flags(-fsigned-char)
|
||||
endif()
|
||||
|
||||
# Honor visibility properties for all target types
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
||||
|
||||
function(ie_python_minimal_api target)
|
||||
# pybind11 uses a lot of API which is not a part of minimal python API subset
|
||||
# Ref 1: https://docs.python.org/3.11/c-api/stable.html
|
||||
# Ref 2: https://github.com/pybind/pybind11/issues/1755
|
||||
# target_compile_definitions(${target} PRIVATE Py_LIMITED_API=0x03090000)
|
||||
# if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# target_compile_options(${target} PRIVATE "-Wno-unused-variable")
|
||||
# endif()
|
||||
endfunction()
|
||||
if(CMAKE_CL_64)
|
||||
# Default char Type Is unsigned
|
||||
# ie_add_compiler_flags(/J)
|
||||
else()
|
||||
ie_add_compiler_flags(-fsigned-char)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
ie_add_compiler_flags(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS)
|
||||
ie_add_compiler_flags(/EHsc) # no asynchronous structured exception handling
|
||||
ie_add_compiler_flags(/Gy) # remove unreferenced functions: function level linking
|
||||
#
|
||||
# Common options / warnings enabled
|
||||
#
|
||||
|
||||
ie_add_compiler_flags(/D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS)
|
||||
# no asynchronous structured exception handling
|
||||
ie_add_compiler_flags(/EHsc)
|
||||
# Allows the compiler to package individual functions in the form of packaged functions (COMDATs).
|
||||
ie_add_compiler_flags(/Gy)
|
||||
# This option helps ensure the fewest possible hard-to-find code defects. Similar to -Wall on GNU / Clang
|
||||
ie_add_compiler_flags(/W3)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# Increase Number of Sections in .Obj file
|
||||
ie_add_compiler_flags(/bigobj)
|
||||
# Build with multiple processes
|
||||
ie_add_compiler_flags(/MP)
|
||||
|
||||
if(AARCH64 AND NOT MSVC_VERSION LESS 1930)
|
||||
# otherwise, _ARM64_EXTENDED_INTRINSICS is defined, which defines 'mvn' macro
|
||||
ie_add_compiler_flags(/D_ARM64_DISTINCT_NEON_TYPES)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Handle Large Addresses
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
|
||||
|
||||
if (CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||
if (CMAKE_VERSION VERSION_LESS 3.24)
|
||||
#
|
||||
# Warnings as errors
|
||||
#
|
||||
|
||||
if(CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||
if(CMAKE_VERSION VERSION_LESS 3.24)
|
||||
ie_add_compiler_flags(/WX)
|
||||
endif()
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /WX")
|
||||
@ -300,26 +366,16 @@ if(WIN32)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(AARCH64 AND CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND NOT MSVC_VERSION LESS 1930)
|
||||
# otherwise, _ARM64_EXTENDED_INTRINSICS is defined, which defines 'mvn' macro
|
||||
ie_add_compiler_flags(-D_ARM64_DISTINCT_NEON_TYPES)
|
||||
endif()
|
||||
|
||||
# Compiler specific flags
|
||||
|
||||
ie_add_compiler_flags(/bigobj)
|
||||
ie_add_compiler_flags(/MP)
|
||||
|
||||
#
|
||||
# Disable noisy warnings
|
||||
#
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# C4251 needs to have dll-interface to be used by clients of class
|
||||
ie_add_compiler_flags(/wd4251)
|
||||
# C4275 non dll-interface class used as base for dll-interface class
|
||||
ie_add_compiler_flags(/wd4275)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
# 161: unrecognized pragma
|
||||
# 177: variable was declared but never referenced
|
||||
# 556: not matched type of assigned function pointer
|
||||
@ -342,42 +398,45 @@ if(WIN32)
|
||||
string(REPLACE "/Zi" "/Z7" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
|
||||
string(REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
|
||||
else()
|
||||
if(CMAKE_COMPILE_WARNING_AS_ERROR AND CMAKE_VERSION VERSION_LESS 3.24)
|
||||
# TODO: enable for C sources as well
|
||||
# ie_add_compiler_flags(-Werror)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
||||
endif()
|
||||
#
|
||||
# Common enabled warnings
|
||||
#
|
||||
|
||||
# allow linker eliminating the unused code and data from the final executable
|
||||
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
|
||||
# emits text showing the command-line option controlling a diagnostic
|
||||
ie_add_compiler_flags(-fdiagnostics-show-option)
|
||||
ie_add_compiler_flags(-Wundef)
|
||||
ie_add_compiler_flags(-Wreturn-type)
|
||||
ie_add_compiler_flags(-Wunused-variable)
|
||||
|
||||
if(OV_COMPILER_IS_APPLECLANG)
|
||||
ie_add_compiler_flags(-Wswitch)
|
||||
set(CMAKE_CXX_FLAGS "-Woverloaded-virtual ${CMAKE_CXX_FLAGS}")
|
||||
else()
|
||||
ie_add_compiler_flags(-Wuninitialized -Winit-self)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
ie_add_compiler_flags(-Winconsistent-missing-override
|
||||
-Wstring-plus-int)
|
||||
else()
|
||||
ie_add_compiler_flags(-Wmaybe-uninitialized)
|
||||
check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
|
||||
if(SUGGEST_OVERRIDE_SUPPORTED)
|
||||
set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
endif()
|
||||
# This enables all the warnings about constructions that some users consider questionable, and that are easy to avoid
|
||||
ie_add_compiler_flags(-Wall)
|
||||
# Warn if an undefined identifier is evaluated in an #if directive. Such identifiers are replaced with zero.
|
||||
ie_add_compiler_flags(-Wundef)
|
||||
|
||||
check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
|
||||
if(SUGGEST_OVERRIDE_SUPPORTED)
|
||||
set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
#
|
||||
# Warnings as errors
|
||||
#
|
||||
|
||||
if(CMAKE_COMPILE_WARNING_AS_ERROR AND CMAKE_VERSION VERSION_LESS 3.24)
|
||||
ie_add_compiler_flags(-Werror)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Disable noisy warnings
|
||||
#
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
# 177: function "XXX" was declared but never referenced
|
||||
ie_add_compiler_flags(-diag-disable=remark,177,2196)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Linker flags
|
||||
#
|
||||
|
||||
if(APPLE)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip")
|
||||
@ -401,6 +460,14 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# if(OV_COMPILER_IS_CLANG)
|
||||
# ie_add_compiler_flags(-Wshorten-64-to-32)
|
||||
# endif()
|
||||
# TODO
|
||||
if(OV_COMPILER_IS_CLANG)
|
||||
ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
|
||||
endif()
|
||||
|
||||
#
|
||||
# link_system_libraries(target <PUBLIC | PRIVATE | INTERFACE> <lib1 [lib2 lib3 ...]>)
|
||||
#
|
||||
|
@ -19,16 +19,12 @@ else()
|
||||
endif()
|
||||
|
||||
if(CI_BUILD_NUMBER)
|
||||
set(TREAT_WARNING_AS_ERROR_DEFAULT ON)
|
||||
set(CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT ON)
|
||||
else()
|
||||
set(TREAT_WARNING_AS_ERROR_DEFAULT OFF)
|
||||
set(CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
ie_dependent_option (TREAT_WARNING_AS_ERROR "WILL BE REMOVED SOON, NEED TO FIX PRIVATE COMPONENTS" ON "X86_64 OR X86" OFF)
|
||||
|
||||
if(NOT DEFINED CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||
set(CMAKE_COMPILE_WARNING_AS_ERROR ${TREAT_WARNING_AS_ERROR_DEFAULT})
|
||||
endif()
|
||||
ie_option (CMAKE_COMPILE_WARNING_AS_ERROR "Enable warnings as errors" ${CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT})
|
||||
|
||||
ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)
|
||||
|
||||
|
@ -16,9 +16,23 @@ if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
macro(_ie_process_msvc_generator_platform flag_name)
|
||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(arch_flag X86_64)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
set(arch_flag X86)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
|
||||
set(arch_flag AARCH64)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
|
||||
set(arch_flag ARM)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^riscv64$")
|
||||
set(arch_flag RISCV64)
|
||||
endif()
|
||||
|
||||
set(HOST_${arch_flag} ON)
|
||||
|
||||
macro(_ie_process_msvc_generator_platform arch_flag)
|
||||
# if cmake -A <ARM|ARM64> is passed
|
||||
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
|
||||
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
|
||||
set(AARCH64 ON)
|
||||
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM")
|
||||
set(ARM ON)
|
||||
@ -27,14 +41,14 @@ macro(_ie_process_msvc_generator_platform flag_name)
|
||||
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32")
|
||||
set(X86 ON)
|
||||
else()
|
||||
set(${flag_name} ON)
|
||||
set(${arch_flag} ON)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
if(MSVC64 OR MINGW64)
|
||||
_ie_process_msvc_generator_platform(X86_64)
|
||||
_ie_process_msvc_generator_platform(${arch_flag})
|
||||
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
|
||||
_ie_process_msvc_generator_platform(X86)
|
||||
_ie_process_msvc_generator_platform(${arch_flag})
|
||||
elseif(CMAKE_OSX_ARCHITECTURES AND APPLE)
|
||||
if(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
||||
set(AARCH64 ON)
|
||||
@ -49,7 +63,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(X86_64 ON)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
set(X86 ON)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*|ARM64.*)")
|
||||
set(AARCH64 ON)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
|
||||
set(ARM ON)
|
||||
@ -57,18 +71,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^riscv64$")
|
||||
set(RISCV64 ON)
|
||||
endif()
|
||||
|
||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(HOST_X86_64 ON)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
set(HOST_X86 ON)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
|
||||
set(HOST_AARCH64 ON)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
|
||||
set(HOST_ARM ON)
|
||||
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^riscv64$")
|
||||
set(HOST_RISCV64 ON)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
||||
set(EMSCRIPTEN ON)
|
||||
endif()
|
||||
|
@ -93,7 +93,6 @@ function(register_extra_modules)
|
||||
file(REMOVE "${devconfig_file}")
|
||||
|
||||
file(WRITE "${devconfig_file}" "\# !! AUTOGENERATED: DON'T EDIT !!\n\n")
|
||||
file(APPEND "${devconfig_file}" "ie_deprecated_no_errors()\n")
|
||||
|
||||
foreach(target IN LISTS ${openvino_export_components})
|
||||
if(target)
|
||||
@ -124,6 +123,17 @@ endif()\n")
|
||||
endif()
|
||||
list(APPEND extra_modules "${OpenVINO_SOURCE_DIR}/src/core/template_extension")
|
||||
|
||||
# add extra flags for compilation of extra modules:
|
||||
# since not all extra modules use OpenVINODeveloperPackage, we have to add these function calls here
|
||||
ov_dev_package_no_errors()
|
||||
ov_deprecated_no_errors()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# 'argument': conversion from 'size_t' to 'int', possible loss of data
|
||||
ie_add_compiler_flags(/wd4267)
|
||||
ie_add_compiler_flags(/wd4244)
|
||||
endif()
|
||||
|
||||
# add each extra module
|
||||
foreach(module_path IN LISTS extra_modules)
|
||||
if(module_path)
|
||||
|
@ -336,12 +336,11 @@ macro(ov_cpack_settings)
|
||||
set(samples_build_deps "cmake, g++, gcc, libc6-dev, make, pkg-config")
|
||||
set(samples_build_deps_suggest "libopencv-core-dev, libopencv-imgproc-dev, libopencv-imgcodecs-dev")
|
||||
set(samples_opencl_suggest "ocl-icd-opencl-dev, opencl-headers")
|
||||
if(OV_GLIBC_VERSION VERSION_LESS_EQUAL 2.27)
|
||||
# Ubuntu 18.04, Debian 9 cases
|
||||
set(json_library "nlohmann-json-dev")
|
||||
else()
|
||||
set(json_library "nlohmann-json3-dev")
|
||||
endif()
|
||||
# Ubuntu 18.04, Debian 9 cases have nlohmann-json-dev
|
||||
# newer systems have nlohmann-json3-dev
|
||||
# according to https://www.debian.org/doc/debian-policy/ch-relationships.html#syntax-of-relationship-fields
|
||||
# we can use | (pipe) to provide alternative package names
|
||||
set(json_library "nlohmann-json3-dev | nlohmann-json-dev")
|
||||
|
||||
# c_samples / cpp_samples
|
||||
set(CPACK_COMPONENT_SAMPLES_DESCRIPTION "Intel(R) Distribution of OpenVINO(TM) Toolkit C / C++ Samples")
|
||||
|
@ -151,13 +151,8 @@ endif()
|
||||
# Extra Compile Flags
|
||||
#
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ie_add_compiler_flags(-Wno-error=unused-variable)
|
||||
ie_add_compiler_flags(-Wno-error=unused-but-set-variable)
|
||||
if(SUGGEST_OVERRIDE_SUPPORTED)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
|
||||
endif()
|
||||
endif()
|
||||
# don't fail on strict compilation options in 3rd party modules
|
||||
ov_dev_package_no_errors()
|
||||
|
||||
# Don't threat deprecated API warnings as errors in 3rd party apps
|
||||
ie_deprecated_no_errors()
|
||||
ov_deprecated_no_errors()
|
||||
|
@ -123,13 +123,8 @@ endif()
|
||||
# Extra Compile Flags
|
||||
#
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ie_add_compiler_flags(-Wno-error=unused-variable)
|
||||
ie_add_compiler_flags(-Wno-error=unused-but-set-variable)
|
||||
if(SUGGEST_OVERRIDE_SUPPORTED)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
|
||||
endif()
|
||||
endif()
|
||||
# don't fail on strict compilation options in 3rd party modules
|
||||
ov_dev_package_no_errors()
|
||||
|
||||
# Don't threat deprecated API warnings as errors in 3rd party apps
|
||||
ie_deprecated_no_errors()
|
||||
ov_deprecated_no_errors()
|
||||
|
@ -7,39 +7,24 @@
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
In case if you are intended to use OpenVINO GPU plugin and offload network inference to Intel® graphics processor, the Intel Graphics Driver should be properly configured on your system.
|
||||
|
||||
If it is already installed, and you want to keep it, you can skip the installation steps.
|
||||
|
||||
## Linux
|
||||
|
||||
If you have installed OpenVINO Runtime from the archive file, APT, or YUM, follow these steps to work with GPU:
|
||||
To install the latest available **Intel® Graphics Compute Runtime for OpenCL™** for your OS, see the [Install Guides](https://github.com/intel/compute-runtime/releases/latest).
|
||||
|
||||
1. Go to the install_dependencies directory:
|
||||
```sh
|
||||
cd <INSTALL_DIR>/install_dependencies/
|
||||
```
|
||||
> **NOTE**: If you use RedHat 8 OS please install OpenCL library as prerequisite via following command line:
|
||||
> ```sh rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm```
|
||||
|
||||
2. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package. To install it, run this script:
|
||||
```sh
|
||||
sudo -E ./install_NEO_OCL_driver.sh
|
||||
```
|
||||
> **NOTE**: If you installed OpenVINO Runtime via PyPI, you can get this script from [the OpenVINO repository](https://github.com/openvinotoolkit/openvino/blob/master/scripts/install_dependencies/install_NEO_OCL_driver.sh).
|
||||
> **NOTE**: For instructions specific to discrete graphics platforms, refer to [the dgpu guide](https://dgpu-docs.intel.com/installation-guides/index.html) (Intel® Arc™ A-Series Graphics, Intel® Data Center GPU Flex Series, Intel® Data Center GPU MAX Series, Intel® processor graphics Gen12, and Intel® Iris Xe MAX codename DG1).
|
||||
|
||||
> **NOTE**: To use the **Intel® Iris® Xe MAX Graphics**, see the [Intel® Iris® Xe MAX Graphics with Linux*](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html) page for driver installation instructions.
|
||||
|
||||
The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does
|
||||
not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower version and installs the current version with your permission:
|
||||

|
||||
You may consider installing one of the earlier versions of the driver, based on your particular setup needs.
|
||||
|
||||
Higher hardware versions require a higher driver version, namely 20.35 instead of 19.41. If the script fails to uninstall the driver, uninstall it manually. During the script execution, you may see the following command line output:
|
||||
```sh
|
||||
Add OpenCL user to video group
|
||||
```
|
||||
Ignore this suggestion and continue.<br>
|
||||
You can also find the most recent version of the driver, installation procedure and other information on the [Intel® software for general purpose GPU capabilities](https://dgpu-docs.intel.com/index.html) site.
|
||||
It is recommended that you refer to the [Intel® Graphics Compute Runtime Github page](https://github.com/intel/compute-runtime/) for instructions and recommendations on GPU driver installation specific to particular releases, including the list of supported hardware platforms.
|
||||
|
||||
3. **Optional:** Install header files to allow compilation of new code. You can find the header files at [Khronos OpenCL™ API Headers](https://github.com/KhronosGroup/OpenCL-Headers.git).
|
||||
|
||||
You've completed all required configuration steps to perform inference on processor graphics.
|
||||
@sphinxdirective
|
||||
|
||||
.. _gpu guide windows:
|
||||
@ -48,24 +33,31 @@ You've completed all required configuration steps to perform inference on proces
|
||||
|
||||
## Windows
|
||||
|
||||
This section will help you check if you require driver installation. Install indicated version or higher.
|
||||
|
||||
If your applications offload computation to **Intel® Integrated Graphics**, you must have the Intel Graphics Driver for Windows installed on your hardware.
|
||||
[Download and install the recommended version](https://downloadcenter.intel.com/download/30079/Intel-Graphics-Windows-10-DCH-Drivers).
|
||||
To install the Intel Graphics Driver for Windows on your hardware, please proceed with the [instruction](https://www.intel.com/content/www/us/en/support/articles/000005629/graphics.html).
|
||||
|
||||
To check if you have this driver installed:
|
||||
|
||||
1. Type **device manager** in your **Search Windows** box and press Enter. The **Device Manager** opens.
|
||||
|
||||
2. Click the drop-down arrow to view the **Display adapters**. You can see the adapter that is installed in your computer:
|
||||

|
||||
|
||||
2. Click the drop-down arrow to view the **Display adapters**. You can see the adapter that is installed in your computer:
|
||||

|
||||
3. Right-click the adapter name and select **Properties**.
|
||||
4. Click the **Driver** tab to see the driver version.
|
||||

|
||||
|
||||
4. Click the **Driver** tab to see the driver version.
|
||||

|
||||
|
||||
You are done updating your device driver and are ready to use your GPU.
|
||||
|
||||
## Additional info
|
||||
|
||||
In the internal OpenVINO validation the following versions of Intel Graphics Driver were used:
|
||||
|
||||
Operation System | Driver version
|
||||
--- |-------------------------
|
||||
Ubuntu 20.04 | [22.35.24055](https://github.com/intel/compute-runtime/releases/tag/22.35.24055)
|
||||
Ubuntu 18.04 | [21.38.21026](https://github.com/intel/compute-runtime/releases/tag/21.38.21026)
|
||||
CentOS 7 | [19.41.14441](https://github.com/intel/compute-runtime/releases/tag/19.41.14441)
|
||||
RHEL 8 | [22.28.23726](https://github.com/intel/compute-runtime/releases/tag/22.28.23726)
|
||||
|
||||
## What’s Next?
|
||||
|
||||
You can try out the toolkit with:
|
||||
@ -79,4 +71,3 @@ Developing in C++:
|
||||
* [Image Classification Async C++ Sample](@ref openvino_inference_engine_samples_classification_sample_async_README)
|
||||
* [Hello Classification C++ Sample](@ref openvino_inference_engine_samples_hello_classification_README)
|
||||
* [Hello Reshape SSD C++ Sample](@ref openvino_inference_engine_samples_hello_reshape_ssd_README)
|
||||
|
||||
|
@ -99,5 +99,6 @@ int main() {
|
||||
//! [ie:load_old_extension]
|
||||
core.AddExtension(std::make_shared<InferenceEngine::Extension>("path_to_extension_library.so"));
|
||||
//! [ie:load_old_extension]
|
||||
(void)status;
|
||||
return 0;
|
||||
}
|
||||
|
@ -117,6 +117,7 @@ int main() {
|
||||
core.add_extension(std::make_shared<InferenceEngine::Extension>("path_to_extension_library.so"));
|
||||
//! [ov_api_2_0:load_old_extension]
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
(void)status;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ layout = ov::Layout("NCHW");
|
||||
std::cout << layout.to_string(); // prints [N,C,H,W]
|
||||
//! [ov:layout:dump]
|
||||
|
||||
std::shared_ptr<ov::Model> model;
|
||||
std::shared_ptr<ov::Model> model = std::make_shared<ov::Model>(ov::OutputVector{}, ov::ParameterVector{});
|
||||
//! [ov:layout:get_from_model]
|
||||
// Get layout for model input
|
||||
layout = ov::layout::get_layout(model->input("input_tensor_name"));
|
||||
|
@ -45,10 +45,6 @@ set (CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
||||
set (CMAKE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
||||
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
|
||||
|
||||
if(TREAT_WARNING_AS_ERROR AND NOT DEFINED CMAKE_COMPILE_WARNING_AS_ERROR)
|
||||
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS")
|
||||
|
@ -46,7 +46,7 @@ if(NOT TARGET nlohmann_json::nlohmann_json)
|
||||
# for example, on debian 9 there is no cmake / pkgconfig files
|
||||
find_file(nlohmann_include_file
|
||||
NAMES "json.hpp"
|
||||
"Path to json.hpp (nlohmann-json-dev )")
|
||||
"Path to json.hpp (nlohmann-json-dev)")
|
||||
if(nlohmann_include_file)
|
||||
add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED)
|
||||
get_filename_component(nlohmann_include_dir "${nlohmann_include_file}" PATH)
|
||||
|
@ -169,7 +169,7 @@ ov::Tensor create_tensor_from_binary(const std::vector<std::string>& files,
|
||||
if (inputInfo.layout != "CN") {
|
||||
binaryFile.read(&data[b * inputSize], inputSize);
|
||||
} else {
|
||||
for (int i = 0; i < inputInfo.channels(); i++) {
|
||||
for (size_t i = 0; i < inputInfo.channels(); i++) {
|
||||
binaryFile.read(&data[(i * binaryBatchSize + b) * sizeof(T)], sizeof(T));
|
||||
}
|
||||
}
|
||||
@ -517,7 +517,7 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < logOutput.size(); i++) {
|
||||
for (size_t i = 0; i < logOutput.size(); i++) {
|
||||
slog::info << "Test Config " << i << slog::endl;
|
||||
auto maxNameWidth = std::max_element(logOutput[i].begin(),
|
||||
logOutput[i].end(),
|
||||
@ -691,7 +691,7 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < logOutput.size(); i++) {
|
||||
for (size_t i = 0; i < logOutput.size(); i++) {
|
||||
slog::info << "Test Config " << i << slog::endl;
|
||||
auto maxNameWidth = std::max_element(logOutput[i].begin(),
|
||||
logOutput[i].end(),
|
||||
|
@ -176,9 +176,9 @@ void setDeviceProperty(ov::Core& core,
|
||||
return;
|
||||
|
||||
if (device_config.find(device) == device_config.end() || // device properties not existed
|
||||
config.first.empty() && // not setting default value to property
|
||||
(!FLAGS_load_config.empty() &&
|
||||
is_dev_set_property[device])) { // device properties loaded from file and overwrite is not happened
|
||||
(config.first.empty() && // not setting default value to property
|
||||
(!FLAGS_load_config.empty() &&
|
||||
is_dev_set_property[device]))) { // device properties loaded from file and overwrite is not happened
|
||||
is_dev_set_property[device] = false;
|
||||
device_config.erase(device);
|
||||
device_config.insert(ov::device::properties(device, device_property));
|
||||
@ -206,7 +206,7 @@ void fuse_mean_scale(ov::preprocess::PrePostProcessor& preproc, const benchmark_
|
||||
bool warned = false;
|
||||
constexpr char warn_msg[] = "Mean/scale values are fused into the model. This slows down performance compared to "
|
||||
"--imean and --iscale which existed before";
|
||||
for (const std::pair<std::string, benchmark_app::InputInfo>& input_info : app_inputs_info) {
|
||||
for (const std::pair<std::string, benchmark_app::InputInfo> input_info : app_inputs_info) {
|
||||
if (!input_info.second.mean.empty()) {
|
||||
if (!warned) {
|
||||
slog::warn << warn_msg << slog::endl;
|
||||
@ -742,7 +742,7 @@ int main(int argc, char* argv[]) {
|
||||
const auto output_precision = FLAGS_op.empty() ? ov::element::undefined : getPrecision2(FLAGS_op);
|
||||
|
||||
const auto& inputs = model->inputs();
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
const auto& item = inputs[i];
|
||||
auto iop_precision = ov::element::undefined;
|
||||
auto type_to_set = ov::element::undefined;
|
||||
@ -783,7 +783,7 @@ int main(int argc, char* argv[]) {
|
||||
fuse_mean_scale(preproc, app_inputs_info.at(0));
|
||||
|
||||
const auto& outs = model->outputs();
|
||||
for (int i = 0; i < outs.size(); i++) {
|
||||
for (size_t i = 0; i < outs.size(); i++) {
|
||||
const auto& item = outs[i];
|
||||
auto iop_precision = ov::element::undefined;
|
||||
try {
|
||||
@ -1215,7 +1215,7 @@ int main(int argc, char* argv[]) {
|
||||
std::vector<LatencyMetrics> groupLatencies = {};
|
||||
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
|
||||
const auto& lat_groups = inferRequestsQueue.get_latency_groups();
|
||||
for (int i = 0; i < lat_groups.size(); i++) {
|
||||
for (size_t i = 0; i < lat_groups.size(); i++) {
|
||||
const auto& lats = lat_groups[i];
|
||||
|
||||
std::string data_shapes_string = "";
|
||||
|
@ -83,7 +83,7 @@ std::map<std::string, ov::TensorVector> get_remote_input_tensors(
|
||||
auto& oclContext = static_cast<ov::intel_gpu::ocl::ClContext&>(context);
|
||||
auto oclInstance = std::make_shared<gpu::OpenCL>(oclContext.get());
|
||||
|
||||
for (int i = 0; i < num_requests; i++) {
|
||||
for (size_t i = 0; i < num_requests; i++) {
|
||||
for (auto& inputs_info : app_inputs_info) {
|
||||
for (auto& input : inputs_info) {
|
||||
// Fill random
|
||||
|
@ -84,7 +84,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
|
||||
|
||||
for (const auto& layer : perfCounts) {
|
||||
dumper << layer.node_name; // layer name
|
||||
dumper << ((int)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
|
||||
dumper << ((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
|
||||
? status_names[(int)layer.status]
|
||||
: "INVALID_STATUS");
|
||||
dumper << layer.node_type << layer.exec_type;
|
||||
@ -107,7 +107,6 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
|
||||
const PerformanceCounters& perfCounts) {
|
||||
std::chrono::microseconds total = std::chrono::microseconds::zero();
|
||||
std::chrono::microseconds total_cpu = std::chrono::microseconds::zero();
|
||||
int layersize = 0;
|
||||
|
||||
dumper << "layerName"
|
||||
<< "execStatus"
|
||||
@ -131,14 +130,13 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
|
||||
for (const auto& layer : profiling) {
|
||||
if (std::string(status_names[(int)layer.status]).compare("EXECUTED") == 0) {
|
||||
dumper << layer.node_name; // layer name
|
||||
dumper << ((int)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
|
||||
dumper << ((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
|
||||
? status_names[(int)layer.status]
|
||||
: "INVALID_STATUS");
|
||||
dumper << layer.node_type << layer.exec_type;
|
||||
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
|
||||
dumper << (layer.real_time * 1.0 / total) * 100;
|
||||
dumper.endLine();
|
||||
layersize += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -159,7 +157,7 @@ StatisticsReport::PerformanceCounters StatisticsReport::get_average_performance_
|
||||
// iterate over each layer from sorted vector and add required PM data
|
||||
// to the per-layer maps
|
||||
for (const auto& pm : perfCounts[i]) {
|
||||
int idx = 0;
|
||||
size_t idx = 0;
|
||||
for (; idx < performanceCountersAvg.size(); idx++) {
|
||||
if (performanceCountersAvg[idx].node_name == pm.node_name) {
|
||||
performanceCountersAvg[idx].real_time += pm.real_time;
|
||||
@ -284,8 +282,8 @@ const nlohmann::json StatisticsReportJSON::perf_counters_to_json(
|
||||
|
||||
item["name"] = layer.node_name; // layer name
|
||||
item["status"] =
|
||||
((int)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
|
||||
: "INVALID_STATUS");
|
||||
((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
|
||||
: "INVALID_STATUS");
|
||||
item["node_type"] = layer.node_type;
|
||||
item["exec_type"] = layer.exec_type;
|
||||
item["real_time"] = layer.real_time.count() / 1000.0;
|
||||
@ -320,8 +318,8 @@ const nlohmann::json StatisticsReportJSON::sort_perf_counters_to_json(
|
||||
nlohmann::json item;
|
||||
item["name"] = layer.node_name; // layer name
|
||||
item["status"] =
|
||||
((int)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
|
||||
: "INVALID_STATUS");
|
||||
((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
|
||||
: "INVALID_STATUS");
|
||||
item["node_type"] = layer.node_type;
|
||||
item["exec_type"] = layer.exec_type;
|
||||
item["real_time"] = layer.real_time.count() / 1000.0;
|
||||
|
@ -527,7 +527,7 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
|
||||
}
|
||||
|
||||
info.dataShape = ov::Shape(info.partialShape.size(), 0);
|
||||
for (int i = 0; i < info.partialShape.size(); i++) {
|
||||
for (size_t i = 0; i < info.partialShape.size(); i++) {
|
||||
auto& dim = info.partialShape[i];
|
||||
if (dim.is_static()) {
|
||||
info.dataShape[i] = dim.get_length();
|
||||
@ -662,65 +662,6 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
|
||||
reshape_required);
|
||||
}
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
|
||||
slog::warn << "YAML and XML formats for config file won't be supported soon." << slog::endl;
|
||||
auto plugin_to_opencv_format = [](const std::string& str) -> std::string {
|
||||
if (str.find("_") != std::string::npos) {
|
||||
slog::warn
|
||||
<< "Device name contains \"_\" and will be changed during loading of configuration due to limitations."
|
||||
"This configuration file could not be loaded correctly."
|
||||
<< slog::endl;
|
||||
}
|
||||
std::string new_str(str);
|
||||
auto pos = new_str.find(".");
|
||||
if (pos != std::string::npos) {
|
||||
new_str.replace(pos, 1, "_");
|
||||
}
|
||||
return new_str;
|
||||
};
|
||||
cv::FileStorage fs(filename, cv::FileStorage::WRITE);
|
||||
if (!fs.isOpened())
|
||||
throw std::runtime_error("Error: Can't open config file : " + filename);
|
||||
for (auto device_it = config.begin(); device_it != config.end(); ++device_it) {
|
||||
fs << plugin_to_opencv_format(device_it->first) << "{:";
|
||||
std::stringstream strm;
|
||||
for (auto param_it = device_it->second.begin(); param_it != device_it->second.end(); ++param_it) {
|
||||
strm << param_it->first;
|
||||
param_it->second.print(strm);
|
||||
}
|
||||
fs << strm.str();
|
||||
fs << "}";
|
||||
}
|
||||
fs.release();
|
||||
}
|
||||
|
||||
void load_config(const std::string& filename, std::map<std::string, ov::AnyMap>& config) {
|
||||
slog::warn << "YAML and XML formats for config file won't be supported soon." << slog::endl;
|
||||
auto opencv_to_plugin_format = [](const std::string& str) -> std::string {
|
||||
std::string new_str(str);
|
||||
auto pos = new_str.find("_");
|
||||
if (pos != std::string::npos) {
|
||||
new_str.replace(pos, 1, ".");
|
||||
}
|
||||
return new_str;
|
||||
};
|
||||
cv::FileStorage fs(filename, cv::FileStorage::READ);
|
||||
if (!fs.isOpened())
|
||||
throw std::runtime_error("Error: Can't load config file : " + filename);
|
||||
cv::FileNode root = fs.root();
|
||||
for (auto it = root.begin(); it != root.end(); ++it) {
|
||||
auto device = *it;
|
||||
if (!device.isMap()) {
|
||||
throw std::runtime_error("Error: Can't parse config file : " + filename);
|
||||
}
|
||||
for (auto iit = device.begin(); iit != device.end(); ++iit) {
|
||||
auto item = *iit;
|
||||
config[opencv_to_plugin_format(device.name())][item.name()] = item.string();
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
|
||||
nlohmann::json jsonConfig;
|
||||
for (const auto& item : config) {
|
||||
@ -794,7 +735,6 @@ void load_config(const std::string& filename, std::map<std::string, ov::AnyMap>&
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
const std::vector<std::string> supported_image_extensions =
|
||||
|
@ -41,7 +41,7 @@ std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t heigh
|
||||
|
||||
cv::Mat resized(cv::Size(width, height), img.type(), _data.get());
|
||||
|
||||
if (width != img.cols || height != img.rows) {
|
||||
if (width != static_cast<size_t>(img.cols) || height != static_cast<size_t>(img.rows)) {
|
||||
slog::warn << "Image is resized from (" << img.cols << ", " << img.rows << ") to (" << width << ", " << height
|
||||
<< ")" << slog::endl;
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ void printInputAndOutputsInfo(const ov::Model& network) {
|
||||
slog::info << "model name: " << network.get_friendly_name() << slog::endl;
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>> inputs = network.inputs();
|
||||
for (const ov::Output<const ov::Node> input : inputs) {
|
||||
for (const ov::Output<const ov::Node>& input : inputs) {
|
||||
slog::info << " inputs" << slog::endl;
|
||||
|
||||
const std::string name = input.get_names().empty() ? "NONE" : input.get_any_name();
|
||||
@ -185,7 +185,7 @@ void printInputAndOutputsInfo(const ov::Model& network) {
|
||||
}
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>> outputs = network.outputs();
|
||||
for (const ov::Output<const ov::Node> output : outputs) {
|
||||
for (const ov::Output<const ov::Node>& output : outputs) {
|
||||
slog::info << " outputs" << slog::endl;
|
||||
|
||||
const std::string name = output.get_names().empty() ? "NONE" : output.get_any_name();
|
||||
|
@ -70,7 +70,7 @@ ov::Tensor read_weights(const std::string& filepath) {
|
||||
ov::Tensor weights(ov::element::u8, {static_cast<size_t>(fileSize)});
|
||||
read_file(filepath, weights.data(), weights.get_byte_size());
|
||||
|
||||
return std::move(weights);
|
||||
return weights;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -126,7 +126,7 @@ int main(int argc, char* argv[]) {
|
||||
in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < model->outputs().size(); i++) {
|
||||
for (size_t i = 0; i < model->outputs().size(); i++) {
|
||||
proc.output(i).tensor().set_element_type(ov::element::f32);
|
||||
}
|
||||
model = proc.build();
|
||||
|
@ -57,9 +57,6 @@ if(LINUX)
|
||||
ie_cpack_add_component(${OV_CPACK_COMP_INSTALL_DEPENDENCIES} HIDDEN)
|
||||
|
||||
set(install_dependencies_files install_openvino_dependencies.sh)
|
||||
if(ENABLE_INTEL_GPU)
|
||||
list(APPEND install_dependencies_files install_NEO_OCL_driver.sh)
|
||||
endif()
|
||||
|
||||
foreach(install_dependencies_file IN LISTS install_dependencies_files)
|
||||
install(PROGRAMS "${CMAKE_CURRENT_SOURCE_DIR}/install_dependencies/${install_dependencies_file}"
|
||||
|
@ -1,470 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#
|
||||
# Installs the Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver on Linux.
|
||||
#
|
||||
# Usage: sudo -E ./install_NEO_OCL_driver.sh
|
||||
#
|
||||
# Supported platforms:
|
||||
# 6th-11th generation Intel® Core™ processor with Intel(R)
|
||||
# Processor Graphics Technology not previously disabled by the BIOS
|
||||
# or motherboard settings
|
||||
#
|
||||
EXIT_FAILURE=1
|
||||
EXIT_WRONG_ARG=2
|
||||
UBUNTU_VERSION=
|
||||
DISTRO=
|
||||
SCRIPT_DIR="$( cd "$( dirname "$(realpath "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
|
||||
INSTALL_DRIVER_VERSION='unknown'
|
||||
|
||||
|
||||
print_help()
|
||||
{
|
||||
# Display Help
|
||||
usage="Usage: $(basename "$0") [OPTIONS]...
|
||||
Download and installs the Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver on Linux
|
||||
|
||||
Available options:
|
||||
-y Replace the currently installed driver with the newer version.
|
||||
--no_numa Skip installing NUMA packages. (off)
|
||||
-h, --help Display this help and exit"
|
||||
echo "$usage"
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
case $key in
|
||||
-d|--install_driver)
|
||||
echo "WARNING: This option is deprecated. Recommended driver for current platform will be installed."
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
-y)
|
||||
agreement=true
|
||||
shift
|
||||
;;
|
||||
-a|--auto)
|
||||
echo "WARNING: This option is deprecated. Recommended driver for current platform will be installed."
|
||||
shift
|
||||
;;
|
||||
--no_numa)
|
||||
no_numa=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
print_help
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "$(basename "$0"): invalid option -- '${key}'"
|
||||
echo "Try '$(basename "$0") --help' for more information."
|
||||
exit $EXIT_WRONG_ARG
|
||||
esac
|
||||
done
|
||||
|
||||
_install_prerequisites_redhat()
|
||||
{
|
||||
# yum doesn't accept timeout in seconds as parameter
|
||||
echo
|
||||
echo "Note: if yum becomes non-responsive, try aborting the script and run:"
|
||||
echo " sudo -E $0"
|
||||
echo
|
||||
CMDS=("dnf install -y 'dnf-command(config-manager)'"
|
||||
"dnf config-manager --add-repo \
|
||||
https://repositories.intel.com/graphics/rhel/8.5/intel-graphics.repo")
|
||||
|
||||
for cmd in "${CMDS[@]}"; do
|
||||
echo "$cmd"
|
||||
if ! eval "$cmd"; then
|
||||
echo "ERROR: failed to run $cmd" >&2
|
||||
echo "Problem (or disk space)?" >&2
|
||||
echo ". Verify that you have enough disk space, and run the script again." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
_install_prerequisites_ubuntu()
|
||||
{
|
||||
apt-get update
|
||||
apt-get install -y gpg-agent
|
||||
curl https://repositories.intel.com/graphics/intel-graphics.key |gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
||||
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list
|
||||
apt-get update
|
||||
if [ "$no_numa" == true ]; then
|
||||
CMDS=("apt-get -y install --no-install-recommends ocl-icd-libopencl1")
|
||||
else
|
||||
CMDS=("apt-get -y install --no-install-recommends libnuma1 ocl-icd-libopencl1")
|
||||
fi
|
||||
|
||||
for cmd in "${CMDS[@]}"; do
|
||||
echo "$cmd"
|
||||
if ! eval "$cmd"; then
|
||||
echo "ERROR: failed to run $cmd" >&2
|
||||
echo "Problem (or disk space)?" >&2
|
||||
echo " sudo -E $0" >&2
|
||||
echo "2. Verify that you have enough disk space, and run the script again." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
install_prerequisites()
|
||||
{
|
||||
echo 'Installing prerequisites...'
|
||||
if [[ $DISTRO == "redhat" ]]; then
|
||||
_install_prerequisites_redhat
|
||||
elif [[ $DISTRO == "ubuntu" ]]; then
|
||||
_install_prerequisites_ubuntu
|
||||
else
|
||||
echo 'WARNING::install_prerequisites: Unknown OS'
|
||||
fi
|
||||
}
|
||||
|
||||
_deploy_rpm()
|
||||
{
|
||||
cmd="rpm $IGFX_RPM_FLAGS -ivh --nodeps --force $1"
|
||||
echo "$cmd"
|
||||
eval "$cmd"
|
||||
}
|
||||
|
||||
_deploy_deb()
|
||||
{
|
||||
cmd="dpkg -i $1"
|
||||
echo "$cmd"
|
||||
eval "$cmd"
|
||||
}
|
||||
|
||||
_install_user_mode_redhat()
|
||||
{
|
||||
|
||||
CMDS=("rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/mesa-filesystem-21.1.5-1.el8.x86_64.rpm" \
|
||||
"dnf install --refresh -y \
|
||||
intel-opencl-22.28.23726.1-i419.el8.x86_64 intel-media intel-mediasdk libmfxgen1 libvpl2 \
|
||||
level-zero intel-level-zero-gpu \
|
||||
intel-metrics-library intel-igc-core intel-igc-cm \
|
||||
libva libva-utils intel-gmmlib" \
|
||||
"rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm" )
|
||||
|
||||
for cmd in "${CMDS[@]}"; do
|
||||
echo "$cmd"
|
||||
if ! eval "$cmd"; then
|
||||
echo "ERROR: failed to run $cmd" >&2
|
||||
echo "Problem (or disk space)?" >&2
|
||||
echo " sudo -E $0" >&2
|
||||
echo "Verify that you have enough disk space, and run the script again." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
_install_user_mode_ubuntu()
|
||||
{
|
||||
if ! 'find . -name "intel*.deb" -exec dpkg -i {} \;'; then
|
||||
echo "ERROR: failed to install debs $cmd error" >&2
|
||||
echo "Make sure you have enough disk space or fix the problem manually and try again." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
install_user_mode()
|
||||
{
|
||||
echo "Installing user mode driver..."
|
||||
|
||||
if [[ $DISTRO == "redhat" ]]; then
|
||||
_install_user_mode_redhat
|
||||
else
|
||||
_install_user_mode_ubuntu
|
||||
fi
|
||||
# exit from $SCRIPT_DIR/neo folder
|
||||
cd - || exit
|
||||
# clean it up
|
||||
rm -rf "$SCRIPT_DIR/neo"
|
||||
}
|
||||
|
||||
_uninstall_user_mode_redhat()
|
||||
{
|
||||
echo Looking for previously installed user-mode driver...
|
||||
PACKAGES=("intel-opencl"
|
||||
"intel-ocloc"
|
||||
"intel-gmmlib"
|
||||
"intel-igc-core"
|
||||
"intel-igc-opencl")
|
||||
for package in "${PACKAGES[@]}"; do
|
||||
echo "rpm -qa | grep $package"
|
||||
if found_package=$(rpm -qa | grep "$package"); then
|
||||
echo "Found installed user-mode driver, performing uninstall..."
|
||||
cmd="rpm -e --nodeps ${found_package}"
|
||||
echo "$cmd"
|
||||
if ! eval "$cmd"; then
|
||||
echo "ERROR: failed to uninstall existing user-mode driver." >&2
|
||||
echo "Please try again manually and run the script again." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
_uninstall_user_mode_ubuntu()
|
||||
{
|
||||
echo Looking for previously installed user-mode driver...
|
||||
|
||||
PACKAGES=("intel-opencl"
|
||||
"intel-opencl-icd"
|
||||
"intel-ocloc"
|
||||
"intel-gmmlib"
|
||||
"intel-igc-core"
|
||||
"intel-igc-opencl")
|
||||
|
||||
for package in "${PACKAGES[@]}"; do
|
||||
if found_package=$(dpkg-query -W -f='${binary:Package}\n' "${package}"); then
|
||||
echo "Found installed user-mode driver, performing uninstall..."
|
||||
cmd="apt-get autoremove -y $package"
|
||||
echo "$cmd"
|
||||
if ! eval "$cmd"; then
|
||||
echo "ERROR: failed to uninstall existing user-mode driver." >&2
|
||||
echo "Please try again manually and run the script again." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
uninstall_user_mode()
|
||||
{
|
||||
if [[ $DISTRO == "redhat" ]]; then
|
||||
_uninstall_user_mode_redhat
|
||||
else
|
||||
_uninstall_user_mode_ubuntu
|
||||
fi
|
||||
}
|
||||
|
||||
_get_packages_ubuntu()
|
||||
{
|
||||
case $INSTALL_DRIVER_VERSION in
|
||||
"21.38.21026")
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-gmmlib_21.2.1_amd64.deb
|
||||
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.8708/intel-igc-core_1.0.8708_amd64.deb
|
||||
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.8708/intel-igc-opencl_1.0.8708_amd64.deb
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-opencl_21.38.21026_amd64.deb
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-ocloc_21.38.21026_amd64.deb
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-level-zero-gpu_1.2.21026_amd64.deb
|
||||
;;
|
||||
"21.48.21782")
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-gmmlib_21.3.3_amd64.deb
|
||||
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.9441/intel-igc-core_1.0.9441_amd64.deb
|
||||
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.9441/intel-igc-opencl_1.0.9441_amd64.deb
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-opencl-icd_21.48.21782_amd64.deb
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-level-zero-gpu_1.2.21782_amd64.deb
|
||||
;;
|
||||
"22.35.24055")
|
||||
apt-get install -y \
|
||||
intel-opencl-icd=22.35.24055+i815~u20.04 \
|
||||
intel-level-zero-gpu=1.3.24055+i815~u20.04 \
|
||||
level-zero=1.8.5+i815~u20.04
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized driver ${INSTALL_DRIVER_VERSION}."
|
||||
exit $EXIT_WRONG_ARG
|
||||
esac
|
||||
}
|
||||
|
||||
_verify_checksum_ubuntu()
|
||||
{
|
||||
case $INSTALL_DRIVER_VERSION in
|
||||
"21.38.21026")
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/ww38.sum
|
||||
sha256sum -c ww38.sum
|
||||
;;
|
||||
"21.48.21782")
|
||||
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/ww48.sum
|
||||
sha256sum -c ww48.sum
|
||||
;;
|
||||
"22.35.24055")
|
||||
echo "Verification by apt"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized driver ${INSTALL_DRIVER_VERSION}."
|
||||
exit $EXIT_WRONG_ARG
|
||||
esac
|
||||
}
|
||||
|
||||
verify_checksum()
|
||||
{
|
||||
if [[ $DISTRO == "redhat" ]]; then
|
||||
return 0
|
||||
else
|
||||
_verify_checksum_ubuntu
|
||||
fi
|
||||
}
|
||||
|
||||
get_packages()
|
||||
{
|
||||
mkdir -p "$SCRIPT_DIR/neo"
|
||||
cd "$SCRIPT_DIR/neo" || exit
|
||||
|
||||
if [[ $DISTRO == "redhat" ]]; then
|
||||
return 0
|
||||
else
|
||||
_get_packages_ubuntu
|
||||
fi
|
||||
if ! verify_checksum; then
|
||||
echo "ERROR: checksums do not match for the downloaded packages"
|
||||
echo " Please verify your Internet connection and make sure you have enough disk space or fix the problem manually and try again. "
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
version_gt() {
|
||||
# check if first version is greater than second version
|
||||
test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1";
|
||||
}
|
||||
|
||||
summary()
|
||||
{
|
||||
echo
|
||||
echo "Installation completed successfully."
|
||||
echo
|
||||
echo "Next steps:"
|
||||
echo "Add OpenCL users to the video and render group: 'sudo usermod -a -G video,render USERNAME'"
|
||||
echo " e.g. if the user running OpenCL host applications is foo, run: sudo usermod -a -G video,render foo"
|
||||
echo " Current user has been already added to the video and render group"
|
||||
echo
|
||||
|
||||
echo "If you use 8th Generation Intel® Core™ processor, add:"
|
||||
echo " i915.alpha_support=1"
|
||||
echo " to the 4.14 kernel command line, in order to enable OpenCL functionality for this platform."
|
||||
echo
|
||||
}
|
||||
|
||||
check_root_access()
|
||||
{
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "ERROR: you must run this script as root." >&2
|
||||
echo "Please try again with \"sudo -E $0\", or as root." >&2
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
}
|
||||
|
||||
add_user_to_video_group()
|
||||
{
|
||||
local real_user
|
||||
real_user=$(logname 2>/dev/null || echo "${SUDO_USER:-${USER}}")
|
||||
echo
|
||||
echo "Adding $real_user to the video group..."
|
||||
if ! usermod -a -G video "$real_user"; then
|
||||
echo "WARNING: unable to add $real_user to the video group" >&2
|
||||
fi
|
||||
echo "Adding $real_user to the render group..."
|
||||
if ! usermod -a -G render "$real_user"; then
|
||||
echo "WARNING: unable to add $real_user to the render group" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
_check_distro_version()
|
||||
{
|
||||
if [[ $DISTRO == redhat ]]; then
|
||||
RHEL_MINOR_VERSION_SUPPORTED="[3-7]"
|
||||
if ! eval grep -m1 'VERSION_ID' /etc/os-release | grep -Eo "8.${RHEL_MINOR_VERSION_SUPPORTED}"; then
|
||||
echo "Warning: This runtime can be installed only on RHEL 8.3 up to RHEL 8.7"
|
||||
echo "More info https://dgpu-docs.intel.com/releases/releases-20211130.html" >&2
|
||||
echo "Installation of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver interrupted"
|
||||
exit $EXIT_FAILURE
|
||||
else
|
||||
INSTALL_DRIVER_VERSION='22.28.23726'
|
||||
fi
|
||||
elif [[ $DISTRO == ubuntu ]]; then
|
||||
UBUNTU_VERSION=$(grep -m1 'VERSION_ID' /etc/os-release | grep -Eo "[0-9]{2}.[0-9]{2}")
|
||||
if [[ $UBUNTU_VERSION == '18.04' ]]; then
|
||||
INSTALL_DRIVER_VERSION='21.38.21026'
|
||||
elif [[ $UBUNTU_VERSION == '20.04' ]]; then
|
||||
INSTALL_DRIVER_VERSION='22.35.24055'
|
||||
else
|
||||
echo "Warning: This runtime can be installed only on Ubuntu 18.04 or Ubuntu 20.04."
|
||||
echo "More info https://github.com/intel/compute-runtime/releases" >&2
|
||||
echo "Installation of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver interrupted"
|
||||
exit $EXIT_FAILURE
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
distro_init()
|
||||
{
|
||||
if [[ -f /etc/redhat-release ]]; then
|
||||
DISTRO="redhat"
|
||||
elif [[ -f /etc/lsb-release ]]; then
|
||||
DISTRO="ubuntu"
|
||||
fi
|
||||
|
||||
_check_distro_version
|
||||
}
|
||||
|
||||
check_agreement()
|
||||
{
|
||||
if [ "$agreement" == true ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "This script will download and install Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver $INSTALL_DRIVER_VERSION, "
|
||||
echo "that was used to validate this OpenVINO™ package."
|
||||
echo "In case if you already have the driver - script will try to remove it."
|
||||
while true; do
|
||||
read -rp "Want to proceed? (y/n): " yn
|
||||
case $yn in
|
||||
[Yy]*) return 0 ;;
|
||||
[Nn]*) exit $EXIT_FAILURE ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
check_current_driver()
|
||||
{
|
||||
echo "Checking current driver version..."
|
||||
if [[ $DISTRO == redhat ]]; then
|
||||
gfx_version=$(yum info intel-opencl | grep Version)
|
||||
elif [[ $DISTRO == ubuntu ]]; then
|
||||
gfx_version=$(dpkg-query --showformat='${Version}' --show intel-opencl)
|
||||
if [[ -z "$gfx_version" ]]; then
|
||||
gfx_version=$(dpkg-query --showformat='${Version}' --show intel-opencl-icd)
|
||||
fi
|
||||
fi
|
||||
|
||||
gfx_version="$(echo -e "${gfx_version}" | grep -Eo "[0-9]{2,3}\.[0-9]{2,3}\.[0-9]{3,6}")"
|
||||
|
||||
# install NEO OCL driver if the current driver version < INSTALL_DRIVER_VERSION
|
||||
if [[ -n $gfx_version && "$(printf '%s\n' "$INSTALL_DRIVER_VERSION" "$gfx_version" | sort -V | head -n 1)" = "$INSTALL_DRIVER_VERSION" ]]; then
|
||||
echo "Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver installation skipped because current version greater or equal to $INSTALL_DRIVER_VERSION" >&2
|
||||
echo "Installation of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver interrupted." >&2
|
||||
exit $EXIT_FAILURE
|
||||
else
|
||||
echo "Starting installation..."
|
||||
fi
|
||||
}
|
||||
|
||||
install()
|
||||
{
|
||||
uninstall_user_mode
|
||||
install_prerequisites
|
||||
get_packages
|
||||
install_user_mode
|
||||
add_user_to_video_group
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
echo "Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver installer"
|
||||
distro_init
|
||||
check_root_access
|
||||
check_current_driver
|
||||
check_agreement
|
||||
install
|
||||
summary
|
||||
}
|
||||
|
||||
[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@"
|
@ -8,7 +8,7 @@ set -e
|
||||
#===================================================================================================
|
||||
# Option parsing
|
||||
|
||||
all_comp=(core dev python)
|
||||
all_comp=(core dev gpu python)
|
||||
os=${os:-auto}
|
||||
|
||||
# public options
|
||||
@ -117,12 +117,14 @@ if [ "$os" == "raspbian9" ] || [ "$os" == "debian9" ] ; then
|
||||
# which are not supported by OpenVINO
|
||||
|
||||
pkgs_core=(libpugixml1v5)
|
||||
pkgs_gpu=()
|
||||
pkgs_python=()
|
||||
pkgs_dev=(pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
|
||||
|
||||
elif [ "$os" == "ubuntu18.04" ] ; then
|
||||
|
||||
pkgs_core=(libtbb2 libpugixml1v5)
|
||||
pkgs_gpu=()
|
||||
pkgs_python=(python3.8 libpython3.8 python3.8-venv python3-pip)
|
||||
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
|
||||
|
||||
@ -131,6 +133,7 @@ elif [ "$os" == "ubuntu20.04" ] || [ "$os" == "debian10" ] || [ "$os" == "raspbi
|
||||
[ "$os" == "ubuntu22.10" ] || [ "$os" == "debian12" ] || [ "$os" == "raspbian12" ]; then
|
||||
|
||||
pkgs_core=(libpugixml1v5)
|
||||
pkgs_gpu=()
|
||||
pkgs_python=(python3 python3-venv python3-pip)
|
||||
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json3-dev make curl sudo)
|
||||
|
||||
@ -163,6 +166,7 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
fi
|
||||
|
||||
pkgs_dev=(gcc gcc-c++ make glibc libstdc++ libgcc cmake3 "json-devel.$arch" "zlib-devel.$arch" sudo)
|
||||
pkgs_gpu=()
|
||||
|
||||
if [ "$os" == "centos7" ] || [ "$os" == "amzn2" ] ; then
|
||||
pkgs_dev+=(pkgconfig)
|
||||
@ -193,6 +197,9 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
|
||||
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm"
|
||||
)
|
||||
pkgs_gpu+=(
|
||||
"http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm"
|
||||
)
|
||||
pkgs_python+=(python38 python38-pip)
|
||||
pkgs_dev+=(
|
||||
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm"
|
||||
|
@ -1388,7 +1388,7 @@ TEST(ie_blob_make_memory_from_preallocated, makeMemoryfromPreallocated) {
|
||||
tensor.dims = dim_t ;
|
||||
tensor.precision = precision_e::U8;
|
||||
tensor.layout = layout_e::NCHW;
|
||||
uint8_t array[1][3][4][4]= {0};
|
||||
uint8_t array[1][3][4][4]= {{{{0}}}};
|
||||
|
||||
size_t size = 48;
|
||||
ie_blob_t *blob = nullptr;
|
||||
|
@ -1,10 +1,15 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ov_test.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
class ov_compiled_model : public ::testing::TestWithParam<std::string> {};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(device_name, ov_compiled_model, ::testing::Values("CPU"));
|
||||
|
||||
TEST_P(ov_compiled_model, ov_compiled_model_inputs_size) {
|
||||
auto device_name = GetParam();
|
||||
ov_core_t* core = nullptr;
|
||||
@ -394,3 +399,5 @@ TEST_P(ov_compiled_model, create_infer_request_error_handling) {
|
||||
ov_model_free(model);
|
||||
ov_core_free(core);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -1,9 +1,12 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ov_test.hpp"
|
||||
#include "test_model_repo.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(ov_version, api_version) {
|
||||
ov_version_t version;
|
||||
ov_get_openvino_version(&version);
|
||||
@ -559,3 +562,5 @@ TEST_P(ov_core, ov_core_compile_model_from_file_unicode) {
|
||||
ov_core_free(core);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "ov_test.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
inline void get_tensor_info(ov_model_t* model, bool input, char** name, ov_shape_t* shape, ov_element_type_e* type) {
|
||||
ov_output_const_port* port = nullptr;
|
||||
if (input) {
|
||||
@ -398,3 +400,5 @@ TEST_P(ov_infer_request, get_profiling_info) {
|
||||
|
||||
ov_profiling_info_list_free(&profiling_infos);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -23,7 +23,7 @@ TEST(ov_tensor, ov_tensor_create_from_host_ptr) {
|
||||
ov_element_type_e type = ov_element_type_e::U8;
|
||||
ov_shape_t shape;
|
||||
setup_4d_shape(&shape, 1, 3, 4, 4);
|
||||
uint8_t host_ptr[1][3][4][4] = {0};
|
||||
uint8_t host_ptr[1][3][4][4] = {{{{0}}}};
|
||||
ov_tensor_t* tensor = nullptr;
|
||||
OV_EXPECT_OK(ov_tensor_create_from_host_ptr(type, shape, &host_ptr, &tensor));
|
||||
EXPECT_NE(nullptr, tensor);
|
||||
|
@ -17,6 +17,14 @@ file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx
|
||||
file(GLOB PYX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx)
|
||||
set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# 'argument': conversion from 'size_t' to 'int', possible loss of data
|
||||
ie_add_compiler_flags(/wd4267)
|
||||
ie_add_compiler_flags(/wd4244)
|
||||
elseif(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ie_add_compiler_flags(-Wno-unused-but-set-variable)
|
||||
endif()
|
||||
|
||||
# create target
|
||||
|
||||
cython_add_module(${TARGET_NAME} ${SOURCES})
|
||||
@ -45,13 +53,13 @@ if(COMMAND ie_add_vs_version_file)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
function(python_disable_deprecated_warnings)
|
||||
disable_deprecated_warnings()
|
||||
function(python_ov_disable_deprecated_warnings)
|
||||
ov_disable_deprecated_warnings()
|
||||
set(pyx_file "${CMAKE_CURRENT_BINARY_DIR}/ie_api.cxx" "${CMAKE_CURRENT_BINARY_DIR}/constants.cxx")
|
||||
set_source_files_properties(${pyx_file} PROPERTIES COMPILE_OPTIONS ${ie_c_cxx_deprecated})
|
||||
endfunction()
|
||||
|
||||
python_disable_deprecated_warnings()
|
||||
python_ov_disable_deprecated_warnings()
|
||||
ie_python_minimal_api(${TARGET_NAME})
|
||||
|
||||
target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
@ -11,109 +11,21 @@ import numpy as np
|
||||
from openvino._pyopenvino import Model
|
||||
from openvino._pyopenvino import Core as CoreBase
|
||||
from openvino._pyopenvino import CompiledModel as CompiledModelBase
|
||||
from openvino._pyopenvino import InferRequest as InferRequestBase
|
||||
from openvino._pyopenvino import AsyncInferQueue as AsyncInferQueueBase
|
||||
from openvino._pyopenvino import ConstOutput
|
||||
from openvino._pyopenvino import Tensor
|
||||
|
||||
|
||||
def tensor_from_file(path: str) -> Tensor:
|
||||
"""Create Tensor from file. Data will be read with dtype of unit8."""
|
||||
return Tensor(np.fromfile(path, dtype=np.uint8)) # type: ignore
|
||||
from openvino.runtime.utils.data_helpers import (
|
||||
_InferRequestWrapper,
|
||||
_data_dispatch,
|
||||
tensor_from_file,
|
||||
)
|
||||
|
||||
|
||||
def set_scalar_tensor(request: InferRequestBase, tensor: Tensor, key: Union[str, int, ConstOutput] = None) -> None:
|
||||
if key is None:
|
||||
request.set_input_tensor(tensor)
|
||||
elif isinstance(key, int):
|
||||
request.set_input_tensor(key, tensor)
|
||||
elif isinstance(key, (str, ConstOutput)):
|
||||
request.set_tensor(key, tensor)
|
||||
else:
|
||||
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
|
||||
|
||||
|
||||
@singledispatch
|
||||
def update_tensor(
|
||||
inputs: Union[np.ndarray, np.number, int, float],
|
||||
request: InferRequestBase,
|
||||
key: Union[str, int, ConstOutput] = None,
|
||||
) -> None:
|
||||
raise TypeError(f"Incompatible input data of type {type(inputs)} under {key} key!")
|
||||
|
||||
|
||||
@update_tensor.register(np.ndarray)
|
||||
def _(
|
||||
inputs: np.ndarray,
|
||||
request: InferRequestBase,
|
||||
key: Union[str, int, ConstOutput] = None,
|
||||
) -> None:
|
||||
# If shape is "empty", assume this is a scalar value
|
||||
if not inputs.shape:
|
||||
set_scalar_tensor(request, Tensor(inputs), key)
|
||||
else:
|
||||
if key is None:
|
||||
tensor = request.get_input_tensor()
|
||||
elif isinstance(key, int):
|
||||
tensor = request.get_input_tensor(key)
|
||||
elif isinstance(key, (str, ConstOutput)):
|
||||
tensor = request.get_tensor(key)
|
||||
else:
|
||||
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
|
||||
# Update shape if there is a mismatch
|
||||
if tensor.shape != inputs.shape:
|
||||
tensor.shape = inputs.shape
|
||||
# When copying, type should be up/down-casted automatically.
|
||||
tensor.data[:] = inputs[:]
|
||||
|
||||
|
||||
@update_tensor.register(np.number) # type: ignore
|
||||
@update_tensor.register(float)
|
||||
@update_tensor.register(int)
|
||||
def _(
|
||||
inputs: Union[np.number, float, int],
|
||||
request: InferRequestBase,
|
||||
key: Union[str, int, ConstOutput] = None,
|
||||
) -> None:
|
||||
set_scalar_tensor(
|
||||
request,
|
||||
Tensor(np.ndarray([], type(inputs), np.array(inputs))),
|
||||
key,
|
||||
)
|
||||
|
||||
|
||||
def normalize_inputs(request: InferRequestBase, inputs: dict) -> dict:
|
||||
"""Helper function to prepare inputs for inference.
|
||||
|
||||
It creates copy of Tensors or copy data to already allocated Tensors on device
|
||||
if the item is of type `np.ndarray`, `np.number`, `int`, `float` or has numpy __array__ attribute.
|
||||
"""
|
||||
# Create new temporary dictionary.
|
||||
# new_inputs will be used to transfer data to inference calls,
|
||||
# ensuring that original inputs are not overwritten with Tensors.
|
||||
new_inputs: Dict[Union[str, int, ConstOutput], Tensor] = {}
|
||||
for key, value in inputs.items():
|
||||
if not isinstance(key, (str, int, ConstOutput)):
|
||||
raise TypeError(f"Incompatible key type for input: {key}")
|
||||
# Copy numpy arrays to already allocated Tensors.
|
||||
if isinstance(value, (np.ndarray, np.number, int, float)):
|
||||
update_tensor(value, request, key)
|
||||
# If value is of Tensor type, put it into temporary dictionary.
|
||||
elif isinstance(value, Tensor):
|
||||
new_inputs[key] = value
|
||||
# If value object has __array__ attribute, load it to Tensor using np.array.
|
||||
elif hasattr(value, "__array__"):
|
||||
update_tensor(np.array(value, copy=True), request, key)
|
||||
# Throw error otherwise.
|
||||
else:
|
||||
raise TypeError(f"Incompatible input data of type {type(value)} under {key} key!")
|
||||
return new_inputs
|
||||
|
||||
|
||||
class InferRequest(InferRequestBase):
|
||||
class InferRequest(_InferRequestWrapper):
|
||||
"""InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""
|
||||
|
||||
def infer(self, inputs: Any = None) -> dict:
|
||||
def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict:
|
||||
"""Infers specified input(s) in synchronous mode.
|
||||
|
||||
Blocks all methods of InferRequest while request is running.
|
||||
@ -127,48 +39,49 @@ class InferRequest(InferRequestBase):
|
||||
|
||||
The allowed types of values in the `inputs` are:
|
||||
|
||||
(1) `numpy.array`
|
||||
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
(2) `openvino.runtime.Tensor`
|
||||
(3) array-like object with `__array__` attribute
|
||||
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
|
||||
it will work only with one-input models. When model has more inputs,
|
||||
function throws error.
|
||||
|
||||
:param inputs: Data to be set on input tensors.
|
||||
:type inputs: Any, optional
|
||||
:param shared_memory: Enables `shared_memory` mode.
|
||||
|
||||
If set to `False` inputs the data dispatcher will safely copy data
|
||||
to existing Tensors (including up- or down-casting according to data type,
|
||||
resizing of the input Tensor). Keeps Tensor inputs "as-is".
|
||||
|
||||
If set to `True` the data dispatcher tries to provide "zero-copy"
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
Keeps Tensor inputs "as-is".
|
||||
Note: Use with extra care, shared data can be modified during runtime!
|
||||
Note: Using `shared_memory` may result in the extra memory overhead.
|
||||
|
||||
Default value: False
|
||||
:type shared_memory: bool, optional
|
||||
:return: Dictionary of results from output tensors with ports as keys.
|
||||
:rtype: Dict[openvino.runtime.ConstOutput, numpy.array]
|
||||
:rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray]
|
||||
"""
|
||||
# If inputs are empty, pass empty dictionary.
|
||||
if inputs is None:
|
||||
return super().infer({})
|
||||
# If inputs are dict, normalize dictionary and call infer method.
|
||||
elif isinstance(inputs, dict):
|
||||
return super().infer(normalize_inputs(self, inputs))
|
||||
# If inputs are list or tuple, enumarate inputs and save them as dictionary.
|
||||
# It is an extension of above branch with dict inputs.
|
||||
elif isinstance(inputs, (list, tuple)):
|
||||
return super().infer(normalize_inputs(self, {index: input for index, input in enumerate(inputs)}))
|
||||
# If inputs are Tensor, call infer method directly.
|
||||
elif isinstance(inputs, Tensor):
|
||||
return super().infer(inputs)
|
||||
# If inputs are single numpy array or scalars, use helper function to copy them
|
||||
# directly to Tensor or create temporary Tensor to pass into the InferRequest.
|
||||
# Pass empty dictionary to infer method, inputs are already set by helper function.
|
||||
elif isinstance(inputs, (np.ndarray, np.number, int, float)):
|
||||
update_tensor(inputs, self)
|
||||
return super().infer({})
|
||||
elif hasattr(inputs, "__array__"):
|
||||
update_tensor(np.array(inputs, copy=True), self)
|
||||
return super().infer({})
|
||||
else:
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
|
||||
return super().infer(_data_dispatch(
|
||||
self,
|
||||
inputs,
|
||||
is_shared=shared_memory,
|
||||
))
|
||||
|
||||
def start_async(
|
||||
self,
|
||||
inputs: Any = None,
|
||||
userdata: Any = None,
|
||||
shared_memory: bool = False,
|
||||
) -> None:
|
||||
"""Starts inference of specified input(s) in asynchronous mode.
|
||||
|
||||
@ -184,11 +97,10 @@ class InferRequest(InferRequestBase):
|
||||
|
||||
The allowed types of values in the `inputs` are:
|
||||
|
||||
(1) `numpy.array`
|
||||
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
(2) `openvino.runtime.Tensor`
|
||||
(3) array-like object with `__array__` attribute
|
||||
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
|
||||
it will work only with one-input models. When model has more inputs,
|
||||
function throws error.
|
||||
|
||||
@ -196,23 +108,35 @@ class InferRequest(InferRequestBase):
|
||||
:type inputs: Any, optional
|
||||
:param userdata: Any data that will be passed inside the callback.
|
||||
:type userdata: Any
|
||||
:param shared_memory: Enables `shared_memory` mode.
|
||||
|
||||
If set to `False` inputs the data dispatcher will safely copy data
|
||||
to existing Tensors (including up- or down-casting according to data type,
|
||||
resizing of the input Tensor). Keeps Tensor inputs "as-is".
|
||||
|
||||
If set to `True` the data dispatcher tries to provide "zero-copy"
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
Keeps Tensor inputs "as-is".
|
||||
Note: Use with extra care, shared data can be modified during runtime!
|
||||
Note: Using `shared_memory` may result in extra memory overhead.
|
||||
|
||||
Default value: False
|
||||
:type shared_memory: bool, optional
|
||||
"""
|
||||
if inputs is None:
|
||||
super().start_async({}, userdata)
|
||||
elif isinstance(inputs, dict):
|
||||
super().start_async(normalize_inputs(self, inputs), userdata)
|
||||
elif isinstance(inputs, (list, tuple)):
|
||||
super().start_async(normalize_inputs(self, {index: input for index, input in enumerate(inputs)}), userdata)
|
||||
elif isinstance(inputs, Tensor):
|
||||
super().start_async(inputs, userdata)
|
||||
elif isinstance(inputs, (np.ndarray, np.number, int, float)):
|
||||
update_tensor(inputs, self)
|
||||
return super().start_async({}, userdata)
|
||||
elif hasattr(inputs, "__array__"):
|
||||
update_tensor(np.array(inputs, copy=True), self)
|
||||
return super().start_async({}, userdata)
|
||||
else:
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
|
||||
super().start_async(
|
||||
_data_dispatch(
|
||||
self,
|
||||
inputs,
|
||||
is_shared=shared_memory,
|
||||
),
|
||||
userdata,
|
||||
)
|
||||
|
||||
|
||||
class CompiledModel(CompiledModelBase):
|
||||
@ -222,6 +146,11 @@ class CompiledModel(CompiledModelBase):
|
||||
multiple optimization transformations, then mapping to compute kernels.
|
||||
"""
|
||||
|
||||
def __init__(self, other: CompiledModelBase) -> None:
|
||||
# Private memeber to store already created InferRequest
|
||||
self._infer_request: Optional[InferRequest] = None
|
||||
super().__init__(other)
|
||||
|
||||
def create_infer_request(self) -> InferRequest:
|
||||
"""Creates an inference request object used to infer the compiled model.
|
||||
|
||||
@ -249,15 +178,15 @@ class CompiledModel(CompiledModelBase):
|
||||
|
||||
The allowed types of values in the `inputs` are:
|
||||
|
||||
(1) `numpy.array`
|
||||
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
(2) `openvino.runtime.Tensor`
|
||||
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
|
||||
it will work only with one-input models. When model has more inputs,
|
||||
function throws error.
|
||||
|
||||
:param inputs: Data to be set on input tensors.
|
||||
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.array], optional
|
||||
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
|
||||
:return: Dictionary of results from output tensors with ports as keys.
|
||||
:rtype: Dict[openvino.runtime.ConstOutput, numpy.array]
|
||||
"""
|
||||
@ -265,12 +194,70 @@ class CompiledModel(CompiledModelBase):
|
||||
# overloaded functions of InferRequest class
|
||||
return self.create_infer_request().infer(inputs)
|
||||
|
||||
def __call__(self, inputs: Optional[Union[dict, list]] = None) -> dict:
|
||||
def __call__(self,
|
||||
inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
|
||||
shared_memory: bool = True) -> dict:
|
||||
"""Callable infer wrapper for CompiledModel.
|
||||
|
||||
Take a look at `infer_new_request` for reference.
|
||||
Infers specified input(s) in synchronous mode.
|
||||
|
||||
Blocks all methods of CompiledModel while request is running.
|
||||
|
||||
Method creates new temporary InferRequest and run inference on it.
|
||||
It is advised to use a dedicated InferRequest class for performance,
|
||||
optimizing workflows, and creating advanced pipelines.
|
||||
|
||||
This method stores created `InferRequest` inside `CompiledModel` object,
|
||||
which can be later reused in consecutive calls.
|
||||
|
||||
The allowed types of keys in the `inputs` dictionary are:
|
||||
|
||||
(1) `int`
|
||||
(2) `str`
|
||||
(3) `openvino.runtime.ConstOutput`
|
||||
|
||||
The allowed types of values in the `inputs` are:
|
||||
|
||||
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
(2) `openvino.runtime.Tensor`
|
||||
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
|
||||
it will work only with one-input models. When model has more inputs,
|
||||
function throws error.
|
||||
|
||||
:param inputs: Data to be set on input tensors.
|
||||
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
|
||||
:param shared_memory: Enables `shared_memory` mode.
|
||||
|
||||
If set to `False` inputs the data dispatcher will safely copy data
|
||||
to existing Tensors (including up- or down-casting according to data type,
|
||||
resizing of the input Tensor). Keeps Tensor inputs "as-is".
|
||||
|
||||
If set to `True` the data dispatcher tries to provide "zero-copy"
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
Keeps Tensor inputs "as-is".
|
||||
Note: Use with extra care, shared data can be modified during runtime!
|
||||
Note: Using `shared_memory` may result in extra memory overhead.
|
||||
|
||||
Default value: True
|
||||
:type shared_memory: bool, optional
|
||||
|
||||
:return: Dictionary of results from output tensors with ports as keys.
|
||||
:rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray]
|
||||
"""
|
||||
return self.infer_new_request(inputs)
|
||||
if self._infer_request is None:
|
||||
self._infer_request = self.create_infer_request()
|
||||
|
||||
return self._infer_request.infer(
|
||||
inputs,
|
||||
shared_memory=shared_memory,
|
||||
)
|
||||
|
||||
|
||||
class AsyncInferQueue(AsyncInferQueueBase):
|
||||
@ -303,6 +290,7 @@ class AsyncInferQueue(AsyncInferQueueBase):
|
||||
self,
|
||||
inputs: Any = None,
|
||||
userdata: Any = None,
|
||||
shared_memory: bool = False,
|
||||
) -> None:
|
||||
"""Run asynchronous inference using the next available InferRequest from the pool.
|
||||
|
||||
@ -314,11 +302,10 @@ class AsyncInferQueue(AsyncInferQueueBase):
|
||||
|
||||
The allowed types of values in the `inputs` are:
|
||||
|
||||
(1) `numpy.array`
|
||||
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
(2) `openvino.runtime.Tensor`
|
||||
(3) array-like object with `__array__` attribute
|
||||
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
|
||||
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
|
||||
it will work only with one-input models. When model has more inputs,
|
||||
function throws error.
|
||||
|
||||
@ -326,32 +313,34 @@ class AsyncInferQueue(AsyncInferQueueBase):
|
||||
:type inputs: Any, optional
|
||||
:param userdata: Any data that will be passed to a callback.
|
||||
:type userdata: Any, optional
|
||||
:param shared_memory: Enables `shared_memory` mode.
|
||||
|
||||
If set to `False` inputs the data dispatcher will safely copy data
|
||||
to existing Tensors (including up- or down-casting according to data type,
|
||||
resizing of the input Tensor). Keeps Tensor inputs "as-is".
|
||||
|
||||
If set to `True` the data dispatcher tries to provide "zero-copy"
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
Keeps Tensor inputs "as-is".
|
||||
Note: Use with extra care, shared data can be modified during runtime!
|
||||
Note: Using `shared_memory` may result in extra memory overhead.
|
||||
|
||||
Default value: False
|
||||
"""
|
||||
if inputs is None:
|
||||
super().start_async({}, userdata)
|
||||
elif isinstance(inputs, dict):
|
||||
super().start_async(
|
||||
normalize_inputs(self[self.get_idle_request_id()], inputs),
|
||||
userdata,
|
||||
)
|
||||
elif isinstance(inputs, (list, tuple)):
|
||||
super().start_async(
|
||||
normalize_inputs(
|
||||
self[self.get_idle_request_id()],
|
||||
{index: input for index, input in enumerate(inputs)},
|
||||
),
|
||||
userdata,
|
||||
)
|
||||
elif isinstance(inputs, Tensor):
|
||||
super().start_async(inputs, userdata)
|
||||
elif isinstance(inputs, (np.ndarray, np.number, int, float)):
|
||||
update_tensor(inputs, self[self.get_idle_request_id()])
|
||||
super().start_async({}, userdata)
|
||||
elif hasattr(inputs, "__array__"):
|
||||
update_tensor(np.array(inputs, copy=True), self[self.get_idle_request_id()])
|
||||
super().start_async({}, userdata)
|
||||
else:
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
|
||||
super().start_async(
|
||||
_data_dispatch(
|
||||
self[self.get_idle_request_id()],
|
||||
inputs,
|
||||
is_shared=shared_memory,
|
||||
),
|
||||
userdata,
|
||||
)
|
||||
|
||||
|
||||
class Core(CoreBase):
|
||||
|
@ -0,0 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch
|
||||
from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file
|
||||
from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper
|
@ -0,0 +1,336 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from functools import singledispatch
|
||||
from typing import Any, Dict, Union, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from openvino._pyopenvino import ConstOutput, Tensor, Type
|
||||
from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper
|
||||
|
||||
ContainerTypes = Union[dict, list, tuple]
|
||||
ScalarTypes = Union[np.number, int, float]
|
||||
ValidKeys = Union[str, int, ConstOutput]
|
||||
|
||||
|
||||
def get_request_tensor(
|
||||
request: _InferRequestWrapper,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> Tensor:
|
||||
if key is None:
|
||||
return request.get_input_tensor()
|
||||
elif isinstance(key, int):
|
||||
return request.get_input_tensor(key)
|
||||
elif isinstance(key, (str, ConstOutput)):
|
||||
return request.get_tensor(key)
|
||||
else:
|
||||
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
|
||||
|
||||
|
||||
@singledispatch
|
||||
def value_to_tensor(
|
||||
value: Union[Tensor, np.ndarray, ScalarTypes],
|
||||
request: Optional[_InferRequestWrapper] = None,
|
||||
is_shared: bool = False,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> None:
|
||||
raise TypeError(f"Incompatible inputs of type: {type(value)}")
|
||||
|
||||
|
||||
@value_to_tensor.register(Tensor)
|
||||
def _(
|
||||
value: Tensor,
|
||||
request: Optional[_InferRequestWrapper] = None,
|
||||
is_shared: bool = False,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> Tensor:
|
||||
return value
|
||||
|
||||
|
||||
@value_to_tensor.register(np.ndarray)
|
||||
def _(
|
||||
value: np.ndarray,
|
||||
request: _InferRequestWrapper,
|
||||
is_shared: bool = False,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> Tensor:
|
||||
# Edge-case for numpy arrays if shape is "empty",
|
||||
# assume this is a scalar value - always copy
|
||||
if not value.shape:
|
||||
return Tensor(np.ndarray([], value.dtype, np.array(value)))
|
||||
tensor_type = get_request_tensor(request, key).get_element_type()
|
||||
tensor_dtype = tensor_type.to_dtype()
|
||||
# WA for FP16-->BF16 edge-case - always copy
|
||||
if tensor_type == Type.bf16:
|
||||
tensor = Tensor(tensor_type, value.shape)
|
||||
tensor.data[:] = value.view(tensor_dtype)
|
||||
return tensor
|
||||
return Tensor(value.astype(tensor_dtype) if tensor_dtype != value.dtype else value, shared_memory=is_shared)
|
||||
|
||||
|
||||
@value_to_tensor.register(np.number)
|
||||
@value_to_tensor.register(int)
|
||||
@value_to_tensor.register(float)
|
||||
def _(
|
||||
value: ScalarTypes,
|
||||
request: Optional[_InferRequestWrapper] = None,
|
||||
is_shared: bool = False,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> Tensor:
|
||||
return Tensor(np.ndarray([], type(value), np.array(value)))
|
||||
|
||||
|
||||
def to_c_style(value: Any, is_shared: bool = False) -> Any:
|
||||
if not isinstance(value, np.ndarray):
|
||||
if hasattr(value, "__array__"):
|
||||
return to_c_style(np.array(value, copy=False)) if is_shared else np.array(value, copy=True)
|
||||
return value
|
||||
# Check C-style if not convert data (or raise error?)
|
||||
return value if value.flags["C_CONTIGUOUS"] else np.ascontiguousarray(value)
|
||||
|
||||
|
||||
###
|
||||
# Start of array normalization.
|
||||
###
|
||||
@singledispatch
|
||||
def normalize_arrays(
|
||||
inputs: Any,
|
||||
is_shared: bool = False,
|
||||
) -> Any:
|
||||
# Check the special case of the array-interface
|
||||
if hasattr(inputs, "__array__"):
|
||||
return to_c_style(np.array(inputs, copy=False)) if is_shared else np.array(inputs, copy=True)
|
||||
# Error should be raised if type does not match any dispatchers
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
|
||||
|
||||
|
||||
@normalize_arrays.register(dict)
|
||||
def _(
|
||||
inputs: dict,
|
||||
is_shared: bool = False,
|
||||
) -> dict:
|
||||
return {k: to_c_style(v) if is_shared else v for k, v in inputs.items()}
|
||||
|
||||
|
||||
@normalize_arrays.register(list)
|
||||
@normalize_arrays.register(tuple)
|
||||
def _(
|
||||
inputs: Union[list, tuple],
|
||||
is_shared: bool = False,
|
||||
) -> dict:
|
||||
return {i: to_c_style(v) if is_shared else v for i, v in enumerate(inputs)}
|
||||
|
||||
|
||||
@normalize_arrays.register(np.ndarray)
|
||||
def _(
|
||||
inputs: dict,
|
||||
is_shared: bool = False,
|
||||
) -> Any:
|
||||
return to_c_style(inputs) if is_shared else inputs
|
||||
###
|
||||
# End of array normalization.
|
||||
###
|
||||
|
||||
|
||||
###
|
||||
# Start of "shared" dispatcher.
|
||||
# (1) Each method should keep Tensors "as-is", regardless to them being shared or not.
|
||||
# (2) ...
|
||||
###
|
||||
# Step to keep alive input values that are not C-style by default
|
||||
@singledispatch
|
||||
def create_shared(
|
||||
inputs: Any,
|
||||
request: _InferRequestWrapper,
|
||||
) -> None:
|
||||
# Check the special case of the array-interface
|
||||
if hasattr(inputs, "__array__"):
|
||||
request._inputs_data = normalize_arrays(inputs, is_shared=True)
|
||||
return value_to_tensor(request._inputs_data, request=request, is_shared=True)
|
||||
# Error should be raised if type does not match any dispatchers
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
|
||||
|
||||
|
||||
@create_shared.register(dict)
|
||||
@create_shared.register(list)
|
||||
@create_shared.register(tuple)
|
||||
def _(
|
||||
inputs: ContainerTypes,
|
||||
request: _InferRequestWrapper,
|
||||
) -> dict:
|
||||
request._inputs_data = normalize_arrays(inputs, is_shared=True)
|
||||
return {k: value_to_tensor(v, request=request, is_shared=True, key=k) for k, v in request._inputs_data.items()}
|
||||
|
||||
|
||||
@create_shared.register(np.ndarray)
|
||||
def _(
|
||||
inputs: np.ndarray,
|
||||
request: _InferRequestWrapper,
|
||||
) -> Tensor:
|
||||
request._inputs_data = normalize_arrays(inputs, is_shared=True)
|
||||
return value_to_tensor(request._inputs_data, request=request, is_shared=True)
|
||||
|
||||
|
||||
@create_shared.register(Tensor)
|
||||
@create_shared.register(np.number)
|
||||
@create_shared.register(int)
|
||||
@create_shared.register(float)
|
||||
def _(
|
||||
inputs: Union[Tensor, ScalarTypes],
|
||||
request: _InferRequestWrapper,
|
||||
) -> Tensor:
|
||||
return value_to_tensor(inputs, request=request, is_shared=True)
|
||||
###
|
||||
# End of "shared" dispatcher methods.
|
||||
###
|
||||
|
||||
|
||||
###
|
||||
# Start of "copied" dispatcher.
|
||||
###
|
||||
def set_request_tensor(
|
||||
request: _InferRequestWrapper,
|
||||
tensor: Tensor,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> None:
|
||||
if key is None:
|
||||
request.set_input_tensor(tensor)
|
||||
elif isinstance(key, int):
|
||||
request.set_input_tensor(key, tensor)
|
||||
elif isinstance(key, (str, ConstOutput)):
|
||||
request.set_tensor(key, tensor)
|
||||
else:
|
||||
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
|
||||
|
||||
|
||||
@singledispatch
|
||||
def update_tensor(
|
||||
inputs: Any,
|
||||
request: _InferRequestWrapper,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> None:
|
||||
if hasattr(inputs, "__array__"):
|
||||
update_tensor(normalize_arrays(inputs, is_shared=False), request, key=None)
|
||||
return None
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)} under {key} key!")
|
||||
|
||||
|
||||
@update_tensor.register(np.ndarray)
|
||||
def _(
|
||||
inputs: np.ndarray,
|
||||
request: _InferRequestWrapper,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> None:
|
||||
# If shape is "empty", assume this is a scalar value
|
||||
if not inputs.shape:
|
||||
set_request_tensor(
|
||||
request,
|
||||
value_to_tensor(inputs, request=request, is_shared=False),
|
||||
key,
|
||||
)
|
||||
else:
|
||||
tensor = get_request_tensor(request, key)
|
||||
# Update shape if there is a mismatch
|
||||
if tensor.shape != inputs.shape:
|
||||
tensor.shape = inputs.shape
|
||||
# When copying, type should be up/down-casted automatically.
|
||||
tensor.data[:] = inputs[:]
|
||||
|
||||
|
||||
@update_tensor.register(np.number) # type: ignore
|
||||
@update_tensor.register(float)
|
||||
@update_tensor.register(int)
|
||||
def _(
|
||||
inputs: Union[np.number, float, int],
|
||||
request: _InferRequestWrapper,
|
||||
key: Optional[ValidKeys] = None,
|
||||
) -> None:
|
||||
set_request_tensor(
|
||||
request,
|
||||
value_to_tensor(inputs, is_shared=False),
|
||||
key,
|
||||
)
|
||||
|
||||
|
||||
def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict:
|
||||
"""Helper function to prepare inputs for inference.
|
||||
|
||||
It creates copy of Tensors or copy data to already allocated Tensors on device
|
||||
if the item is of type `np.ndarray`, `np.number`, `int`, `float` or has numpy __array__ attribute.
|
||||
"""
|
||||
# Create new temporary dictionary.
|
||||
# new_inputs will be used to transfer data to inference calls,
|
||||
# ensuring that original inputs are not overwritten with Tensors.
|
||||
new_inputs: Dict[ValidKeys, Tensor] = {}
|
||||
for key, value in inputs.items():
|
||||
if not isinstance(key, (str, int, ConstOutput)):
|
||||
raise TypeError(f"Incompatible key type for input: {key}")
|
||||
# Copy numpy arrays to already allocated Tensors.
|
||||
# If value object has __array__ attribute, load it to Tensor using np.array
|
||||
if isinstance(value, (np.ndarray, np.number, int, float)) or hasattr(value, "__array__"):
|
||||
update_tensor(value, request, key)
|
||||
# If value is of Tensor type, put it into temporary dictionary.
|
||||
elif isinstance(value, Tensor):
|
||||
new_inputs[key] = value
|
||||
# Throw error otherwise.
|
||||
else:
|
||||
raise TypeError(f"Incompatible inputs of type: {type(value)} under {key} key!")
|
||||
return new_inputs
|
||||
|
||||
|
||||
@singledispatch
|
||||
def create_copied(
|
||||
inputs: Union[ContainerTypes, np.ndarray, ScalarTypes],
|
||||
request: _InferRequestWrapper,
|
||||
) -> Union[dict, None]:
|
||||
# Check the special case of the array-interface
|
||||
if hasattr(inputs, "__array__"):
|
||||
update_tensor(normalize_arrays(inputs, is_shared=False), request, key=None)
|
||||
return {}
|
||||
# Error should be raised if type does not match any dispatchers
|
||||
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
|
||||
|
||||
|
||||
@create_copied.register(dict)
|
||||
@create_copied.register(list)
|
||||
@create_copied.register(tuple)
|
||||
def _(
|
||||
inputs: ContainerTypes,
|
||||
request: _InferRequestWrapper,
|
||||
) -> dict:
|
||||
return update_inputs(normalize_arrays(inputs, is_shared=False), request)
|
||||
|
||||
|
||||
@create_copied.register(np.ndarray)
|
||||
def _(
|
||||
inputs: np.ndarray,
|
||||
request: _InferRequestWrapper,
|
||||
) -> dict:
|
||||
update_tensor(normalize_arrays(inputs, is_shared=False), request, key=None)
|
||||
return {}
|
||||
|
||||
|
||||
@create_copied.register(Tensor)
|
||||
@create_copied.register(np.number)
|
||||
@create_copied.register(int)
|
||||
@create_copied.register(float)
|
||||
def _(
|
||||
inputs: Union[Tensor, ScalarTypes],
|
||||
request: _InferRequestWrapper,
|
||||
) -> Tensor:
|
||||
return value_to_tensor(inputs, is_shared=False)
|
||||
###
|
||||
# End of "copied" dispatcher methods.
|
||||
###
|
||||
|
||||
|
||||
def _data_dispatch(
|
||||
request: _InferRequestWrapper,
|
||||
inputs: Union[ContainerTypes, Tensor, np.ndarray, ScalarTypes] = None,
|
||||
is_shared: bool = False,
|
||||
) -> Union[dict, Tensor]:
|
||||
if inputs is None:
|
||||
return {}
|
||||
return create_shared(inputs, request) if is_shared else create_copied(inputs, request)
|
@ -0,0 +1,22 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import numpy as np
|
||||
|
||||
from openvino._pyopenvino import Tensor
|
||||
from openvino._pyopenvino import InferRequest as InferRequestBase
|
||||
|
||||
|
||||
def tensor_from_file(path: str) -> Tensor:
|
||||
"""Create Tensor from file. Data will be read with dtype of unit8."""
|
||||
return Tensor(np.fromfile(path, dtype=np.uint8)) # type: ignore
|
||||
|
||||
|
||||
class _InferRequestWrapper(InferRequestBase):
|
||||
"""InferRequest class with internal memory."""
|
||||
|
||||
def __init__(self, other: InferRequestBase) -> None:
|
||||
# Private memeber to store newly created shared memory data
|
||||
self._inputs_data = None
|
||||
super().__init__(other)
|
@ -326,26 +326,57 @@ void regclass_Core(py::module m) {
|
||||
cls.def(
|
||||
"read_model",
|
||||
[](ov::Core& self, py::object model_path, py::object weights_path) {
|
||||
std::string model_path_cpp{py::str(model_path)};
|
||||
std::string weights_path_cpp{py::str(weights_path)};
|
||||
py::gil_scoped_release release;
|
||||
return self.read_model(model_path_cpp, weights_path_cpp);
|
||||
if (py::isinstance(model_path, pybind11::module::import("io").attr("BytesIO"))) {
|
||||
std::stringstream _stream;
|
||||
model_path.attr("seek")(0); // Always rewind stream!
|
||||
_stream << model_path
|
||||
.attr("read")() // alternative: model_path.attr("get_value")()
|
||||
.cast<std::string>();
|
||||
py::buffer_info info;
|
||||
if (!py::isinstance<py::none>(weights_path)) {
|
||||
auto p = weights_path.cast<py::bytes>();
|
||||
info = py::buffer(p).request();
|
||||
}
|
||||
size_t bin_size = static_cast<size_t>(info.size);
|
||||
ov::Tensor tensor(ov::element::Type_t::u8, {bin_size});
|
||||
// if weights are not empty
|
||||
if (bin_size) {
|
||||
const uint8_t* bin = reinterpret_cast<const uint8_t*>(info.ptr);
|
||||
std::memcpy(tensor.data(), bin, bin_size);
|
||||
}
|
||||
py::gil_scoped_release release;
|
||||
return self.read_model(_stream.str(), tensor);
|
||||
} else if (py::isinstance(model_path, py::module_::import("pathlib").attr("Path")) ||
|
||||
py::isinstance<py::str>(model_path)) {
|
||||
const std::string model_path_cpp{py::str(model_path)};
|
||||
std::string weights_path_cpp;
|
||||
if (!py::isinstance<py::none>(weights_path)) {
|
||||
weights_path_cpp = py::str(weights_path);
|
||||
}
|
||||
py::gil_scoped_release release;
|
||||
return self.read_model(model_path_cpp, weights_path_cpp);
|
||||
}
|
||||
|
||||
std::stringstream str;
|
||||
str << "Provided python object type " << model_path.get_type().str()
|
||||
<< " isn't supported as 'model' argument.";
|
||||
throw ov::Exception(str.str());
|
||||
},
|
||||
py::arg("model"),
|
||||
py::arg("weights") = "",
|
||||
py::arg("weights") = py::none(),
|
||||
R"(
|
||||
Reads models from IR / ONNX / PDPD formats.
|
||||
|
||||
GIL is released while running this function.
|
||||
|
||||
:param model: A string with model in IR / ONNX / PDPD format.
|
||||
:type model: str
|
||||
:param model: A path to a model in IR / ONNX / PDPD format or a model itself wrapped in io.ByesIO format.
|
||||
:type model: Union[pathlib.Path, io.BytesIO]
|
||||
:param weights: A path to a data file For IR format (*.bin): if path is empty,
|
||||
it tries to read a bin file with the same name as xml and if the bin
|
||||
file with the same name was not found, loads IR without weights.
|
||||
For ONNX format (*.onnx): weights parameter is not used.
|
||||
For PDPD format (*.pdmodel) weights parameter is not used.
|
||||
:type weights: str
|
||||
:type weights: pathlib.Path
|
||||
:return: A model.
|
||||
:rtype: openvino.runtime.Model
|
||||
)");
|
||||
|
@ -19,6 +19,13 @@ namespace py = pybind11;
|
||||
|
||||
using namespace ov::frontend;
|
||||
|
||||
class MemoryBuffer : public std::streambuf {
|
||||
public:
|
||||
MemoryBuffer(char* data, std::size_t size) {
|
||||
setg(data, data, data + size);
|
||||
}
|
||||
};
|
||||
|
||||
void regclass_frontend_FrontEnd(py::module m) {
|
||||
py::class_<FrontEnd, std::shared_ptr<FrontEnd>> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local());
|
||||
fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd";
|
||||
@ -26,10 +33,18 @@ void regclass_frontend_FrontEnd(py::module m) {
|
||||
fem.def(
|
||||
"load",
|
||||
[](FrontEnd& self, const py::object& py_obj) {
|
||||
try {
|
||||
if (py::isinstance(py_obj, py::module_::import("pathlib").attr("Path")) ||
|
||||
py::isinstance<py::str>(py_obj) || py::isinstance<py::bytes>(py_obj)) {
|
||||
// check if model path is either a string/pathlib.Path/bytes
|
||||
std::string model_path = Common::utils::convert_path_to_string(py_obj);
|
||||
return self.load(model_path);
|
||||
} catch (...) {
|
||||
} else if (py::isinstance(py_obj, pybind11::module::import("io").attr("BytesIO"))) {
|
||||
// support of BytesIO
|
||||
py::buffer_info info = py::buffer(py_obj.attr("getbuffer")()).request();
|
||||
MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
|
||||
std::istream _istream(&mb);
|
||||
return self.load(&_istream);
|
||||
} else {
|
||||
// Extended for one argument only for this time
|
||||
return self.load({Common::utils::py_object_to_any(py_obj)});
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ struct is_std_vector<std::vector<T, A>> : std::true_type {};
|
||||
for (auto el : casted) { \
|
||||
py_list.append(py_type(el)); \
|
||||
} \
|
||||
return py_list; \
|
||||
return std::move(py_list); \
|
||||
} \
|
||||
return py::cast(casted); \
|
||||
} \
|
||||
|
@ -17,6 +17,10 @@ template void regclass_graph_Output<const ov::Node>(py::module m, std::string ty
|
||||
template <typename T>
|
||||
void def_type_dependent_functions(py::class_<ov::Output<T>, std::shared_ptr<ov::Output<T>>>& output) {}
|
||||
|
||||
template <>
|
||||
void def_type_dependent_functions<const ov::Node>(
|
||||
py::class_<ov::Output<const ov::Node>, std::shared_ptr<ov::Output<const ov::Node>>>& output) {}
|
||||
|
||||
template <>
|
||||
void def_type_dependent_functions<ov::Node>(
|
||||
py::class_<ov::Output<ov::Node>, std::shared_ptr<ov::Output<ov::Node>>>& output) {
|
||||
|
@ -28,6 +28,10 @@ template<>
|
||||
void def_type_dependent_functions<ov::Node>(py::class_<ov::Output<ov::Node>,
|
||||
std::shared_ptr<ov::Output<ov::Node>>>& output);
|
||||
|
||||
template<>
|
||||
void def_type_dependent_functions<const ov::Node>(py::class_<ov::Output<const ov::Node>,
|
||||
std::shared_ptr<ov::Output<const ov::Node>>>& output);
|
||||
|
||||
template <typename VT>
|
||||
void regclass_graph_Output(py::module m, std::string typestring)
|
||||
{
|
||||
|
@ -39,7 +39,7 @@ public:
|
||||
* ignored by implicit casters.
|
||||
*/
|
||||
static handle cast(ov::Layout src, return_value_policy policy, handle parent) {
|
||||
return cast(src, policy, parent);
|
||||
return pybind11::cast(src, policy, parent);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import io
|
||||
import os
|
||||
import onnx
|
||||
import numpy as np
|
||||
@ -172,6 +173,7 @@ def run_model(model, *inputs, expected):
|
||||
# FrontEndManager shall be initialized and destroyed after all tests finished
|
||||
# This is because destroy of FrontEndManager will unload all plugins, no objects shall exist after this
|
||||
fem = FrontEndManager()
|
||||
model_stream = io.BytesIO()
|
||||
onnx_model_filename = "model.onnx"
|
||||
onnx_model_2_filename = "model2.onnx"
|
||||
onnx_model_with_custom_attributes_filename = "model_custom_attributes.onnx"
|
||||
@ -183,6 +185,7 @@ ONNX_FRONTEND_NAME = "onnx"
|
||||
|
||||
def setup_module():
|
||||
onnx.save_model(create_onnx_model(), onnx_model_filename)
|
||||
onnx.save_model(create_onnx_model(), model_stream)
|
||||
onnx.save_model(create_onnx_model_2(), onnx_model_2_filename)
|
||||
onnx.save_model(create_onnx_model_with_custom_attributes(),
|
||||
onnx_model_with_custom_attributes_filename)
|
||||
@ -719,3 +722,17 @@ def test_so_extension_via_frontend_decode_input_model():
|
||||
|
||||
decoded_model = load_decoded_model() # decoded model has longer lifetime than frontend
|
||||
assert decoded_model
|
||||
|
||||
|
||||
def test_load_bytesio_model():
|
||||
from openvino.runtime import Core
|
||||
|
||||
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
|
||||
model_from_fe = fe.load(model_stream)
|
||||
assert model_from_fe
|
||||
converted_model = fe.convert(model_from_fe)
|
||||
assert converted_model.friendly_name == "graph"
|
||||
|
||||
core = Core()
|
||||
model = core.read_model(model_stream)
|
||||
assert converted_model.friendly_name == model.friendly_name
|
||||
|
@ -177,7 +177,7 @@ def test_serialize_pass_tuple(request, tmp_path):
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass("Serialize", output_files=(str(xml_path), str(bin_path)))
|
||||
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
|
||||
pass_manager.run_passes(func)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
@ -201,7 +201,7 @@ def test_default_version(request, tmp_path):
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass("Serialize", output_files=(str(xml_path), str(bin_path)))
|
||||
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
|
||||
pass_manager.run_passes(func)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
@ -225,7 +225,7 @@ def test_default_version_IR_V11_tuple(request, tmp_path):
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass("Serialize", output_files=(str(xml_path), str(bin_path)), version="IR_V11")
|
||||
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path), version="IR_V11")
|
||||
pass_manager.run_passes(func)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
|
@ -210,11 +210,12 @@ def test_infer_tensor_wrong_input_data(device):
|
||||
assert "Incompatible key type for input: 0.0" in str(e.value)
|
||||
|
||||
|
||||
def test_direct_infer(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_direct_infer(device, shared_flag):
|
||||
compiled_model, img = generate_model_and_image(device)
|
||||
|
||||
tensor = Tensor(img)
|
||||
res = compiled_model({"data": tensor})
|
||||
res = compiled_model({"data": tensor}, shared_memory=shared_flag)
|
||||
assert np.argmax(res[compiled_model.outputs[0]]) == 531
|
||||
ref = compiled_model.infer_new_request({"data": tensor})
|
||||
assert np.array_equal(ref[compiled_model.outputs[0]], res[compiled_model.outputs[0]])
|
||||
@ -231,4 +232,4 @@ def test_compiled_model_after_core_destroyed(device):
|
||||
del core
|
||||
del model
|
||||
# check compiled and infer request can work properly after core object is destroyed
|
||||
compiled([np.random.normal(size=list(input.shape)) for input in compiled.inputs])
|
||||
compiled([np.random.normal(size=list(input.shape)).astype(dtype=input.get_element_type().to_dtype()) for input in compiled.inputs])
|
||||
|
@ -109,6 +109,13 @@ def test_read_model_from_tensor():
|
||||
assert isinstance(model, Model)
|
||||
|
||||
|
||||
def test_read_model_with_wrong_input():
|
||||
core = Core()
|
||||
with pytest.raises(RuntimeError) as e:
|
||||
core.read_model(model=3, weights=3)
|
||||
assert "Provided python object type <class 'int'> isn't supported as 'model' argument." in str(e.value)
|
||||
|
||||
|
||||
def test_read_model_as_path():
|
||||
core = Core()
|
||||
model = core.read_model(model=Path(test_net_xml), weights=Path(test_net_bin))
|
||||
@ -133,7 +140,7 @@ def test_read_model_from_onnx_as_path():
|
||||
assert isinstance(model, Model)
|
||||
|
||||
|
||||
def test_read_net_from_buffer():
|
||||
def test_read_model_from_buffer():
|
||||
core = Core()
|
||||
with open(test_net_bin, "rb") as f:
|
||||
weights = f.read()
|
||||
@ -143,7 +150,7 @@ def test_read_net_from_buffer():
|
||||
assert isinstance(model, Model)
|
||||
|
||||
|
||||
def test_net_from_buffer_valid():
|
||||
def test_model_from_buffer_valid():
|
||||
core = Core()
|
||||
with open(test_net_bin, "rb") as f:
|
||||
weights = f.read()
|
||||
|
@ -304,7 +304,8 @@ def test_cancel(device):
|
||||
assert "[ INFER_CANCELLED ]" in str(e.value)
|
||||
|
||||
|
||||
def test_start_async(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_start_async(device, shared_flag):
|
||||
core = Core()
|
||||
model = core.read_model(test_net_xml, test_net_bin)
|
||||
compiled_model = core.compile_model(model, device)
|
||||
@ -322,14 +323,15 @@ def test_start_async(device):
|
||||
callbacks_info["finished"] = 0
|
||||
for request in requests:
|
||||
request.set_callback(callback, callbacks_info)
|
||||
request.start_async({0: img})
|
||||
request.start_async({0: img}, shared_memory=shared_flag)
|
||||
for request in requests:
|
||||
request.wait()
|
||||
assert request.latency > 0
|
||||
assert callbacks_info["finished"] == jobs
|
||||
|
||||
|
||||
def test_infer_list_as_inputs(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_infer_list_as_inputs(device, shared_flag):
|
||||
num_inputs = 4
|
||||
input_shape = [2, 1]
|
||||
dtype = np.float32
|
||||
@ -345,17 +347,18 @@ def test_infer_list_as_inputs(device):
|
||||
request = compiled_model.create_infer_request()
|
||||
|
||||
inputs = [np.random.normal(size=input_shape).astype(dtype)]
|
||||
request.infer(inputs)
|
||||
request.infer(inputs, shared_memory=shared_flag)
|
||||
check_fill_inputs(request, inputs)
|
||||
|
||||
inputs = [
|
||||
np.random.normal(size=input_shape).astype(dtype) for _ in range(num_inputs)
|
||||
]
|
||||
request.infer(inputs)
|
||||
request.infer(inputs, shared_memory=shared_flag)
|
||||
check_fill_inputs(request, inputs)
|
||||
|
||||
|
||||
def test_infer_mixed_keys(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_infer_mixed_keys(device, shared_flag):
|
||||
core = Core()
|
||||
model = get_relu_model()
|
||||
compiled_model = core.compile_model(model, device)
|
||||
@ -367,7 +370,7 @@ def test_infer_mixed_keys(device):
|
||||
tensor2 = Tensor(data2)
|
||||
|
||||
request = compiled_model.create_infer_request()
|
||||
res = request.infer({0: tensor2, "data": tensor})
|
||||
res = request.infer({0: tensor2, "data": tensor}, shared_memory=shared_flag)
|
||||
assert np.argmax(res[compiled_model.output()]) == 531
|
||||
|
||||
|
||||
@ -386,10 +389,11 @@ def test_infer_mixed_keys(device):
|
||||
(Type.u64, np.uint64),
|
||||
(Type.boolean, bool),
|
||||
])
|
||||
def test_infer_mixed_values(device, ov_type, numpy_dtype):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_infer_mixed_values(device, ov_type, numpy_dtype, shared_flag):
|
||||
request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype)
|
||||
|
||||
request.infer([tensor1, array1])
|
||||
request.infer([tensor1, array1], shared_memory=shared_flag)
|
||||
|
||||
assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1)))
|
||||
|
||||
@ -409,10 +413,11 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype):
|
||||
(Type.u64, np.uint64),
|
||||
(Type.boolean, bool),
|
||||
])
|
||||
def test_async_mixed_values(device, ov_type, numpy_dtype):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_async_mixed_values(device, ov_type, numpy_dtype, shared_flag):
|
||||
request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype)
|
||||
|
||||
request.start_async([tensor1, array1])
|
||||
request.start_async([tensor1, array1], shared_memory=shared_flag)
|
||||
request.wait()
|
||||
|
||||
assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1)))
|
||||
@ -429,13 +434,14 @@ def test_async_mixed_values(device, ov_type, numpy_dtype):
|
||||
(Type.u16, np.uint16),
|
||||
(Type.i64, np.int64),
|
||||
])
|
||||
def test_infer_single_input(device, ov_type, numpy_dtype):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_infer_single_input(device, ov_type, numpy_dtype, shared_flag):
|
||||
request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype)
|
||||
|
||||
request.infer(array1)
|
||||
request.infer(array1, shared_memory=shared_flag)
|
||||
assert np.array_equal(request.get_output_tensor().data, np.abs(array1))
|
||||
|
||||
request.infer(tensor1)
|
||||
request.infer(tensor1, shared_memory=shared_flag)
|
||||
assert np.array_equal(request.get_output_tensor().data, np.abs(tensor1.data))
|
||||
|
||||
|
||||
@ -450,19 +456,21 @@ def test_infer_single_input(device, ov_type, numpy_dtype):
|
||||
(Type.u16, np.uint16),
|
||||
(Type.i64, np.int64),
|
||||
])
|
||||
def test_async_single_input(device, ov_type, numpy_dtype):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_async_single_input(device, ov_type, numpy_dtype, shared_flag):
|
||||
request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype)
|
||||
|
||||
request.start_async(array1)
|
||||
request.start_async(array1, shared_memory=shared_flag)
|
||||
request.wait()
|
||||
assert np.array_equal(request.get_output_tensor().data, np.abs(array1))
|
||||
|
||||
request.start_async(tensor1)
|
||||
request.start_async(tensor1, shared_memory=shared_flag)
|
||||
request.wait()
|
||||
assert np.array_equal(request.get_output_tensor().data, np.abs(tensor1.data))
|
||||
|
||||
|
||||
def test_infer_queue(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_infer_queue(device, shared_flag):
|
||||
jobs = 8
|
||||
num_request = 4
|
||||
core = Core()
|
||||
@ -475,12 +483,17 @@ def test_infer_queue(device):
|
||||
jobs_done[job_id]["finished"] = True
|
||||
jobs_done[job_id]["latency"] = request.latency
|
||||
|
||||
img = generate_image()
|
||||
img = None
|
||||
|
||||
if not shared_flag:
|
||||
img = generate_image()
|
||||
infer_queue.set_callback(callback)
|
||||
assert infer_queue.is_ready()
|
||||
|
||||
for i in range(jobs):
|
||||
infer_queue.start_async({"data": img}, i)
|
||||
if shared_flag:
|
||||
img = generate_image()
|
||||
infer_queue.start_async({"data": img}, i, shared_memory=shared_flag)
|
||||
infer_queue.wait_all()
|
||||
assert all(job["finished"] for job in jobs_done)
|
||||
assert all(job["latency"] > 0 for job in jobs_done)
|
||||
@ -670,19 +683,21 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
|
||||
assert np.allclose(res[list(res)[0]], expected_res, atol=1e-6), f"Expected values: {expected_res} \n Actual values: {res} \n"
|
||||
|
||||
|
||||
def test_get_results(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_get_results(device, shared_flag):
|
||||
core = Core()
|
||||
data = ops.parameter([10], np.float64)
|
||||
model = Model(ops.split(data, 0, 5), [data])
|
||||
compiled_model = core.compile_model(model, device)
|
||||
request = compiled_model.create_infer_request()
|
||||
inputs = [np.random.normal(size=list(compiled_model.input().shape))]
|
||||
results = request.infer(inputs)
|
||||
results = request.infer(inputs, shared_memory=shared_flag)
|
||||
for output in compiled_model.outputs:
|
||||
assert np.array_equal(results[output], request.results[output])
|
||||
|
||||
|
||||
def test_results_async_infer(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_results_async_infer(device, shared_flag):
|
||||
jobs = 8
|
||||
num_request = 4
|
||||
core = Core()
|
||||
@ -698,7 +713,7 @@ def test_results_async_infer(device):
|
||||
img = generate_image()
|
||||
infer_queue.set_callback(callback)
|
||||
for i in range(jobs):
|
||||
infer_queue.start_async({"data": img}, i)
|
||||
infer_queue.start_async({"data": img}, i, shared_memory=shared_flag)
|
||||
infer_queue.wait_all()
|
||||
|
||||
request = compiled_model.create_infer_request()
|
||||
@ -712,7 +727,8 @@ def test_results_async_infer(device):
|
||||
os.environ.get("TEST_DEVICE") not in ["GPU"],
|
||||
reason="Device dependent test",
|
||||
)
|
||||
def test_infer_float16(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_infer_float16(device, shared_flag):
|
||||
model = bytes(
|
||||
b"""<net name="add_model" version="10">
|
||||
<layers>
|
||||
@ -787,12 +803,13 @@ def test_infer_float16(device):
|
||||
compiled_model = core.compile_model(model, device)
|
||||
input_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(np.float16)
|
||||
request = compiled_model.create_infer_request()
|
||||
outputs = request.infer({0: input_data, 1: input_data})
|
||||
outputs = request.infer({0: input_data, 1: input_data}, shared_memory=shared_flag)
|
||||
assert np.allclose(list(outputs.values()), list(request.results.values()))
|
||||
assert np.allclose(list(outputs.values()), input_data + input_data)
|
||||
|
||||
|
||||
def test_ports_as_inputs(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_ports_as_inputs(device, shared_flag):
|
||||
input_shape = [2, 2]
|
||||
param_a = ops.parameter(input_shape, np.float32)
|
||||
param_b = ops.parameter(input_shape, np.float32)
|
||||
@ -808,56 +825,66 @@ def test_ports_as_inputs(device):
|
||||
tensor1 = Tensor(arr_1)
|
||||
tensor2 = Tensor(arr_2)
|
||||
|
||||
res = request.infer({compiled_model.inputs[0]: tensor1, compiled_model.inputs[1]: tensor2})
|
||||
res = request.infer(
|
||||
{compiled_model.inputs[0]: tensor1, compiled_model.inputs[1]: tensor2},
|
||||
shared_memory=shared_flag,
|
||||
)
|
||||
assert np.array_equal(res[compiled_model.outputs[0]], tensor1.data + tensor2.data)
|
||||
|
||||
res = request.infer({request.model_inputs[0]: tensor1, request.model_inputs[1]: tensor2})
|
||||
res = request.infer(
|
||||
{request.model_inputs[0]: tensor1, request.model_inputs[1]: tensor2},
|
||||
shared_memory=shared_flag,
|
||||
)
|
||||
assert np.array_equal(res[request.model_outputs[0]], tensor1.data + tensor2.data)
|
||||
|
||||
|
||||
def test_inputs_dict_not_replaced(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_inputs_dict_not_replaced(device, shared_flag):
|
||||
request, arr_1, arr_2 = create_simple_request_and_inputs(device)
|
||||
|
||||
inputs = {0: arr_1, 1: arr_2}
|
||||
inputs_copy = deepcopy(inputs)
|
||||
|
||||
res = request.infer(inputs)
|
||||
res = request.infer(inputs, shared_memory=shared_flag)
|
||||
|
||||
np.testing.assert_equal(inputs, inputs_copy)
|
||||
assert np.array_equal(res[request.model_outputs[0]], arr_1 + arr_2)
|
||||
|
||||
|
||||
def test_inputs_list_not_replaced(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_inputs_list_not_replaced(device, shared_flag):
|
||||
request, arr_1, arr_2 = create_simple_request_and_inputs(device)
|
||||
|
||||
inputs = [arr_1, arr_2]
|
||||
inputs_copy = deepcopy(inputs)
|
||||
|
||||
res = request.infer(inputs)
|
||||
res = request.infer(inputs, shared_memory=shared_flag)
|
||||
|
||||
assert np.array_equal(inputs, inputs_copy)
|
||||
assert np.array_equal(res[request.model_outputs[0]], arr_1 + arr_2)
|
||||
|
||||
|
||||
def test_inputs_tuple_not_replaced(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_inputs_tuple_not_replaced(device, shared_flag):
|
||||
request, arr_1, arr_2 = create_simple_request_and_inputs(device)
|
||||
|
||||
inputs = (arr_1, arr_2)
|
||||
inputs_copy = deepcopy(inputs)
|
||||
|
||||
res = request.infer(inputs)
|
||||
res = request.infer(inputs, shared_memory=shared_flag)
|
||||
|
||||
assert np.array_equal(inputs, inputs_copy)
|
||||
assert np.array_equal(res[request.model_outputs[0]], arr_1 + arr_2)
|
||||
|
||||
|
||||
def test_invalid_inputs(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_invalid_inputs(device, shared_flag):
|
||||
request, _, _ = create_simple_request_and_inputs(device)
|
||||
|
||||
inputs = "some_input"
|
||||
|
||||
with pytest.raises(TypeError) as e:
|
||||
request.infer(inputs)
|
||||
request.infer(inputs, shared_memory=shared_flag)
|
||||
assert "Incompatible inputs of type:" in str(e.value)
|
||||
|
||||
|
||||
@ -885,7 +912,8 @@ def test_infer_dynamic_model(device):
|
||||
assert request.get_input_tensor().shape == Shape(shape3)
|
||||
|
||||
|
||||
def test_array_like_input_request(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_array_like_input_request(device, shared_flag):
|
||||
class ArrayLikeObject:
|
||||
# Array-like object accepted by np.array to test inputs similar to torch tensor and tf.Tensor
|
||||
def __init__(self, array) -> None:
|
||||
@ -899,7 +927,7 @@ def test_array_like_input_request(device):
|
||||
model_input_list = [ArrayLikeObject(input_data.tolist())]
|
||||
|
||||
# Test single array-like object in InferRequest().Infer()
|
||||
res_object = request.infer(model_input_object)
|
||||
res_object = request.infer(model_input_object, shared_memory=shared_flag)
|
||||
assert np.array_equal(res_object[request.model_outputs[0]], np.abs(input_data))
|
||||
|
||||
# Test list of array-like objects to use normalize_inputs()
|
||||
@ -907,7 +935,8 @@ def test_array_like_input_request(device):
|
||||
assert np.array_equal(res_list[request.model_outputs[0]], np.abs(input_data))
|
||||
|
||||
|
||||
def test_array_like_input_async(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_array_like_input_async(device, shared_flag):
|
||||
class ArrayLikeObject:
|
||||
# Array-like object accepted by np.array to test inputs similar to torch tensor and tf.Tensor
|
||||
def __init__(self, array) -> None:
|
||||
@ -920,7 +949,7 @@ def test_array_like_input_async(device):
|
||||
model_input_object = ArrayLikeObject(input_data.tolist())
|
||||
model_input_list = [ArrayLikeObject(input_data.tolist())]
|
||||
# Test single array-like object in InferRequest().start_async()
|
||||
request.start_async(model_input_object)
|
||||
request.start_async(model_input_object, shared_memory=shared_flag)
|
||||
request.wait()
|
||||
assert np.array_equal(request.get_output_tensor().data, np.abs(input_data))
|
||||
|
||||
@ -930,19 +959,20 @@ def test_array_like_input_async(device):
|
||||
assert np.array_equal(request.get_output_tensor().data, np.abs(input_data))
|
||||
|
||||
|
||||
def test_array_like_input_async_infer_queue(device):
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
def test_array_like_input_async_infer_queue(device, shared_flag):
|
||||
class ArrayLikeObject:
|
||||
# Array-like object accepted by np.array to test inputs similar to torch tensor and tf.Tensor
|
||||
def __init__(self, array) -> None:
|
||||
self.data = array
|
||||
|
||||
def __array__(self):
|
||||
return np.array(self.data)
|
||||
return self.data
|
||||
|
||||
jobs = 8
|
||||
ov_type = Type.f32
|
||||
input_shape = [2, 2]
|
||||
input_data = [[-2, -1], [0, 1]]
|
||||
input_data = np.ascontiguousarray([[-2, -1], [0, 1]])
|
||||
param = ops.parameter(input_shape, ov_type)
|
||||
layer = ops.abs(param)
|
||||
model = Model([layer], [param])
|
||||
@ -950,21 +980,23 @@ def test_array_like_input_async_infer_queue(device):
|
||||
compiled_model = core.compile_model(model, "CPU")
|
||||
|
||||
model_input_object = ArrayLikeObject(input_data)
|
||||
model_input_list = [ArrayLikeObject(input_data)]
|
||||
model_input_list = [[ArrayLikeObject(deepcopy(input_data))] for _ in range(jobs)]
|
||||
|
||||
# Test single array-like object in AsyncInferQueue.start_async()
|
||||
infer_queue_object = AsyncInferQueue(compiled_model, jobs)
|
||||
for _i in range(jobs):
|
||||
infer_queue_object.start_async(model_input_object)
|
||||
infer_queue_object.wait_all()
|
||||
|
||||
for i in range(jobs):
|
||||
assert np.array_equal(infer_queue_object[i].get_output_tensor().data, np.abs(input_data))
|
||||
|
||||
# Test list of array-like objects in AsyncInferQueue.start_async()
|
||||
infer_queue_list = AsyncInferQueue(compiled_model, jobs)
|
||||
for _i in range(jobs):
|
||||
infer_queue_list.start_async(model_input_list)
|
||||
for i in range(jobs):
|
||||
infer_queue_list.start_async(model_input_list[i], shared_memory=shared_flag)
|
||||
infer_queue_list.wait_all()
|
||||
|
||||
for i in range(jobs):
|
||||
assert np.array_equal(infer_queue_list[i].get_output_tensor().data, np.abs(input_data))
|
||||
|
||||
|
124
src/bindings/python/tests/test_utils/test_data_dispatch.py
Normal file
124
src/bindings/python/tests/test_utils/test_data_dispatch.py
Normal file
@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from tests.conftest import model_path
|
||||
from tests.test_utils.test_utils import get_relu_model, generate_image, generate_model_and_image, generate_relu_compiled_model
|
||||
from openvino.runtime import Model, ConstOutput, Type, Shape, Core, Tensor
|
||||
from openvino.runtime.utils.data_helpers import _data_dispatch
|
||||
|
||||
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
|
||||
test_net_xml, test_net_bin = model_path(is_myriad)
|
||||
|
||||
|
||||
def _get_value(value):
|
||||
return value.data if isinstance(value, Tensor) else value
|
||||
|
||||
|
||||
def _run_dispatcher(device, input_data, input_shape, is_shared):
|
||||
compiled_model = generate_relu_compiled_model(device, input_shape)
|
||||
infer_request = compiled_model.create_infer_request()
|
||||
result = _data_dispatch(infer_request, input_data, is_shared)
|
||||
|
||||
return result, infer_request
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data_type", [np.float_, np.int_, int, float])
|
||||
@pytest.mark.parametrize("input_shape", [[], [1]])
|
||||
@pytest.mark.parametrize("is_shared", [True, False])
|
||||
def test_scalars_dispatcher(device, data_type, input_shape, is_shared):
|
||||
test_data = data_type(2)
|
||||
expected = Tensor(np.ndarray([], data_type, np.array(test_data)))
|
||||
|
||||
result, _ = _run_dispatcher(device, test_data, input_shape, is_shared)
|
||||
|
||||
assert isinstance(result, Tensor)
|
||||
assert result.get_shape() == Shape([])
|
||||
assert result.get_element_type() == Type(data_type)
|
||||
assert result.data == expected.data
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_shape", [[1], [2, 2]])
|
||||
@pytest.mark.parametrize("is_shared", [True, False])
|
||||
def test_tensor_dispatcher(device, input_shape, is_shared):
|
||||
array = np.ones(input_shape)
|
||||
|
||||
test_data = Tensor(array, is_shared)
|
||||
|
||||
result, _ = _run_dispatcher(device, test_data, input_shape, is_shared)
|
||||
|
||||
assert isinstance(result, Tensor)
|
||||
assert result.get_shape() == Shape(input_shape)
|
||||
assert result.get_element_type() == Type(array.dtype)
|
||||
assert np.array_equal(result.data, array)
|
||||
|
||||
# Change data to check if shared_memory is still applied
|
||||
array[0] = 2.0
|
||||
|
||||
assert np.array_equal(array, result.data) if is_shared else not np.array_equal(array, result.data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_shape", [[1], [2, 2]])
|
||||
def test_ndarray_shared_dispatcher(device, input_shape):
|
||||
test_data = np.ones(input_shape).astype(np.float32)
|
||||
|
||||
result, _ = _run_dispatcher(device, test_data, input_shape, True)
|
||||
|
||||
assert isinstance(result, Tensor)
|
||||
assert result.get_shape() == Shape(test_data.shape)
|
||||
assert result.get_element_type() == Type(test_data.dtype)
|
||||
assert np.array_equal(result.data, test_data)
|
||||
|
||||
test_data[0] = 2.0
|
||||
|
||||
assert np.array_equal(result.data, test_data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_shape", [[1], [2, 2]])
|
||||
def test_ndarray_shared_dispatcher_casting(device, input_shape):
|
||||
test_data = np.ones(input_shape)
|
||||
|
||||
result, infer_request = _run_dispatcher(device, test_data, input_shape, True)
|
||||
|
||||
assert isinstance(result, Tensor)
|
||||
assert result.get_shape() == Shape(test_data.shape)
|
||||
assert result.get_element_type() == infer_request.inputs[0].get_element_type()
|
||||
assert np.array_equal(result.data, test_data)
|
||||
|
||||
test_data[0] = 2.0
|
||||
|
||||
assert not np.array_equal(result.data, test_data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_shape", [[1, 2, 3], [2, 2]])
|
||||
def test_ndarray_shared_dispatcher_misalign(device, input_shape):
|
||||
test_data = np.asfortranarray(np.ones(input_shape).astype(np.float32))
|
||||
|
||||
result, _ = _run_dispatcher(device, test_data, input_shape, True)
|
||||
|
||||
assert isinstance(result, Tensor)
|
||||
assert result.get_shape() == Shape(test_data.shape)
|
||||
assert result.get_element_type() == Type(test_data.dtype)
|
||||
assert np.array_equal(result.data, test_data)
|
||||
|
||||
test_data[0] = 2.0
|
||||
|
||||
assert not np.array_equal(result.data, test_data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_shape", [[1, 2, 3], [2, 2]])
|
||||
def test_ndarray_copied_dispatcher(device, input_shape):
|
||||
test_data = np.ones(input_shape)
|
||||
|
||||
result, infer_request = _run_dispatcher(device, test_data, input_shape, False)
|
||||
|
||||
assert result == {}
|
||||
assert np.array_equal(infer_request.inputs[0].data, test_data)
|
||||
|
||||
test_data[0] = 2.0
|
||||
|
||||
assert not np.array_equal(infer_request.inputs[0].data, test_data)
|
@ -118,6 +118,8 @@ def create_filename_for_test(test_name, tmp_path, is_xml_path=False, is_bin_path
|
||||
python_version = str(sys.version_info.major) + "_" + str(sys.version_info.minor)
|
||||
filename = test_name.replace("test_", "").replace("[", "_").replace("]", "_")
|
||||
filename = filename + "_" + python_version
|
||||
_xml = tmp_path / Path(filename + ".xml") if is_xml_path else tmp_path / Path(filename + ".xml")
|
||||
_bin = tmp_path / Path(filename + ".bin") if is_bin_path else tmp_path / Path(filename + ".bin")
|
||||
path_to_xml = tmp_path / Path(filename + ".xml")
|
||||
path_to_bin = tmp_path / Path(filename + ".bin")
|
||||
_xml = path_to_xml if is_xml_path else str(path_to_xml)
|
||||
_bin = path_to_bin if is_bin_path else str(path_to_bin)
|
||||
return (_xml, _bin)
|
||||
|
@ -36,7 +36,7 @@ elif machine == "X86" or machine == "i686":
|
||||
ARCH = "ia32"
|
||||
elif machine == "arm" or machine == "armv7l":
|
||||
ARCH = "arm"
|
||||
elif machine == "aarch64" or machine == "arm64":
|
||||
elif machine == "aarch64" or machine == "arm64" or machine == "ARM64":
|
||||
ARCH = "arm64"
|
||||
|
||||
# The following variables can be defined in environment or .env file
|
||||
|
@ -30,8 +30,12 @@ elseif(SELECTIVE_BUILD STREQUAL "ON")
|
||||
# After disabling a block of code, some variables might be unused.
|
||||
target_compile_options(${TARGET_NAME} INTERFACE
|
||||
-Wno-unused-function
|
||||
-Wno-unused-variable
|
||||
-Wno-unused-parameter
|
||||
-Wno-unused-local-typedefs)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
target_compile_options(${TARGET_NAME} INTERFACE -Wno-unused-but-set-variable)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(GENERATED_HEADER ${CMAKE_CURRENT_BINARY_DIR}/conditional_compilation_gen.h CACHE FILEPATH "")
|
||||
|
@ -58,6 +58,9 @@ struct TestNode : public TestNodeBase {
|
||||
} // namespace
|
||||
|
||||
TEST(ConditionalCompilationTests, SimpleScopeAnalysys) {
|
||||
auto func = SIMPLE_CCTests;
|
||||
(void)func;
|
||||
|
||||
int n = 0;
|
||||
|
||||
OV_SCOPE(CCTests, Scope0) n = 42;
|
||||
|
@ -26,10 +26,6 @@ if(TARGET ittnotify)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
target_compile_options(${TARGET_NAME} PRIVATE -Wall)
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET_NAME} PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/function_name.hpp>
|
||||
#include <openvino/util/pp.hpp>
|
||||
#include <string>
|
||||
@ -34,9 +35,9 @@ namespace openvino
|
||||
*/
|
||||
typedef struct handle_ {} *handle_t;
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
namespace internal
|
||||
{
|
||||
domain_t domain(char const* name);
|
||||
@ -45,9 +46,9 @@ namespace openvino
|
||||
void taskEnd(domain_t d);
|
||||
void threadName(const char* name);
|
||||
}
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fn void threadName(const char* name)
|
||||
|
@ -259,6 +259,8 @@ public:
|
||||
|
||||
static ov::Output<ov::Node> getSingleConsumerConstant(const ov::Output<ov::Node>& output);
|
||||
|
||||
static bool checkConstantOnInf(const std::shared_ptr<Node> constant_node);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Node> foldFakeQuantize(
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||
|
@ -27,7 +27,7 @@ public:
|
||||
class LP_TRANSFORMATIONS_API SharedValueAttribute : public std::enable_shared_from_this<SharedValueAttribute> {
|
||||
public:
|
||||
struct LP_TRANSFORMATIONS_API SharedValue : public std::enable_shared_from_this<SharedValue> {
|
||||
SharedValue() = default;
|
||||
SharedValue() {}
|
||||
SharedValue(const T& value) : value{value} {}
|
||||
T value = {};
|
||||
void addAttribute(std::weak_ptr<SharedValueAttribute> attribute) {
|
||||
|
@ -188,6 +188,12 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
||||
|
||||
auto newMultiplyFullPathValues = fold<opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
|
||||
|
||||
// Transformation can't be applied if new full path values brake accuracy because of Inf values
|
||||
if (!NetworkHelper::checkConstantOnInf(newSubtractFullPathValues) ||
|
||||
!NetworkHelper::checkConstantOnInf(newMultiplyFullPathValues)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (NetworkHelper::isZeroConst(newSubtractFullPathValues)) {
|
||||
newSubtractFullPathValues = nullptr;
|
||||
}
|
||||
|
@ -179,10 +179,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
|
||||
inputLowConst_f32 = fold<opset1::Divide>(inputLowConst_f32, value);
|
||||
inputHighConst_f32 = fold<opset1::Divide>(inputHighConst_f32, value);
|
||||
const auto resultLow = ov::as_type_ptr<opset1::Constant>(inputLowConst_f32)->cast_vector<float>();
|
||||
const auto resultHigh = ov::as_type_ptr<opset1::Constant>(inputHighConst_f32)->cast_vector<float>();
|
||||
if (std::any_of(resultLow.begin(), resultLow.end(), [](const float value){ return std::isinf(value); }) ||
|
||||
std::any_of(resultHigh.begin(), resultHigh.end(), [](const float value){ return std::isinf(value); })) {
|
||||
if (!NetworkHelper::checkConstantOnInf(inputLowConst_f32) ||
|
||||
!NetworkHelper::checkConstantOnInf(inputHighConst_f32)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ bool FakeQuantizeDequantization::checkShape(const std::shared_ptr<ngraph::Node>&
|
||||
}
|
||||
|
||||
if (!inPShape.rank().is_dynamic()) {
|
||||
for (int i = 0; i < inPShape.size(); ++i) {
|
||||
for (size_t i = 0; i < inPShape.size(); ++i) {
|
||||
if (inPShape[i] != outPShape[i] && !inPShape[i].is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -27,8 +27,8 @@ constexpr char LayerTransformation::originalLayerPostfix[];
|
||||
LayerTransformation::LayerTransformation(const Params& params) :
|
||||
updatePrecisions(params.updatePrecisions),
|
||||
deqPrecision(params.deqPrecision),
|
||||
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
|
||||
defaultPrecisions(params.defaultPrecisions),
|
||||
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
|
||||
context(nullptr) {}
|
||||
|
||||
void LayerTransformation::setContext(TransformationContext* context) noexcept {
|
||||
|
@ -273,7 +273,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_model(const std::shared_p
|
||||
bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::shared_ptr<const ngraph::Function>& function) {
|
||||
std::set<std::shared_ptr<ngraph::Node>> handledNodes;
|
||||
std::deque<std::shared_ptr<ngraph::Node>> nodes;
|
||||
for (const auto result : function->get_results()) {
|
||||
for (const auto& result : function->get_results()) {
|
||||
nodes.push_front(result);
|
||||
}
|
||||
|
||||
@ -294,7 +294,7 @@ bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::s
|
||||
}
|
||||
} else if (const auto multiSubGraph = ov::as_type_ptr<ngraph::op::util::MultiSubGraphOp>(parent)) {
|
||||
// Look inside subraph operations, such as TensorIterator, Loop, If, etc
|
||||
for (int i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++) {
|
||||
for (size_t i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++) {
|
||||
if (isFunctionQuantized(multiSubGraph->get_function(i))) {
|
||||
return true;
|
||||
}
|
||||
|
@ -438,7 +438,7 @@ std::vector<size_t> NetworkHelper::updateReshapeValues(
|
||||
}
|
||||
}
|
||||
}
|
||||
return updatedReshapeValues;
|
||||
return std::move(updatedReshapeValues);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter(std::shared_ptr<Node> node) {
|
||||
@ -2010,6 +2010,14 @@ ov::Output<ov::Node> NetworkHelper::getSingleConsumerConstant(const ov::Output<o
|
||||
? output
|
||||
: node->clone_with_new_inputs(node->input_values())->output(0);
|
||||
}
|
||||
|
||||
bool NetworkHelper::checkConstantOnInf(const std::shared_ptr<Node> constant_node) {
|
||||
const auto constant = ov::as_type_ptr<opset1::Constant>(constant_node);
|
||||
if (constant == nullptr)
|
||||
return false;
|
||||
const auto values = constant->cast_vector<float>();
|
||||
return std::all_of(values.begin(), values.end(), [](const float x) { return !std::isinf(x); });
|
||||
}
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
@ -262,8 +262,8 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr<cons
|
||||
return true;
|
||||
};
|
||||
|
||||
if (dequantizationOnWeights.subtract && !checkConstShape(dequantizationOnWeights.subtractConstant) ||
|
||||
dequantizationOnWeights.multiply && !checkConstShape(dequantizationOnWeights.multiplyConstant)) {
|
||||
if ((dequantizationOnWeights.subtract && !checkConstShape(dequantizationOnWeights.subtractConstant)) ||
|
||||
(dequantizationOnWeights.multiply && !checkConstShape(dequantizationOnWeights.multiplyConstant))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -390,6 +390,38 @@ const std::vector<AddTransformationTestValues> testValuesWithoutConstantBranches
|
||||
{{}, {}, {5.f}},
|
||||
{}},
|
||||
""},
|
||||
// Multiply with the value that mustn't be transformed (to avoid infinite values in multiply constant)
|
||||
{false,
|
||||
-1,
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {1.f}, {std::numeric_limits<float>::max()}},
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, {0.009f}},
|
||||
{}},
|
||||
{ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {1.f}, {std::numeric_limits<float>::max()}},
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, {0.009f}},
|
||||
{{}, {}, {}},
|
||||
{}},
|
||||
""},
|
||||
// Subtract with the value that mustn't be transformed (to avoid infinite values in multiply constant)
|
||||
{false,
|
||||
-1,
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, {0.009f}},
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {std::numeric_limits<float>::max()}, {2.f}},
|
||||
{}},
|
||||
{ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, {0.009f}},
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {std::numeric_limits<float>::max()}, {2.f}},
|
||||
{{}, {}, {}},
|
||||
{}},
|
||||
""},
|
||||
|
||||
// convolution before FQ (choose that branch)
|
||||
{false,
|
||||
|
@ -150,13 +150,13 @@ const std::vector<GatherTransformationTestValues> testValues = {
|
||||
{ngraph::element::u8,
|
||||
{{ngraph::element::f32},
|
||||
{{128.f}, element::undefined, {1, 3, 1}, false, 1ul, element::u8, true},
|
||||
{{0.1}, ngraph::element::f32, {1, 3, 1}}}},
|
||||
{{0.1f}, ngraph::element::f32, {1, 3, 1}}}},
|
||||
{ngraph::element::u8,
|
||||
{{}, {}, {}},
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32},
|
||||
{{128.f}, element::undefined, {1, 3, 1}, false, 1ul, element::u8, true},
|
||||
{{0.1}, ngraph::element::f32, {1, 3, 1}}}}},
|
||||
{{0.1f}, ngraph::element::f32, {1, 3, 1}}}}},
|
||||
// U8: per-channel quantization, gather axis match with channel
|
||||
{{1},
|
||||
{0},
|
||||
|
@ -87,9 +87,10 @@ TEST_P(FQDecompositionWithSharedConstants, FQDecompositionWithSharedConstants) {
|
||||
ASSERT_TRUE(res.valid) << res.message;
|
||||
|
||||
// additional check: FQ constants after transformation mustn't be shared
|
||||
for (const auto n : actualFunction->get_ordered_ops()) {
|
||||
if (ov::is_type<opset1::Constant>(n))
|
||||
for (const auto& n : actualFunction->get_ordered_ops()) {
|
||||
if (ov::is_type<opset1::Constant>(n)) {
|
||||
EXPECT_EQ(n->get_output_target_inputs(0).size(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
namespace {
|
||||
|
@ -196,7 +196,7 @@ public:
|
||||
if (size() < mask->size())
|
||||
resize(mask->size());
|
||||
for (size_t i = 0; i < size(); i++) {
|
||||
if (i == axis) {
|
||||
if (static_cast<int64_t>(i) == axis) {
|
||||
std::set<uint64_t> dst_set;
|
||||
const auto& src_set = mask->at(i);
|
||||
auto it = src_set.lower_bound(split_start);
|
||||
|
@ -35,7 +35,6 @@ bool ngraph::pass::GenerateMappingFile::run_on_model(const std::shared_ptr<ngrap
|
||||
|
||||
for (auto&& node : f->get_ordered_ops()) {
|
||||
uint64_t ie_port_index{node->inputs().size()};
|
||||
uint64_t ng_port_index{0};
|
||||
if (std::dynamic_pointer_cast<ov::op::v0::Result>(node))
|
||||
continue;
|
||||
for (auto&& output : node->outputs()) {
|
||||
@ -52,7 +51,6 @@ bool ngraph::pass::GenerateMappingFile::run_on_model(const std::shared_ptr<ngrap
|
||||
}
|
||||
}
|
||||
++ie_port_index;
|
||||
++ng_port_index;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ public:
|
||||
// 2. Get constant rank to set mask on last dimension
|
||||
const auto const_op = std::dynamic_pointer_cast<opset6::Constant>(cur_node);
|
||||
const auto shape_rank = const_op->get_shape().size();
|
||||
const auto shift = (matmul->get_transpose_b()) ? 2 : 1;
|
||||
const size_t shift = (matmul->get_transpose_b()) ? 2 : 1;
|
||||
if (shape_rank < shift) {
|
||||
NGRAPH_DEBUG << "Can't init mask for MatMul: " << matmul->get_friendly_name() << std::endl;
|
||||
return false;
|
||||
|
@ -970,7 +970,7 @@ static std::vector<dims_vec> map_reshaped_dimensions(const dims_vec input_shape,
|
||||
static std::vector<ov::Shape> map_reshaped_shapes(const ov::Shape unsquized_shape,
|
||||
const std::vector<dims_vec> dims_map) {
|
||||
auto retval = std::vector<ov::Shape>();
|
||||
for (const auto unsquized_dims : dims_map) {
|
||||
for (const auto& unsquized_dims : dims_map) {
|
||||
auto cur_dim_shape = ov::Shape();
|
||||
for (const auto& dim : unsquized_dims)
|
||||
cur_dim_shape.push_back(unsquized_shape[dim]);
|
||||
@ -1243,7 +1243,7 @@ public:
|
||||
for (auto& ch : weights_mask_row->at(out_dim)) {
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
auto iter = get_channel_iter(dims_shape[in_dim], unsquized_shift, ch);
|
||||
for (const auto coord : iter)
|
||||
for (const auto& coord : iter)
|
||||
cur_mask->at(in_dim).insert(iter.index(coord));
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
@ -1316,7 +1316,7 @@ public:
|
||||
for (auto& ch : input_mask_row->at(in_dim)) {
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
auto iter = get_channel_iter(dims_shape[out_dim], unsquized_shift, ch);
|
||||
for (const auto coord : iter)
|
||||
for (const auto& coord : iter)
|
||||
cur_mask->at(out_dim).insert(iter.index(coord));
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
@ -1388,7 +1388,7 @@ public:
|
||||
NGRAPH_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (input_mask->size() != m_output.get_partial_shape().rank().get_length()) {
|
||||
if (static_cast<int64_t>(input_mask->size()) != m_output.get_partial_shape().rank().get_length()) {
|
||||
NGRAPH_DEBUG << "Transpose which change tensor rank is not supported yet.";
|
||||
return false;
|
||||
}
|
||||
@ -1454,7 +1454,7 @@ static ngraph::Mask::Ptr create_connect_split_output_mask(ngraph::Mask::Ptr inpu
|
||||
}
|
||||
for (size_t j = 0; j < output_mask_raw->size(); j++) {
|
||||
const auto& dim_mask = output_mask_raw->at(j);
|
||||
if (j == axis) {
|
||||
if (static_cast<int64_t>(j) == axis) {
|
||||
for (auto d : dim_mask)
|
||||
cur_mask->at(j).insert(d + split_start);
|
||||
} else {
|
||||
@ -1502,9 +1502,9 @@ public:
|
||||
// split_lengths can contain -1 value
|
||||
int minus_one_length_idx = -1;
|
||||
int64_t total_lengths = 0;
|
||||
for (int i = 0; i < split_lengths.size(); i++) {
|
||||
for (size_t i = 0; i < split_lengths.size(); i++) {
|
||||
if (split_lengths[i] == -1) {
|
||||
minus_one_length_idx = i;
|
||||
minus_one_length_idx = static_cast<int>(i);
|
||||
continue;
|
||||
}
|
||||
total_lengths += split_lengths[i];
|
||||
|
@ -43,8 +43,8 @@ static bool is_static_reshape_op(std::shared_ptr<ov::Node> node) {
|
||||
const auto& input_shape = input.get_shape();
|
||||
const auto output_shape = output_shape_const_op->cast_vector<int64_t>();
|
||||
// below casts are needed due to VC warning C4244, literals are not enough in this case
|
||||
const auto input_elems =
|
||||
std::accumulate(input_shape.begin(), input_shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
|
||||
const int64_t input_elems =
|
||||
std::accumulate(input_shape.begin(), input_shape.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
|
||||
const auto output_elems =
|
||||
std::accumulate(output_shape.begin(), output_shape.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
|
||||
if (output_elems <= 0 || input_elems == output_elems)
|
||||
|
@ -71,7 +71,7 @@ auto tail_transformations(NodeVector& tail, const size_t tail_size, const ngraph
|
||||
if (config.m_need_fill_tail_register &&
|
||||
(ov::is_type<ov::op::v1::Maximum>(op) ||
|
||||
ov::is_type<ov::op::v1::Add>(op))) {
|
||||
for (auto i = 0; i < op->inputs().size(); ++i) {
|
||||
for (size_t i = 0; i < op->inputs().size(); ++i) {
|
||||
if (auto fill = insertFill(op->input(i))) {
|
||||
updated_tile.push_back(fill);
|
||||
}
|
||||
@ -116,7 +116,7 @@ ngraph::snippets::code ngraph::snippets::Generator::generate(std::shared_ptr<ov:
|
||||
if (force_ptr_increment || loop->has_outer_loop) {
|
||||
std::vector<int64_t> new_finalization_offsets(loop->get_finalization_offsets());
|
||||
const auto& ptr_increments = loop->get_ptr_increments();
|
||||
for (auto i = 0; i < new_finalization_offsets.size(); i++) {
|
||||
for (size_t i = 0; i < new_finalization_offsets.size(); i++) {
|
||||
new_finalization_offsets[i] += ptr_increments[i];
|
||||
}
|
||||
loop->set_finalization_offsets(new_finalization_offsets);
|
||||
|
@ -54,7 +54,7 @@ void LoopBegin::validate_and_infer_types_except_LoopEnd() {
|
||||
const size_t num_inputs = get_input_size();
|
||||
set_output_size(num_inputs + 1);
|
||||
// All outputs are by-passed from inputs, except for the last one - it connects LoopBegin and LoopEnd
|
||||
for (int i = 0; i < num_inputs; i++)
|
||||
for (size_t i = 0; i < num_inputs; i++)
|
||||
get_output_descriptor(i).set_tensor_ptr(get_input_descriptor(i).get_output().get_tensor_ptr());
|
||||
set_output_type(num_inputs, element::f32, ov::PartialShape{ov::Shape{}});
|
||||
}
|
||||
@ -81,8 +81,10 @@ std::shared_ptr<LoopEnd> LoopBegin::get_loop_end() {
|
||||
|
||||
LoopEnd::LoopEnd(const std::vector<Output<Node>> &args, size_t work_amount, size_t work_amount_increment,
|
||||
std::vector<bool> apply_increments, std::vector<int64_t> finalization_offsets)
|
||||
: LoopBase(args, work_amount, work_amount_increment), finalization_offsets(std::move(finalization_offsets)),
|
||||
has_outer_loop(true), loop_io_size(0) {
|
||||
: LoopBase(args, work_amount, work_amount_increment),
|
||||
has_outer_loop(true),
|
||||
finalization_offsets(std::move(finalization_offsets)),
|
||||
loop_io_size(0) {
|
||||
ptr_increments.resize(apply_increments.size());
|
||||
std::transform(apply_increments.begin(), apply_increments.end(), ptr_increments.begin(),
|
||||
[work_amount_increment](bool apply) {
|
||||
@ -93,8 +95,11 @@ LoopEnd::LoopEnd(const std::vector<Output<Node>> &args, size_t work_amount, size
|
||||
|
||||
LoopEnd::LoopEnd(const std::vector<Output<Node>> &args, size_t work_amount, size_t work_amount_increment,
|
||||
std::vector<int64_t> ptr_increments, std::vector<int64_t> finalization_offsets)
|
||||
: LoopBase(args, work_amount, work_amount_increment), ptr_increments(std::move(ptr_increments)),
|
||||
finalization_offsets(std::move(finalization_offsets)), has_outer_loop(true), loop_io_size(0) {
|
||||
: LoopBase(args, work_amount, work_amount_increment),
|
||||
has_outer_loop(true),
|
||||
ptr_increments(std::move(ptr_increments)),
|
||||
finalization_offsets(std::move(finalization_offsets)),
|
||||
loop_io_size(0) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@ -172,7 +177,7 @@ void LoopEnd::validate_and_infer_types() {
|
||||
finalization_offsets.resize(loop_io_size, 0);
|
||||
set_output_size(num_inputs - 1);
|
||||
// All outputs are by-passed from inputs, except for the last one - it connects LoopBegin and LoopEnd
|
||||
for (int i = 0; i < num_inputs - 1; i++)
|
||||
for (size_t i = 0; i < num_inputs - 1; i++)
|
||||
get_output_descriptor(i).set_tensor_ptr(get_input_descriptor(i).get_output().get_tensor_ptr());
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_pt
|
||||
}
|
||||
|
||||
if (op_supports_only_exec_type(op)) {
|
||||
for (auto i = 0; i < op->inputs().size(); i++) {
|
||||
for (size_t i = 0; i < op->inputs().size(); i++) {
|
||||
auto shared_input = op->get_input_node_shared_ptr(i);
|
||||
auto existing_convert = ov::as_type_ptr<ov::op::v0::Convert>(shared_input);
|
||||
// We should insert Convert before Ops, which supports only exec element type, only when:
|
||||
@ -75,7 +75,7 @@ bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_pt
|
||||
rewritten |= true;
|
||||
}
|
||||
} else { // branch for Movement ops, MatMul ops in the future and for the Convert, Result
|
||||
for (auto i = 0; i < op->inputs().size(); i++) {
|
||||
for (size_t i = 0; i < op->inputs().size(); i++) {
|
||||
auto shared_input = op->get_input_node_shared_ptr(i);
|
||||
// it's original element type because we don't use validate_and_infer_type() anywhere
|
||||
const auto original_eltype = op->input(i).get_element_type();
|
||||
|
@ -7,8 +7,13 @@
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include <iterator>
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
static constexpr size_t reg_count = 16lu;
|
||||
constexpr size_t reg_count = 16lu;
|
||||
} // namespace
|
||||
|
||||
bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr<ov::Model>& f) {
|
||||
@ -187,7 +192,7 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
|
||||
auto op = typed_ops[n].second;
|
||||
for (const auto& out : op->outputs()) {
|
||||
for (const auto& port : out.get_target_inputs()) {
|
||||
auto k = std::find(ops.begin(), ops.end(), port.get_node()->shared_from_this()) - ops.begin();
|
||||
size_t k = std::find(ops.begin(), ops.end(), port.get_node()->shared_from_this()) - ops.begin();
|
||||
if (k == ops.size())
|
||||
throw ngraph_error("assign registers can't find target op in the body");
|
||||
switch (typed_ops[k].first) {
|
||||
@ -314,3 +319,6 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
@ -18,7 +18,7 @@ ngraph::snippets::pass::BroadcastToMoveBroadcast::BroadcastToMoveBroadcast() {
|
||||
|
||||
auto m_broadcast = ngraph::pattern::wrap_type<ngraph::op::v1::Broadcast, ngraph::op::v3::Broadcast>();
|
||||
|
||||
auto callback = [this](ngraph::pattern::Matcher &m) {
|
||||
auto callback = [](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::BroadcastToMoveBroadcast")
|
||||
auto root = m.get_match_root();
|
||||
if (auto broadcast_v1 = ov::as_type_ptr<const ov::op::v1::Broadcast>(root)) {
|
||||
|
@ -40,7 +40,7 @@ auto outputs_are_not_broadcastable(const std::shared_ptr<const Node>& node) -> b
|
||||
return false;
|
||||
ov::PartialShape ref_shape = outputs.front().get_partial_shape();
|
||||
bool success = true;
|
||||
for (int i = 1; i < outputs.size() && success; i++) {
|
||||
for (size_t i = 1; i < outputs.size() && success; i++) {
|
||||
success &= ov::PartialShape::broadcast_merge_into(ref_shape, outputs[i].get_partial_shape(), ov::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
return !success;
|
||||
|
@ -17,7 +17,7 @@ ngraph::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() {
|
||||
[](std::shared_ptr<Node> n) {
|
||||
return ngraph::is_type<ov::op::v0::Constant>(n);
|
||||
});
|
||||
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
|
||||
ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::ConvertConstantsToScalars")
|
||||
auto constant = as_type_ptr<ov::op::v0::Constant>(m.get_match_root());
|
||||
if (ov::shape_size(constant->get_output_shape(0)) != 1)
|
||||
|
@ -15,7 +15,7 @@ ngraph::snippets::pass::ConvertPowerToPowerStatic::ConvertPowerToPowerStatic() {
|
||||
return is_type<ov::op::v1::Power>(n) &&
|
||||
is_type<snippets::op::Scalar>(n->get_input_node_shared_ptr(1));
|
||||
});
|
||||
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
|
||||
ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::ConvertConstantsToScalars")
|
||||
auto power = ov::as_type_ptr<ov::op::v1::Power>(m.get_match_root());
|
||||
auto scalar = ov::as_type_ptr<snippets::op::Scalar>(power->get_input_node_shared_ptr(1));
|
||||
|
@ -221,7 +221,7 @@ bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(
|
||||
osc.resize(output_size, 0);
|
||||
osh.resize(output_size, 0);
|
||||
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
for (size_t i = 0; i < input_size; i++) {
|
||||
float il = input_low[input_low.size() == 1 ? 0 : i];
|
||||
float ih = input_high[input_high.size() == 1 ? 0 : i];
|
||||
|
||||
@ -229,7 +229,7 @@ bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(
|
||||
ish[i] = -il * isc[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < output_size; i++) {
|
||||
for (size_t i = 0; i < output_size; i++) {
|
||||
float ol = output_low[output_low.size() == 1 ? 0 : i];
|
||||
float oh = output_high[output_high.size() == 1 ? 0 : i];
|
||||
|
||||
@ -276,13 +276,13 @@ std::vector<float> ngraph::snippets::pass::FakeQuantizeDecomposition::calculateS
|
||||
std::all_of(osc.cbegin(), osc.cend(), [](float val) { return val == 1.f; }) &&
|
||||
std::all_of(osh.cbegin(), osh.cend(), [](float val) { return std::abs(val + 128.f) < thr; })) {
|
||||
bool is_crop_aligned = true;
|
||||
for (int i = 0; i < std::max(cl.size(), isc.size()); i++) {
|
||||
for (size_t i = 0; i < std::max(cl.size(), isc.size()); i++) {
|
||||
if (std::abs(cl[cl.size() == 1 ? 0 : i] * isc[isc.size() == 1 ? 0 : i] + 128.f) > thr) {
|
||||
is_crop_aligned = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < std::max(ch.size(), isc.size()); i++) {
|
||||
for (size_t i = 0; i < std::max(ch.size(), isc.size()); i++) {
|
||||
if (std::abs(ch[ch.size() == 1 ? 0 : i] * isc[isc.size() == 1 ? 0 : i] - 127.f) > thr) {
|
||||
is_crop_aligned = false;
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() {
|
||||
in.replace_source_output(brgemm->output(0));
|
||||
set_layout_from_order(as_type_ptr<opset1::Transpose>(transpose_out.get_node_shared_ptr()), brgemm_out);
|
||||
}
|
||||
for (int i = 0; i < brgemm->get_input_size(); i++) {
|
||||
for (size_t i = 0; i < brgemm->get_input_size(); i++) {
|
||||
const auto& in_value = brgemm->input_value(i);
|
||||
if (transpose_matcher->match(in_value)) {
|
||||
const auto& transpose = as_type_ptr<opset1::Transpose>(in_value.get_node_shared_ptr());
|
||||
|
@ -20,7 +20,7 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
|
||||
op::Brgemm>();
|
||||
|
||||
register_matcher(std::make_shared<ngraph::pattern::Matcher>(pattern, matcher_name),
|
||||
[this, allocation_rank](ngraph::pattern::Matcher &m) {
|
||||
[allocation_rank](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertBuffer")
|
||||
auto root = m.get_match_root();
|
||||
bool rewritten = false;
|
||||
@ -53,14 +53,15 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
|
||||
// we should remove them to insert one common Buffer on one common port
|
||||
replace_output_update_name(output_node->output(0), output_node->input_value(0));
|
||||
} else if (ov::is_type<ngraph::op::v0::Result>(output_node)) {
|
||||
// TODO: At this moment operation which is should be wrapped by Buffers doesn't support several childs where one of them is Result
|
||||
// because Result and Buffer from one root port should have the same register. It's not supported at the moment
|
||||
// For example,
|
||||
// Buffer
|
||||
// |
|
||||
// Softmax
|
||||
// / \
|
||||
// Buffer Result
|
||||
/* TODO: At this moment operation which is should be wrapped by Buffers doesn't support several childs where one of them is Result
|
||||
* because Result and Buffer from one root port should have the same register. It's not supported at the moment
|
||||
* For example,
|
||||
* Buffer
|
||||
* |
|
||||
* Softmax
|
||||
* / \
|
||||
* Buffer Result
|
||||
*/
|
||||
throw ngraph::ngraph_error(
|
||||
"Operation which is should be wrapped by Buffers has few children from one output port where one of them is Result");
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ ngraph::snippets::pass::InsertLoad::InsertLoad(const size_t count) {
|
||||
MATCHER_SCOPE(InsertLoad);
|
||||
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<ngraph::opset1::Parameter, ngraph::snippets::op::Buffer>(), matcher_name),
|
||||
[this, count](ngraph::pattern::Matcher &m) {
|
||||
[count](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertLoad")
|
||||
auto root = m.get_match_root();
|
||||
|
||||
@ -58,7 +58,7 @@ ngraph::snippets::pass::InsertStore::InsertStore(const size_t count) {
|
||||
MATCHER_SCOPE(InsertStore);
|
||||
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<ngraph::opset1::Result, ngraph::snippets::op::Buffer>(), matcher_name),
|
||||
[this, count](ngraph::pattern::Matcher &m) {
|
||||
[count](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertStore")
|
||||
auto root = m.get_match_root();
|
||||
|
||||
|
@ -236,10 +236,10 @@ bool InsertLoops::run_on_model(const std::shared_ptr<ov::Model> &model) {
|
||||
const auto& new_shapes = plugin_shapes->second.as<std::vector<std::vector<size_t>>>();
|
||||
if (new_shapes.size() != commonResults.size() + commonParams.size())
|
||||
throw ngraph_error("InsertLoops got invalid number of plugin-overriden shapes");
|
||||
for (int i = 0; i < commonParams.size(); i++)
|
||||
for (size_t i = 0; i < commonParams.size(); i++)
|
||||
ioShapes.emplace_back(new_shapes[i]);
|
||||
// reverse overriden_shapes for results since commonResults are reversed with respect to model->get_parameters()
|
||||
for (int i = 0; i < commonResults.size(); i++)
|
||||
for (size_t i = 0; i < commonResults.size(); i++)
|
||||
ioShapes.emplace_back(new_shapes[new_shapes.size() - 1 - i]);
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ namespace {
|
||||
|
||||
std::pair<ov::PartialShape, std::vector<ov::PartialShape>> get_numpy_broadcast_partial_shapes(const std::vector<ov::PartialShape>& input_shapes) {
|
||||
ov::PartialShape target_shape = input_shapes.front();
|
||||
for (auto i = 1; i < input_shapes.size(); i++) {
|
||||
for (size_t i = 1; i < input_shapes.size(); i++) {
|
||||
if (!ov::PartialShape::broadcast_merge_into(target_shape, input_shapes[i], op::AutoBroadcastType::NUMPY))
|
||||
throw ngraph::ngraph_error("InsertMoveBroadcast: Failed broadcast-merge input shapes");
|
||||
}
|
||||
@ -62,7 +62,7 @@ ngraph::Output<ngraph::Node> ngraph::snippets::pass::InsertMoveBroadcast::Broadc
|
||||
|
||||
ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() {
|
||||
MATCHER_SCOPE(InsertMoveBroadcast);
|
||||
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
|
||||
ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertMoveBroadcast")
|
||||
auto root = m.get_match_root();
|
||||
const auto& values = root->input_values();
|
||||
|
@ -25,19 +25,20 @@ auto can_be_merged(const std::shared_ptr<ngraph::snippets::op::LoopEnd>& loop_en
|
||||
loop_end_down->get_increment() != loop_end_up->get_increment())
|
||||
return false;
|
||||
|
||||
// If between Loops there are common dependencies (for example, reducing operations), we cannot merge these Loops
|
||||
// Example, when there is HorizonMax op between Loops:
|
||||
// Data
|
||||
// VectorBuffer LoopBegin
|
||||
// \ Load | \
|
||||
// Maximum | /
|
||||
// / LoopEnd
|
||||
// HorizonMax |
|
||||
// \ LoopBegin
|
||||
// \ Load \
|
||||
// Subtract |
|
||||
// Store /
|
||||
// LoopEnd
|
||||
/* If between Loops there are common dependencies (for example, reducing operations), we cannot merge these Loops
|
||||
* Example, when there is HorizonMax op between Loops:
|
||||
* Data
|
||||
* VectorBuffer LoopBegin
|
||||
* \ Load | \
|
||||
* Maximum | /
|
||||
* / LoopEnd
|
||||
* HorizonMax |
|
||||
* \ LoopBegin
|
||||
* \ Load \
|
||||
* Subtract |
|
||||
* Store /
|
||||
* LoopEnd
|
||||
*/
|
||||
auto up_dependent_ptrs = loop_end_up->get_control_dependents();
|
||||
ov::NodeVector up_dependents(up_dependent_ptrs.size(), nullptr);
|
||||
std::transform(up_dependent_ptrs.begin(), up_dependent_ptrs.end(), up_dependents.begin(), [](ngraph::Node* node) { return node->shared_from_this(); });
|
||||
@ -65,7 +66,7 @@ auto get_buffer_and_loop_end(const std::shared_ptr<ngraph::snippets::op::LoopBeg
|
||||
continue;
|
||||
|
||||
// We can fuse Loops even LoopBegin has several the same inputs (the common Buffer/LoopEnd)
|
||||
if (buffer && buffer == parent_shared || !buffer && loop_end_up && loop_end_up == parent_shared)
|
||||
if ((buffer && buffer == parent_shared) || (!buffer && loop_end_up && loop_end_up == parent_shared))
|
||||
continue;
|
||||
|
||||
loop_end_up = ngraph::as_type_ptr<ngraph::snippets::op::LoopEnd>(parent_shared);
|
||||
|
@ -16,7 +16,7 @@ std::shared_ptr<LoopBegin> insertLoopBeginAfterOutputs(const OutputVector& origi
|
||||
|
||||
auto loop_begin = std::make_shared<LoopBegin>(originalOutputs);
|
||||
|
||||
for (int i = 0; i < originalChildInputs.size(); i++) {
|
||||
for (size_t i = 0; i < originalChildInputs.size(); i++) {
|
||||
for (auto& input : originalChildInputs[i]) {
|
||||
input.replace_source_output(loop_begin->output(i));
|
||||
}
|
||||
@ -37,7 +37,7 @@ std::shared_ptr<LoopEnd> insertLoopEndBeforeInputs(const std::vector<Input<Node>
|
||||
auto loop_end = std::make_shared<LoopEnd>(originalParentOutputs, work_amount, increment,
|
||||
std::move(apply_increment), std::move(finalization_offsets));
|
||||
|
||||
for (int i = 0; i < originalInputs.size(); i++) {
|
||||
for (size_t i = 0; i < originalInputs.size(); i++) {
|
||||
originalInputs[i].replace_source_output(loop_end->output(i));
|
||||
}
|
||||
return loop_end;
|
||||
|
@ -57,25 +57,26 @@ ngraph::snippets::pass::SoftmaxDecomposition::SoftmaxDecomposition(const size_t
|
||||
|
||||
/* ====== ReduceMax decomposition ====== */
|
||||
|
||||
// We have to have fake edge Data -> Loop[ReduceMax] -> Loop[Sub + Exp + ReduceSum] because ReduceMax is
|
||||
// accumulator which finds maximum of elements and save it to vector register. Loop works only with GPR (data) but ReduceMax Loop
|
||||
// doesn't save maximum to data. Seems like, LoopEnd shouldn't have outputs:
|
||||
// Data
|
||||
// VectorBuffer LoopBegin \
|
||||
// \ Load \ |
|
||||
// Maximum / |
|
||||
// / LoopEnd |
|
||||
// HorizonMax /
|
||||
// \ LoopBegin[Sub + Exp + ReduceSum]
|
||||
// But nGraph doesn't allow to have 0 outputs for Node (at least 1 output).
|
||||
// Thus, we propagate data through Loop[ReduceMax] using fake edge because of that Loop[ReduceMax] has two inputs "Data"
|
||||
// Data
|
||||
// VectorBuffer LoopBegin
|
||||
// \ Load | \
|
||||
// Maximum | /
|
||||
// / LoopEnd
|
||||
// HorizonMax |
|
||||
// \ LoopBegin[Sub + Exp + ReduceSum]
|
||||
/* We have to have fake edge Data -> Loop[ReduceMax] -> Loop[Sub + Exp + ReduceSum] because ReduceMax is
|
||||
* accumulator which finds maximum of elements and save it to vector register. Loop works only with GPR (data) but ReduceMax Loop
|
||||
* doesn't save maximum to data. Seems like, LoopEnd shouldn't have outputs:
|
||||
* Data
|
||||
* VectorBuffer LoopBegin \
|
||||
* \ Load \ |
|
||||
* Maximum / |
|
||||
* / LoopEnd |
|
||||
* HorizonMax /
|
||||
* \ LoopBegin[Sub + Exp + ReduceSum]
|
||||
* But nGraph doesn't allow to have 0 outputs for Node (at least 1 output).
|
||||
* Thus, we propagate data through Loop[ReduceMax] using fake edge because of that Loop[ReduceMax] has two inputs "Data"
|
||||
* Data
|
||||
* VectorBuffer LoopBegin
|
||||
* \ Load | \
|
||||
* Maximum | /
|
||||
* / LoopEnd
|
||||
* HorizonMax |
|
||||
* \ LoopBegin[Sub + Exp + ReduceSum]
|
||||
*/
|
||||
const auto vector_buffer_max = std::make_shared<ngraph::snippets::op::VectorBuffer>();
|
||||
const auto loop_max_begin = ngraph::snippets::op::insertLoopBegin(ngraph::OutputVector{data, data});
|
||||
|
||||
|
@ -22,8 +22,7 @@ ngraph::snippets::pass::TransformConvertToConvertTruncation::TransformConvertToC
|
||||
});
|
||||
|
||||
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<ngraph::opset1::Convert>(), matcher_name),
|
||||
[this](ngraph::pattern::Matcher &m) {
|
||||
ngraph::pattern::wrap_type<ngraph::opset1::Convert>(), matcher_name), [](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::TransformConvertToConvertTruncation")
|
||||
const auto root = m.get_match_root();
|
||||
const auto convert = ngraph::as_type_ptr<ngraph::opset1::Convert>(root);
|
||||
|
@ -92,7 +92,7 @@ ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const
|
||||
// Note that it can be smaller though, for example tensor shape can be prepended with 1 for scheduling purposes
|
||||
if (std::any_of(layout.begin(), layout.end(), [=](size_t x) {return x >= rank;}))
|
||||
throw ngraph_error("Invalid layout detected: all layout indexes must be smaller than the tensor rank");
|
||||
for (int i = 0; i < layout.size(); i++)
|
||||
for (size_t i = 0; i < layout.size(); i++)
|
||||
reordered_shape[i] = shape[layout[i]];
|
||||
return reordered_shape;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ namespace test {
|
||||
namespace snippets {
|
||||
|
||||
DummyTargetMachine::DummyTargetMachine() {
|
||||
auto dummy_functor = [this](const std::shared_ptr<ngraph::Node>& n) {
|
||||
auto dummy_functor = [](const std::shared_ptr<ngraph::Node>& n) {
|
||||
return std::make_shared<DummyEmitter>();
|
||||
};
|
||||
jitters[op::v0::Parameter::get_type_info_static()] = dummy_functor;
|
||||
|
@ -30,7 +30,7 @@ void BroadcastToMoveBroadcastTests::SetUp() {
|
||||
std::tie(inputShapes[0], inputShapes[1], broadcast_shape) = this->GetParam();
|
||||
snippets_function = std::make_shared<BroadcastAddLoweredFunction>(inputShapes, broadcast_shape);
|
||||
master_shape = {};
|
||||
for (int i = 0; i < inputShapes[0].size(); i++)
|
||||
for (size_t i = 0; i < inputShapes[0].size(); i++)
|
||||
master_shape.push_back(static_cast<int64_t>(std::max(inputShapes[0].get_shape()[i], inputShapes[1].get_shape()[i])));
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ void InsertMoveBroadcastTests::SetUp() {
|
||||
if (inputShapes[0].size() != inputShapes[1].size())
|
||||
IE_THROW() << "Expected input shapes of the same size";
|
||||
master_shape = {};
|
||||
for (int i = 0; i < inputShapes[0].size(); i++)
|
||||
for (size_t i = 0; i < inputShapes[0].size(); i++)
|
||||
master_shape.push_back(static_cast<int64_t>(std::max(inputShapes[0][i], inputShapes[1][i])));
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user