Merge branch 'master' into itikhono/ts/fix_performance_issues

This commit is contained in:
Ivan Tikhonov 2023-02-12 21:51:00 +04:00 committed by GitHub
commit f5bff5c087
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
494 changed files with 7032 additions and 3449 deletions

View File

@ -62,23 +62,8 @@ jobs:
TMP_DIR: /mnt/tmp
SHARE_DIR: /mount/cinfsshare/onnxtestdata
CCACHE_DIR: $(SHARE_DIR)/ccache/master/android_arm64
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.10'
addToPath: true
disableDownloadFromRegistry: false
architecture: 'x64'
githubToken: $(auth_token)
displayName: Setup Python 3.10
name: setupPython
- bash: |
#!/bin/bash
python -V
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami

View File

@ -40,9 +40,6 @@ resources:
name: openvinotoolkit/testdata
ref: master
variables:
- group: github
jobs:
- job: Lin
strategy:
@ -99,23 +96,8 @@ jobs:
CMAKE_VERSION: 3.24.0
BUILD_PYTHON: $(WORK_DIR)/build_python
INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.10'
addToPath: true
disableDownloadFromRegistry: false
architecture: 'x64'
githubToken: $(auth_token)
displayName: Setup Python 3.10
name: setupPython
- bash: |
#!/bin/bash
python -V
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami
@ -300,31 +282,37 @@ jobs:
- script: ls -alR $(INSTALL_DIR)
displayName: 'List install test files'
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
- script: |
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py
env:
# because of static build libgna is needed for python binary
LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64
displayName: 'nGraph and IE Python Bindings Tests'
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
- script: |
# For python imports to import pybind_mock_frontend
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
export PYTHONPATH=$(INSTALL_TEST_DIR):$(INSTALL_DIR)/python/python3.8:$PYTHONPATH
python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_zoo_models.py \
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_backend.py
env:
# because of static build libgna is needed for python binary and mock_py frontend library
LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(INSTALL_TEST_DIR)
displayName: 'Python API 2.0 Tests'
- script: |
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
env:
# because of static build libgna is needed for python binary
LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64
displayName: 'Model Optimizer UT'
- script: |

View File

@ -39,9 +39,6 @@ resources:
name: openvinotoolkit/openvino_contrib
ref: master
variables:
- group: github
jobs:
- job: linux_arm64
# About 150% of total time
@ -80,23 +77,8 @@ jobs:
OPENVINO_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64
OPENCV_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_opencv
ONETBB_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_onetbb
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib:$LD_LIBRARY_PATH
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.10'
addToPath: true
disableDownloadFromRegistry: false
architecture: 'x64'
githubToken: $(auth_token)
displayName: Setup Python 3.10
name: setupPython
- bash: |
#!/bin/bash
python -V
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami

View File

@ -36,9 +36,6 @@ resources:
endpoint: openvinotoolkit
name: openvinotoolkit/testdata
variables:
- group: github
jobs:
- job: LinCC
# About 150% of total time
@ -58,22 +55,8 @@ jobs:
BUILD_DIR: $(WORK_DIR)/build
INSTALL_DIR: $(WORK_DIR)/install_pkg
SETUPVARS: $(INSTALL_DIR)/setupvars.sh
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib:$LD_LIBRARY_PATH
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.10'
addToPath: true
disableDownloadFromRegistry: false
architecture: 'x64'
githubToken: $(auth_token)
displayName: Setup Python 3.10
name: setupPython
- bash: |
#!/bin/bash
python -V
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami

View File

@ -65,23 +65,8 @@ jobs:
TMP_DIR: /mnt/tmp
SHARE_DIR: /mount/cinfsshare/onnxtestdata
CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.10'
addToPath: true
disableDownloadFromRegistry: false
architecture: 'x64'
githubToken: $(auth_token)
displayName: Setup Python 3.10
name: setupPython
- bash: |
#!/bin/bash
python -V
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami
@ -240,9 +225,6 @@ jobs:
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
- script: |
# TODO (vurusovs): revert skip of test_core.py::test_register_plugin*,
# test should be fixed
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph \
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
@ -257,9 +239,6 @@ jobs:
export LD_LIBRARY_PATH=$(PYTHON_WHEEL_INSTALL_DIR)/openvino/libs:$(INSTALL_TEST_DIR):$LD_LIBRARY_PATH
# For python imports to import pybind_mock_frontend
export PYTHONPATH=$(INSTALL_TEST_DIR):$PYTHONPATH
# TODO (vurusovs): revert skip of test_core.py::test_register_plugin*,
# test should be fixed
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyopenvino \
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
@ -267,9 +246,7 @@ jobs:
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_backend.py -v
displayName: 'Python API 2.0 Tests'
- script: |
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
- script: python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
displayName: 'Model Optimizer UT'
- script: |
@ -316,6 +293,8 @@ jobs:
- script: |
$(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-NGraphUT.xml
env:
LD_LIBRARY_PATH: $(INSTALL_TEST_DIR)
displayName: 'OV Core UT'
- script: |

View File

@ -31,9 +31,6 @@ pr:
- 'tools/*'
- 'tests/layer_tests/*'
variables:
- group: github
jobs:
- job: onnxruntime
timeoutInMinutes: '90'
@ -55,23 +52,8 @@ jobs:
BUILD_DIR: $(WORK_DIR)/build
ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime
ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build
LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib:$LD_LIBRARY_PATH
OV_PYTHON_VERSION: 3.10.9 # Newest version of Python 3.10. Full version of Python its required for LD_LIBRARY_PATH and required for successful Task setupPython
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.10'
addToPath: true
disableDownloadFromRegistry: false
architecture: 'x64'
githubToken: $(auth_token)
displayName: Setup Python 3.10
name: setupPython
- bash: |
#!/bin/bash
python -V
- script: |
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
whoami

View File

@ -9,17 +9,19 @@ ov_coverage_clean(REPOSITORY "openvino"
ov_coverage_capture(INFO_FILE "openvino"
BASE_DIRECTORY "${OV_COVERAGE_BASE_DIRECTORY}"
DIRECTORY "${OV_COVERAGE_GCDA_DATA_DIRECTORY}"
EXCLUDE_PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/*.pb.cc"
"${OV_COVERAGE_BASE_DIRECTORY}/*.pb.h"
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests/*"
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests_deprecated/*"
"${OV_COVERAGE_BASE_DIRECTORY}/thirdparty/*") # Skip some pb files, tests and thirdparty
EXCLUDE_PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/*.pb.cc"
"${OV_COVERAGE_BASE_DIRECTORY}/*.pb.h"
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests/*"
"${OV_COVERAGE_BASE_DIRECTORY}/*/tests_deprecated/*"
"${OV_COVERAGE_BASE_DIRECTORY}/thirdparty/*"
"${OV_COVERAGE_BASE_DIRECTORY}/CMakeCXXCompilerId.cpp"
"${OV_COVERAGE_BASE_DIRECTORY}/CMakeCCompilerId.c") # Skip some service files, tests and thirdparty
# Generate reports
# Common report
ov_coverage_genhtml(INFO_FILE "openvino"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
# Generate reports
##################### Core Components #####################
ov_coverage_extract(INPUT "openvino" OUTPUT "inference"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/inference/*")

View File

@ -292,7 +292,7 @@ function(ie_mark_target_as_cc TARGET_NAME)
endif()
target_link_libraries(${TARGET_NAME} PRIVATE ${cc_library})
if(NOT (SELECTIVE_BUILD STREQUAL "ON"))
if(NOT SELECTIVE_BUILD STREQUAL "ON")
return()
endif()

View File

@ -6,22 +6,24 @@ include(ProcessorCount)
include(CheckCXXCompilerFlag)
#
# disable_deprecated_warnings()
# ov_disable_deprecated_warnings()
#
# Disables deprecated warnings generation in current scope (directory, function)
# Defines ie_c_cxx_deprecated varaible which contains C / C++ compiler flags
#
macro(disable_deprecated_warnings)
macro(ov_disable_deprecated_warnings)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated "/Qdiag-disable:1478,1786")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(ie_c_cxx_deprecated "/wd4996")
elseif(OV_COMPILER_IS_CLANG)
set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated "-diag-disable=1478,1786")
else()
elseif(OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
endif()
endif()
@ -36,30 +38,36 @@ macro(disable_deprecated_warnings)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_deprecated}")
endmacro()
macro(disable_deprecated_warnings)
ov_disable_deprecated_warnings()
endmacro()
#
# ie_deprecated_no_errors()
# ov_deprecated_no_errors()
#
# Don't threat deprecated warnings as errors in current scope (directory, function)
# Defines ie_c_cxx_deprecated_no_errors varaible which contains C / C++ compiler flags
#
macro(ie_deprecated_no_errors)
macro(ov_deprecated_no_errors)
if(WIN32)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated_no_errors "/Qdiag-warning:1478,1786")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# show 4996 only for /w4
set(ie_c_cxx_deprecated_no_errors "/wd4996")
elseif(OV_COMPILER_IS_CLANG)
set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
endif()
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated_no_errors "-diag-warning=1478,1786")
else()
elseif(OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
endif()
endif()
if(NOT ie_c_cxx_deprecated_no_errors)
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
if(NOT ie_c_cxx_deprecated_no_errors)
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${ie_c_cxx_deprecated_no_errors}")
@ -68,6 +76,25 @@ macro(ie_deprecated_no_errors)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_deprecated_no_errors}")
endmacro()
#
# ov_dev_package_no_errors()
#
# Exports flags for 3rdparty modules, but without errors
#
macro(ov_dev_package_no_errors)
if(OV_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_GNUCXX)
set(ie_c_cxx_dev_no_errors "-Wno-all")
if(SUGGEST_OVERRIDE_SUPPORTED)
set(ie_cxx_dev_no_errors "${ie_c_cxx_dev_no_errors} -Wno-error=suggest-override")
endif()
endif()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${ie_c_cxx_dev_no_errors} ${ie_cxx_dev_no_errors}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${ie_c_cxx_dev_no_errors}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ie_c_cxx_dev_no_errors} ${ie_cxx_dev_no_errors}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_dev_no_errors}")
endmacro()
#
# ie_sse42_optimization_flags(<output flags>)
#
@ -165,7 +192,10 @@ macro(ie_arm_neon_optimization_flags flags)
endif()
else()
if(AARCH64)
set(${flags} -O2 -ftree-vectorize)
set(${flags} -O2)
if(NOT CMAKE_CL_64)
list(APPEND ${flags} -ftree-vectorize)
endif()
elseif(ARM)
set(${flags} -mfpu=neon -Wno-unused-command-line-argument)
endif()
@ -190,7 +220,9 @@ function(ov_disable_all_warnings)
if(target_type STREQUAL "SHARED_LIBRARY" OR target_type STREQUAL "EXECUTABLE")
set(link_interface LINK_OPTIONS)
endif()
set_target_properties(${target} PROPERTIES ${link_interface} "-Wno-error=maybe-uninitialized;-Wno-maybe-uninitialized")
if(CMAKE_COMPILER_IS_GNUCXX)
set_target_properties(${target} PROPERTIES ${link_interface} "-Wno-error=maybe-uninitialized;-Wno-maybe-uninitialized")
endif()
elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# 193: zero used for undefined preprocessing identifier "XXX"
# 1011: missing return statement at end of non-void function "XXX"
@ -238,6 +270,21 @@ function(ov_force_include target scope header_file)
endif()
endfunction()
#
# ie_python_minimal_api(<target>)
#
# Set options to use only Python Limited API
#
function(ie_python_minimal_api target)
# pybind11 uses a lot of API which is not a part of minimal python API subset
# Ref 1: https://docs.python.org/3.11/c-api/stable.html
# Ref 2: https://github.com/pybind/pybind11/issues/1755
# target_compile_definitions(${target} PRIVATE Py_LIMITED_API=0x03090000)
# if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# target_compile_options(${target} PRIVATE "-Wno-unused-variable")
# endif()
endfunction()
#
# Compilation and linker flags
#
@ -262,34 +309,53 @@ if(ENABLE_COVERAGE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")
endif()
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
ie_add_compiler_flags(-fsigned-char)
endif()
# Honor visibility properties for all target types
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
function(ie_python_minimal_api target)
# pybind11 uses a lot of API which is not a part of minimal python API subset
# Ref 1: https://docs.python.org/3.11/c-api/stable.html
# Ref 2: https://github.com/pybind/pybind11/issues/1755
# target_compile_definitions(${target} PRIVATE Py_LIMITED_API=0x03090000)
# if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# target_compile_options(${target} PRIVATE "-Wno-unused-variable")
# endif()
endfunction()
if(CMAKE_CL_64)
# Default char Type Is unsigned
# ie_add_compiler_flags(/J)
else()
ie_add_compiler_flags(-fsigned-char)
endif()
if(WIN32)
ie_add_compiler_flags(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS)
ie_add_compiler_flags(/EHsc) # no asynchronous structured exception handling
ie_add_compiler_flags(/Gy) # remove unreferenced functions: function level linking
#
# Common options / warnings enabled
#
ie_add_compiler_flags(/D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS)
# no asynchronous structured exception handling
ie_add_compiler_flags(/EHsc)
# Allows the compiler to package individual functions in the form of packaged functions (COMDATs).
ie_add_compiler_flags(/Gy)
# This option helps ensure the fewest possible hard-to-find code defects. Similar to -Wall on GNU / Clang
ie_add_compiler_flags(/W3)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# Increase Number of Sections in .Obj file
ie_add_compiler_flags(/bigobj)
# Build with multiple processes
ie_add_compiler_flags(/MP)
if(AARCH64 AND NOT MSVC_VERSION LESS 1930)
# otherwise, _ARM64_EXTENDED_INTRINSICS is defined, which defines 'mvn' macro
ie_add_compiler_flags(/D_ARM64_DISTINCT_NEON_TYPES)
endif()
endif()
# Handle Large Addresses
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
if (CMAKE_COMPILE_WARNING_AS_ERROR)
if (CMAKE_VERSION VERSION_LESS 3.24)
#
# Warnings as errors
#
if(CMAKE_COMPILE_WARNING_AS_ERROR)
if(CMAKE_VERSION VERSION_LESS 3.24)
ie_add_compiler_flags(/WX)
endif()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /WX")
@ -300,26 +366,16 @@ if(WIN32)
endif()
endif()
if(AARCH64 AND CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND NOT MSVC_VERSION LESS 1930)
# otherwise, _ARM64_EXTENDED_INTRINSICS is defined, which defines 'mvn' macro
ie_add_compiler_flags(-D_ARM64_DISTINCT_NEON_TYPES)
endif()
# Compiler specific flags
ie_add_compiler_flags(/bigobj)
ie_add_compiler_flags(/MP)
#
# Disable noisy warnings
#
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# C4251 needs to have dll-interface to be used by clients of class
ie_add_compiler_flags(/wd4251)
# C4275 non dll-interface class used as base for dll-interface class
ie_add_compiler_flags(/wd4275)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# 161: unrecognized pragma
# 177: variable was declared but never referenced
# 556: not matched type of assigned function pointer
@ -342,42 +398,45 @@ if(WIN32)
string(REPLACE "/Zi" "/Z7" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
string(REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
else()
if(CMAKE_COMPILE_WARNING_AS_ERROR AND CMAKE_VERSION VERSION_LESS 3.24)
# TODO: enable for C sources as well
# ie_add_compiler_flags(-Werror)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()
#
# Common enabled warnings
#
# allow linker eliminating the unused code and data from the final executable
ie_add_compiler_flags(-ffunction-sections -fdata-sections)
# emits text showing the command-line option controlling a diagnostic
ie_add_compiler_flags(-fdiagnostics-show-option)
ie_add_compiler_flags(-Wundef)
ie_add_compiler_flags(-Wreturn-type)
ie_add_compiler_flags(-Wunused-variable)
if(OV_COMPILER_IS_APPLECLANG)
ie_add_compiler_flags(-Wswitch)
set(CMAKE_CXX_FLAGS "-Woverloaded-virtual ${CMAKE_CXX_FLAGS}")
else()
ie_add_compiler_flags(-Wuninitialized -Winit-self)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
ie_add_compiler_flags(-Winconsistent-missing-override
-Wstring-plus-int)
else()
ie_add_compiler_flags(-Wmaybe-uninitialized)
check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
if(SUGGEST_OVERRIDE_SUPPORTED)
set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
endif()
endif()
# This enables all the warnings about constructions that some users consider questionable, and that are easy to avoid
ie_add_compiler_flags(-Wall)
# Warn if an undefined identifier is evaluated in an #if directive. Such identifiers are replaced with zero.
ie_add_compiler_flags(-Wundef)
check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
if(SUGGEST_OVERRIDE_SUPPORTED)
set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
endif()
#
# Warnings as errors
#
if(CMAKE_COMPILE_WARNING_AS_ERROR AND CMAKE_VERSION VERSION_LESS 3.24)
ie_add_compiler_flags(-Werror)
endif()
#
# Disable noisy warnings
#
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# 177: function "XXX" was declared but never referenced
ie_add_compiler_flags(-diag-disable=remark,177,2196)
endif()
#
# Linker flags
#
if(APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip")
@ -401,6 +460,14 @@ else()
endif()
endif()
# if(OV_COMPILER_IS_CLANG)
# ie_add_compiler_flags(-Wshorten-64-to-32)
# endif()
# TODO
if(OV_COMPILER_IS_CLANG)
ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
endif()
#
# link_system_libraries(target <PUBLIC | PRIVATE | INTERFACE> <lib1 [lib2 lib3 ...]>)
#

View File

@ -19,16 +19,12 @@ else()
endif()
if(CI_BUILD_NUMBER)
set(TREAT_WARNING_AS_ERROR_DEFAULT ON)
set(CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT ON)
else()
set(TREAT_WARNING_AS_ERROR_DEFAULT OFF)
set(CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT OFF)
endif()
ie_dependent_option (TREAT_WARNING_AS_ERROR "WILL BE REMOVED SOON, NEED TO FIX PRIVATE COMPONENTS" ON "X86_64 OR X86" OFF)
if(NOT DEFINED CMAKE_COMPILE_WARNING_AS_ERROR)
set(CMAKE_COMPILE_WARNING_AS_ERROR ${TREAT_WARNING_AS_ERROR_DEFAULT})
endif()
ie_option (CMAKE_COMPILE_WARNING_AS_ERROR "Enable warnings as errors" ${CMAKE_COMPILE_WARNING_AS_ERROR_DEFAULT})
ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)

View File

@ -16,9 +16,23 @@ if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif()
endif()
macro(_ie_process_msvc_generator_platform flag_name)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(arch_flag X86_64)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(arch_flag X86)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
set(arch_flag AARCH64)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(arch_flag ARM)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^riscv64$")
set(arch_flag RISCV64)
endif()
set(HOST_${arch_flag} ON)
macro(_ie_process_msvc_generator_platform arch_flag)
# if cmake -A <ARM|ARM64> is passed
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
set(AARCH64 ON)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM")
set(ARM ON)
@ -27,14 +41,14 @@ macro(_ie_process_msvc_generator_platform flag_name)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32")
set(X86 ON)
else()
set(${flag_name} ON)
set(${arch_flag} ON)
endif()
endmacro()
if(MSVC64 OR MINGW64)
_ie_process_msvc_generator_platform(X86_64)
_ie_process_msvc_generator_platform(${arch_flag})
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
_ie_process_msvc_generator_platform(X86)
_ie_process_msvc_generator_platform(${arch_flag})
elseif(CMAKE_OSX_ARCHITECTURES AND APPLE)
if(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
set(AARCH64 ON)
@ -49,7 +63,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(X86_64 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(X86 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*|ARM64.*)")
set(AARCH64 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(ARM ON)
@ -57,18 +71,6 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^riscv64$")
set(RISCV64 ON)
endif()
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(HOST_X86_64 ON)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(HOST_X86 ON)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
set(HOST_AARCH64 ON)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(HOST_ARM ON)
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^riscv64$")
set(HOST_RISCV64 ON)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(EMSCRIPTEN ON)
endif()

View File

@ -93,7 +93,6 @@ function(register_extra_modules)
file(REMOVE "${devconfig_file}")
file(WRITE "${devconfig_file}" "\# !! AUTOGENERATED: DON'T EDIT !!\n\n")
file(APPEND "${devconfig_file}" "ie_deprecated_no_errors()\n")
foreach(target IN LISTS ${openvino_export_components})
if(target)
@ -124,6 +123,17 @@ endif()\n")
endif()
list(APPEND extra_modules "${OpenVINO_SOURCE_DIR}/src/core/template_extension")
# add extra flags for compilation of extra modules:
# since not all extra modules use OpenVINODeveloperPackage, we have to add these function calls here
ov_dev_package_no_errors()
ov_deprecated_no_errors()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# 'argument': conversion from 'size_t' to 'int', possible loss of data
ie_add_compiler_flags(/wd4267)
ie_add_compiler_flags(/wd4244)
endif()
# add each extra module
foreach(module_path IN LISTS extra_modules)
if(module_path)

View File

@ -336,12 +336,11 @@ macro(ov_cpack_settings)
set(samples_build_deps "cmake, g++, gcc, libc6-dev, make, pkg-config")
set(samples_build_deps_suggest "libopencv-core-dev, libopencv-imgproc-dev, libopencv-imgcodecs-dev")
set(samples_opencl_suggest "ocl-icd-opencl-dev, opencl-headers")
if(OV_GLIBC_VERSION VERSION_LESS_EQUAL 2.27)
# Ubuntu 18.04, Debian 9 cases
set(json_library "nlohmann-json-dev")
else()
set(json_library "nlohmann-json3-dev")
endif()
# Ubuntu 18.04, Debian 9 cases have nlohmann-json-dev
# newer systems have nlohmann-json3-dev
# according to https://www.debian.org/doc/debian-policy/ch-relationships.html#syntax-of-relationship-fields
# we can use | (pipe) to provide alternative package names
set(json_library "nlohmann-json3-dev | nlohmann-json-dev")
# c_samples / cpp_samples
set(CPACK_COMPONENT_SAMPLES_DESCRIPTION "Intel(R) Distribution of OpenVINO(TM) Toolkit C / C++ Samples")

View File

@ -151,13 +151,8 @@ endif()
# Extra Compile Flags
#
if(CMAKE_COMPILER_IS_GNUCXX)
ie_add_compiler_flags(-Wno-error=unused-variable)
ie_add_compiler_flags(-Wno-error=unused-but-set-variable)
if(SUGGEST_OVERRIDE_SUPPORTED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
endif()
endif()
# don't fail on strict compilation options in 3rd party modules
ov_dev_package_no_errors()
# Don't threat deprecated API warnings as errors in 3rd party apps
ie_deprecated_no_errors()
ov_deprecated_no_errors()

View File

@ -123,13 +123,8 @@ endif()
# Extra Compile Flags
#
if(CMAKE_COMPILER_IS_GNUCXX)
ie_add_compiler_flags(-Wno-error=unused-variable)
ie_add_compiler_flags(-Wno-error=unused-but-set-variable)
if(SUGGEST_OVERRIDE_SUPPORTED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
endif()
endif()
# don't fail on strict compilation options in 3rd party modules
ov_dev_package_no_errors()
# Don't threat deprecated API warnings as errors in 3rd party apps
ie_deprecated_no_errors()
ov_deprecated_no_errors()

View File

@ -7,39 +7,24 @@
@endsphinxdirective
In case if you are intended to use OpenVINO GPU plugin and offload network inference to Intel® graphics processor, the Intel Graphics Driver should be properly configured on your system.
If it is already installed, and you want to keep it, you can skip the installation steps.
## Linux
If you have installed OpenVINO Runtime from the archive file, APT, or YUM, follow these steps to work with GPU:
To install the latest available **Intel® Graphics Compute Runtime for OpenCL™** for your OS, see the [Install Guides](https://github.com/intel/compute-runtime/releases/latest).
1. Go to the install_dependencies directory:
```sh
cd <INSTALL_DIR>/install_dependencies/
```
> **NOTE**: If you use RedHat 8 OS please install OpenCL library as prerequisite via following command line:
> ```sh rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm```
2. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package. To install it, run this script:
```sh
sudo -E ./install_NEO_OCL_driver.sh
```
> **NOTE**: If you installed OpenVINO Runtime via PyPI, you can get this script from [the OpenVINO repository](https://github.com/openvinotoolkit/openvino/blob/master/scripts/install_dependencies/install_NEO_OCL_driver.sh).
> **NOTE**: For instructions specific to discrete graphics platforms, refer to [the dgpu guide](https://dgpu-docs.intel.com/installation-guides/index.html) (Intel® Arc™ A-Series Graphics, Intel® Data Center GPU Flex Series, Intel® Data Center GPU MAX Series, Intel® processor graphics Gen12, and Intel® Iris Xe MAX codename DG1).
> **NOTE**: To use the **Intel® Iris® Xe MAX Graphics**, see the [Intel® Iris® Xe MAX Graphics with Linux*](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html) page for driver installation instructions.
The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does
not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower version and installs the current version with your permission:
![](../img/NEO_check_agreement.png)
You may consider installing one of the earlier versions of the driver, based on your particular setup needs.
Higher hardware versions require a higher driver version, namely 20.35 instead of 19.41. If the script fails to uninstall the driver, uninstall it manually. During the script execution, you may see the following command line output:
```sh
Add OpenCL user to video group
```
Ignore this suggestion and continue.<br>
You can also find the most recent version of the driver, installation procedure and other information on the [Intel® software for general purpose GPU capabilities](https://dgpu-docs.intel.com/index.html) site.
It is recommended that you refer to the [Intel® Graphics Compute Runtime Github page](https://github.com/intel/compute-runtime/) for instructions and recommendations on GPU driver installation specific to particular releases, including the list of supported hardware platforms.
3. **Optional:** Install header files to allow compilation of new code. You can find the header files at [Khronos OpenCL™ API Headers](https://github.com/KhronosGroup/OpenCL-Headers.git).
You've completed all required configuration steps to perform inference on processor graphics.
@sphinxdirective
.. _gpu guide windows:
@ -48,24 +33,31 @@ You've completed all required configuration steps to perform inference on proces
## Windows
This section will help you check if you require driver installation. Install indicated version or higher.
If your applications offload computation to **Intel® Integrated Graphics**, you must have the Intel Graphics Driver for Windows installed on your hardware.
[Download and install the recommended version](https://downloadcenter.intel.com/download/30079/Intel-Graphics-Windows-10-DCH-Drivers).
To install the Intel Graphics Driver for Windows on your hardware, please proceed with the [instruction](https://www.intel.com/content/www/us/en/support/articles/000005629/graphics.html).
To check if you have this driver installed:
1. Type **device manager** in your **Search Windows** box and press Enter. The **Device Manager** opens.
2. Click the drop-down arrow to view the **Display adapters**. You can see the adapter that is installed in your computer:
![](../img/DeviceManager.PNG)
2. Click the drop-down arrow to view the **Display adapters**. You can see the adapter that is installed in your computer:
![](../img/DeviceManager.PNG)
3. Right-click the adapter name and select **Properties**.
4. Click the **Driver** tab to see the driver version.
![](../img/DeviceDriverVersion.PNG)
4. Click the **Driver** tab to see the driver version.
![](../img/DeviceDriverVersion.PNG)
You are done updating your device driver and are ready to use your GPU.
## Additional info
In the internal OpenVINO validation the following versions of Intel Graphics Driver were used:
Operation System | Driver version
--- |-------------------------
Ubuntu 20.04 | [22.35.24055](https://github.com/intel/compute-runtime/releases/tag/22.35.24055)
Ubuntu 18.04 | [21.38.21026](https://github.com/intel/compute-runtime/releases/tag/21.38.21026)
CentOS 7 | [19.41.14441](https://github.com/intel/compute-runtime/releases/tag/19.41.14441)
RHEL 8 | [22.28.23726](https://github.com/intel/compute-runtime/releases/tag/22.28.23726)
## Whats Next?
You can try out the toolkit with:
@ -79,4 +71,3 @@ Developing in C++:
* [Image Classification Async C++ Sample](@ref openvino_inference_engine_samples_classification_sample_async_README)
* [Hello Classification C++ Sample](@ref openvino_inference_engine_samples_hello_classification_README)
* [Hello Reshape SSD C++ Sample](@ref openvino_inference_engine_samples_hello_reshape_ssd_README)

View File

@ -99,5 +99,6 @@ int main() {
//! [ie:load_old_extension]
core.AddExtension(std::make_shared<InferenceEngine::Extension>("path_to_extension_library.so"));
//! [ie:load_old_extension]
(void)status;
return 0;
}

View File

@ -117,6 +117,7 @@ int main() {
core.add_extension(std::make_shared<InferenceEngine::Extension>("path_to_extension_library.so"));
//! [ov_api_2_0:load_old_extension]
OPENVINO_SUPPRESS_DEPRECATED_END
(void)status;
return 0;
}

View File

@ -55,7 +55,7 @@ layout = ov::Layout("NCHW");
std::cout << layout.to_string(); // prints [N,C,H,W]
//! [ov:layout:dump]
std::shared_ptr<ov::Model> model;
std::shared_ptr<ov::Model> model = std::make_shared<ov::Model>(ov::OutputVector{}, ov::ParameterVector{});
//! [ov:layout:get_from_model]
// Get layout for model input
layout = ov::layout::get_layout(model->input("input_tensor_name"));

View File

@ -45,10 +45,6 @@ set (CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
set (CMAKE_PDB_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
if(TREAT_WARNING_AS_ERROR AND NOT DEFINED CMAKE_COMPILE_WARNING_AS_ERROR)
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
endif()
if (WIN32)
set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS")

View File

@ -46,7 +46,7 @@ if(NOT TARGET nlohmann_json::nlohmann_json)
# for example, on debian 9 there is no cmake / pkgconfig files
find_file(nlohmann_include_file
NAMES "json.hpp"
"Path to json.hpp (nlohmann-json-dev )")
"Path to json.hpp (nlohmann-json-dev)")
if(nlohmann_include_file)
add_library(nlohmann_json::nlohmann_json INTERFACE IMPORTED)
get_filename_component(nlohmann_include_dir "${nlohmann_include_file}" PATH)

View File

@ -169,7 +169,7 @@ ov::Tensor create_tensor_from_binary(const std::vector<std::string>& files,
if (inputInfo.layout != "CN") {
binaryFile.read(&data[b * inputSize], inputSize);
} else {
for (int i = 0; i < inputInfo.channels(); i++) {
for (size_t i = 0; i < inputInfo.channels(); i++) {
binaryFile.read(&data[(i * binaryBatchSize + b) * sizeof(T)], sizeof(T));
}
}
@ -517,7 +517,7 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
}
}
for (int i = 0; i < logOutput.size(); i++) {
for (size_t i = 0; i < logOutput.size(); i++) {
slog::info << "Test Config " << i << slog::endl;
auto maxNameWidth = std::max_element(logOutput[i].begin(),
logOutput[i].end(),
@ -691,7 +691,7 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
}
}
for (int i = 0; i < logOutput.size(); i++) {
for (size_t i = 0; i < logOutput.size(); i++) {
slog::info << "Test Config " << i << slog::endl;
auto maxNameWidth = std::max_element(logOutput[i].begin(),
logOutput[i].end(),

View File

@ -176,9 +176,9 @@ void setDeviceProperty(ov::Core& core,
return;
if (device_config.find(device) == device_config.end() || // device properties not existed
config.first.empty() && // not setting default value to property
(!FLAGS_load_config.empty() &&
is_dev_set_property[device])) { // device properties loaded from file and overwrite is not happened
(config.first.empty() && // not setting default value to property
(!FLAGS_load_config.empty() &&
is_dev_set_property[device]))) { // device properties loaded from file and overwrite is not happened
is_dev_set_property[device] = false;
device_config.erase(device);
device_config.insert(ov::device::properties(device, device_property));
@ -206,7 +206,7 @@ void fuse_mean_scale(ov::preprocess::PrePostProcessor& preproc, const benchmark_
bool warned = false;
constexpr char warn_msg[] = "Mean/scale values are fused into the model. This slows down performance compared to "
"--imean and --iscale which existed before";
for (const std::pair<std::string, benchmark_app::InputInfo>& input_info : app_inputs_info) {
for (const std::pair<std::string, benchmark_app::InputInfo> input_info : app_inputs_info) {
if (!input_info.second.mean.empty()) {
if (!warned) {
slog::warn << warn_msg << slog::endl;
@ -742,7 +742,7 @@ int main(int argc, char* argv[]) {
const auto output_precision = FLAGS_op.empty() ? ov::element::undefined : getPrecision2(FLAGS_op);
const auto& inputs = model->inputs();
for (int i = 0; i < inputs.size(); i++) {
for (size_t i = 0; i < inputs.size(); i++) {
const auto& item = inputs[i];
auto iop_precision = ov::element::undefined;
auto type_to_set = ov::element::undefined;
@ -783,7 +783,7 @@ int main(int argc, char* argv[]) {
fuse_mean_scale(preproc, app_inputs_info.at(0));
const auto& outs = model->outputs();
for (int i = 0; i < outs.size(); i++) {
for (size_t i = 0; i < outs.size(); i++) {
const auto& item = outs[i];
auto iop_precision = ov::element::undefined;
try {
@ -1215,7 +1215,7 @@ int main(int argc, char* argv[]) {
std::vector<LatencyMetrics> groupLatencies = {};
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
const auto& lat_groups = inferRequestsQueue.get_latency_groups();
for (int i = 0; i < lat_groups.size(); i++) {
for (size_t i = 0; i < lat_groups.size(); i++) {
const auto& lats = lat_groups[i];
std::string data_shapes_string = "";

View File

@ -83,7 +83,7 @@ std::map<std::string, ov::TensorVector> get_remote_input_tensors(
auto& oclContext = static_cast<ov::intel_gpu::ocl::ClContext&>(context);
auto oclInstance = std::make_shared<gpu::OpenCL>(oclContext.get());
for (int i = 0; i < num_requests; i++) {
for (size_t i = 0; i < num_requests; i++) {
for (auto& inputs_info : app_inputs_info) {
for (auto& input : inputs_info) {
// Fill random

View File

@ -84,7 +84,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
for (const auto& layer : perfCounts) {
dumper << layer.node_name; // layer name
dumper << ((int)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
dumper << ((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
@ -107,7 +107,6 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
const PerformanceCounters& perfCounts) {
std::chrono::microseconds total = std::chrono::microseconds::zero();
std::chrono::microseconds total_cpu = std::chrono::microseconds::zero();
int layersize = 0;
dumper << "layerName"
<< "execStatus"
@ -131,14 +130,13 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
for (const auto& layer : profiling) {
if (std::string(status_names[(int)layer.status]).compare("EXECUTED") == 0) {
dumper << layer.node_name; // layer name
dumper << ((int)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
dumper << ((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
dumper << (layer.real_time * 1.0 / total) * 100;
dumper.endLine();
layersize += 1;
}
}
@ -159,7 +157,7 @@ StatisticsReport::PerformanceCounters StatisticsReport::get_average_performance_
// iterate over each layer from sorted vector and add required PM data
// to the per-layer maps
for (const auto& pm : perfCounts[i]) {
int idx = 0;
size_t idx = 0;
for (; idx < performanceCountersAvg.size(); idx++) {
if (performanceCountersAvg[idx].node_name == pm.node_name) {
performanceCountersAvg[idx].real_time += pm.real_time;
@ -284,8 +282,8 @@ const nlohmann::json StatisticsReportJSON::perf_counters_to_json(
item["name"] = layer.node_name; // layer name
item["status"] =
((int)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
: "INVALID_STATUS");
((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
: "INVALID_STATUS");
item["node_type"] = layer.node_type;
item["exec_type"] = layer.exec_type;
item["real_time"] = layer.real_time.count() / 1000.0;
@ -320,8 +318,8 @@ const nlohmann::json StatisticsReportJSON::sort_perf_counters_to_json(
nlohmann::json item;
item["name"] = layer.node_name; // layer name
item["status"] =
((int)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
: "INVALID_STATUS");
((size_t)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
: "INVALID_STATUS");
item["node_type"] = layer.node_type;
item["exec_type"] = layer.exec_type;
item["real_time"] = layer.real_time.count() / 1000.0;

View File

@ -527,7 +527,7 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
}
info.dataShape = ov::Shape(info.partialShape.size(), 0);
for (int i = 0; i < info.partialShape.size(); i++) {
for (size_t i = 0; i < info.partialShape.size(); i++) {
auto& dim = info.partialShape[i];
if (dim.is_static()) {
info.dataShape[i] = dim.get_length();
@ -662,65 +662,6 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
reshape_required);
}
#ifdef USE_OPENCV
void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
slog::warn << "YAML and XML formats for config file won't be supported soon." << slog::endl;
auto plugin_to_opencv_format = [](const std::string& str) -> std::string {
if (str.find("_") != std::string::npos) {
slog::warn
<< "Device name contains \"_\" and will be changed during loading of configuration due to limitations."
"This configuration file could not be loaded correctly."
<< slog::endl;
}
std::string new_str(str);
auto pos = new_str.find(".");
if (pos != std::string::npos) {
new_str.replace(pos, 1, "_");
}
return new_str;
};
cv::FileStorage fs(filename, cv::FileStorage::WRITE);
if (!fs.isOpened())
throw std::runtime_error("Error: Can't open config file : " + filename);
for (auto device_it = config.begin(); device_it != config.end(); ++device_it) {
fs << plugin_to_opencv_format(device_it->first) << "{:";
std::stringstream strm;
for (auto param_it = device_it->second.begin(); param_it != device_it->second.end(); ++param_it) {
strm << param_it->first;
param_it->second.print(strm);
}
fs << strm.str();
fs << "}";
}
fs.release();
}
void load_config(const std::string& filename, std::map<std::string, ov::AnyMap>& config) {
slog::warn << "YAML and XML formats for config file won't be supported soon." << slog::endl;
auto opencv_to_plugin_format = [](const std::string& str) -> std::string {
std::string new_str(str);
auto pos = new_str.find("_");
if (pos != std::string::npos) {
new_str.replace(pos, 1, ".");
}
return new_str;
};
cv::FileStorage fs(filename, cv::FileStorage::READ);
if (!fs.isOpened())
throw std::runtime_error("Error: Can't load config file : " + filename);
cv::FileNode root = fs.root();
for (auto it = root.begin(); it != root.end(); ++it) {
auto device = *it;
if (!device.isMap()) {
throw std::runtime_error("Error: Can't parse config file : " + filename);
}
for (auto iit = device.begin(); iit != device.end(); ++iit) {
auto item = *iit;
config[opencv_to_plugin_format(device.name())][item.name()] = item.string();
}
}
}
#else
void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config) {
nlohmann::json jsonConfig;
for (const auto& item : config) {
@ -794,7 +735,6 @@ void load_config(const std::string& filename, std::map<std::string, ov::AnyMap>&
}
}
}
#endif
#ifdef USE_OPENCV
const std::vector<std::string> supported_image_extensions =

View File

@ -41,7 +41,7 @@ std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t heigh
cv::Mat resized(cv::Size(width, height), img.type(), _data.get());
if (width != img.cols || height != img.rows) {
if (width != static_cast<size_t>(img.cols) || height != static_cast<size_t>(img.rows)) {
slog::warn << "Image is resized from (" << img.cols << ", " << img.rows << ") to (" << width << ", " << height
<< ")" << slog::endl;
}

View File

@ -171,7 +171,7 @@ void printInputAndOutputsInfo(const ov::Model& network) {
slog::info << "model name: " << network.get_friendly_name() << slog::endl;
const std::vector<ov::Output<const ov::Node>> inputs = network.inputs();
for (const ov::Output<const ov::Node> input : inputs) {
for (const ov::Output<const ov::Node>& input : inputs) {
slog::info << " inputs" << slog::endl;
const std::string name = input.get_names().empty() ? "NONE" : input.get_any_name();
@ -185,7 +185,7 @@ void printInputAndOutputsInfo(const ov::Model& network) {
}
const std::vector<ov::Output<const ov::Node>> outputs = network.outputs();
for (const ov::Output<const ov::Node> output : outputs) {
for (const ov::Output<const ov::Node>& output : outputs) {
slog::info << " outputs" << slog::endl;
const std::string name = output.get_names().empty() ? "NONE" : output.get_any_name();

View File

@ -70,7 +70,7 @@ ov::Tensor read_weights(const std::string& filepath) {
ov::Tensor weights(ov::element::u8, {static_cast<size_t>(fileSize)});
read_file(filepath, weights.data(), weights.get_byte_size());
return std::move(weights);
return weights;
}
/**

View File

@ -126,7 +126,7 @@ int main(int argc, char* argv[]) {
in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
}
}
for (int i = 0; i < model->outputs().size(); i++) {
for (size_t i = 0; i < model->outputs().size(); i++) {
proc.output(i).tensor().set_element_type(ov::element::f32);
}
model = proc.build();

View File

@ -57,9 +57,6 @@ if(LINUX)
ie_cpack_add_component(${OV_CPACK_COMP_INSTALL_DEPENDENCIES} HIDDEN)
set(install_dependencies_files install_openvino_dependencies.sh)
if(ENABLE_INTEL_GPU)
list(APPEND install_dependencies_files install_NEO_OCL_driver.sh)
endif()
foreach(install_dependencies_file IN LISTS install_dependencies_files)
install(PROGRAMS "${CMAKE_CURRENT_SOURCE_DIR}/install_dependencies/${install_dependencies_file}"

View File

@ -1,470 +0,0 @@
#!/bin/bash
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
# Installs the Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver on Linux.
#
# Usage: sudo -E ./install_NEO_OCL_driver.sh
#
# Supported platforms:
# 6th-11th generation Intel® Core™ processor with Intel(R)
# Processor Graphics Technology not previously disabled by the BIOS
# or motherboard settings
#
EXIT_FAILURE=1
EXIT_WRONG_ARG=2
UBUNTU_VERSION=
DISTRO=
SCRIPT_DIR="$( cd "$( dirname "$(realpath "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
INSTALL_DRIVER_VERSION='unknown'
print_help()
{
# Display Help
usage="Usage: $(basename "$0") [OPTIONS]...
Download and installs the Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver on Linux
Available options:
-y Replace the currently installed driver with the newer version.
--no_numa Skip installing NUMA packages. (off)
-h, --help Display this help and exit"
echo "$usage"
}
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-d|--install_driver)
echo "WARNING: This option is deprecated. Recommended driver for current platform will be installed."
shift
shift
;;
-y)
agreement=true
shift
;;
-a|--auto)
echo "WARNING: This option is deprecated. Recommended driver for current platform will be installed."
shift
;;
--no_numa)
no_numa=true
shift
;;
-h|--help)
print_help
exit
;;
*)
echo "$(basename "$0"): invalid option -- '${key}'"
echo "Try '$(basename "$0") --help' for more information."
exit $EXIT_WRONG_ARG
esac
done
_install_prerequisites_redhat()
{
# yum doesn't accept timeout in seconds as parameter
echo
echo "Note: if yum becomes non-responsive, try aborting the script and run:"
echo " sudo -E $0"
echo
CMDS=("dnf install -y 'dnf-command(config-manager)'"
"dnf config-manager --add-repo \
https://repositories.intel.com/graphics/rhel/8.5/intel-graphics.repo")
for cmd in "${CMDS[@]}"; do
echo "$cmd"
if ! eval "$cmd"; then
echo "ERROR: failed to run $cmd" >&2
echo "Problem (or disk space)?" >&2
echo ". Verify that you have enough disk space, and run the script again." >&2
exit $EXIT_FAILURE
fi
done
}
_install_prerequisites_ubuntu()
{
apt-get update
apt-get install -y gpg-agent
curl https://repositories.intel.com/graphics/intel-graphics.key |gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list
apt-get update
if [ "$no_numa" == true ]; then
CMDS=("apt-get -y install --no-install-recommends ocl-icd-libopencl1")
else
CMDS=("apt-get -y install --no-install-recommends libnuma1 ocl-icd-libopencl1")
fi
for cmd in "${CMDS[@]}"; do
echo "$cmd"
if ! eval "$cmd"; then
echo "ERROR: failed to run $cmd" >&2
echo "Problem (or disk space)?" >&2
echo " sudo -E $0" >&2
echo "2. Verify that you have enough disk space, and run the script again." >&2
exit $EXIT_FAILURE
fi
done
}
install_prerequisites()
{
echo 'Installing prerequisites...'
if [[ $DISTRO == "redhat" ]]; then
_install_prerequisites_redhat
elif [[ $DISTRO == "ubuntu" ]]; then
_install_prerequisites_ubuntu
else
echo 'WARNING::install_prerequisites: Unknown OS'
fi
}
_deploy_rpm()
{
cmd="rpm $IGFX_RPM_FLAGS -ivh --nodeps --force $1"
echo "$cmd"
eval "$cmd"
}
_deploy_deb()
{
cmd="dpkg -i $1"
echo "$cmd"
eval "$cmd"
}
_install_user_mode_redhat()
{
CMDS=("rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/mesa-filesystem-21.1.5-1.el8.x86_64.rpm" \
"dnf install --refresh -y \
intel-opencl-22.28.23726.1-i419.el8.x86_64 intel-media intel-mediasdk libmfxgen1 libvpl2 \
level-zero intel-level-zero-gpu \
intel-metrics-library intel-igc-core intel-igc-cm \
libva libva-utils intel-gmmlib" \
"rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm" )
for cmd in "${CMDS[@]}"; do
echo "$cmd"
if ! eval "$cmd"; then
echo "ERROR: failed to run $cmd" >&2
echo "Problem (or disk space)?" >&2
echo " sudo -E $0" >&2
echo "Verify that you have enough disk space, and run the script again." >&2
exit $EXIT_FAILURE
fi
done
}
_install_user_mode_ubuntu()
{
if ! 'find . -name "intel*.deb" -exec dpkg -i {} \;'; then
echo "ERROR: failed to install debs $cmd error" >&2
echo "Make sure you have enough disk space or fix the problem manually and try again." >&2
exit $EXIT_FAILURE
fi
}
install_user_mode()
{
echo "Installing user mode driver..."
if [[ $DISTRO == "redhat" ]]; then
_install_user_mode_redhat
else
_install_user_mode_ubuntu
fi
# exit from $SCRIPT_DIR/neo folder
cd - || exit
# clean it up
rm -rf "$SCRIPT_DIR/neo"
}
_uninstall_user_mode_redhat()
{
echo Looking for previously installed user-mode driver...
PACKAGES=("intel-opencl"
"intel-ocloc"
"intel-gmmlib"
"intel-igc-core"
"intel-igc-opencl")
for package in "${PACKAGES[@]}"; do
echo "rpm -qa | grep $package"
if found_package=$(rpm -qa | grep "$package"); then
echo "Found installed user-mode driver, performing uninstall..."
cmd="rpm -e --nodeps ${found_package}"
echo "$cmd"
if ! eval "$cmd"; then
echo "ERROR: failed to uninstall existing user-mode driver." >&2
echo "Please try again manually and run the script again." >&2
exit $EXIT_FAILURE
fi
fi
done
}
_uninstall_user_mode_ubuntu()
{
echo Looking for previously installed user-mode driver...
PACKAGES=("intel-opencl"
"intel-opencl-icd"
"intel-ocloc"
"intel-gmmlib"
"intel-igc-core"
"intel-igc-opencl")
for package in "${PACKAGES[@]}"; do
if found_package=$(dpkg-query -W -f='${binary:Package}\n' "${package}"); then
echo "Found installed user-mode driver, performing uninstall..."
cmd="apt-get autoremove -y $package"
echo "$cmd"
if ! eval "$cmd"; then
echo "ERROR: failed to uninstall existing user-mode driver." >&2
echo "Please try again manually and run the script again." >&2
exit $EXIT_FAILURE
fi
fi
done
}
uninstall_user_mode()
{
if [[ $DISTRO == "redhat" ]]; then
_uninstall_user_mode_redhat
else
_uninstall_user_mode_ubuntu
fi
}
_get_packages_ubuntu()
{
case $INSTALL_DRIVER_VERSION in
"21.38.21026")
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-gmmlib_21.2.1_amd64.deb
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.8708/intel-igc-core_1.0.8708_amd64.deb
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.8708/intel-igc-opencl_1.0.8708_amd64.deb
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-opencl_21.38.21026_amd64.deb
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-ocloc_21.38.21026_amd64.deb
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/intel-level-zero-gpu_1.2.21026_amd64.deb
;;
"21.48.21782")
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-gmmlib_21.3.3_amd64.deb
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.9441/intel-igc-core_1.0.9441_amd64.deb
curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.9441/intel-igc-opencl_1.0.9441_amd64.deb
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-opencl-icd_21.48.21782_amd64.deb
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-level-zero-gpu_1.2.21782_amd64.deb
;;
"22.35.24055")
apt-get install -y \
intel-opencl-icd=22.35.24055+i815~u20.04 \
intel-level-zero-gpu=1.3.24055+i815~u20.04 \
level-zero=1.8.5+i815~u20.04
;;
*)
echo "ERROR: Unrecognized driver ${INSTALL_DRIVER_VERSION}."
exit $EXIT_WRONG_ARG
esac
}
_verify_checksum_ubuntu()
{
case $INSTALL_DRIVER_VERSION in
"21.38.21026")
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.38.21026/ww38.sum
sha256sum -c ww38.sum
;;
"21.48.21782")
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/ww48.sum
sha256sum -c ww48.sum
;;
"22.35.24055")
echo "Verification by apt"
;;
*)
echo "ERROR: Unrecognized driver ${INSTALL_DRIVER_VERSION}."
exit $EXIT_WRONG_ARG
esac
}
verify_checksum()
{
if [[ $DISTRO == "redhat" ]]; then
return 0
else
_verify_checksum_ubuntu
fi
}
get_packages()
{
mkdir -p "$SCRIPT_DIR/neo"
cd "$SCRIPT_DIR/neo" || exit
if [[ $DISTRO == "redhat" ]]; then
return 0
else
_get_packages_ubuntu
fi
if ! verify_checksum; then
echo "ERROR: checksums do not match for the downloaded packages"
echo " Please verify your Internet connection and make sure you have enough disk space or fix the problem manually and try again. "
exit $EXIT_FAILURE
fi
}
version_gt() {
# check if first version is greater than second version
test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1";
}
summary()
{
echo
echo "Installation completed successfully."
echo
echo "Next steps:"
echo "Add OpenCL users to the video and render group: 'sudo usermod -a -G video,render USERNAME'"
echo " e.g. if the user running OpenCL host applications is foo, run: sudo usermod -a -G video,render foo"
echo " Current user has been already added to the video and render group"
echo
echo "If you use 8th Generation Intel® Core™ processor, add:"
echo " i915.alpha_support=1"
echo " to the 4.14 kernel command line, in order to enable OpenCL functionality for this platform."
echo
}
check_root_access()
{
if [[ $EUID -ne 0 ]]; then
echo "ERROR: you must run this script as root." >&2
echo "Please try again with \"sudo -E $0\", or as root." >&2
exit $EXIT_FAILURE
fi
}
add_user_to_video_group()
{
local real_user
real_user=$(logname 2>/dev/null || echo "${SUDO_USER:-${USER}}")
echo
echo "Adding $real_user to the video group..."
if ! usermod -a -G video "$real_user"; then
echo "WARNING: unable to add $real_user to the video group" >&2
fi
echo "Adding $real_user to the render group..."
if ! usermod -a -G render "$real_user"; then
echo "WARNING: unable to add $real_user to the render group" >&2
fi
}
_check_distro_version()
{
if [[ $DISTRO == redhat ]]; then
RHEL_MINOR_VERSION_SUPPORTED="[3-7]"
if ! eval grep -m1 'VERSION_ID' /etc/os-release | grep -Eo "8.${RHEL_MINOR_VERSION_SUPPORTED}"; then
echo "Warning: This runtime can be installed only on RHEL 8.3 up to RHEL 8.7"
echo "More info https://dgpu-docs.intel.com/releases/releases-20211130.html" >&2
echo "Installation of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver interrupted"
exit $EXIT_FAILURE
else
INSTALL_DRIVER_VERSION='22.28.23726'
fi
elif [[ $DISTRO == ubuntu ]]; then
UBUNTU_VERSION=$(grep -m1 'VERSION_ID' /etc/os-release | grep -Eo "[0-9]{2}.[0-9]{2}")
if [[ $UBUNTU_VERSION == '18.04' ]]; then
INSTALL_DRIVER_VERSION='21.38.21026'
elif [[ $UBUNTU_VERSION == '20.04' ]]; then
INSTALL_DRIVER_VERSION='22.35.24055'
else
echo "Warning: This runtime can be installed only on Ubuntu 18.04 or Ubuntu 20.04."
echo "More info https://github.com/intel/compute-runtime/releases" >&2
echo "Installation of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver interrupted"
exit $EXIT_FAILURE
fi
fi
}
distro_init()
{
if [[ -f /etc/redhat-release ]]; then
DISTRO="redhat"
elif [[ -f /etc/lsb-release ]]; then
DISTRO="ubuntu"
fi
_check_distro_version
}
check_agreement()
{
if [ "$agreement" == true ]; then
return 0
fi
echo "This script will download and install Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver $INSTALL_DRIVER_VERSION, "
echo "that was used to validate this OpenVINO™ package."
echo "In case if you already have the driver - script will try to remove it."
while true; do
read -rp "Want to proceed? (y/n): " yn
case $yn in
[Yy]*) return 0 ;;
[Nn]*) exit $EXIT_FAILURE ;;
esac
done
}
check_current_driver()
{
echo "Checking current driver version..."
if [[ $DISTRO == redhat ]]; then
gfx_version=$(yum info intel-opencl | grep Version)
elif [[ $DISTRO == ubuntu ]]; then
gfx_version=$(dpkg-query --showformat='${Version}' --show intel-opencl)
if [[ -z "$gfx_version" ]]; then
gfx_version=$(dpkg-query --showformat='${Version}' --show intel-opencl-icd)
fi
fi
gfx_version="$(echo -e "${gfx_version}" | grep -Eo "[0-9]{2,3}\.[0-9]{2,3}\.[0-9]{3,6}")"
# install NEO OCL driver if the current driver version < INSTALL_DRIVER_VERSION
if [[ -n $gfx_version && "$(printf '%s\n' "$INSTALL_DRIVER_VERSION" "$gfx_version" | sort -V | head -n 1)" = "$INSTALL_DRIVER_VERSION" ]]; then
echo "Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver installation skipped because current version greater or equal to $INSTALL_DRIVER_VERSION" >&2
echo "Installation of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver interrupted." >&2
exit $EXIT_FAILURE
else
echo "Starting installation..."
fi
}
install()
{
uninstall_user_mode
install_prerequisites
get_packages
install_user_mode
add_user_to_video_group
}
main()
{
echo "Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver installer"
distro_init
check_root_access
check_current_driver
check_agreement
install
summary
}
[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@"

View File

@ -8,7 +8,7 @@ set -e
#===================================================================================================
# Option parsing
all_comp=(core dev python)
all_comp=(core dev gpu python)
os=${os:-auto}
# public options
@ -117,12 +117,14 @@ if [ "$os" == "raspbian9" ] || [ "$os" == "debian9" ] ; then
# which are not supported by OpenVINO
pkgs_core=(libpugixml1v5)
pkgs_gpu=()
pkgs_python=()
pkgs_dev=(pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
elif [ "$os" == "ubuntu18.04" ] ; then
pkgs_core=(libtbb2 libpugixml1v5)
pkgs_gpu=()
pkgs_python=(python3.8 libpython3.8 python3.8-venv python3-pip)
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
@ -131,6 +133,7 @@ elif [ "$os" == "ubuntu20.04" ] || [ "$os" == "debian10" ] || [ "$os" == "raspbi
[ "$os" == "ubuntu22.10" ] || [ "$os" == "debian12" ] || [ "$os" == "raspbian12" ]; then
pkgs_core=(libpugixml1v5)
pkgs_gpu=()
pkgs_python=(python3 python3-venv python3-pip)
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json3-dev make curl sudo)
@ -163,6 +166,7 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
fi
pkgs_dev=(gcc gcc-c++ make glibc libstdc++ libgcc cmake3 "json-devel.$arch" "zlib-devel.$arch" sudo)
pkgs_gpu=()
if [ "$os" == "centos7" ] || [ "$os" == "amzn2" ] ; then
pkgs_dev+=(pkgconfig)
@ -193,6 +197,9 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
"https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm"
)
pkgs_gpu+=(
"http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm"
)
pkgs_python+=(python38 python38-pip)
pkgs_dev+=(
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm"

View File

@ -1388,7 +1388,7 @@ TEST(ie_blob_make_memory_from_preallocated, makeMemoryfromPreallocated) {
tensor.dims = dim_t ;
tensor.precision = precision_e::U8;
tensor.layout = layout_e::NCHW;
uint8_t array[1][3][4][4]= {0};
uint8_t array[1][3][4][4]= {{{{0}}}};
size_t size = 48;
ie_blob_t *blob = nullptr;

View File

@ -1,10 +1,15 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ov_test.hpp"
namespace {
class ov_compiled_model : public ::testing::TestWithParam<std::string> {};
INSTANTIATE_TEST_SUITE_P(device_name, ov_compiled_model, ::testing::Values("CPU"));
TEST_P(ov_compiled_model, ov_compiled_model_inputs_size) {
auto device_name = GetParam();
ov_core_t* core = nullptr;
@ -394,3 +399,5 @@ TEST_P(ov_compiled_model, create_infer_request_error_handling) {
ov_model_free(model);
ov_core_free(core);
}
} // namespace

View File

@ -1,9 +1,12 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ov_test.hpp"
#include "test_model_repo.hpp"
namespace {
TEST(ov_version, api_version) {
ov_version_t version;
ov_get_openvino_version(&version);
@ -559,3 +562,5 @@ TEST_P(ov_core, ov_core_compile_model_from_file_unicode) {
ov_core_free(core);
}
#endif
} // namespace

View File

@ -5,6 +5,8 @@
#include "ov_test.hpp"
namespace {
inline void get_tensor_info(ov_model_t* model, bool input, char** name, ov_shape_t* shape, ov_element_type_e* type) {
ov_output_const_port* port = nullptr;
if (input) {
@ -398,3 +400,5 @@ TEST_P(ov_infer_request, get_profiling_info) {
ov_profiling_info_list_free(&profiling_infos);
}
} // namespace

View File

@ -23,7 +23,7 @@ TEST(ov_tensor, ov_tensor_create_from_host_ptr) {
ov_element_type_e type = ov_element_type_e::U8;
ov_shape_t shape;
setup_4d_shape(&shape, 1, 3, 4, 4);
uint8_t host_ptr[1][3][4][4] = {0};
uint8_t host_ptr[1][3][4][4] = {{{{0}}}};
ov_tensor_t* tensor = nullptr;
OV_EXPECT_OK(ov_tensor_create_from_host_ptr(type, shape, &host_ptr, &tensor));
EXPECT_NE(nullptr, tensor);

View File

@ -17,6 +17,14 @@ file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx
file(GLOB PYX_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx)
set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# 'argument': conversion from 'size_t' to 'int', possible loss of data
ie_add_compiler_flags(/wd4267)
ie_add_compiler_flags(/wd4244)
elseif(CMAKE_COMPILER_IS_GNUCXX)
ie_add_compiler_flags(-Wno-unused-but-set-variable)
endif()
# create target
cython_add_module(${TARGET_NAME} ${SOURCES})
@ -45,13 +53,13 @@ if(COMMAND ie_add_vs_version_file)
endforeach()
endif()
function(python_disable_deprecated_warnings)
disable_deprecated_warnings()
function(python_ov_disable_deprecated_warnings)
ov_disable_deprecated_warnings()
set(pyx_file "${CMAKE_CURRENT_BINARY_DIR}/ie_api.cxx" "${CMAKE_CURRENT_BINARY_DIR}/constants.cxx")
set_source_files_properties(${pyx_file} PROPERTIES COMPILE_OPTIONS ${ie_c_cxx_deprecated})
endfunction()
python_disable_deprecated_warnings()
python_ov_disable_deprecated_warnings()
ie_python_minimal_api(${TARGET_NAME})
target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")

View File

@ -11,109 +11,21 @@ import numpy as np
from openvino._pyopenvino import Model
from openvino._pyopenvino import Core as CoreBase
from openvino._pyopenvino import CompiledModel as CompiledModelBase
from openvino._pyopenvino import InferRequest as InferRequestBase
from openvino._pyopenvino import AsyncInferQueue as AsyncInferQueueBase
from openvino._pyopenvino import ConstOutput
from openvino._pyopenvino import Tensor
def tensor_from_file(path: str) -> Tensor:
"""Create Tensor from file. Data will be read with dtype of unit8."""
return Tensor(np.fromfile(path, dtype=np.uint8)) # type: ignore
from openvino.runtime.utils.data_helpers import (
_InferRequestWrapper,
_data_dispatch,
tensor_from_file,
)
def set_scalar_tensor(request: InferRequestBase, tensor: Tensor, key: Union[str, int, ConstOutput] = None) -> None:
if key is None:
request.set_input_tensor(tensor)
elif isinstance(key, int):
request.set_input_tensor(key, tensor)
elif isinstance(key, (str, ConstOutput)):
request.set_tensor(key, tensor)
else:
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
@singledispatch
def update_tensor(
inputs: Union[np.ndarray, np.number, int, float],
request: InferRequestBase,
key: Union[str, int, ConstOutput] = None,
) -> None:
raise TypeError(f"Incompatible input data of type {type(inputs)} under {key} key!")
@update_tensor.register(np.ndarray)
def _(
inputs: np.ndarray,
request: InferRequestBase,
key: Union[str, int, ConstOutput] = None,
) -> None:
# If shape is "empty", assume this is a scalar value
if not inputs.shape:
set_scalar_tensor(request, Tensor(inputs), key)
else:
if key is None:
tensor = request.get_input_tensor()
elif isinstance(key, int):
tensor = request.get_input_tensor(key)
elif isinstance(key, (str, ConstOutput)):
tensor = request.get_tensor(key)
else:
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
# Update shape if there is a mismatch
if tensor.shape != inputs.shape:
tensor.shape = inputs.shape
# When copying, type should be up/down-casted automatically.
tensor.data[:] = inputs[:]
@update_tensor.register(np.number) # type: ignore
@update_tensor.register(float)
@update_tensor.register(int)
def _(
inputs: Union[np.number, float, int],
request: InferRequestBase,
key: Union[str, int, ConstOutput] = None,
) -> None:
set_scalar_tensor(
request,
Tensor(np.ndarray([], type(inputs), np.array(inputs))),
key,
)
def normalize_inputs(request: InferRequestBase, inputs: dict) -> dict:
"""Helper function to prepare inputs for inference.
It creates copy of Tensors or copy data to already allocated Tensors on device
if the item is of type `np.ndarray`, `np.number`, `int`, `float` or has numpy __array__ attribute.
"""
# Create new temporary dictionary.
# new_inputs will be used to transfer data to inference calls,
# ensuring that original inputs are not overwritten with Tensors.
new_inputs: Dict[Union[str, int, ConstOutput], Tensor] = {}
for key, value in inputs.items():
if not isinstance(key, (str, int, ConstOutput)):
raise TypeError(f"Incompatible key type for input: {key}")
# Copy numpy arrays to already allocated Tensors.
if isinstance(value, (np.ndarray, np.number, int, float)):
update_tensor(value, request, key)
# If value is of Tensor type, put it into temporary dictionary.
elif isinstance(value, Tensor):
new_inputs[key] = value
# If value object has __array__ attribute, load it to Tensor using np.array.
elif hasattr(value, "__array__"):
update_tensor(np.array(value, copy=True), request, key)
# Throw error otherwise.
else:
raise TypeError(f"Incompatible input data of type {type(value)} under {key} key!")
return new_inputs
class InferRequest(InferRequestBase):
class InferRequest(_InferRequestWrapper):
"""InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""
def infer(self, inputs: Any = None) -> dict:
def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict:
"""Infers specified input(s) in synchronous mode.
Blocks all methods of InferRequest while request is running.
@ -127,48 +39,49 @@ class InferRequest(InferRequestBase):
The allowed types of values in the `inputs` are:
(1) `numpy.array`
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
(2) `openvino.runtime.Tensor`
(3) array-like object with `__array__` attribute
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
it will work only with one-input models. When model has more inputs,
function throws error.
:param inputs: Data to be set on input tensors.
:type inputs: Any, optional
:param shared_memory: Enables `shared_memory` mode.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
resizing of the input Tensor). Keeps Tensor inputs "as-is".
If set to `True` the data dispatcher tries to provide "zero-copy"
Tensors for every input in form of:
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
Data that is going to be copied:
* `numpy.ndarray` which are not C contiguous
* inputs which data types are mismatched from Infer Request's inputs
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in the extra memory overhead.
Default value: False
:type shared_memory: bool, optional
:return: Dictionary of results from output tensors with ports as keys.
:rtype: Dict[openvino.runtime.ConstOutput, numpy.array]
:rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray]
"""
# If inputs are empty, pass empty dictionary.
if inputs is None:
return super().infer({})
# If inputs are dict, normalize dictionary and call infer method.
elif isinstance(inputs, dict):
return super().infer(normalize_inputs(self, inputs))
# If inputs are list or tuple, enumarate inputs and save them as dictionary.
# It is an extension of above branch with dict inputs.
elif isinstance(inputs, (list, tuple)):
return super().infer(normalize_inputs(self, {index: input for index, input in enumerate(inputs)}))
# If inputs are Tensor, call infer method directly.
elif isinstance(inputs, Tensor):
return super().infer(inputs)
# If inputs are single numpy array or scalars, use helper function to copy them
# directly to Tensor or create temporary Tensor to pass into the InferRequest.
# Pass empty dictionary to infer method, inputs are already set by helper function.
elif isinstance(inputs, (np.ndarray, np.number, int, float)):
update_tensor(inputs, self)
return super().infer({})
elif hasattr(inputs, "__array__"):
update_tensor(np.array(inputs, copy=True), self)
return super().infer({})
else:
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
return super().infer(_data_dispatch(
self,
inputs,
is_shared=shared_memory,
))
def start_async(
self,
inputs: Any = None,
userdata: Any = None,
shared_memory: bool = False,
) -> None:
"""Starts inference of specified input(s) in asynchronous mode.
@ -184,11 +97,10 @@ class InferRequest(InferRequestBase):
The allowed types of values in the `inputs` are:
(1) `numpy.array`
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
(2) `openvino.runtime.Tensor`
(3) array-like object with `__array__` attribute
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
it will work only with one-input models. When model has more inputs,
function throws error.
@ -196,23 +108,35 @@ class InferRequest(InferRequestBase):
:type inputs: Any, optional
:param userdata: Any data that will be passed inside the callback.
:type userdata: Any
:param shared_memory: Enables `shared_memory` mode.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
resizing of the input Tensor). Keeps Tensor inputs "as-is".
If set to `True` the data dispatcher tries to provide "zero-copy"
Tensors for every input in form of:
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
Data that is going to be copied:
* `numpy.ndarray` which are not C contiguous
* inputs which data types are mismatched from Infer Request's inputs
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in extra memory overhead.
Default value: False
:type shared_memory: bool, optional
"""
if inputs is None:
super().start_async({}, userdata)
elif isinstance(inputs, dict):
super().start_async(normalize_inputs(self, inputs), userdata)
elif isinstance(inputs, (list, tuple)):
super().start_async(normalize_inputs(self, {index: input for index, input in enumerate(inputs)}), userdata)
elif isinstance(inputs, Tensor):
super().start_async(inputs, userdata)
elif isinstance(inputs, (np.ndarray, np.number, int, float)):
update_tensor(inputs, self)
return super().start_async({}, userdata)
elif hasattr(inputs, "__array__"):
update_tensor(np.array(inputs, copy=True), self)
return super().start_async({}, userdata)
else:
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
super().start_async(
_data_dispatch(
self,
inputs,
is_shared=shared_memory,
),
userdata,
)
class CompiledModel(CompiledModelBase):
@ -222,6 +146,11 @@ class CompiledModel(CompiledModelBase):
multiple optimization transformations, then mapping to compute kernels.
"""
def __init__(self, other: CompiledModelBase) -> None:
# Private memeber to store already created InferRequest
self._infer_request: Optional[InferRequest] = None
super().__init__(other)
def create_infer_request(self) -> InferRequest:
"""Creates an inference request object used to infer the compiled model.
@ -249,15 +178,15 @@ class CompiledModel(CompiledModelBase):
The allowed types of values in the `inputs` are:
(1) `numpy.array`
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
(2) `openvino.runtime.Tensor`
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
it will work only with one-input models. When model has more inputs,
function throws error.
:param inputs: Data to be set on input tensors.
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.array], optional
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
:return: Dictionary of results from output tensors with ports as keys.
:rtype: Dict[openvino.runtime.ConstOutput, numpy.array]
"""
@ -265,12 +194,70 @@ class CompiledModel(CompiledModelBase):
# overloaded functions of InferRequest class
return self.create_infer_request().infer(inputs)
def __call__(self, inputs: Optional[Union[dict, list]] = None) -> dict:
def __call__(self,
inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None,
shared_memory: bool = True) -> dict:
"""Callable infer wrapper for CompiledModel.
Take a look at `infer_new_request` for reference.
Infers specified input(s) in synchronous mode.
Blocks all methods of CompiledModel while request is running.
Method creates new temporary InferRequest and run inference on it.
It is advised to use a dedicated InferRequest class for performance,
optimizing workflows, and creating advanced pipelines.
This method stores created `InferRequest` inside `CompiledModel` object,
which can be later reused in consecutive calls.
The allowed types of keys in the `inputs` dictionary are:
(1) `int`
(2) `str`
(3) `openvino.runtime.ConstOutput`
The allowed types of values in the `inputs` are:
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
(2) `openvino.runtime.Tensor`
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
it will work only with one-input models. When model has more inputs,
function throws error.
:param inputs: Data to be set on input tensors.
:type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional
:param shared_memory: Enables `shared_memory` mode.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
resizing of the input Tensor). Keeps Tensor inputs "as-is".
If set to `True` the data dispatcher tries to provide "zero-copy"
Tensors for every input in form of:
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
Data that is going to be copied:
* `numpy.ndarray` which are not C contiguous
* inputs which data types are mismatched from Infer Request's inputs
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in extra memory overhead.
Default value: True
:type shared_memory: bool, optional
:return: Dictionary of results from output tensors with ports as keys.
:rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray]
"""
return self.infer_new_request(inputs)
if self._infer_request is None:
self._infer_request = self.create_infer_request()
return self._infer_request.infer(
inputs,
shared_memory=shared_memory,
)
class AsyncInferQueue(AsyncInferQueueBase):
@ -303,6 +290,7 @@ class AsyncInferQueue(AsyncInferQueueBase):
self,
inputs: Any = None,
userdata: Any = None,
shared_memory: bool = False,
) -> None:
"""Run asynchronous inference using the next available InferRequest from the pool.
@ -314,11 +302,10 @@ class AsyncInferQueue(AsyncInferQueueBase):
The allowed types of values in the `inputs` are:
(1) `numpy.array`
(1) `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
(2) `openvino.runtime.Tensor`
(3) array-like object with `__array__` attribute
Can be called with only one `openvino.runtime.Tensor` or `numpy.array`,
Can be called with only one `openvino.runtime.Tensor` or `numpy.ndarray`,
it will work only with one-input models. When model has more inputs,
function throws error.
@ -326,32 +313,34 @@ class AsyncInferQueue(AsyncInferQueueBase):
:type inputs: Any, optional
:param userdata: Any data that will be passed to a callback.
:type userdata: Any, optional
:param shared_memory: Enables `shared_memory` mode.
If set to `False` inputs the data dispatcher will safely copy data
to existing Tensors (including up- or down-casting according to data type,
resizing of the input Tensor). Keeps Tensor inputs "as-is".
If set to `True` the data dispatcher tries to provide "zero-copy"
Tensors for every input in form of:
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
Data that is going to be copied:
* `numpy.ndarray` which are not C contiguous
* inputs which data types are mismatched from Infer Request's inputs
* inputs that should be in `BF16` data type
* scalar inputs (i.e. `np.float_`/`int`/`float`)
Keeps Tensor inputs "as-is".
Note: Use with extra care, shared data can be modified during runtime!
Note: Using `shared_memory` may result in extra memory overhead.
Default value: False
"""
if inputs is None:
super().start_async({}, userdata)
elif isinstance(inputs, dict):
super().start_async(
normalize_inputs(self[self.get_idle_request_id()], inputs),
userdata,
)
elif isinstance(inputs, (list, tuple)):
super().start_async(
normalize_inputs(
self[self.get_idle_request_id()],
{index: input for index, input in enumerate(inputs)},
),
userdata,
)
elif isinstance(inputs, Tensor):
super().start_async(inputs, userdata)
elif isinstance(inputs, (np.ndarray, np.number, int, float)):
update_tensor(inputs, self[self.get_idle_request_id()])
super().start_async({}, userdata)
elif hasattr(inputs, "__array__"):
update_tensor(np.array(inputs, copy=True), self[self.get_idle_request_id()])
super().start_async({}, userdata)
else:
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
super().start_async(
_data_dispatch(
self[self.get_idle_request_id()],
inputs,
is_shared=shared_memory,
),
userdata,
)
class Core(CoreBase):

View File

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch
from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file
from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper

View File

@ -0,0 +1,336 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from functools import singledispatch
from typing import Any, Dict, Union, Optional
import numpy as np
from openvino._pyopenvino import ConstOutput, Tensor, Type
from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper
ContainerTypes = Union[dict, list, tuple]
ScalarTypes = Union[np.number, int, float]
ValidKeys = Union[str, int, ConstOutput]
def get_request_tensor(
request: _InferRequestWrapper,
key: Optional[ValidKeys] = None,
) -> Tensor:
if key is None:
return request.get_input_tensor()
elif isinstance(key, int):
return request.get_input_tensor(key)
elif isinstance(key, (str, ConstOutput)):
return request.get_tensor(key)
else:
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
@singledispatch
def value_to_tensor(
value: Union[Tensor, np.ndarray, ScalarTypes],
request: Optional[_InferRequestWrapper] = None,
is_shared: bool = False,
key: Optional[ValidKeys] = None,
) -> None:
raise TypeError(f"Incompatible inputs of type: {type(value)}")
@value_to_tensor.register(Tensor)
def _(
value: Tensor,
request: Optional[_InferRequestWrapper] = None,
is_shared: bool = False,
key: Optional[ValidKeys] = None,
) -> Tensor:
return value
@value_to_tensor.register(np.ndarray)
def _(
value: np.ndarray,
request: _InferRequestWrapper,
is_shared: bool = False,
key: Optional[ValidKeys] = None,
) -> Tensor:
# Edge-case for numpy arrays if shape is "empty",
# assume this is a scalar value - always copy
if not value.shape:
return Tensor(np.ndarray([], value.dtype, np.array(value)))
tensor_type = get_request_tensor(request, key).get_element_type()
tensor_dtype = tensor_type.to_dtype()
# WA for FP16-->BF16 edge-case - always copy
if tensor_type == Type.bf16:
tensor = Tensor(tensor_type, value.shape)
tensor.data[:] = value.view(tensor_dtype)
return tensor
return Tensor(value.astype(tensor_dtype) if tensor_dtype != value.dtype else value, shared_memory=is_shared)
@value_to_tensor.register(np.number)
@value_to_tensor.register(int)
@value_to_tensor.register(float)
def _(
value: ScalarTypes,
request: Optional[_InferRequestWrapper] = None,
is_shared: bool = False,
key: Optional[ValidKeys] = None,
) -> Tensor:
return Tensor(np.ndarray([], type(value), np.array(value)))
def to_c_style(value: Any, is_shared: bool = False) -> Any:
if not isinstance(value, np.ndarray):
if hasattr(value, "__array__"):
return to_c_style(np.array(value, copy=False)) if is_shared else np.array(value, copy=True)
return value
# Check C-style if not convert data (or raise error?)
return value if value.flags["C_CONTIGUOUS"] else np.ascontiguousarray(value)
###
# Start of array normalization.
###
@singledispatch
def normalize_arrays(
inputs: Any,
is_shared: bool = False,
) -> Any:
# Check the special case of the array-interface
if hasattr(inputs, "__array__"):
return to_c_style(np.array(inputs, copy=False)) if is_shared else np.array(inputs, copy=True)
# Error should be raised if type does not match any dispatchers
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
@normalize_arrays.register(dict)
def _(
inputs: dict,
is_shared: bool = False,
) -> dict:
return {k: to_c_style(v) if is_shared else v for k, v in inputs.items()}
@normalize_arrays.register(list)
@normalize_arrays.register(tuple)
def _(
inputs: Union[list, tuple],
is_shared: bool = False,
) -> dict:
return {i: to_c_style(v) if is_shared else v for i, v in enumerate(inputs)}
@normalize_arrays.register(np.ndarray)
def _(
inputs: dict,
is_shared: bool = False,
) -> Any:
return to_c_style(inputs) if is_shared else inputs
###
# End of array normalization.
###
###
# Start of "shared" dispatcher.
# (1) Each method should keep Tensors "as-is", regardless to them being shared or not.
# (2) ...
###
# Step to keep alive input values that are not C-style by default
@singledispatch
def create_shared(
inputs: Any,
request: _InferRequestWrapper,
) -> None:
# Check the special case of the array-interface
if hasattr(inputs, "__array__"):
request._inputs_data = normalize_arrays(inputs, is_shared=True)
return value_to_tensor(request._inputs_data, request=request, is_shared=True)
# Error should be raised if type does not match any dispatchers
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
@create_shared.register(dict)
@create_shared.register(list)
@create_shared.register(tuple)
def _(
inputs: ContainerTypes,
request: _InferRequestWrapper,
) -> dict:
request._inputs_data = normalize_arrays(inputs, is_shared=True)
return {k: value_to_tensor(v, request=request, is_shared=True, key=k) for k, v in request._inputs_data.items()}
@create_shared.register(np.ndarray)
def _(
inputs: np.ndarray,
request: _InferRequestWrapper,
) -> Tensor:
request._inputs_data = normalize_arrays(inputs, is_shared=True)
return value_to_tensor(request._inputs_data, request=request, is_shared=True)
@create_shared.register(Tensor)
@create_shared.register(np.number)
@create_shared.register(int)
@create_shared.register(float)
def _(
inputs: Union[Tensor, ScalarTypes],
request: _InferRequestWrapper,
) -> Tensor:
return value_to_tensor(inputs, request=request, is_shared=True)
###
# End of "shared" dispatcher methods.
###
###
# Start of "copied" dispatcher.
###
def set_request_tensor(
request: _InferRequestWrapper,
tensor: Tensor,
key: Optional[ValidKeys] = None,
) -> None:
if key is None:
request.set_input_tensor(tensor)
elif isinstance(key, int):
request.set_input_tensor(key, tensor)
elif isinstance(key, (str, ConstOutput)):
request.set_tensor(key, tensor)
else:
raise TypeError(f"Unsupported key type: {type(key)} for Tensor under key: {key}")
@singledispatch
def update_tensor(
inputs: Any,
request: _InferRequestWrapper,
key: Optional[ValidKeys] = None,
) -> None:
if hasattr(inputs, "__array__"):
update_tensor(normalize_arrays(inputs, is_shared=False), request, key=None)
return None
raise TypeError(f"Incompatible inputs of type: {type(inputs)} under {key} key!")
@update_tensor.register(np.ndarray)
def _(
inputs: np.ndarray,
request: _InferRequestWrapper,
key: Optional[ValidKeys] = None,
) -> None:
# If shape is "empty", assume this is a scalar value
if not inputs.shape:
set_request_tensor(
request,
value_to_tensor(inputs, request=request, is_shared=False),
key,
)
else:
tensor = get_request_tensor(request, key)
# Update shape if there is a mismatch
if tensor.shape != inputs.shape:
tensor.shape = inputs.shape
# When copying, type should be up/down-casted automatically.
tensor.data[:] = inputs[:]
@update_tensor.register(np.number) # type: ignore
@update_tensor.register(float)
@update_tensor.register(int)
def _(
inputs: Union[np.number, float, int],
request: _InferRequestWrapper,
key: Optional[ValidKeys] = None,
) -> None:
set_request_tensor(
request,
value_to_tensor(inputs, is_shared=False),
key,
)
def update_inputs(inputs: dict, request: _InferRequestWrapper) -> dict:
"""Helper function to prepare inputs for inference.
It creates copy of Tensors or copy data to already allocated Tensors on device
if the item is of type `np.ndarray`, `np.number`, `int`, `float` or has numpy __array__ attribute.
"""
# Create new temporary dictionary.
# new_inputs will be used to transfer data to inference calls,
# ensuring that original inputs are not overwritten with Tensors.
new_inputs: Dict[ValidKeys, Tensor] = {}
for key, value in inputs.items():
if not isinstance(key, (str, int, ConstOutput)):
raise TypeError(f"Incompatible key type for input: {key}")
# Copy numpy arrays to already allocated Tensors.
# If value object has __array__ attribute, load it to Tensor using np.array
if isinstance(value, (np.ndarray, np.number, int, float)) or hasattr(value, "__array__"):
update_tensor(value, request, key)
# If value is of Tensor type, put it into temporary dictionary.
elif isinstance(value, Tensor):
new_inputs[key] = value
# Throw error otherwise.
else:
raise TypeError(f"Incompatible inputs of type: {type(value)} under {key} key!")
return new_inputs
@singledispatch
def create_copied(
inputs: Union[ContainerTypes, np.ndarray, ScalarTypes],
request: _InferRequestWrapper,
) -> Union[dict, None]:
# Check the special case of the array-interface
if hasattr(inputs, "__array__"):
update_tensor(normalize_arrays(inputs, is_shared=False), request, key=None)
return {}
# Error should be raised if type does not match any dispatchers
raise TypeError(f"Incompatible inputs of type: {type(inputs)}")
@create_copied.register(dict)
@create_copied.register(list)
@create_copied.register(tuple)
def _(
inputs: ContainerTypes,
request: _InferRequestWrapper,
) -> dict:
return update_inputs(normalize_arrays(inputs, is_shared=False), request)
@create_copied.register(np.ndarray)
def _(
inputs: np.ndarray,
request: _InferRequestWrapper,
) -> dict:
update_tensor(normalize_arrays(inputs, is_shared=False), request, key=None)
return {}
@create_copied.register(Tensor)
@create_copied.register(np.number)
@create_copied.register(int)
@create_copied.register(float)
def _(
inputs: Union[Tensor, ScalarTypes],
request: _InferRequestWrapper,
) -> Tensor:
return value_to_tensor(inputs, is_shared=False)
###
# End of "copied" dispatcher methods.
###
def _data_dispatch(
request: _InferRequestWrapper,
inputs: Union[ContainerTypes, Tensor, np.ndarray, ScalarTypes] = None,
is_shared: bool = False,
) -> Union[dict, Tensor]:
if inputs is None:
return {}
return create_shared(inputs, request) if is_shared else create_copied(inputs, request)

View File

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import numpy as np
from openvino._pyopenvino import Tensor
from openvino._pyopenvino import InferRequest as InferRequestBase
def tensor_from_file(path: str) -> Tensor:
"""Create Tensor from file. Data will be read with dtype of unit8."""
return Tensor(np.fromfile(path, dtype=np.uint8)) # type: ignore
class _InferRequestWrapper(InferRequestBase):
"""InferRequest class with internal memory."""
def __init__(self, other: InferRequestBase) -> None:
# Private memeber to store newly created shared memory data
self._inputs_data = None
super().__init__(other)

View File

@ -326,26 +326,57 @@ void regclass_Core(py::module m) {
cls.def(
"read_model",
[](ov::Core& self, py::object model_path, py::object weights_path) {
std::string model_path_cpp{py::str(model_path)};
std::string weights_path_cpp{py::str(weights_path)};
py::gil_scoped_release release;
return self.read_model(model_path_cpp, weights_path_cpp);
if (py::isinstance(model_path, pybind11::module::import("io").attr("BytesIO"))) {
std::stringstream _stream;
model_path.attr("seek")(0); // Always rewind stream!
_stream << model_path
.attr("read")() // alternative: model_path.attr("get_value")()
.cast<std::string>();
py::buffer_info info;
if (!py::isinstance<py::none>(weights_path)) {
auto p = weights_path.cast<py::bytes>();
info = py::buffer(p).request();
}
size_t bin_size = static_cast<size_t>(info.size);
ov::Tensor tensor(ov::element::Type_t::u8, {bin_size});
// if weights are not empty
if (bin_size) {
const uint8_t* bin = reinterpret_cast<const uint8_t*>(info.ptr);
std::memcpy(tensor.data(), bin, bin_size);
}
py::gil_scoped_release release;
return self.read_model(_stream.str(), tensor);
} else if (py::isinstance(model_path, py::module_::import("pathlib").attr("Path")) ||
py::isinstance<py::str>(model_path)) {
const std::string model_path_cpp{py::str(model_path)};
std::string weights_path_cpp;
if (!py::isinstance<py::none>(weights_path)) {
weights_path_cpp = py::str(weights_path);
}
py::gil_scoped_release release;
return self.read_model(model_path_cpp, weights_path_cpp);
}
std::stringstream str;
str << "Provided python object type " << model_path.get_type().str()
<< " isn't supported as 'model' argument.";
throw ov::Exception(str.str());
},
py::arg("model"),
py::arg("weights") = "",
py::arg("weights") = py::none(),
R"(
Reads models from IR / ONNX / PDPD formats.
GIL is released while running this function.
:param model: A string with model in IR / ONNX / PDPD format.
:type model: str
:param model: A path to a model in IR / ONNX / PDPD format or a model itself wrapped in io.ByesIO format.
:type model: Union[pathlib.Path, io.BytesIO]
:param weights: A path to a data file For IR format (*.bin): if path is empty,
it tries to read a bin file with the same name as xml and if the bin
file with the same name was not found, loads IR without weights.
For ONNX format (*.onnx): weights parameter is not used.
For PDPD format (*.pdmodel) weights parameter is not used.
:type weights: str
:type weights: pathlib.Path
:return: A model.
:rtype: openvino.runtime.Model
)");

View File

@ -19,6 +19,13 @@ namespace py = pybind11;
using namespace ov::frontend;
class MemoryBuffer : public std::streambuf {
public:
MemoryBuffer(char* data, std::size_t size) {
setg(data, data, data + size);
}
};
void regclass_frontend_FrontEnd(py::module m) {
py::class_<FrontEnd, std::shared_ptr<FrontEnd>> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local());
fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd";
@ -26,10 +33,18 @@ void regclass_frontend_FrontEnd(py::module m) {
fem.def(
"load",
[](FrontEnd& self, const py::object& py_obj) {
try {
if (py::isinstance(py_obj, py::module_::import("pathlib").attr("Path")) ||
py::isinstance<py::str>(py_obj) || py::isinstance<py::bytes>(py_obj)) {
// check if model path is either a string/pathlib.Path/bytes
std::string model_path = Common::utils::convert_path_to_string(py_obj);
return self.load(model_path);
} catch (...) {
} else if (py::isinstance(py_obj, pybind11::module::import("io").attr("BytesIO"))) {
// support of BytesIO
py::buffer_info info = py::buffer(py_obj.attr("getbuffer")()).request();
MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
std::istream _istream(&mb);
return self.load(&_istream);
} else {
// Extended for one argument only for this time
return self.load({Common::utils::py_object_to_any(py_obj)});
}

View File

@ -31,7 +31,7 @@ struct is_std_vector<std::vector<T, A>> : std::true_type {};
for (auto el : casted) { \
py_list.append(py_type(el)); \
} \
return py_list; \
return std::move(py_list); \
} \
return py::cast(casted); \
} \

View File

@ -17,6 +17,10 @@ template void regclass_graph_Output<const ov::Node>(py::module m, std::string ty
template <typename T>
void def_type_dependent_functions(py::class_<ov::Output<T>, std::shared_ptr<ov::Output<T>>>& output) {}
template <>
void def_type_dependent_functions<const ov::Node>(
py::class_<ov::Output<const ov::Node>, std::shared_ptr<ov::Output<const ov::Node>>>& output) {}
template <>
void def_type_dependent_functions<ov::Node>(
py::class_<ov::Output<ov::Node>, std::shared_ptr<ov::Output<ov::Node>>>& output) {

View File

@ -28,6 +28,10 @@ template<>
void def_type_dependent_functions<ov::Node>(py::class_<ov::Output<ov::Node>,
std::shared_ptr<ov::Output<ov::Node>>>& output);
template<>
void def_type_dependent_functions<const ov::Node>(py::class_<ov::Output<const ov::Node>,
std::shared_ptr<ov::Output<const ov::Node>>>& output);
template <typename VT>
void regclass_graph_Output(py::module m, std::string typestring)
{

View File

@ -39,7 +39,7 @@ public:
* ignored by implicit casters.
*/
static handle cast(ov::Layout src, return_value_policy policy, handle parent) {
return cast(src, policy, parent);
return pybind11::cast(src, policy, parent);
}
};
}

View File

@ -2,6 +2,7 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import io
import os
import onnx
import numpy as np
@ -172,6 +173,7 @@ def run_model(model, *inputs, expected):
# FrontEndManager shall be initialized and destroyed after all tests finished
# This is because destroy of FrontEndManager will unload all plugins, no objects shall exist after this
fem = FrontEndManager()
model_stream = io.BytesIO()
onnx_model_filename = "model.onnx"
onnx_model_2_filename = "model2.onnx"
onnx_model_with_custom_attributes_filename = "model_custom_attributes.onnx"
@ -183,6 +185,7 @@ ONNX_FRONTEND_NAME = "onnx"
def setup_module():
onnx.save_model(create_onnx_model(), onnx_model_filename)
onnx.save_model(create_onnx_model(), model_stream)
onnx.save_model(create_onnx_model_2(), onnx_model_2_filename)
onnx.save_model(create_onnx_model_with_custom_attributes(),
onnx_model_with_custom_attributes_filename)
@ -719,3 +722,17 @@ def test_so_extension_via_frontend_decode_input_model():
decoded_model = load_decoded_model() # decoded model has longer lifetime than frontend
assert decoded_model
def test_load_bytesio_model():
from openvino.runtime import Core
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
model_from_fe = fe.load(model_stream)
assert model_from_fe
converted_model = fe.convert(model_from_fe)
assert converted_model.friendly_name == "graph"
core = Core()
model = core.read_model(model_stream)
assert converted_model.friendly_name == model.friendly_name

View File

@ -177,7 +177,7 @@ def test_serialize_pass_tuple(request, tmp_path):
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
pass_manager = Manager()
pass_manager.register_pass("Serialize", output_files=(str(xml_path), str(bin_path)))
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
pass_manager.run_passes(func)
res_model = core.read_model(model=xml_path, weights=bin_path)
@ -201,7 +201,7 @@ def test_default_version(request, tmp_path):
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
pass_manager = Manager()
pass_manager.register_pass("Serialize", output_files=(str(xml_path), str(bin_path)))
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
pass_manager.run_passes(func)
res_model = core.read_model(model=xml_path, weights=bin_path)
@ -225,7 +225,7 @@ def test_default_version_IR_V11_tuple(request, tmp_path):
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
pass_manager = Manager()
pass_manager.register_pass("Serialize", output_files=(str(xml_path), str(bin_path)), version="IR_V11")
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path), version="IR_V11")
pass_manager.run_passes(func)
res_model = core.read_model(model=xml_path, weights=bin_path)

View File

@ -210,11 +210,12 @@ def test_infer_tensor_wrong_input_data(device):
assert "Incompatible key type for input: 0.0" in str(e.value)
def test_direct_infer(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_direct_infer(device, shared_flag):
compiled_model, img = generate_model_and_image(device)
tensor = Tensor(img)
res = compiled_model({"data": tensor})
res = compiled_model({"data": tensor}, shared_memory=shared_flag)
assert np.argmax(res[compiled_model.outputs[0]]) == 531
ref = compiled_model.infer_new_request({"data": tensor})
assert np.array_equal(ref[compiled_model.outputs[0]], res[compiled_model.outputs[0]])
@ -231,4 +232,4 @@ def test_compiled_model_after_core_destroyed(device):
del core
del model
# check compiled and infer request can work properly after core object is destroyed
compiled([np.random.normal(size=list(input.shape)) for input in compiled.inputs])
compiled([np.random.normal(size=list(input.shape)).astype(dtype=input.get_element_type().to_dtype()) for input in compiled.inputs])

View File

@ -109,6 +109,13 @@ def test_read_model_from_tensor():
assert isinstance(model, Model)
def test_read_model_with_wrong_input():
core = Core()
with pytest.raises(RuntimeError) as e:
core.read_model(model=3, weights=3)
assert "Provided python object type <class 'int'> isn't supported as 'model' argument." in str(e.value)
def test_read_model_as_path():
core = Core()
model = core.read_model(model=Path(test_net_xml), weights=Path(test_net_bin))
@ -133,7 +140,7 @@ def test_read_model_from_onnx_as_path():
assert isinstance(model, Model)
def test_read_net_from_buffer():
def test_read_model_from_buffer():
core = Core()
with open(test_net_bin, "rb") as f:
weights = f.read()
@ -143,7 +150,7 @@ def test_read_net_from_buffer():
assert isinstance(model, Model)
def test_net_from_buffer_valid():
def test_model_from_buffer_valid():
core = Core()
with open(test_net_bin, "rb") as f:
weights = f.read()

View File

@ -304,7 +304,8 @@ def test_cancel(device):
assert "[ INFER_CANCELLED ]" in str(e.value)
def test_start_async(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_start_async(device, shared_flag):
core = Core()
model = core.read_model(test_net_xml, test_net_bin)
compiled_model = core.compile_model(model, device)
@ -322,14 +323,15 @@ def test_start_async(device):
callbacks_info["finished"] = 0
for request in requests:
request.set_callback(callback, callbacks_info)
request.start_async({0: img})
request.start_async({0: img}, shared_memory=shared_flag)
for request in requests:
request.wait()
assert request.latency > 0
assert callbacks_info["finished"] == jobs
def test_infer_list_as_inputs(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_infer_list_as_inputs(device, shared_flag):
num_inputs = 4
input_shape = [2, 1]
dtype = np.float32
@ -345,17 +347,18 @@ def test_infer_list_as_inputs(device):
request = compiled_model.create_infer_request()
inputs = [np.random.normal(size=input_shape).astype(dtype)]
request.infer(inputs)
request.infer(inputs, shared_memory=shared_flag)
check_fill_inputs(request, inputs)
inputs = [
np.random.normal(size=input_shape).astype(dtype) for _ in range(num_inputs)
]
request.infer(inputs)
request.infer(inputs, shared_memory=shared_flag)
check_fill_inputs(request, inputs)
def test_infer_mixed_keys(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_infer_mixed_keys(device, shared_flag):
core = Core()
model = get_relu_model()
compiled_model = core.compile_model(model, device)
@ -367,7 +370,7 @@ def test_infer_mixed_keys(device):
tensor2 = Tensor(data2)
request = compiled_model.create_infer_request()
res = request.infer({0: tensor2, "data": tensor})
res = request.infer({0: tensor2, "data": tensor}, shared_memory=shared_flag)
assert np.argmax(res[compiled_model.output()]) == 531
@ -386,10 +389,11 @@ def test_infer_mixed_keys(device):
(Type.u64, np.uint64),
(Type.boolean, bool),
])
def test_infer_mixed_values(device, ov_type, numpy_dtype):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_infer_mixed_values(device, ov_type, numpy_dtype, shared_flag):
request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype)
request.infer([tensor1, array1])
request.infer([tensor1, array1], shared_memory=shared_flag)
assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1)))
@ -409,10 +413,11 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype):
(Type.u64, np.uint64),
(Type.boolean, bool),
])
def test_async_mixed_values(device, ov_type, numpy_dtype):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_async_mixed_values(device, ov_type, numpy_dtype, shared_flag):
request, tensor1, array1 = concat_model_with_data(device, ov_type, numpy_dtype)
request.start_async([tensor1, array1])
request.start_async([tensor1, array1], shared_memory=shared_flag)
request.wait()
assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1)))
@ -429,13 +434,14 @@ def test_async_mixed_values(device, ov_type, numpy_dtype):
(Type.u16, np.uint16),
(Type.i64, np.int64),
])
def test_infer_single_input(device, ov_type, numpy_dtype):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_infer_single_input(device, ov_type, numpy_dtype, shared_flag):
request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype)
request.infer(array1)
request.infer(array1, shared_memory=shared_flag)
assert np.array_equal(request.get_output_tensor().data, np.abs(array1))
request.infer(tensor1)
request.infer(tensor1, shared_memory=shared_flag)
assert np.array_equal(request.get_output_tensor().data, np.abs(tensor1.data))
@ -450,19 +456,21 @@ def test_infer_single_input(device, ov_type, numpy_dtype):
(Type.u16, np.uint16),
(Type.i64, np.int64),
])
def test_async_single_input(device, ov_type, numpy_dtype):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_async_single_input(device, ov_type, numpy_dtype, shared_flag):
request, tensor1, array1 = abs_model_with_data(device, ov_type, numpy_dtype)
request.start_async(array1)
request.start_async(array1, shared_memory=shared_flag)
request.wait()
assert np.array_equal(request.get_output_tensor().data, np.abs(array1))
request.start_async(tensor1)
request.start_async(tensor1, shared_memory=shared_flag)
request.wait()
assert np.array_equal(request.get_output_tensor().data, np.abs(tensor1.data))
def test_infer_queue(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_infer_queue(device, shared_flag):
jobs = 8
num_request = 4
core = Core()
@ -475,12 +483,17 @@ def test_infer_queue(device):
jobs_done[job_id]["finished"] = True
jobs_done[job_id]["latency"] = request.latency
img = generate_image()
img = None
if not shared_flag:
img = generate_image()
infer_queue.set_callback(callback)
assert infer_queue.is_ready()
for i in range(jobs):
infer_queue.start_async({"data": img}, i)
if shared_flag:
img = generate_image()
infer_queue.start_async({"data": img}, i, shared_memory=shared_flag)
infer_queue.wait_all()
assert all(job["finished"] for job in jobs_done)
assert all(job["latency"] > 0 for job in jobs_done)
@ -670,19 +683,21 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
assert np.allclose(res[list(res)[0]], expected_res, atol=1e-6), f"Expected values: {expected_res} \n Actual values: {res} \n"
def test_get_results(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_get_results(device, shared_flag):
core = Core()
data = ops.parameter([10], np.float64)
model = Model(ops.split(data, 0, 5), [data])
compiled_model = core.compile_model(model, device)
request = compiled_model.create_infer_request()
inputs = [np.random.normal(size=list(compiled_model.input().shape))]
results = request.infer(inputs)
results = request.infer(inputs, shared_memory=shared_flag)
for output in compiled_model.outputs:
assert np.array_equal(results[output], request.results[output])
def test_results_async_infer(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_results_async_infer(device, shared_flag):
jobs = 8
num_request = 4
core = Core()
@ -698,7 +713,7 @@ def test_results_async_infer(device):
img = generate_image()
infer_queue.set_callback(callback)
for i in range(jobs):
infer_queue.start_async({"data": img}, i)
infer_queue.start_async({"data": img}, i, shared_memory=shared_flag)
infer_queue.wait_all()
request = compiled_model.create_infer_request()
@ -712,7 +727,8 @@ def test_results_async_infer(device):
os.environ.get("TEST_DEVICE") not in ["GPU"],
reason="Device dependent test",
)
def test_infer_float16(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_infer_float16(device, shared_flag):
model = bytes(
b"""<net name="add_model" version="10">
<layers>
@ -787,12 +803,13 @@ def test_infer_float16(device):
compiled_model = core.compile_model(model, device)
input_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype(np.float16)
request = compiled_model.create_infer_request()
outputs = request.infer({0: input_data, 1: input_data})
outputs = request.infer({0: input_data, 1: input_data}, shared_memory=shared_flag)
assert np.allclose(list(outputs.values()), list(request.results.values()))
assert np.allclose(list(outputs.values()), input_data + input_data)
def test_ports_as_inputs(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_ports_as_inputs(device, shared_flag):
input_shape = [2, 2]
param_a = ops.parameter(input_shape, np.float32)
param_b = ops.parameter(input_shape, np.float32)
@ -808,56 +825,66 @@ def test_ports_as_inputs(device):
tensor1 = Tensor(arr_1)
tensor2 = Tensor(arr_2)
res = request.infer({compiled_model.inputs[0]: tensor1, compiled_model.inputs[1]: tensor2})
res = request.infer(
{compiled_model.inputs[0]: tensor1, compiled_model.inputs[1]: tensor2},
shared_memory=shared_flag,
)
assert np.array_equal(res[compiled_model.outputs[0]], tensor1.data + tensor2.data)
res = request.infer({request.model_inputs[0]: tensor1, request.model_inputs[1]: tensor2})
res = request.infer(
{request.model_inputs[0]: tensor1, request.model_inputs[1]: tensor2},
shared_memory=shared_flag,
)
assert np.array_equal(res[request.model_outputs[0]], tensor1.data + tensor2.data)
def test_inputs_dict_not_replaced(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_inputs_dict_not_replaced(device, shared_flag):
request, arr_1, arr_2 = create_simple_request_and_inputs(device)
inputs = {0: arr_1, 1: arr_2}
inputs_copy = deepcopy(inputs)
res = request.infer(inputs)
res = request.infer(inputs, shared_memory=shared_flag)
np.testing.assert_equal(inputs, inputs_copy)
assert np.array_equal(res[request.model_outputs[0]], arr_1 + arr_2)
def test_inputs_list_not_replaced(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_inputs_list_not_replaced(device, shared_flag):
request, arr_1, arr_2 = create_simple_request_and_inputs(device)
inputs = [arr_1, arr_2]
inputs_copy = deepcopy(inputs)
res = request.infer(inputs)
res = request.infer(inputs, shared_memory=shared_flag)
assert np.array_equal(inputs, inputs_copy)
assert np.array_equal(res[request.model_outputs[0]], arr_1 + arr_2)
def test_inputs_tuple_not_replaced(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_inputs_tuple_not_replaced(device, shared_flag):
request, arr_1, arr_2 = create_simple_request_and_inputs(device)
inputs = (arr_1, arr_2)
inputs_copy = deepcopy(inputs)
res = request.infer(inputs)
res = request.infer(inputs, shared_memory=shared_flag)
assert np.array_equal(inputs, inputs_copy)
assert np.array_equal(res[request.model_outputs[0]], arr_1 + arr_2)
def test_invalid_inputs(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_invalid_inputs(device, shared_flag):
request, _, _ = create_simple_request_and_inputs(device)
inputs = "some_input"
with pytest.raises(TypeError) as e:
request.infer(inputs)
request.infer(inputs, shared_memory=shared_flag)
assert "Incompatible inputs of type:" in str(e.value)
@ -885,7 +912,8 @@ def test_infer_dynamic_model(device):
assert request.get_input_tensor().shape == Shape(shape3)
def test_array_like_input_request(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_array_like_input_request(device, shared_flag):
class ArrayLikeObject:
# Array-like object accepted by np.array to test inputs similar to torch tensor and tf.Tensor
def __init__(self, array) -> None:
@ -899,7 +927,7 @@ def test_array_like_input_request(device):
model_input_list = [ArrayLikeObject(input_data.tolist())]
# Test single array-like object in InferRequest().Infer()
res_object = request.infer(model_input_object)
res_object = request.infer(model_input_object, shared_memory=shared_flag)
assert np.array_equal(res_object[request.model_outputs[0]], np.abs(input_data))
# Test list of array-like objects to use normalize_inputs()
@ -907,7 +935,8 @@ def test_array_like_input_request(device):
assert np.array_equal(res_list[request.model_outputs[0]], np.abs(input_data))
def test_array_like_input_async(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_array_like_input_async(device, shared_flag):
class ArrayLikeObject:
# Array-like object accepted by np.array to test inputs similar to torch tensor and tf.Tensor
def __init__(self, array) -> None:
@ -920,7 +949,7 @@ def test_array_like_input_async(device):
model_input_object = ArrayLikeObject(input_data.tolist())
model_input_list = [ArrayLikeObject(input_data.tolist())]
# Test single array-like object in InferRequest().start_async()
request.start_async(model_input_object)
request.start_async(model_input_object, shared_memory=shared_flag)
request.wait()
assert np.array_equal(request.get_output_tensor().data, np.abs(input_data))
@ -930,19 +959,20 @@ def test_array_like_input_async(device):
assert np.array_equal(request.get_output_tensor().data, np.abs(input_data))
def test_array_like_input_async_infer_queue(device):
@pytest.mark.parametrize("shared_flag", [True, False])
def test_array_like_input_async_infer_queue(device, shared_flag):
class ArrayLikeObject:
# Array-like object accepted by np.array to test inputs similar to torch tensor and tf.Tensor
def __init__(self, array) -> None:
self.data = array
def __array__(self):
return np.array(self.data)
return self.data
jobs = 8
ov_type = Type.f32
input_shape = [2, 2]
input_data = [[-2, -1], [0, 1]]
input_data = np.ascontiguousarray([[-2, -1], [0, 1]])
param = ops.parameter(input_shape, ov_type)
layer = ops.abs(param)
model = Model([layer], [param])
@ -950,21 +980,23 @@ def test_array_like_input_async_infer_queue(device):
compiled_model = core.compile_model(model, "CPU")
model_input_object = ArrayLikeObject(input_data)
model_input_list = [ArrayLikeObject(input_data)]
model_input_list = [[ArrayLikeObject(deepcopy(input_data))] for _ in range(jobs)]
# Test single array-like object in AsyncInferQueue.start_async()
infer_queue_object = AsyncInferQueue(compiled_model, jobs)
for _i in range(jobs):
infer_queue_object.start_async(model_input_object)
infer_queue_object.wait_all()
for i in range(jobs):
assert np.array_equal(infer_queue_object[i].get_output_tensor().data, np.abs(input_data))
# Test list of array-like objects in AsyncInferQueue.start_async()
infer_queue_list = AsyncInferQueue(compiled_model, jobs)
for _i in range(jobs):
infer_queue_list.start_async(model_input_list)
for i in range(jobs):
infer_queue_list.start_async(model_input_list[i], shared_memory=shared_flag)
infer_queue_list.wait_all()
for i in range(jobs):
assert np.array_equal(infer_queue_list[i].get_output_tensor().data, np.abs(input_data))

View File

@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import pytest
import numpy as np
from tests.conftest import model_path
from tests.test_utils.test_utils import get_relu_model, generate_image, generate_model_and_image, generate_relu_compiled_model
from openvino.runtime import Model, ConstOutput, Type, Shape, Core, Tensor
from openvino.runtime.utils.data_helpers import _data_dispatch
is_myriad = os.environ.get("TEST_DEVICE") == "MYRIAD"
test_net_xml, test_net_bin = model_path(is_myriad)
def _get_value(value):
return value.data if isinstance(value, Tensor) else value
def _run_dispatcher(device, input_data, input_shape, is_shared):
compiled_model = generate_relu_compiled_model(device, input_shape)
infer_request = compiled_model.create_infer_request()
result = _data_dispatch(infer_request, input_data, is_shared)
return result, infer_request
@pytest.mark.parametrize("data_type", [np.float_, np.int_, int, float])
@pytest.mark.parametrize("input_shape", [[], [1]])
@pytest.mark.parametrize("is_shared", [True, False])
def test_scalars_dispatcher(device, data_type, input_shape, is_shared):
test_data = data_type(2)
expected = Tensor(np.ndarray([], data_type, np.array(test_data)))
result, _ = _run_dispatcher(device, test_data, input_shape, is_shared)
assert isinstance(result, Tensor)
assert result.get_shape() == Shape([])
assert result.get_element_type() == Type(data_type)
assert result.data == expected.data
@pytest.mark.parametrize("input_shape", [[1], [2, 2]])
@pytest.mark.parametrize("is_shared", [True, False])
def test_tensor_dispatcher(device, input_shape, is_shared):
array = np.ones(input_shape)
test_data = Tensor(array, is_shared)
result, _ = _run_dispatcher(device, test_data, input_shape, is_shared)
assert isinstance(result, Tensor)
assert result.get_shape() == Shape(input_shape)
assert result.get_element_type() == Type(array.dtype)
assert np.array_equal(result.data, array)
# Change data to check if shared_memory is still applied
array[0] = 2.0
assert np.array_equal(array, result.data) if is_shared else not np.array_equal(array, result.data)
@pytest.mark.parametrize("input_shape", [[1], [2, 2]])
def test_ndarray_shared_dispatcher(device, input_shape):
test_data = np.ones(input_shape).astype(np.float32)
result, _ = _run_dispatcher(device, test_data, input_shape, True)
assert isinstance(result, Tensor)
assert result.get_shape() == Shape(test_data.shape)
assert result.get_element_type() == Type(test_data.dtype)
assert np.array_equal(result.data, test_data)
test_data[0] = 2.0
assert np.array_equal(result.data, test_data)
@pytest.mark.parametrize("input_shape", [[1], [2, 2]])
def test_ndarray_shared_dispatcher_casting(device, input_shape):
test_data = np.ones(input_shape)
result, infer_request = _run_dispatcher(device, test_data, input_shape, True)
assert isinstance(result, Tensor)
assert result.get_shape() == Shape(test_data.shape)
assert result.get_element_type() == infer_request.inputs[0].get_element_type()
assert np.array_equal(result.data, test_data)
test_data[0] = 2.0
assert not np.array_equal(result.data, test_data)
@pytest.mark.parametrize("input_shape", [[1, 2, 3], [2, 2]])
def test_ndarray_shared_dispatcher_misalign(device, input_shape):
test_data = np.asfortranarray(np.ones(input_shape).astype(np.float32))
result, _ = _run_dispatcher(device, test_data, input_shape, True)
assert isinstance(result, Tensor)
assert result.get_shape() == Shape(test_data.shape)
assert result.get_element_type() == Type(test_data.dtype)
assert np.array_equal(result.data, test_data)
test_data[0] = 2.0
assert not np.array_equal(result.data, test_data)
@pytest.mark.parametrize("input_shape", [[1, 2, 3], [2, 2]])
def test_ndarray_copied_dispatcher(device, input_shape):
test_data = np.ones(input_shape)
result, infer_request = _run_dispatcher(device, test_data, input_shape, False)
assert result == {}
assert np.array_equal(infer_request.inputs[0].data, test_data)
test_data[0] = 2.0
assert not np.array_equal(infer_request.inputs[0].data, test_data)

View File

@ -118,6 +118,8 @@ def create_filename_for_test(test_name, tmp_path, is_xml_path=False, is_bin_path
python_version = str(sys.version_info.major) + "_" + str(sys.version_info.minor)
filename = test_name.replace("test_", "").replace("[", "_").replace("]", "_")
filename = filename + "_" + python_version
_xml = tmp_path / Path(filename + ".xml") if is_xml_path else tmp_path / Path(filename + ".xml")
_bin = tmp_path / Path(filename + ".bin") if is_bin_path else tmp_path / Path(filename + ".bin")
path_to_xml = tmp_path / Path(filename + ".xml")
path_to_bin = tmp_path / Path(filename + ".bin")
_xml = path_to_xml if is_xml_path else str(path_to_xml)
_bin = path_to_bin if is_bin_path else str(path_to_bin)
return (_xml, _bin)

View File

@ -36,7 +36,7 @@ elif machine == "X86" or machine == "i686":
ARCH = "ia32"
elif machine == "arm" or machine == "armv7l":
ARCH = "arm"
elif machine == "aarch64" or machine == "arm64":
elif machine == "aarch64" or machine == "arm64" or machine == "ARM64":
ARCH = "arm64"
# The following variables can be defined in environment or .env file

View File

@ -30,8 +30,12 @@ elseif(SELECTIVE_BUILD STREQUAL "ON")
# After disabling a block of code, some variables might be unused.
target_compile_options(${TARGET_NAME} INTERFACE
-Wno-unused-function
-Wno-unused-variable
-Wno-unused-parameter
-Wno-unused-local-typedefs)
if(CMAKE_COMPILER_IS_GNUCXX)
target_compile_options(${TARGET_NAME} INTERFACE -Wno-unused-but-set-variable)
endif()
endif()
set(GENERATED_HEADER ${CMAKE_CURRENT_BINARY_DIR}/conditional_compilation_gen.h CACHE FILEPATH "")

View File

@ -58,6 +58,9 @@ struct TestNode : public TestNodeBase {
} // namespace
TEST(ConditionalCompilationTests, SimpleScopeAnalysys) {
auto func = SIMPLE_CCTests;
(void)func;
int n = 0;
OV_SCOPE(CCTests, Scope0) n = 42;

View File

@ -26,10 +26,6 @@ if(TARGET ittnotify)
endif()
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
target_compile_options(${TARGET_NAME} PRIVATE -Wall)
endif()
target_include_directories(${TARGET_NAME} PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)

View File

@ -8,6 +8,7 @@
*/
#pragma once
#include <openvino/function_name.hpp>
#include <openvino/util/pp.hpp>
#include <string>
@ -34,9 +35,9 @@ namespace openvino
*/
typedef struct handle_ {} *handle_t;
/**
* @cond
*/
/**
* @cond
*/
namespace internal
{
domain_t domain(char const* name);
@ -45,9 +46,9 @@ namespace openvino
void taskEnd(domain_t d);
void threadName(const char* name);
}
/**
* @endcond
*/
/**
* @endcond
*/
/**
* @fn void threadName(const char* name)

View File

@ -259,6 +259,8 @@ public:
static ov::Output<ov::Node> getSingleConsumerConstant(const ov::Output<ov::Node>& output);
static bool checkConstantOnInf(const std::shared_ptr<Node> constant_node);
private:
static std::shared_ptr<Node> foldFakeQuantize(
const std::shared_ptr<opset1::FakeQuantize>& fq,

View File

@ -27,7 +27,7 @@ public:
class LP_TRANSFORMATIONS_API SharedValueAttribute : public std::enable_shared_from_this<SharedValueAttribute> {
public:
struct LP_TRANSFORMATIONS_API SharedValue : public std::enable_shared_from_this<SharedValue> {
SharedValue() = default;
SharedValue() {}
SharedValue(const T& value) : value{value} {}
T value = {};
void addAttribute(std::weak_ptr<SharedValueAttribute> attribute) {

View File

@ -188,6 +188,12 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
auto newMultiplyFullPathValues = fold<opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
// Transformation can't be applied if new full path values brake accuracy because of Inf values
if (!NetworkHelper::checkConstantOnInf(newSubtractFullPathValues) ||
!NetworkHelper::checkConstantOnInf(newMultiplyFullPathValues)) {
return false;
}
if (NetworkHelper::isZeroConst(newSubtractFullPathValues)) {
newSubtractFullPathValues = nullptr;
}

View File

@ -179,10 +179,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
inputLowConst_f32 = fold<opset1::Divide>(inputLowConst_f32, value);
inputHighConst_f32 = fold<opset1::Divide>(inputHighConst_f32, value);
const auto resultLow = ov::as_type_ptr<opset1::Constant>(inputLowConst_f32)->cast_vector<float>();
const auto resultHigh = ov::as_type_ptr<opset1::Constant>(inputHighConst_f32)->cast_vector<float>();
if (std::any_of(resultLow.begin(), resultLow.end(), [](const float value){ return std::isinf(value); }) ||
std::any_of(resultHigh.begin(), resultHigh.end(), [](const float value){ return std::isinf(value); })) {
if (!NetworkHelper::checkConstantOnInf(inputLowConst_f32) ||
!NetworkHelper::checkConstantOnInf(inputHighConst_f32)) {
return nullptr;
}

View File

@ -104,7 +104,7 @@ bool FakeQuantizeDequantization::checkShape(const std::shared_ptr<ngraph::Node>&
}
if (!inPShape.rank().is_dynamic()) {
for (int i = 0; i < inPShape.size(); ++i) {
for (size_t i = 0; i < inPShape.size(); ++i) {
if (inPShape[i] != outPShape[i] && !inPShape[i].is_dynamic()) {
return false;
}

View File

@ -27,8 +27,8 @@ constexpr char LayerTransformation::originalLayerPostfix[];
LayerTransformation::LayerTransformation(const Params& params) :
updatePrecisions(params.updatePrecisions),
deqPrecision(params.deqPrecision),
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
defaultPrecisions(params.defaultPrecisions),
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
context(nullptr) {}
void LayerTransformation::setContext(TransformationContext* context) noexcept {

View File

@ -273,7 +273,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_model(const std::shared_p
bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::shared_ptr<const ngraph::Function>& function) {
std::set<std::shared_ptr<ngraph::Node>> handledNodes;
std::deque<std::shared_ptr<ngraph::Node>> nodes;
for (const auto result : function->get_results()) {
for (const auto& result : function->get_results()) {
nodes.push_front(result);
}
@ -294,7 +294,7 @@ bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::s
}
} else if (const auto multiSubGraph = ov::as_type_ptr<ngraph::op::util::MultiSubGraphOp>(parent)) {
// Look inside subraph operations, such as TensorIterator, Loop, If, etc
for (int i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++) {
for (size_t i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++) {
if (isFunctionQuantized(multiSubGraph->get_function(i))) {
return true;
}

View File

@ -438,7 +438,7 @@ std::vector<size_t> NetworkHelper::updateReshapeValues(
}
}
}
return updatedReshapeValues;
return std::move(updatedReshapeValues);
}
std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter(std::shared_ptr<Node> node) {
@ -2010,6 +2010,14 @@ ov::Output<ov::Node> NetworkHelper::getSingleConsumerConstant(const ov::Output<o
? output
: node->clone_with_new_inputs(node->input_values())->output(0);
}
bool NetworkHelper::checkConstantOnInf(const std::shared_ptr<Node> constant_node) {
const auto constant = ov::as_type_ptr<opset1::Constant>(constant_node);
if (constant == nullptr)
return false;
const auto values = constant->cast_vector<float>();
return std::all_of(values.begin(), values.end(), [](const float x) { return !std::isinf(x); });
}
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -262,8 +262,8 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr<cons
return true;
};
if (dequantizationOnWeights.subtract && !checkConstShape(dequantizationOnWeights.subtractConstant) ||
dequantizationOnWeights.multiply && !checkConstShape(dequantizationOnWeights.multiplyConstant)) {
if ((dequantizationOnWeights.subtract && !checkConstShape(dequantizationOnWeights.subtractConstant)) ||
(dequantizationOnWeights.multiply && !checkConstShape(dequantizationOnWeights.multiplyConstant))) {
return false;
}

View File

@ -390,6 +390,38 @@ const std::vector<AddTransformationTestValues> testValuesWithoutConstantBranches
{{}, {}, {5.f}},
{}},
""},
// Multiply with the value that mustn't be transformed (to avoid infinite values in multiply constant)
{false,
-1,
LayerTransformation::createParamsU8I8(),
{ngraph::element::u8,
{{ngraph::element::f32}, {1.f}, {std::numeric_limits<float>::max()}},
ngraph::element::u8,
{{ngraph::element::f32}, {}, {0.009f}},
{}},
{ngraph::element::u8,
{{ngraph::element::f32}, {1.f}, {std::numeric_limits<float>::max()}},
ngraph::element::u8,
{{ngraph::element::f32}, {}, {0.009f}},
{{}, {}, {}},
{}},
""},
// Subtract with the value that mustn't be transformed (to avoid infinite values in multiply constant)
{false,
-1,
LayerTransformation::createParamsU8I8(),
{ngraph::element::u8,
{{ngraph::element::f32}, {}, {0.009f}},
ngraph::element::u8,
{{ngraph::element::f32}, {std::numeric_limits<float>::max()}, {2.f}},
{}},
{ngraph::element::u8,
{{ngraph::element::f32}, {}, {0.009f}},
ngraph::element::u8,
{{ngraph::element::f32}, {std::numeric_limits<float>::max()}, {2.f}},
{{}, {}, {}},
{}},
""},
// convolution before FQ (choose that branch)
{false,

View File

@ -150,13 +150,13 @@ const std::vector<GatherTransformationTestValues> testValues = {
{ngraph::element::u8,
{{ngraph::element::f32},
{{128.f}, element::undefined, {1, 3, 1}, false, 1ul, element::u8, true},
{{0.1}, ngraph::element::f32, {1, 3, 1}}}},
{{0.1f}, ngraph::element::f32, {1, 3, 1}}}},
{ngraph::element::u8,
{{}, {}, {}},
ngraph::element::u8,
{{ngraph::element::f32},
{{128.f}, element::undefined, {1, 3, 1}, false, 1ul, element::u8, true},
{{0.1}, ngraph::element::f32, {1, 3, 1}}}}},
{{0.1f}, ngraph::element::f32, {1, 3, 1}}}}},
// U8: per-channel quantization, gather axis match with channel
{{1},
{0},

View File

@ -87,9 +87,10 @@ TEST_P(FQDecompositionWithSharedConstants, FQDecompositionWithSharedConstants) {
ASSERT_TRUE(res.valid) << res.message;
// additional check: FQ constants after transformation mustn't be shared
for (const auto n : actualFunction->get_ordered_ops()) {
if (ov::is_type<opset1::Constant>(n))
for (const auto& n : actualFunction->get_ordered_ops()) {
if (ov::is_type<opset1::Constant>(n)) {
EXPECT_EQ(n->get_output_target_inputs(0).size(), 1);
}
}
}
namespace {

View File

@ -196,7 +196,7 @@ public:
if (size() < mask->size())
resize(mask->size());
for (size_t i = 0; i < size(); i++) {
if (i == axis) {
if (static_cast<int64_t>(i) == axis) {
std::set<uint64_t> dst_set;
const auto& src_set = mask->at(i);
auto it = src_set.lower_bound(split_start);

View File

@ -35,7 +35,6 @@ bool ngraph::pass::GenerateMappingFile::run_on_model(const std::shared_ptr<ngrap
for (auto&& node : f->get_ordered_ops()) {
uint64_t ie_port_index{node->inputs().size()};
uint64_t ng_port_index{0};
if (std::dynamic_pointer_cast<ov::op::v0::Result>(node))
continue;
for (auto&& output : node->outputs()) {
@ -52,7 +51,6 @@ bool ngraph::pass::GenerateMappingFile::run_on_model(const std::shared_ptr<ngrap
}
}
++ie_port_index;
++ng_port_index;
}
}

View File

@ -114,7 +114,7 @@ public:
// 2. Get constant rank to set mask on last dimension
const auto const_op = std::dynamic_pointer_cast<opset6::Constant>(cur_node);
const auto shape_rank = const_op->get_shape().size();
const auto shift = (matmul->get_transpose_b()) ? 2 : 1;
const size_t shift = (matmul->get_transpose_b()) ? 2 : 1;
if (shape_rank < shift) {
NGRAPH_DEBUG << "Can't init mask for MatMul: " << matmul->get_friendly_name() << std::endl;
return false;

View File

@ -970,7 +970,7 @@ static std::vector<dims_vec> map_reshaped_dimensions(const dims_vec input_shape,
static std::vector<ov::Shape> map_reshaped_shapes(const ov::Shape unsquized_shape,
const std::vector<dims_vec> dims_map) {
auto retval = std::vector<ov::Shape>();
for (const auto unsquized_dims : dims_map) {
for (const auto& unsquized_dims : dims_map) {
auto cur_dim_shape = ov::Shape();
for (const auto& dim : unsquized_dims)
cur_dim_shape.push_back(unsquized_shape[dim]);
@ -1243,7 +1243,7 @@ public:
for (auto& ch : weights_mask_row->at(out_dim)) {
NGRAPH_SUPPRESS_DEPRECATED_START
auto iter = get_channel_iter(dims_shape[in_dim], unsquized_shift, ch);
for (const auto coord : iter)
for (const auto& coord : iter)
cur_mask->at(in_dim).insert(iter.index(coord));
NGRAPH_SUPPRESS_DEPRECATED_END
}
@ -1316,7 +1316,7 @@ public:
for (auto& ch : input_mask_row->at(in_dim)) {
NGRAPH_SUPPRESS_DEPRECATED_START
auto iter = get_channel_iter(dims_shape[out_dim], unsquized_shift, ch);
for (const auto coord : iter)
for (const auto& coord : iter)
cur_mask->at(out_dim).insert(iter.index(coord));
NGRAPH_SUPPRESS_DEPRECATED_END
}
@ -1388,7 +1388,7 @@ public:
NGRAPH_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
return false;
}
if (input_mask->size() != m_output.get_partial_shape().rank().get_length()) {
if (static_cast<int64_t>(input_mask->size()) != m_output.get_partial_shape().rank().get_length()) {
NGRAPH_DEBUG << "Transpose which change tensor rank is not supported yet.";
return false;
}
@ -1454,7 +1454,7 @@ static ngraph::Mask::Ptr create_connect_split_output_mask(ngraph::Mask::Ptr inpu
}
for (size_t j = 0; j < output_mask_raw->size(); j++) {
const auto& dim_mask = output_mask_raw->at(j);
if (j == axis) {
if (static_cast<int64_t>(j) == axis) {
for (auto d : dim_mask)
cur_mask->at(j).insert(d + split_start);
} else {
@ -1502,9 +1502,9 @@ public:
// split_lengths can contain -1 value
int minus_one_length_idx = -1;
int64_t total_lengths = 0;
for (int i = 0; i < split_lengths.size(); i++) {
for (size_t i = 0; i < split_lengths.size(); i++) {
if (split_lengths[i] == -1) {
minus_one_length_idx = i;
minus_one_length_idx = static_cast<int>(i);
continue;
}
total_lengths += split_lengths[i];

View File

@ -43,8 +43,8 @@ static bool is_static_reshape_op(std::shared_ptr<ov::Node> node) {
const auto& input_shape = input.get_shape();
const auto output_shape = output_shape_const_op->cast_vector<int64_t>();
// below casts are needed due to VC warning C4244, literals are not enough in this case
const auto input_elems =
std::accumulate(input_shape.begin(), input_shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
const int64_t input_elems =
std::accumulate(input_shape.begin(), input_shape.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
const auto output_elems =
std::accumulate(output_shape.begin(), output_shape.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
if (output_elems <= 0 || input_elems == output_elems)

View File

@ -71,7 +71,7 @@ auto tail_transformations(NodeVector& tail, const size_t tail_size, const ngraph
if (config.m_need_fill_tail_register &&
(ov::is_type<ov::op::v1::Maximum>(op) ||
ov::is_type<ov::op::v1::Add>(op))) {
for (auto i = 0; i < op->inputs().size(); ++i) {
for (size_t i = 0; i < op->inputs().size(); ++i) {
if (auto fill = insertFill(op->input(i))) {
updated_tile.push_back(fill);
}
@ -116,7 +116,7 @@ ngraph::snippets::code ngraph::snippets::Generator::generate(std::shared_ptr<ov:
if (force_ptr_increment || loop->has_outer_loop) {
std::vector<int64_t> new_finalization_offsets(loop->get_finalization_offsets());
const auto& ptr_increments = loop->get_ptr_increments();
for (auto i = 0; i < new_finalization_offsets.size(); i++) {
for (size_t i = 0; i < new_finalization_offsets.size(); i++) {
new_finalization_offsets[i] += ptr_increments[i];
}
loop->set_finalization_offsets(new_finalization_offsets);

View File

@ -54,7 +54,7 @@ void LoopBegin::validate_and_infer_types_except_LoopEnd() {
const size_t num_inputs = get_input_size();
set_output_size(num_inputs + 1);
// All outputs are by-passed from inputs, except for the last one - it connects LoopBegin and LoopEnd
for (int i = 0; i < num_inputs; i++)
for (size_t i = 0; i < num_inputs; i++)
get_output_descriptor(i).set_tensor_ptr(get_input_descriptor(i).get_output().get_tensor_ptr());
set_output_type(num_inputs, element::f32, ov::PartialShape{ov::Shape{}});
}
@ -81,8 +81,10 @@ std::shared_ptr<LoopEnd> LoopBegin::get_loop_end() {
LoopEnd::LoopEnd(const std::vector<Output<Node>> &args, size_t work_amount, size_t work_amount_increment,
std::vector<bool> apply_increments, std::vector<int64_t> finalization_offsets)
: LoopBase(args, work_amount, work_amount_increment), finalization_offsets(std::move(finalization_offsets)),
has_outer_loop(true), loop_io_size(0) {
: LoopBase(args, work_amount, work_amount_increment),
has_outer_loop(true),
finalization_offsets(std::move(finalization_offsets)),
loop_io_size(0) {
ptr_increments.resize(apply_increments.size());
std::transform(apply_increments.begin(), apply_increments.end(), ptr_increments.begin(),
[work_amount_increment](bool apply) {
@ -93,8 +95,11 @@ LoopEnd::LoopEnd(const std::vector<Output<Node>> &args, size_t work_amount, size
LoopEnd::LoopEnd(const std::vector<Output<Node>> &args, size_t work_amount, size_t work_amount_increment,
std::vector<int64_t> ptr_increments, std::vector<int64_t> finalization_offsets)
: LoopBase(args, work_amount, work_amount_increment), ptr_increments(std::move(ptr_increments)),
finalization_offsets(std::move(finalization_offsets)), has_outer_loop(true), loop_io_size(0) {
: LoopBase(args, work_amount, work_amount_increment),
has_outer_loop(true),
ptr_increments(std::move(ptr_increments)),
finalization_offsets(std::move(finalization_offsets)),
loop_io_size(0) {
constructor_validate_and_infer_types();
}
@ -172,7 +177,7 @@ void LoopEnd::validate_and_infer_types() {
finalization_offsets.resize(loop_io_size, 0);
set_output_size(num_inputs - 1);
// All outputs are by-passed from inputs, except for the last one - it connects LoopBegin and LoopEnd
for (int i = 0; i < num_inputs - 1; i++)
for (size_t i = 0; i < num_inputs - 1; i++)
get_output_descriptor(i).set_tensor_ptr(get_input_descriptor(i).get_output().get_tensor_ptr());
}

View File

@ -55,7 +55,7 @@ bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_pt
}
if (op_supports_only_exec_type(op)) {
for (auto i = 0; i < op->inputs().size(); i++) {
for (size_t i = 0; i < op->inputs().size(); i++) {
auto shared_input = op->get_input_node_shared_ptr(i);
auto existing_convert = ov::as_type_ptr<ov::op::v0::Convert>(shared_input);
// We should insert Convert before Ops, which supports only exec element type, only when:
@ -75,7 +75,7 @@ bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_pt
rewritten |= true;
}
} else { // branch for Movement ops, MatMul ops in the future and for the Convert, Result
for (auto i = 0; i < op->inputs().size(); i++) {
for (size_t i = 0; i < op->inputs().size(); i++) {
auto shared_input = op->get_input_node_shared_ptr(i);
// it's original element type because we don't use validate_and_infer_type() anywhere
const auto original_eltype = op->input(i).get_element_type();

View File

@ -7,8 +7,13 @@
#include "snippets/snippets_isa.hpp"
#include <iterator>
#if defined(__clang__)
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wunused-lambda-capture"
#endif
namespace {
static constexpr size_t reg_count = 16lu;
constexpr size_t reg_count = 16lu;
} // namespace
bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr<ov::Model>& f) {
@ -187,7 +192,7 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
auto op = typed_ops[n].second;
for (const auto& out : op->outputs()) {
for (const auto& port : out.get_target_inputs()) {
auto k = std::find(ops.begin(), ops.end(), port.get_node()->shared_from_this()) - ops.begin();
size_t k = std::find(ops.begin(), ops.end(), port.get_node()->shared_from_this()) - ops.begin();
if (k == ops.size())
throw ngraph_error("assign registers can't find target op in the body");
switch (typed_ops[k].first) {
@ -314,3 +319,6 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
return false;
}
#if defined(__clang__)
# pragma clang diagnostic pop
#endif

View File

@ -18,7 +18,7 @@ ngraph::snippets::pass::BroadcastToMoveBroadcast::BroadcastToMoveBroadcast() {
auto m_broadcast = ngraph::pattern::wrap_type<ngraph::op::v1::Broadcast, ngraph::op::v3::Broadcast>();
auto callback = [this](ngraph::pattern::Matcher &m) {
auto callback = [](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::BroadcastToMoveBroadcast")
auto root = m.get_match_root();
if (auto broadcast_v1 = ov::as_type_ptr<const ov::op::v1::Broadcast>(root)) {

View File

@ -40,7 +40,7 @@ auto outputs_are_not_broadcastable(const std::shared_ptr<const Node>& node) -> b
return false;
ov::PartialShape ref_shape = outputs.front().get_partial_shape();
bool success = true;
for (int i = 1; i < outputs.size() && success; i++) {
for (size_t i = 1; i < outputs.size() && success; i++) {
success &= ov::PartialShape::broadcast_merge_into(ref_shape, outputs[i].get_partial_shape(), ov::op::AutoBroadcastType::NUMPY);
}
return !success;

View File

@ -17,7 +17,7 @@ ngraph::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() {
[](std::shared_ptr<Node> n) {
return ngraph::is_type<ov::op::v0::Constant>(n);
});
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::ConvertConstantsToScalars")
auto constant = as_type_ptr<ov::op::v0::Constant>(m.get_match_root());
if (ov::shape_size(constant->get_output_shape(0)) != 1)

View File

@ -15,7 +15,7 @@ ngraph::snippets::pass::ConvertPowerToPowerStatic::ConvertPowerToPowerStatic() {
return is_type<ov::op::v1::Power>(n) &&
is_type<snippets::op::Scalar>(n->get_input_node_shared_ptr(1));
});
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::ConvertConstantsToScalars")
auto power = ov::as_type_ptr<ov::op::v1::Power>(m.get_match_root());
auto scalar = ov::as_type_ptr<snippets::op::Scalar>(power->get_input_node_shared_ptr(1));

View File

@ -221,7 +221,7 @@ bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(
osc.resize(output_size, 0);
osh.resize(output_size, 0);
for (int i = 0; i < input_size; i++) {
for (size_t i = 0; i < input_size; i++) {
float il = input_low[input_low.size() == 1 ? 0 : i];
float ih = input_high[input_high.size() == 1 ? 0 : i];
@ -229,7 +229,7 @@ bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(
ish[i] = -il * isc[i];
}
for (int i = 0; i < output_size; i++) {
for (size_t i = 0; i < output_size; i++) {
float ol = output_low[output_low.size() == 1 ? 0 : i];
float oh = output_high[output_high.size() == 1 ? 0 : i];
@ -276,13 +276,13 @@ std::vector<float> ngraph::snippets::pass::FakeQuantizeDecomposition::calculateS
std::all_of(osc.cbegin(), osc.cend(), [](float val) { return val == 1.f; }) &&
std::all_of(osh.cbegin(), osh.cend(), [](float val) { return std::abs(val + 128.f) < thr; })) {
bool is_crop_aligned = true;
for (int i = 0; i < std::max(cl.size(), isc.size()); i++) {
for (size_t i = 0; i < std::max(cl.size(), isc.size()); i++) {
if (std::abs(cl[cl.size() == 1 ? 0 : i] * isc[isc.size() == 1 ? 0 : i] + 128.f) > thr) {
is_crop_aligned = false;
}
}
for (int i = 0; i < std::max(ch.size(), isc.size()); i++) {
for (size_t i = 0; i < std::max(ch.size(), isc.size()); i++) {
if (std::abs(ch[ch.size() == 1 ? 0 : i] * isc[isc.size() == 1 ? 0 : i] - 127.f) > thr) {
is_crop_aligned = false;
}

View File

@ -65,7 +65,7 @@ FuseTransposeBrgemm::FuseTransposeBrgemm() {
in.replace_source_output(brgemm->output(0));
set_layout_from_order(as_type_ptr<opset1::Transpose>(transpose_out.get_node_shared_ptr()), brgemm_out);
}
for (int i = 0; i < brgemm->get_input_size(); i++) {
for (size_t i = 0; i < brgemm->get_input_size(); i++) {
const auto& in_value = brgemm->input_value(i);
if (transpose_matcher->match(in_value)) {
const auto& transpose = as_type_ptr<opset1::Transpose>(in_value.get_node_shared_ptr());

View File

@ -20,7 +20,7 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
op::Brgemm>();
register_matcher(std::make_shared<ngraph::pattern::Matcher>(pattern, matcher_name),
[this, allocation_rank](ngraph::pattern::Matcher &m) {
[allocation_rank](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertBuffer")
auto root = m.get_match_root();
bool rewritten = false;
@ -53,14 +53,15 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
// we should remove them to insert one common Buffer on one common port
replace_output_update_name(output_node->output(0), output_node->input_value(0));
} else if (ov::is_type<ngraph::op::v0::Result>(output_node)) {
// TODO: At this moment operation which is should be wrapped by Buffers doesn't support several childs where one of them is Result
// because Result and Buffer from one root port should have the same register. It's not supported at the moment
// For example,
// Buffer
// |
// Softmax
// / \
// Buffer Result
/* TODO: At this moment operation which is should be wrapped by Buffers doesn't support several childs where one of them is Result
* because Result and Buffer from one root port should have the same register. It's not supported at the moment
* For example,
* Buffer
* |
* Softmax
* / \
* Buffer Result
*/
throw ngraph::ngraph_error(
"Operation which is should be wrapped by Buffers has few children from one output port where one of them is Result");
}

View File

@ -16,7 +16,7 @@ ngraph::snippets::pass::InsertLoad::InsertLoad(const size_t count) {
MATCHER_SCOPE(InsertLoad);
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
ngraph::pattern::wrap_type<ngraph::opset1::Parameter, ngraph::snippets::op::Buffer>(), matcher_name),
[this, count](ngraph::pattern::Matcher &m) {
[count](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertLoad")
auto root = m.get_match_root();
@ -58,7 +58,7 @@ ngraph::snippets::pass::InsertStore::InsertStore(const size_t count) {
MATCHER_SCOPE(InsertStore);
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
ngraph::pattern::wrap_type<ngraph::opset1::Result, ngraph::snippets::op::Buffer>(), matcher_name),
[this, count](ngraph::pattern::Matcher &m) {
[count](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertStore")
auto root = m.get_match_root();

View File

@ -236,10 +236,10 @@ bool InsertLoops::run_on_model(const std::shared_ptr<ov::Model> &model) {
const auto& new_shapes = plugin_shapes->second.as<std::vector<std::vector<size_t>>>();
if (new_shapes.size() != commonResults.size() + commonParams.size())
throw ngraph_error("InsertLoops got invalid number of plugin-overriden shapes");
for (int i = 0; i < commonParams.size(); i++)
for (size_t i = 0; i < commonParams.size(); i++)
ioShapes.emplace_back(new_shapes[i]);
// reverse overriden_shapes for results since commonResults are reversed with respect to model->get_parameters()
for (int i = 0; i < commonResults.size(); i++)
for (size_t i = 0; i < commonResults.size(); i++)
ioShapes.emplace_back(new_shapes[new_shapes.size() - 1 - i]);
}

View File

@ -21,7 +21,7 @@ namespace {
std::pair<ov::PartialShape, std::vector<ov::PartialShape>> get_numpy_broadcast_partial_shapes(const std::vector<ov::PartialShape>& input_shapes) {
ov::PartialShape target_shape = input_shapes.front();
for (auto i = 1; i < input_shapes.size(); i++) {
for (size_t i = 1; i < input_shapes.size(); i++) {
if (!ov::PartialShape::broadcast_merge_into(target_shape, input_shapes[i], op::AutoBroadcastType::NUMPY))
throw ngraph::ngraph_error("InsertMoveBroadcast: Failed broadcast-merge input shapes");
}
@ -62,7 +62,7 @@ ngraph::Output<ngraph::Node> ngraph::snippets::pass::InsertMoveBroadcast::Broadc
ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() {
MATCHER_SCOPE(InsertMoveBroadcast);
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertMoveBroadcast")
auto root = m.get_match_root();
const auto& values = root->input_values();

View File

@ -25,19 +25,20 @@ auto can_be_merged(const std::shared_ptr<ngraph::snippets::op::LoopEnd>& loop_en
loop_end_down->get_increment() != loop_end_up->get_increment())
return false;
// If between Loops there are common dependencies (for example, reducing operations), we cannot merge these Loops
// Example, when there is HorizonMax op between Loops:
// Data
// VectorBuffer LoopBegin
// \ Load | \
// Maximum | /
// / LoopEnd
// HorizonMax |
// \ LoopBegin
// \ Load \
// Subtract |
// Store /
// LoopEnd
/* If between Loops there are common dependencies (for example, reducing operations), we cannot merge these Loops
* Example, when there is HorizonMax op between Loops:
* Data
* VectorBuffer LoopBegin
* \ Load | \
* Maximum | /
* / LoopEnd
* HorizonMax |
* \ LoopBegin
* \ Load \
* Subtract |
* Store /
* LoopEnd
*/
auto up_dependent_ptrs = loop_end_up->get_control_dependents();
ov::NodeVector up_dependents(up_dependent_ptrs.size(), nullptr);
std::transform(up_dependent_ptrs.begin(), up_dependent_ptrs.end(), up_dependents.begin(), [](ngraph::Node* node) { return node->shared_from_this(); });
@ -65,7 +66,7 @@ auto get_buffer_and_loop_end(const std::shared_ptr<ngraph::snippets::op::LoopBeg
continue;
// We can fuse Loops even LoopBegin has several the same inputs (the common Buffer/LoopEnd)
if (buffer && buffer == parent_shared || !buffer && loop_end_up && loop_end_up == parent_shared)
if ((buffer && buffer == parent_shared) || (!buffer && loop_end_up && loop_end_up == parent_shared))
continue;
loop_end_up = ngraph::as_type_ptr<ngraph::snippets::op::LoopEnd>(parent_shared);

View File

@ -16,7 +16,7 @@ std::shared_ptr<LoopBegin> insertLoopBeginAfterOutputs(const OutputVector& origi
auto loop_begin = std::make_shared<LoopBegin>(originalOutputs);
for (int i = 0; i < originalChildInputs.size(); i++) {
for (size_t i = 0; i < originalChildInputs.size(); i++) {
for (auto& input : originalChildInputs[i]) {
input.replace_source_output(loop_begin->output(i));
}
@ -37,7 +37,7 @@ std::shared_ptr<LoopEnd> insertLoopEndBeforeInputs(const std::vector<Input<Node>
auto loop_end = std::make_shared<LoopEnd>(originalParentOutputs, work_amount, increment,
std::move(apply_increment), std::move(finalization_offsets));
for (int i = 0; i < originalInputs.size(); i++) {
for (size_t i = 0; i < originalInputs.size(); i++) {
originalInputs[i].replace_source_output(loop_end->output(i));
}
return loop_end;

View File

@ -57,25 +57,26 @@ ngraph::snippets::pass::SoftmaxDecomposition::SoftmaxDecomposition(const size_t
/* ====== ReduceMax decomposition ====== */
// We have to have fake edge Data -> Loop[ReduceMax] -> Loop[Sub + Exp + ReduceSum] because ReduceMax is
// accumulator which finds maximum of elements and save it to vector register. Loop works only with GPR (data) but ReduceMax Loop
// doesn't save maximum to data. Seems like, LoopEnd shouldn't have outputs:
// Data
// VectorBuffer LoopBegin \
// \ Load \ |
// Maximum / |
// / LoopEnd |
// HorizonMax /
// \ LoopBegin[Sub + Exp + ReduceSum]
// But nGraph doesn't allow to have 0 outputs for Node (at least 1 output).
// Thus, we propagate data through Loop[ReduceMax] using fake edge because of that Loop[ReduceMax] has two inputs "Data"
// Data
// VectorBuffer LoopBegin
// \ Load | \
// Maximum | /
// / LoopEnd
// HorizonMax |
// \ LoopBegin[Sub + Exp + ReduceSum]
/* We have to have fake edge Data -> Loop[ReduceMax] -> Loop[Sub + Exp + ReduceSum] because ReduceMax is
* accumulator which finds maximum of elements and save it to vector register. Loop works only with GPR (data) but ReduceMax Loop
* doesn't save maximum to data. Seems like, LoopEnd shouldn't have outputs:
* Data
* VectorBuffer LoopBegin \
* \ Load \ |
* Maximum / |
* / LoopEnd |
* HorizonMax /
* \ LoopBegin[Sub + Exp + ReduceSum]
* But nGraph doesn't allow to have 0 outputs for Node (at least 1 output).
* Thus, we propagate data through Loop[ReduceMax] using fake edge because of that Loop[ReduceMax] has two inputs "Data"
* Data
* VectorBuffer LoopBegin
* \ Load | \
* Maximum | /
* / LoopEnd
* HorizonMax |
* \ LoopBegin[Sub + Exp + ReduceSum]
*/
const auto vector_buffer_max = std::make_shared<ngraph::snippets::op::VectorBuffer>();
const auto loop_max_begin = ngraph::snippets::op::insertLoopBegin(ngraph::OutputVector{data, data});

View File

@ -22,8 +22,7 @@ ngraph::snippets::pass::TransformConvertToConvertTruncation::TransformConvertToC
});
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
ngraph::pattern::wrap_type<ngraph::opset1::Convert>(), matcher_name),
[this](ngraph::pattern::Matcher &m) {
ngraph::pattern::wrap_type<ngraph::opset1::Convert>(), matcher_name), [](ngraph::pattern::Matcher &m) {
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::TransformConvertToConvertTruncation")
const auto root = m.get_match_root();
const auto convert = ngraph::as_type_ptr<ngraph::opset1::Convert>(root);

View File

@ -92,7 +92,7 @@ ov::PartialShape get_reordered_planar_shape(const ov::PartialShape& shape, const
// Note that it can be smaller though, for example tensor shape can be prepended with 1 for scheduling purposes
if (std::any_of(layout.begin(), layout.end(), [=](size_t x) {return x >= rank;}))
throw ngraph_error("Invalid layout detected: all layout indexes must be smaller than the tensor rank");
for (int i = 0; i < layout.size(); i++)
for (size_t i = 0; i < layout.size(); i++)
reordered_shape[i] = shape[layout[i]];
return reordered_shape;
}

View File

@ -12,7 +12,7 @@ namespace test {
namespace snippets {
DummyTargetMachine::DummyTargetMachine() {
auto dummy_functor = [this](const std::shared_ptr<ngraph::Node>& n) {
auto dummy_functor = [](const std::shared_ptr<ngraph::Node>& n) {
return std::make_shared<DummyEmitter>();
};
jitters[op::v0::Parameter::get_type_info_static()] = dummy_functor;

View File

@ -30,7 +30,7 @@ void BroadcastToMoveBroadcastTests::SetUp() {
std::tie(inputShapes[0], inputShapes[1], broadcast_shape) = this->GetParam();
snippets_function = std::make_shared<BroadcastAddLoweredFunction>(inputShapes, broadcast_shape);
master_shape = {};
for (int i = 0; i < inputShapes[0].size(); i++)
for (size_t i = 0; i < inputShapes[0].size(); i++)
master_shape.push_back(static_cast<int64_t>(std::max(inputShapes[0].get_shape()[i], inputShapes[1].get_shape()[i])));
}

View File

@ -32,7 +32,7 @@ void InsertMoveBroadcastTests::SetUp() {
if (inputShapes[0].size() != inputShapes[1].size())
IE_THROW() << "Expected input shapes of the same size";
master_shape = {};
for (int i = 0; i < inputShapes[0].size(); i++)
for (size_t i = 0; i < inputShapes[0].size(); i++)
master_shape.push_back(static_cast<int64_t>(std::max(inputShapes[0][i], inputShapes[1][i])));
}

Some files were not shown because too many files have changed in this diff Show More