Merge pull request #278 from asuhov/2019-r3

Publishing 2019 R3 content
This commit is contained in:
Alexey Suhov 2019-10-04 19:54:45 +03:00 committed by GitHub
commit 1c794d971c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1734 changed files with 72094 additions and 58972 deletions

View File

@ -1,5 +1,5 @@
# [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
[![Stable release](https://img.shields.io/badge/version-2019.R2-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R2)
[![Stable release](https://img.shields.io/badge/version-2019.R3-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R3)
[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
This toolkit allows developers to deploy pre-trained deep learning models through a high-level C++ Inference Engine API integrated with application logic.

View File

@ -34,6 +34,9 @@ message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR})
message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID})
message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
# remove file with exported developer targets to force its regeneration
file(REMOVE "${CMAKE_BINARY_DIR}/targets_developer.cmake")
add_subdirectory(src)
if(ENABLE_TESTS)

View File

@ -2,35 +2,79 @@
# SPDX-License-Identifier: Apache-2.0
#
#module to locate GNA libraries
# module to locate GNA libraries
if (WIN32)
set(GNA_PLATFORM_DIR win64)
set(GNA_LIB_DIR x64)
set(GNA_LIB gna)
elseif (UNIX)
set(GNA_PLATFORM_DIR linux)
set(GNA_LIB_DIR lib)
set(GNA_LIB gna_api)
set(GNA_KERNEL_LIB gna_kernel)
else ()
message(FATAL_ERROR "GNA not supported on this platform, only linux, and windows")
endif ()
find_library(GNA_API_LIBRARY
${GNA_LIB}
set(libGNA_FOUND TRUE)
set(GNA_KERNEL_LIB_NAME gna)
set(GNA_LIBS_LIST
"libGNA::API"
"libGNA::KERNEL")
if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
# use old version of GNA Library from gna_20181120
if (WIN32)
set(GNA_LIB_DIR x64)
else ()
list(APPEND GNA_LIBS_LIST
"libGNA::OLD_API_LIB")
set(GNA_LIB_DIR lib)
set(GNA_KERNEL_LIB_NAME gna_kernel)
endif()
set(libGNA_INCLUDE_DIRS "${GNA}/${GNA_PLATFORM_DIR}/include")
else()
# use current version of GNA library
set(GNA_LIB_DIR x64)
set(libGNA_INCLUDE_DIRS "${GNA}/include")
endif()
set(libGNA_LIBRARIES_BASE_PATH ${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR})
add_library(libGNA::KERNEL SHARED IMPORTED)
find_library(GNA_KERNEL_LIBRARY
${GNA_KERNEL_LIB_NAME}
HINTS
${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR})
${libGNA_LIBRARIES_BASE_PATH})
set_target_properties(libGNA::KERNEL PROPERTIES IMPORTED_LOCATION ${GNA_KERNEL_LIBRARY})
set(libGNA_INCLUDE_DIRS ${GNA}/${GNA_PLATFORM_DIR}/include)
set(libGNA_LIBRARY ${GNA_API_LIBRARY})
if (UNIX)
#message("Searching for libgna_kernel.so in: ${GNA}/${GNA_PLATFORM_DIR}/${GNA_KERNEL_LIB}")
find_library(GNA_KERNEL_LIBRARY
${GNA_KERNEL_LIB}
if ((GNA_LIBRARY_VERSION STREQUAL "GNA1") AND (NOT WIN32))
add_library(libGNA::OLD_API_LIB SHARED IMPORTED)
find_library(GNA_API_LIBRARY
gna_api
HINTS
${GNA}/${GNA_PLATFORM_DIR}/${GNA_LIB_DIR})
endif ()
${libGNA_LIBRARIES_BASE_PATH})
set_target_properties(libGNA::OLD_API_LIB PROPERTIES IMPORTED_LOCATION ${GNA_API_LIBRARY})
target_link_libraries(libGNA::OLD_API_LIB INTERFACE libGNA::KERNEL)
set_target_properties(libGNA::OLD_API_LIB PROPERTIES IMPORTED_NO_SONAME TRUE)
set_target_properties(libGNA::KERNEL PROPERTIES IMPORTED_NO_SONAME TRUE)
endif()
set(libGNA_LIBRARIES ${libGNA_LIBRARY} ${GNA_KERNEL_LIBRARY})
add_library(libGNA::API INTERFACE IMPORTED)
set_property(TARGET libGNA::API PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${libGNA_INCLUDE_DIRS})
add_library(libGNA INTERFACE IMPORTED)
foreach(_lib_name ${GNA_LIBS_LIST})
set_property(TARGET libGNA APPEND PROPERTY INTERFACE_LINK_LIBRARIES ${_lib_name})
get_target_property(_target_type ${_lib_name} TYPE)
if (${_target_type} STREQUAL "INTERFACE_LIBRARY")
get_target_property(_target_location ${_lib_name} INTERFACE_INCLUDE_DIRECTORIES)
else()
get_target_property(_target_location ${_lib_name} IMPORTED_LOCATION)
endif ()
message(STATUS "${_lib_name} ${_target_type} : ${_target_location}")
endforeach(_lib_name)
if (WIN32)
set_target_properties(libGNA::KERNEL PROPERTIES
IMPORTED_IMPLIB ${GNA_KERNEL_LIBRARY})
elseif(NOT GNA_LIBRARY_VERSION STREQUAL "GNA1")
set_target_properties(libGNA PROPERTIES INTERFACE_LINK_OPTIONS "-Wl,-rpath-link,${libGNA_LIBRARIES_BASE_PATH}")
endif ()

View File

@ -24,8 +24,6 @@ endif()
if (APPLE)
set(ENABLE_GNA OFF)
set(ENABLE_CLDNN OFF)
SET(ENABLE_MYRIAD OFF)
SET(ENABLE_VPU OFF)
endif()
@ -66,18 +64,39 @@ if (ENABLE_MKL_DNN)
add_definitions(-DENABLE_MKL_DNN=1)
endif()
if (ENABLE_UNICODE_PATH_SUPPORT)
add_definitions(-DENABLE_UNICODE_PATH_SUPPORT=1)
endif()
if (ENABLE_GNA)
add_definitions(-DENABLE_GNA)
set (DEFAULT_GNA_LIB GNA1_1401)
# "GNA library version: GNA1|GNA1_1401|GNA2" - default is 1401
if (NOT GNA_LIBRARY_VERSION STREQUAL "GNA1"
AND NOT GNA_LIBRARY_VERSION STREQUAL "GNA1_1401"
AND NOT GNA_LIBRARY_VERSION STREQUAL "GNA2")
set (GNA_LIBRARY_VERSION ${DEFAULT_GNA_LIB})
message(STATUS "GNA_LIBRARY_VERSION not set. Can be GNA1, GNA1_1401 or GNA2. Default is ${GNA_LIBRARY_VERSION}")
endif()
if (GNA_LIBRARY_VERSION STREQUAL "GNA2")
message(WARNING "GNA2 is not currently supported. Fallback to ${DEFAULT_GNA_LIB}")
set(GNA_LIBRARY_VERSION ${DEFAULT_GNA_LIB})
endif()
if (UNIX AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4)
message(WARNING "${GNA_LIBRARY_VERSION} no supported on GCC version ${CMAKE_CXX_COMPILER_VERSION}. Fallback to GNA1")
set(GNA_LIBRARY_VERSION GNA1)
endif()
set(GNA_LIBRARY_VERSION "${GNA_LIBRARY_VERSION}" CACHE STRING "GNAVersion" FORCE)
list (APPEND IE_OPTIONS GNA_LIBRARY_VERSION)
endif()
if (ENABLE_SAMPLES)
set (ENABLE_SAMPLES_CORE ON)
endif()
#models dependend tests
if (DEVELOPMENT_PLUGIN_MODE)
message (STATUS "Enabled development plugin mode")
@ -93,8 +112,18 @@ if (DEVELOPMENT_PLUGIN_MODE)
endif()
endif()
if (NOT ENABLE_TESTS)
set(ENABLE_GNA_MODELS OFF)
endif ()
if (VERBOSE_BUILD)
set(CMAKE_VERBOSE_MAKEFILE ON)
endif()
if(ENABLE_DUMP)
add_definitions(-DDEBUG_DUMP)
endif()
print_enabled_features()

View File

@ -7,6 +7,9 @@ if(DEFINED IE_MAIN_SOURCE_DIR AND TARGET inference_engine)
set(InferenceEngine_LIBRARIES inference_engine)
else()
include("${CMAKE_CURRENT_LIST_DIR}/targets.cmake")
if(NOT WIN32)
set_target_properties(IE::inference_engine PROPERTIES INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
endif()
get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
set(InferenceEngine_LIBRARIES IE::inference_engine)
endif()

View File

@ -11,46 +11,26 @@ set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")
include(ExternalProject)
if (ENABLE_SAME_BRANCH_FOR_MODELS)
branchName(MODELS_BRANCH)
else()
set(MODELS_BRANCH "master")
endif()
include(linux_name)
if(COMMAND get_linux_name)
get_linux_name(LINUX_OS_NAME)
endif()
if (ENABLE_MYRIAD)
RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2450
ARCHIVE_UNIFIED firmware_ma2450_676.zip
TARGET_PATH "${TEMP}/vpu/firmware/ma2450"
ENVIRONMENT "VPU_FIRMWARE_MA2450"
FOLDER)
debug_message(STATUS "ma2450=" ${VPU_FIRMWARE_MA2450})
endif ()
if (ENABLE_MYRIAD)
RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2X8X
ARCHIVE_UNIFIED firmware_ma2x8x_mdk_R8_9.zip
TARGET_PATH "${TEMP}/vpu/firmware/ma2x8x"
ENVIRONMENT "VPU_FIRMWARE_MA2X8X"
FOLDER)
debug_message(STATUS "ma2x8x=" ${VPU_FIRMWARE_MA2X8X})
endif ()
include(vpu_dependencies)
endif()
## enable cblas_gemm from OpenBLAS package
if (GEMM STREQUAL "OPENBLAS")
if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
find_package(BLAS REQUIRED)
if(BLAS_FOUND)
find_path(BLAS_INCLUDE_DIRS cblas.h)
else()
message(ERROR "OpenBLAS not found: install OpenBLAS or set -DBLAS_INCLUDE_DIRS=<path to dir with cblas.h> and -DBLAS_LIBRARIES=<path to libopenblas.so or openblas.lib>")
if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
find_package(BLAS REQUIRED)
if(BLAS_FOUND)
find_path(BLAS_INCLUDE_DIRS cblas.h)
else()
message(ERROR "OpenBLAS not found: install OpenBLAS or set -DBLAS_INCLUDE_DIRS=<path to dir with cblas.h> and -DBLAS_LIBRARIES=<path to libopenblas.so or openblas.lib>")
endif()
endif()
endif()
debug_message(STATUS "openblas=" ${BLAS_LIBRARIES})
debug_message(STATUS "openblas=" ${BLAS_LIBRARIES})
endif ()
#MKL-ml package
@ -64,111 +44,116 @@ endif ()
## Intel OMP package
if (THREADING STREQUAL "OMP")
if (WIN32)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_WIN "iomp.zip"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
elseif(LINUX)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_LIN "iomp.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
else(APPLE)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_MAC "iomp_20190130_mac.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
endif()
log_rpath_from_dir(OMP "${OMP}/lib")
debug_message(STATUS "intel_omp=" ${OMP})
if (WIN32)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_WIN "iomp.zip"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
elseif(LINUX)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_LIN "iomp.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
else(APPLE)
RESOLVE_DEPENDENCY(OMP
ARCHIVE_MAC "iomp_20190130_mac.tgz"
TARGET_PATH "${TEMP}/omp"
ENVIRONMENT "OMP"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
endif()
log_rpath_from_dir(OMP "${OMP}/lib")
debug_message(STATUS "intel_omp=" ${OMP})
endif ()
## TBB package
if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
if (WIN32)
#TODO: add target_path to be platform specific as well, to avoid following if
RESOLVE_DEPENDENCY(TBB
ARCHIVE_WIN "tbb2019_20181010_win.zip" #TODO: windows zip archive created incorrectly using old name for folder
TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
elseif(LINUX)
RESOLVE_DEPENDENCY(TBB
ARCHIVE_LIN "tbb2019_20181010_lin.tgz"
TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT")
else(APPLE)
RESOLVE_DEPENDENCY(TBB
ARCHIVE_MAC "tbb2019_20190414_mac.tgz"
TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
endif()
log_rpath_from_dir(TBB "${TBB}/lib")
debug_message(STATUS "tbb=" ${TBB})
if (WIN32)
#TODO: add target_path to be platform specific as well, to avoid following if
RESOLVE_DEPENDENCY(TBB
ARCHIVE_WIN "tbb2019_20181010_win.zip" #TODO: windows zip archive created incorrectly using old name for folder
TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
elseif(LINUX)
RESOLVE_DEPENDENCY(TBB
ARCHIVE_LIN "tbb2019_20181010_lin.tgz"
TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT")
else(APPLE)
RESOLVE_DEPENDENCY(TBB
ARCHIVE_MAC "tbb2019_20190414_v1_mac.tgz"
TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT"
VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
endif()
log_rpath_from_dir(TBB "${TBB}/lib")
debug_message(STATUS "tbb=" ${TBB})
endif ()
if (ENABLE_OPENCV)
set(OPENCV_VERSION "4.1.1")
set(OPENCV_BUILD "595")
set(OPENCV_SUFFIX "")
if (WIN32)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_WIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}.zip"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
log_rpath_from_dir(OPENCV "\\opencv_${OPENCV_VERSION}\\bin")
set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
elseif(APPLE)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_MAC "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.tar.xz"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_osx/lib")
set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
elseif(LINUX)
if (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04")
set(OPENCV_SUFFIX "ubuntu16")
elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04")
set(OPENCV_SUFFIX "ubuntu18")
elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7")
set(OPENCV_SUFFIX "centos7")
elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l" AND
(${LINUX_OS_NAME} STREQUAL "Debian 9" OR
${LINUX_OS_NAME} STREQUAL "Raspbian 9" OR
${LINUX_OS_NAME} STREQUAL "Debian 10" OR
${LINUX_OS_NAME} STREQUAL "Raspbian 10"))
set(OPENCV_SUFFIX "debian9arm")
set(OPENCV_VERSION "4.1.2")
set(OPENCV_BUILD "624")
set(OPENCV_SUFFIX "")
if (WIN32)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_WIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}.zip"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
log_rpath_from_dir(OPENCV "\\opencv_${OPENCV_VERSION}\\bin")
elseif(APPLE)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_MAC "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.tar.xz"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_osx/lib")
elseif(LINUX)
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l")
set(OPENCV_SUFFIX "debian9arm")
elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04")
set(OPENCV_SUFFIX "ubuntu16")
elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04")
set(OPENCV_SUFFIX "ubuntu18")
elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7")
set(OPENCV_SUFFIX "centos7")
endif()
endif()
if (OPENCV_SUFFIX)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_LIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_${OPENCV_SUFFIX}.tar.xz"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/lib")
endif()
debug_message(STATUS "opencv=" ${OPENCV})
# OpenCV_DIR should point to cmake folder within the specified OpenCV binary package.
# It's required to successsfully find OpenCV libs using find_package(OpenCV ...) command.
# So, the cached OpenCV_DIR variable should be update if custom value wasn't previously set here.
if (NOT DEFINED ENV{OpenCV_DIR})
set(OpenCV_DIR "${OPENCV}/cmake" CACHE PATH "Path to OpenCV in temp directory")
endif()
endif()
if (OPENCV_SUFFIX)
RESOLVE_DEPENDENCY(OPENCV
ARCHIVE_LIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_${OPENCV_SUFFIX}.tar.xz"
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}"
ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/lib")
set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
endif()
debug_message(STATUS "opencv=" ${OPENCV})
set(OpenCV_DIR "${OPENCV}" CACHE PATH "Path to OpenCV in temp directory")
endif()
include(ie_parallel)
if (ENABLE_GNA)
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "gna_20181120.zip"
TARGET_PATH "${TEMP}/gna")
if (GNA_LIBRARY_VERSION STREQUAL "GNA1")
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "gna_20181120.zip"
TARGET_PATH "${TEMP}/gna")
elseif(GNA_LIBRARY_VERSION STREQUAL "GNA1_1401")
set(GNA_VERSION "01.00.00.1401")
RESOLVE_DEPENDENCY(GNA
ARCHIVE_UNIFIED "GNA_${GNA_VERSION}.zip"
TARGET_PATH "${TEMP}/gna_${GNA_VERSION}"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*")
endif()
debug_message(STATUS "gna=" ${GNA})
endif()
configure_file(

View File

@ -6,7 +6,7 @@
include(debug)
if (UNIX AND NOT APPLE)
set(LINUX TRUE)
set(LINUX ON)
endif()
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
@ -68,16 +68,14 @@ set(CMAKE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX})
if (WIN32)
# Support CMake multiconfiguration for Visual Studio build
set(IE_BUILD_POSTFIX $<$<CONFIG:Debug>:${IE_DEBUG_POSTFIX}>$<$<CONFIG:Release>:${IE_RELEASE_POSTFIX}>)
set(IE_BUILD_CONFIGURATION $<CONFIG>)
else ()
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" )
set(IE_BUILD_POSTFIX ${IE_DEBUG_POSTFIX})
else()
set(IE_BUILD_POSTFIX ${IE_RELEASE_POSTFIX})
endif()
set(IE_BUILD_CONFIGURATION ${CMAKE_BUILD_TYPE})
endif()
message(STATUS "BUILD_CONFIGURATION: ${IE_BUILD_CONFIGURATION}")
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\")
@ -95,12 +93,12 @@ if(NOT UNIX)
set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER})
set(LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}) # compatibility issue: linux uses LIBRARY_OUTPUT_PATH, windows uses LIBRARY_OUTPUT_DIRECTORY
else()
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION})
set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION})
set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/lib)
set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE})
set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE})
set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/lib)
set(LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}/lib)
endif()

View File

@ -145,7 +145,7 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked
if(DEFINED ENV{IE_PATH_TO_DEPS})
set(URL "$ENV{IE_PATH_TO_DEPS}/${RELATIVE_URL}")
else()
set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R2/inference_engine/${RELATIVE_URL}")
set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R3/inference_engine/${RELATIVE_URL}")
endif()
#no message on recursive calls

View File

@ -4,15 +4,20 @@
include (options)
#this options are aimed to optimize build time on development system
#these options are aimed to optimize build time on development system
#backed targets
ie_option (ENABLE_GNA "GNA support for inference engine" ON)
ie_option (ENABLE_ROCKHOPER "use Rockhopper decoder for converting / output scores" ON)
ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON)
ie_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON)
ie_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF)
ie_option (ENABLE_CLDNN_BUILD "build clDnn from sources" OFF)
ie_option (ENABLE_PROFILING_ITT "ITT tracing of IE and plugins internals" ON)
ie_option (ENABLE_PROFILING_RAW "Raw counters profiling (just values, no start/stop time or timeline)" OFF)
@ -90,8 +95,18 @@ ie_option (DEVELOPMENT_PLUGIN_MODE "Disabled build of all plugins" OFF)
ie_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ON)
ie_option (ENABLE_CPP_CCT "enables C++ version of Cross Check Tool" OFF)
ie_option (ENABLE_UNICODE_PATH_SUPPORT "Enable loading models from Unicode paths" ON)
ie_option (ENABLE_LTO "Enable Link Time Optimization" OFF)
# FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
# this must be addressed in a proper way
if(CMAKE_CROSSCOMPILING OR NOT (UNIX AND NOT APPLE))
set(ENABLE_LTO OFF)
endif()
if (UNIX AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.3)
set(ENABLE_UNICODE_PATH_SUPPORT OFF)
endif()

View File

@ -6,57 +6,77 @@ function(set_ie_threading_interface_for TARGET_NAME)
set(IE_THREAD_DEFINE "IE_THREAD_SEQ")
if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
if (NOT (IE_MAIN_SOURCE_DIR))
set(incl_path ${IE_EXTERNAL_DIR}/tbb/include)
if (WIN32)
set(lib_rel_path ${IE_LIB_REL_DIR})
set(lib_dbg_path ${IE_LIB_DBG_DIR})
else ()
set(lib_rel_path ${IE_EXTERNAL_DIR}/tbb/lib)
set(lib_dbg_path ${lib_rel_path})
endif ()
else ()
set(incl_path ${TBB}/include)
set(lib_rel_path ${TBB}/lib)
set(lib_dbg_path ${lib_rel_path})
endif ()
if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
find_path(TBB_INCLUDE_DIRS tbb/tbb.h ${incl_path} NO_DEFAULT_PATH)
find_library(TBB_LIBRARIES_RELEASE tbb ${lib_rel_path} NO_DEFAULT_PATH)
find_library(TBB_LIBRARIES_DEBUG tbb_debug ${lib_dbg_path} NO_DEFAULT_PATH)
ext_message(STATUS "TBB include: ${TBB_INCLUDE_DIRS}")
ext_message(STATUS "TBB Release lib: ${TBB_LIBRARIES_RELEASE}")
ext_message(STATUS "TBB Debug lib: ${TBB_LIBRARIES_DEBUG}")
endif ()
if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
ext_message(WARNING "TBB not found. TBB support will be disabled. ${IE_THREAD_DEFINE} is defined")
else ()
set(IE_THREAD_DEFINE "IE_THREAD_TBB")
target_include_directories(${TARGET_NAME} PUBLIC ${TBB_INCLUDE_DIRS})
if (WIN32)
target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp")
endif ()
# Debug binaries are optional.
if (TBB_LIBRARIES_DEBUG)
if (DEFINED ENV{TBBROOT})
# Check TBB package in case if custom TBBROOT path configured
find_package(TBB QUIET PATHS "$ENV{TBBROOT}/cmake")
if (TBB_FOUND)
set(IE_THREAD_DEFINE "IE_THREAD_TBB")
if (WIN32)
target_link_libraries(${TARGET_NAME} PUBLIC "$<$<CONFIG:DEBUG>:${TBB_LIBRARIES_DEBUG}>;$<$<NOT:$<CONFIG:DEBUG>>:${TBB_LIBRARIES_RELEASE}>")
target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp")
endif ()
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_IMPORTED_TARGETS})
else ()
# TBB was not found by the configured TBBROOT path, SEQ method will be used
ext_message(WARNING "TBB not found by the configured TBBROOT path $ENV{TBBROOT}")
endif ()
else()
if (NOT (IE_MAIN_SOURCE_DIR))
set(incl_path ${IE_EXTERNAL_DIR}/tbb/include)
if (WIN32)
set(lib_rel_path ${IE_LIB_REL_DIR})
set(lib_dbg_path ${IE_LIB_DBG_DIR})
else ()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_DEBUG})
else()
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
endif ()
set(lib_rel_path ${IE_EXTERNAL_DIR}/tbb/lib)
set(lib_dbg_path ${lib_rel_path})
endif ()
else ()
# Link Release library to all configurations.
ext_message(WARNING "TBB Debug binaries are missed.")
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
set(incl_path ${TBB}/include)
set(lib_rel_path ${TBB}/lib)
set(lib_dbg_path ${lib_rel_path})
endif ()
endif ()
if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
find_path(TBB_INCLUDE_DIRS tbb/tbb.h ${incl_path} NO_DEFAULT_PATH)
find_library(TBB_LIBRARIES_RELEASE tbb ${lib_rel_path} NO_DEFAULT_PATH)
ext_message(STATUS "TBB include: ${TBB_INCLUDE_DIRS}")
ext_message(STATUS "TBB Release lib: ${TBB_LIBRARIES_RELEASE}")
if (NOT LINUX)
find_library(TBB_LIBRARIES_DEBUG tbb_debug ${lib_dbg_path} NO_DEFAULT_PATH)
if (TBB_LIBRARIES_DEBUG)
ext_message(STATUS "TBB Debug lib: ${TBB_LIBRARIES_DEBUG}")
else ()
ext_message(WARNING "TBB Debug binaries are missed.")
endif ()
endif ()
endif ()
if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE)
ext_message(WARNING "TBB not found. TBB support will be disabled. ${IE_THREAD_DEFINE} is defined")
else ()
set(IE_THREAD_DEFINE "IE_THREAD_TBB")
target_include_directories(${TARGET_NAME} PUBLIC ${TBB_INCLUDE_DIRS})
if (WIN32)
target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp")
endif ()
# Debug binaries are optional.
if (TBB_LIBRARIES_DEBUG AND NOT LINUX)
if (WIN32)
target_link_libraries(${TARGET_NAME} PUBLIC "$<$<CONFIG:DEBUG>:${TBB_LIBRARIES_DEBUG}>;$<$<NOT:$<CONFIG:DEBUG>>:${TBB_LIBRARIES_RELEASE}>")
else ()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_DEBUG})
else()
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
endif ()
endif ()
else ()
# Link Release library to all configurations.
target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE})
endif ()
endif ()
endif()
elseif (THREADING STREQUAL "OMP")
if (WIN32)
set(omp_lib_name libiomp5md)
@ -79,9 +99,15 @@ function(set_ie_threading_interface_for TARGET_NAME)
if (NOT OMP_LIBRARIES_RELEASE)
find_library(OMP_LIBRARIES_RELEASE ${omp_lib_name} ${lib_rel_path} NO_DEFAULT_PATH)
find_library(OMP_LIBRARIES_DEBUG ${omp_lib_name} ${lib_dbg_path} NO_DEFAULT_PATH)
ext_message(STATUS "OMP Release lib: ${OMP_LIBRARIES_RELEASE}")
ext_message(STATUS "OMP Debug lib: ${OMP_LIBRARIES_DEBUG}")
if (NOT LINUX)
find_library(OMP_LIBRARIES_DEBUG ${omp_lib_name} ${lib_dbg_path} NO_DEFAULT_PATH)
if (OMP_LIBRARIES_DEBUG)
ext_message(STATUS "OMP Debug lib: ${OMP_LIBRARIES_DEBUG}")
else ()
ext_message(WARNING "OMP Debug binaries are missed.")
endif ()
endif ()
endif ()
if (NOT OMP_LIBRARIES_RELEASE)
@ -98,7 +124,7 @@ function(set_ie_threading_interface_for TARGET_NAME)
endif ()
# Debug binaries are optional.
if (OMP_LIBRARIES_DEBUG)
if (OMP_LIBRARIES_DEBUG AND NOT LINUX)
if (WIN32)
target_link_libraries(${TARGET_NAME} PUBLIC "$<$<CONFIG:DEBUG>:${OMP_LIBRARIES_DEBUG}>;$<$<NOT:$<CONFIG:DEBUG>>:${OMP_LIBRARIES_RELEASE}>")
else()
@ -110,7 +136,6 @@ function(set_ie_threading_interface_for TARGET_NAME)
endif ()
else ()
# Link Release library to all configurations.
ext_message(WARNING "OMP Debug binaries are missed.")
target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_RELEASE})
endif ()
endif ()

View File

@ -4,9 +4,9 @@
macro(disable_deprecated_warnings)
if(WIN32)
if("${CMAKE_CXX_COMPILER_ID}" MATCHES Intel)
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-warning:1478")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL MSVC)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") # disable warning on deprecated API
endif()
else()
@ -29,7 +29,6 @@ if (WIN32)
endif()
endif()
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
@ -38,7 +37,7 @@ if (WIN32)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Z7")
set(DEBUG_SYMBOLS_LINKER_FLAGS "/DEBUG")
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (CMAKE_BUILD_TYPE STREQUAL "Release")
# Keep default /OPT values. See /DEBUG reference for details.
set(DEBUG_SYMBOLS_LINKER_FLAGS "${DEBUG_SYMBOLS_LINKER_FLAGS} /OPT:REF /OPT:ICF")
endif()
@ -51,12 +50,28 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Werror=return-type ")
if (APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-reorder")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wswitch")
elseif(UNIX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-switch")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized")
endif()
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable=remark")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden")
if(LINUX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL")
endif()
endif()

View File

@ -2,12 +2,16 @@
# SPDX-License-Identifier: Apache-2.0
#
if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
if (UNIX OR APPLE AND CMAKE_BUILD_TYPE STREQUAL "Release")
set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fPIE -fPIC -Wformat -Wformat-security")
# TODO: double check it it's OK
if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
string(REPLACE "-fPIE" "" CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS}")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack -z relro -z now")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack -z relro -z now")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
@ -17,12 +21,12 @@ if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s -fvisibility=hidden")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s -fvisibility=hidden")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fstack-protector-all")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fvisibility=hidden")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvisibility=hidden")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector-strong")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z noexecstack -z relro -z now")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack -z relro -z now")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wl,--strip-all -fvisibility=hidden")
@ -32,7 +36,7 @@ if (UNIX OR APPLE AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_CCXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CCXX_FLAGS}")
elseif (WIN32)
if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
if (CMAKE_CXX_COMPILER_ID STREQUAL MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MP /sdl")
endif()
endif()

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
set(InferenceEngine_VERSION 2.0.0)
set(InferenceEngine_VERSION 2.1.0)
set(PACKAGE_VERSION ${InferenceEngine_VERSION})
set(PACKAGE_VERSION_EXACT False)

View File

@ -121,7 +121,8 @@ else()
elseif (APPLE)
set_target_properties(IE::inference_engine PROPERTIES
IMPORTED_LOCATION_RELEASE "${IE_RELEASE_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}"
INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
# Debug binaries are optional
find_library(IE_DEBUG_LIBRARY inference_engine@IE_DEBUG_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
@ -137,7 +138,8 @@ else()
# Only Release binaries are distributed for Linux systems
set_target_properties(IE::inference_engine PROPERTIES
IMPORTED_LOCATION "${IE_RELEASE_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}"
INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
target_link_libraries(IE::inference_engine INTERFACE ${CMAKE_DL_LIBS})
endif()

View File

@ -0,0 +1,68 @@
# Copyright (C) 2019 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
set(VPU_SUPPORTED_SOC ma2450 ma2x8x mv0262)
#
# Default firmware packages
#
RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2450
ARCHIVE_UNIFIED firmware_ma2450_759W.zip
TARGET_PATH "${TEMP}/vpu/firmware/ma2450"
ENVIRONMENT "VPU_FIRMWARE_MA2450"
FOLDER)
debug_message(STATUS "ma2450=" ${VPU_FIRMWARE_MA2450})
RESOLVE_DEPENDENCY(VPU_FIRMWARE_MV0262
ARCHIVE_UNIFIED firmware_mv0262_mdk_R9.8.zip
TARGET_PATH "${TEMP}/vpu/firmware/mv0262"
ENVIRONMENT "VPU_FIRMWARE_MV0262"
FOLDER)
debug_message(STATUS "mv0262=" ${VPU_FIRMWARE_MV0262})
RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2X8X
ARCHIVE_UNIFIED firmware_ma2x8x_mdk_R9.8.zip
TARGET_PATH "${TEMP}/vpu/firmware/ma2x8x"
ENVIRONMENT "VPU_FIRMWARE_MA2X8X"
FOLDER)
debug_message(STATUS "ma2x8x=" ${VPU_FIRMWARE_MA2X8X})
#
# CMake variables to override default firmware files
#
foreach(soc IN LISTS VPU_SUPPORTED_SOC)
string(TOUPPER "${soc}" soc_upper)
set(var_name VPU_FIRMWARE_${soc_upper}_FILE)
find_file(${var_name} MvNCAPI-${soc}.mvcmd "${VPU_FIRMWARE_${soc_upper}}/mvnc")
if(NOT ${var_name})
message(FATAL_ERROR "[VPU] Missing ${soc} firmware")
endif()
endforeach()
#
# `vpu_copy_firmware` CMake target
#
foreach(soc IN LISTS VPU_SUPPORTED_SOC)
string(TOUPPER "${soc}" soc_upper)
set(var_name VPU_FIRMWARE_${soc_upper}_FILE)
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/MvNCAPI-${soc}.mvcmd")
list(APPEND all_firmware_files ${firmware_out_file})
add_custom_command(
OUTPUT ${firmware_out_file}
COMMAND
${CMAKE_COMMAND} -E copy ${${var_name}} ${firmware_out_file}
MAIN_DEPENDENCY ${${var_name}}
COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}"
VERBATIM)
endforeach()
add_custom_target(vpu_copy_firmware
DEPENDS ${all_firmware_files}
COMMENT "[VPU] Copy firmware files")

View File

@ -13,7 +13,7 @@ elseif(ARCH STREQUAL "i386")
endif()
# in case of independent python api build (out of Inference Engine root Cmake)
if (NOT(IE_MAIN_SOURCE_DIR))
if (NOT DEFINED IE_MAIN_SOURCE_DIR)
if("${CMAKE_BUILD_TYPE}" STREQUAL "")
message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used")
set(CMAKE_BUILD_TYPE "Release")
@ -45,7 +45,11 @@ else()
set (PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/${PYTHON_VERSION}/openvino)
endif()
find_package (InferenceEngine REQUIRED)
if(DEFINED IE_MAIN_SOURCE_DIR)
find_package(InferenceEngine REQUIRED)
else()
find_package(InferenceEngineDeveloperPackage REQUIRED)
endif()
set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
add_subdirectory (src/openvino/inference_engine)

View File

@ -260,7 +260,7 @@ This class stores main information about the layer and allow to modify some laye
* `weights`- Dictionary with layer weights, biases or custom blobs if any
* `params` - Layer specific parameters. Provides getter and setter interfaces to get and modify layer parameters.
Please note that some modifications can be ignored and\or overwriten by target plugin (e.g. modification of
Please note that some modifications can be ignored and/or overwriten by target plugin (e.g. modification of
convolution kernel size will be reflected in layer parameters but finally the plugin will ignore it and will
use initial kernel size)
@ -280,8 +280,6 @@ layers affinity and output layers.
* `init_from_buffer` - Defines the way of how `model` and `weights` attributes are interpreted.
If `True`, attributes are interpreted as strings with paths to .xml and .bin files of IR. If `False`, they are
interpreted as Python `bytes` object with .xml and .bin files content.
* `ngrpah_compatibility` - Default value: `False`. If `IENetwork` initializes from
[experimental IR V7](./docs/OperationsSpecification-V7.md), set to `True`
* Usage examples:
@ -506,7 +504,7 @@ This class is the main plugin interface and serves to initialize and configure t
* Description: Loads extensions library to the plugin. Applicable only for a CPU device and a HETERO device with CPU
* Parameters:
* `extension_path` - A full path to CPU extensions library
* Return value: None
* Return value: None
* Usage example:
```py
>>> plugin = IEPlugin(device="CPU")

View File

@ -1,155 +0,0 @@
# Benchmark Python* Application
This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks.
## How It Works
Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine
plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend
on the mode defined with the `-api` command-line parameter.
> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
### Synchronous API
For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values:
* Number of iterations defined with the `-niter` command-line argument
* Time duration specified with the `-t` command-line argument
* Both of them (execution will continue until both conditions are met)
* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
During the execution, the application collects two types of metrics:
* Latency for each infer request executed with `Infer` method
* Duration of all executions
Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size.
### Asynchronous API
For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of infer is specified with the `-nireq` command-line parameter. A number of executions is defined by one of the two values:
* Number of iterations defined with the `-niter` command-line argument
* Time duration specified with the `-t` command-line argument
* Both of them (execution will continue until both conditions are met)
* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device.
The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration.
## Running
Notice that the benchmark_app usually produces optimal performance for any device out of the box.
**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.:
```
$benchmark_app -m <model> -i <input> -d CPU
```
Running the application with the `-h` or `--help`' option yields the following usage message:
```
usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL
[-pp PLUGIN_DIR] [-d TARGET_DEVICE]
[-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG]
[-api {sync,async}] [-niter NUMBER_ITERATIONS]
[-nireq NUMBER_INFER_REQUESTS] [-b BATCH_SIZE]
[-stream_output [STREAM_OUTPUT]] [-t TIME]
[-progress [PROGRESS]] [-nstreams NUMBER_STREAMS]
[-nthreads NUMBER_THREADS] [-pin {YES,NO}]
[--exec_graph_path EXEC_GRAPH_PATH]
[-pc [PERF_COUNTS]]
Options:
-h, --help Show this help message and exit.
-i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT
Optional. Path to a folder with images and/or binaries
or to specific image or binary file.
-m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
Required. Path to an .xml file with a trained model.
-pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
Optional. Path to a plugin folder.
-d TARGET_DEVICE, --target_device TARGET_DEVICE
Optional. Specify a target device to infer on: CPU,
GPU, FPGA, HDDL or MYRIAD.
Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin.
-l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION
Optional. Required for CPU custom layers. Absolute
path to a shared library with the kernels
implementations.
-c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
Optional. Required for GPU custom kernels. Absolute
path to an .xml file with the kernels description.
-api {sync,async}, --api_type {sync,async}
Optional. Enable using sync/async API. Default value
is async.
-niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
Optional. Number of iterations. If not specified, the
number of iterations is calculated depending on a
device.
-nireq NUMBER_INFER_REQUESTS, --number_infer_requests NUMBER_INFER_REQUESTS
Optional. Number of infer requests. Default value is
determined automatically for device.
-b BATCH_SIZE, --batch_size BATCH_SIZE
Optional. Batch size value. If not specified, the
batch size value is determined from IR
-stream_output [STREAM_OUTPUT]
Optional. Print progress as a plain text. When
specified, an interactive progress bar is replaced
with a multiline output.
-t TIME, --time TIME Optional. Time in seconds to execute topology.
-progress [PROGRESS] Optional. Show progress bar (can affect performance
measurement). Default values is "False".
-nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
Optional. Number of streams to use for inference on the CPU/GPU in throughput mode
(for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
-nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
Number of threads to use for inference on the CPU
(including HETERO case).
-pin {YES,NO}, --infer_threads_pinning {YES,NO}
Optional. Enable ("YES" is default value) or disable
("NO")CPU threads pinning for CPU-involved inference.
--exec_graph_path EXEC_GRAPH_PATH
Optional. Path to a file where to store executable
graph information serialized.
-pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
Optional. Report performance counters.
```
Running the application with the empty list of options yields the usage message given above and an error message.
Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values.
If a model has only image input(s), please a provide folder with images or a path to an image as input.
If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input.
If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
For example, to do inference of an image using a trained network with multiple outputs on CPU, run the following command:
```
python3 benchmark_app.py -i <path_to_image>/inputImage.bmp -m <path_to_model>/multiple-output.xml -d CPU
```
## Demo Output
The application outputs number of executed iterations, total duration of execution, latency and throughput.
Additionally, if you set the `-pc` parameter, the application outputs performance counters.
If you set `-exec_graph_path`, the application reports executable graph information serialized.
```
[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
Progress: |................................| 100.00%
[Step 9/9] Dumping statistics report
Progress: |................................| 100.00%
Count: 4408 iterations
Duration: 60153.52 ms
Latency: 51.8244 ms
Throughput: 73.28 FPS
```
## See Also
* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)

View File

@ -1,343 +0,0 @@
"""
Copyright (C) 2018-2019 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from statistics import median
from openvino.inference_engine import IENetwork, IECore, get_version
from .utils.parameters import *
from .utils.inputs_filling import *
from .utils.utils import *
from .utils.infer_request_wrap import *
from .utils.progress_bar import *
def getDurationInMilliseconds(duration):
return duration * 1000
def static_vars(**kwargs):
def decorate(func):
for k in kwargs:
setattr(func, k, kwargs[k])
return func
return decorate
@static_vars(step_id = 0)
def next_step(additional_info = ""):
step_names = {
1 : "Parsing and validating input arguments",
2 : "Loading Inference Engine",
3 : "Read the Intermediate Representation of the network",
4 : "Resizing network to match image sizes and given batch",
5 : "Configuring input of the model",
6 : "Setting device configuration",
7 : "Loading the model to the device",
8 : "Setting optimal runtime parameters",
9 : "Creating infer requests and filling input blobs with images",
10 : "Measuring performance",
11 : "Dumping statistics report",
}
next_step.step_id += 1
if (next_step.step_id not in step_names.keys()):
raise Exception("Step ID " + str(next_step.step_id) + " is out of total steps number " + len(step_names))
print("[Step {}/{}] {}".format(next_step.step_id, len(step_names), step_names[next_step.step_id]) + (" (" + additional_info + ")" if len(additional_info) else ""))
def main(args=None):
try:
# ------------------------------ 1. Parsing and validating input arguments -------------------------------------
next_step()
if not args:
args = parse_args()
# ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
next_step()
device_name = args.target_device.upper()
ie = IECore()
if CPU_DEVICE_NAME in device_name:
if args.path_to_extension:
ie.add_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME)
if GPU_DEVICE_NAME in device_name:
if args.path_to_cldnn_config:
ie.set_config({'CONFIG_FILE' : args.path_to_cldnn_config}, GPU_DEVICE_NAME)
logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
logger.info("InferenceEngine:\n{: <9}{}".format("",get_version()))
version_string = "Device is {}\n".format(device_name)
for device, version in ie.get_versions(device_name).items():
version_string += "{: <9}{}\n".format("", device)
version_string += "{: <9}{:.<24}{} {}.{}\n".format("",version.description," version", version.major, version.minor)
version_string += "{: <9}{:.<24} {}\n".format("","Build", version.build_number)
logger.info(version_string)
# --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
next_step()
xml_filename = os.path.abspath(args.path_to_model)
head, tail = os.path.splitext(xml_filename)
bin_filename = os.path.abspath(head + BIN_EXTENSION)
ie_network = IENetwork(xml_filename, bin_filename)
input_info = ie_network.inputs
if len(input_info) == 0:
raise AttributeError('No inputs info is provided')
# --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
next_step()
batch_size = ie_network.batch_size
precision = ie_network.precision
if args.batch_size and args.batch_size != ie_network.batch_size:
new_shapes = {}
for key in input_info.keys():
shape = input_info[key].shape
layout = input_info[key].layout
batchIndex = -1
if ((layout == 'NCHW') or (layout == 'NCDHW') or
(layout == 'NHWC') or (layout == 'NDHWC') or
(layout == 'NC')):
batchIndex = 0
elif (layout == 'CN'):
batchIndex = 1
if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)):
shape[batchIndex] = args.batch_size
new_shapes[key] = shape
if (len(new_shapes) > 0):
logger.info("Resizing network to batch = {}".format(args.batch_size))
ie_network.reshape(new_shapes)
batch_size = args.batch_size
logger.info("Network batch size: {}, precision {}".format(batch_size, precision))
# --------------------- 5. Configuring input of the model ------------------------------------------------------
next_step()
for key in input_info.keys():
if (isImage(input_info[key])):
# Set the precision of input data provided by the user
# Should be called before load of the network to the plugin
input_info[key].precision = 'U8'
# --------------------- 6. Setting device configuration --------------------------------------------------------
next_step()
devices = parseDevices(device_name)
device_nstreams = parseValuePerDevice(devices, args.number_streams)
for device in devices:
if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys
## limit threading for CPU portion of inference
if args.number_threads:
ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, device)
# pin threads for CPU portion of inference
ie.set_config({'CPU_BIND_THREAD': args.infer_threads_pinning}, device)
## for CPU execution, more throughput-oriented execution via streams
# for pure CPU execution, more throughput-oriented execution via streams
if args.api_type == 'async':
ie.set_config({'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device))
if device in device_nstreams.keys()
else 'CPU_THROUGHPUT_AUTO' }, device)
device_nstreams[device] = int(ie.get_config(device, 'CPU_THROUGHPUT_STREAMS'))
elif device == GPU_DEVICE_NAME:
if args.api_type == 'async':
ie.set_config({'GPU_THROUGHPUT_STREAMS' : str(device_nstreams.get(device))
if device in device_nstreams.keys()
else 'GPU_THROUGHPUT_AUTO'}, device)
device_nstreams[device] = int(ie.get_config(device, 'GPU_THROUGHPUT_STREAMS'))
elif device == MYRIAD_DEVICE_NAME:
ie.set_config({'LOG_LEVEL': 'LOG_INFO',
'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
# --------------------- 7. Loading the model to the device -----------------------------------------------------
next_step()
config = { 'PERF_COUNT' : ('YES' if args.perf_counts else 'NO')}
exe_network = ie.load_network(ie_network,
device_name,
config=config,
num_requests=args.number_infer_requests if args.number_infer_requests else 0)
# --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
next_step()
## Number of requests
infer_requests = exe_network.requests
nireq = len(infer_requests)
## Iteration limit
niter = args.number_iterations
if niter and args.api_type == 'async':
niter = (int)((niter + nireq - 1)/nireq)*nireq
if (args.number_iterations != niter):
logger.warn("Number of iterations was aligned by request number "
"from {} to {} using number of requests {}".format(args.number_iterations, niter, nireq))
## Time limit
duration_seconds = 0
if args.time:
## time limit
duration_seconds = args.time
elif not args.number_iterations:
## default time limit
duration_seconds = get_duration_in_secs(device)
# ------------------------------------ 8. Creating infer requests and filling input blobs ----------------------
next_step()
request_queue = InferRequestsQueue(infer_requests)
path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None
requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests)
# ------------------------------------ 9. Measuring performance ------------------------------------------------
progress_count = 0
progress_bar_total_count = 10000
output_string = "Start inference {}ronously".format(args.api_type)
if (args.api_type == "async"):
if output_string != "":
output_string += ", "
output_string += str(nireq) + " inference requests"
device_ss = ''
for device, nstreams in device_nstreams.items():
if device_ss != '':
device_ss += ', '
device_ss += "{} streams for {}".format(str(nstreams), device)
if device_ss != '':
output_string += " using " + device_ss
output_string += ", limits: "
if niter:
if not duration_seconds:
progress_bar_total_count = niter
output_string += str(niter) + " iterations"
if duration_seconds:
if niter:
output_string += ", "
output_string += str(getDurationInMilliseconds(duration_seconds)) + " ms duration"
next_step(output_string)
## warming up - out of scope
infer_request = request_queue.getIdleRequest()
if not infer_request:
raise Exception("No idle Infer Requests!")
if (args.api_type == 'sync'):
infer_request.infer(requests_input_data[infer_request.id])
else:
infer_request.startAsync(requests_input_data[infer_request.id])
request_queue.waitAll()
request_queue.resetTimes()
start_time = datetime.now()
exec_time = (datetime.now() - start_time).total_seconds()
iteration = 0
progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress)
## Start inference & calculate performance
## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
while ((niter and iteration < niter) or
(duration_seconds and exec_time < duration_seconds) or
(args.api_type == "async" and iteration % nireq != 0)):
infer_request = request_queue.getIdleRequest()
if not infer_request:
raise Exception("No idle Infer Requests!")
if (args.api_type == 'sync'):
infer_request.infer(requests_input_data[infer_request.id])
else:
infer_request.startAsync(requests_input_data[infer_request.id])
iteration += 1
exec_time = (datetime.now() - start_time).total_seconds()
if niter:
progress_bar.add_progress(1)
else:
## calculate how many progress intervals are covered by current iteration.
## depends on the current iteration time and time of each progress interval.
## Previously covered progress intervals must be skipped.
progress_interval_time = duration_seconds / progress_bar_total_count
new_progress = (int) (exec_time / progress_interval_time - progress_count)
progress_bar.add_progress(new_progress)
progress_count += new_progress
## wait the latest inference executions
request_queue.waitAll()
total_duration_sec = request_queue.getDurationInSeconds()
times = request_queue.times
times.sort()
latency_ms = median(times)
fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec
progress_bar.finish()
# ------------------------------------ 10. Dumping statistics report -------------------------------------------
next_step()
if args.exec_graph_path:
try:
exec_graph_info = exe_network.get_exec_graph_info()
exec_graph_info.serialize(args.exec_graph_path)
logger.info("Executable graph is stored to {}".format(args.exec_graph_path))
del exec_graph_info
except Exception as e:
logging.exception(e)
if args.perf_counts:
for ni in range(int(nireq)):
perf_counts = exe_network.requests[ni].get_perf_counts()
logger.info("Pefrormance counts for {}-th infer request".format(ni))
for layer, stats in perf_counts.items():
max_layer_name = 30
print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer,
stats['status'],
'layerType: ' + str(stats['layer_type']),
'realTime: ' + str(stats['real_time']),
'cpu: ' + str(stats['cpu_time']),
'execType: ' + str(stats['exec_type'])))
print("Count: {} iterations".format(iteration))
print("Duration: {:.2f} ms".format(getDurationInMilliseconds(total_duration_sec)))
print("Latency: {:.4f} ms".format(latency_ms))
print("Throughput: {:.2f} FPS".format(fps))
del exe_network
del ie
next_step.step_id = 0
except Exception as e:
logging.exception(e)

View File

@ -1,81 +0,0 @@
"""
Copyright (C) 2018-2019 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from ctypes import *
from datetime import datetime
import threading
class InferReqWrap:
def __init__(self, request, id, callbackQueue):
self.id = id
self.request = request
self.request.set_completion_callback(self.callback, self.id)
self.callbackQueue = callbackQueue
def callback(self, statusCode, userdata):
if (userdata != self.id):
print("Request ID {} does not correspond to user data {}".format(self.id, userdata))
elif statusCode != 0:
print("Request {} failed with status code {}".format(self.id, statusCode))
self.callbackQueue(self.id, self.request.latency)
def startAsync(self, input_data):
self.request.async_infer(input_data)
def infer(self, input_data):
self.request.infer(input_data)
self.callbackQueue(self.id, self.request.latency);
class InferRequestsQueue:
def __init__(self, requests):
self.idleIds = []
self.requests = []
self.times = []
for id in range(0, len(requests)):
self.requests.append(InferReqWrap(requests[id], id, self.putIdleRequest))
self.idleIds.append(id)
self.startTime = datetime.max
self.endTime = datetime.min
self.cv = threading.Condition()
def resetTimes(self):
self.times.clear()
def getDurationInSeconds(self):
return (self.endTime - self.startTime).total_seconds()
def putIdleRequest(self, id, latency):
self.cv.acquire()
self.times.append(latency)
self.idleIds.append(id)
self.endTime = max(self.endTime, datetime.now())
self.cv.notify()
self.cv.release()
def getIdleRequest(self):
self.cv.acquire()
while len(self.idleIds) == 0:
self.cv.wait()
id = self.idleIds.pop();
self.startTime = min(datetime.now(), self.startTime);
self.cv.release()
return self.requests[id]
def waitAll(self):
self.cv.acquire()
while len(self.idleIds) != len(self.requests):
self.cv.wait()
self.cv.release()

View File

@ -1,194 +0,0 @@
"""
Copyright (C) 2018-2019 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import logging
import os
import cv2
import numpy as np
import sys
from glob import glob
from random import choice
from .logging import logger
IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP']
BINARY_EXTENSIONS = ['BIN']
def isImage(blob):
if (blob.layout != "NCHW"):
return False
channels = blob.shape[1]
return (channels == 3)
def isImageInfo(blob):
if (blob.layout != "NC"):
return False
channels = blob.shape[1]
return (channels >= 2)
def getInputs(path_to_input, batch_size, input_info, requests):
input_image_sizes = {}
for key in input_info.keys():
if (isImage(input_info[key])):
input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3])
logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key,
input_info[key].precision,
input_info[key].layout,
" ".join(str(x) for x in input_info[key].shape)))
images_count = len(input_image_sizes.keys())
binaries_count = len(input_info) - images_count
image_files = list()
binary_files = list()
if (path_to_input):
image_files = get_files_by_extensions(path_to_input, IMAGE_EXTENSIONS)
image_files.sort()
binary_files = get_files_by_extensions(path_to_input, BINARY_EXTENSIONS)
binary_files.sort()
if (len(image_files) == 0) and (len(binary_files) == 0):
logger.warn("No input files were given: all inputs will be filled with random values!")
else:
binary_to_be_used = binaries_count*batch_size*len(requests)
if binary_to_be_used > 0 and len(binary_files) == 0:
logger.warn("No supported binary inputs found! Please check your file extensions: {}".format(",".join(BINARY_EXTENSIONS)))
elif binary_to_be_used > len(binary_files):
logger.warn("Some binary input files will be duplicated: {} files are required, but only {} were provided".format(binary_to_be_used, len(binary_files)))
elif binary_to_be_used < len(binary_files):
logger.warn("Some binary input files will be ignored: only {} files are required from {}".format(binary_to_be_used, len(binary_files)))
images_to_be_used = images_count*batch_size*len(requests)
if images_to_be_used > 0 and len(image_files) == 0:
logger.warn("No supported image inputs found! Please check your file extensions: {}".format(",".join(IMAGE_EXTENSIONS)))
elif images_to_be_used > len(image_files):
logger.warn("Some image input files will be duplicated: {} files are required, but only {} were provided".format(images_to_be_used, len(image_files)))
elif images_to_be_used < len(image_files):
logger.warn("Some image input files will be ignored: only {} files are required from {}".format(images_to_be_used, len(image_files)))
requests_input_data = []
for request_id in range(0, len(requests)):
logger.info("Infer Request {} filling".format(request_id))
input_data = {}
keys = list(input_info.keys())
for key in keys:
if isImage(input_info[key]):
# input is image
if (len(image_files) > 0):
input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key), len(keys), input_info[key].shape)
continue
# input is binary
if (len(binary_files) > 0):
input_data[key] = fill_blob_with_binary(binary_files, input_info[key].shape)
continue
# most likely input is image info
if isImageInfo(input_info[key]) and len(input_image_sizes) == 1:
image_size = input_image_sizes[list(input_image_sizes.keys()).pop()]
logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" +
str(image_size[1]))
input_data[key] = fill_blob_with_image_info(image_size, input_info[key].shape)
continue
# fill with random data
logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if isImage(input_info[key]) else "some binary data"))
input_data[key] = fill_blob_with_random(input_info[key].precision, input_info[key].shape)
requests_input_data.append(input_data)
return requests_input_data
def get_files_by_extensions(path_to_input, extensions):
input_files = list()
if os.path.isfile(path_to_input):
input_files.append(path_to_input)
else:
path = os.path.join(path_to_input, '*')
files = glob(path, recursive=True)
for file in files:
file_extension = file.rsplit('.').pop().upper()
if file_extension in extensions:
input_files.append(file)
return input_files
def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_size, shape):
images = np.ndarray(shape)
image_index = request_id*batch_size*input_size + input_id
for b in range(batch_size):
image_index %= len(image_paths)
image_filename = image_paths[image_index]
image = cv2.imread(image_filename)
new_im_size = tuple(shape[2:])
if image.shape[:-1] != new_im_size:
logger.warn("Image {} is resized from ({}) to ({})".format(image_filename, image.shape[:-1], new_im_size))
image = cv2.resize(image, new_im_size)
image = image.transpose((2, 1, 0))
images[b] = image
image_index += input_size
return images
def fill_blob_with_binary(binary_paths, request_id, batch_size, input_id, input_size, shape):
binaries = np.ndarray(shape)
binary_index = request_id*batch_size*input_size + input_id
for b in range(batch_size):
binary_index %= len(image_paths)
binary_filename = binary_paths[binary_index]
binary_file_size = os.path.getsize(binary_file)
input_size = np.prod(shape)/batch_size
if (input_size != binary_file_size):
raise Exception("File " + binary_filename + " contains " << str(binary_file_size) + " bytes " +
"but network expects " + str(input_size))
with open(binary_file, 'r') as f:
binary_data = f.read()
binaries[b] = binary_data
binary_index += input_size
return binaries
def fill_blob_with_image_info(image_size, shape):
im_info = np.ndarray(shape)
for b in range(shape[0]):
for i in range(shape[1]):
im_info[b][i] = image_size[i] if i in [0, 1] else 1
return im_info
def fill_blob_with_random(precision, shape):
if precision == "FP32":
return np.random.rand(*shape).astype(np.float32)
elif precision == "FP16":
return np.random.rand(*shape).astype(np.float16)
elif precision == "I32":
return np.random.rand(*shape).astype(np.int32)
elif precision == "U8":
return np.random.rand(*shape).astype(np.uint8)
elif precision == "I8":
return np.random.rand(*shape).astype(np.int8)
elif precision == "U16":
return np.random.rand(*shape).astype(np.uint16)
elif precision == "I16":
return np.random.rand(*shape).astype(np.int16)
else:
raise Exception("Input precision is not supported: " + precision)

View File

@ -1,92 +0,0 @@
"""
Copyright (C) 2018-2019 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
from fnmatch import fnmatch
XML_EXTENSION = ".xml"
BIN_EXTENSION = ".bin"
XML_EXTENSION_PATTERN = '*' + XML_EXTENSION
def validate_args(args):
if args.number_iterations is not None and args.number_iterations < 0:
raise Exception("Number of iterations should be positive (invalid -niter option value)")
if args.number_infer_requests and args.number_infer_requests < 0:
raise Exception("Number of inference requests should be positive (invalid -nireq option value)")
if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN):
raise Exception('Path {} is not xml file.')
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args():
parser = argparse.ArgumentParser(add_help=False)
args = parser.add_argument_group('Options')
args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS,
help="Show this help message and exit.")
args.add_argument('-i', '--path_to_input', type=str, required=False,
help="Optional. Path to a folder with images and/or binaries or to specific image or binary file.")
args.add_argument('-m', '--path_to_model', type=str, required=True,
help="Required. Path to an .xml file with a trained model.")
args.add_argument('-d', '--target_device', type=str, required=False, default="CPU",
help="Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. "
"Use \"-d HETERO:<comma separated devices list>\" format to specify HETERO plugin. ")
args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None,
help="Optional. Required for CPU custom layers. "
"Absolute path to a shared library with the kernels implementations.")
args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False,
help="Optional. Required for GPU custom kernels. Absolute path to an .xml file with the "
"kernels description.")
args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'],
help="Optional. Enable using sync/async API. Default value is async.")
args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None,
help="Optional. Number of iterations. "
"If not specified, the number of iterations is calculated depending on a device.")
args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=None,
help="Optional. Number of infer requests. Default value is determined automatically for device.")
args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
help="Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation")
args.add_argument('-stream_output', type=str2bool, required=False, default=False, nargs='?', const=True,
help="Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a "
"multiline output.")
args.add_argument('-t', '--time', type=int, required=False, default=None,
help="Optional. Time in seconds to execute topology.")
args.add_argument('-progress', type=str2bool, required=False, default=False, nargs='?', const=True,
help="Optional. Show progress bar (can affect performance measurement). Default values is \"False\".")
args.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None,
help="Optional. Number of streams to use for inference on the CPU/GPU in throughput mode "
"(for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).")
args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
help="Number of threads to use for inference on the CPU "
"(including HETERO case).")
args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', choices=['YES', 'NO'],
help="Optional. Enable (\"YES\" is default value) or disable (\"NO\")"
"CPU threads pinning for CPU-involved inference.")
args.add_argument('--exec_graph_path', type=str, required=False,
help="Optional. Path to a file where to store executable graph information serialized.")
args.add_argument("-pc", "--perf_counts", type=str2bool, required=False, default=False, nargs='?', const=True,
help="Optional. Report performance counters.", )
parsed_args = parser.parse_args()
validate_args(parsed_args)
return parsed_args

View File

@ -1,99 +0,0 @@
"""
Copyright (C) 2018-2019 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import multiprocessing
from .logging import logger
VPU_DEVICE_NAME = "VPU"
MYRIAD_DEVICE_NAME = "MYRIAD"
HDDL_DEVICE_NAME = "HDDL"
FPGA_DEVICE_NAME = "FPGA"
CPU_DEVICE_NAME = "CPU"
GPU_DEVICE_NAME = "GPU"
HETERO_DEVICE_NAME = "HETERO"
UNKNOWN_DEVICE_TYPE = "UNKNOWN"
DEVICE_DURATION_IN_SECS = {
CPU_DEVICE_NAME: 60,
GPU_DEVICE_NAME: 60,
VPU_DEVICE_NAME: 60,
MYRIAD_DEVICE_NAME: 60,
HDDL_DEVICE_NAME: 60,
FPGA_DEVICE_NAME: 120,
UNKNOWN_DEVICE_TYPE: 120
}
DEVICE_NIREQ_ASYNC = {
CPU_DEVICE_NAME: 2,
GPU_DEVICE_NAME: 2,
VPU_DEVICE_NAME: 4,
MYRIAD_DEVICE_NAME: 4,
HDDL_DEVICE_NAME: 100,
FPGA_DEVICE_NAME: 3,
UNKNOWN_DEVICE_TYPE: 1
}
def get_duration_in_secs(target_device):
duration = 0
for device in DEVICE_DURATION_IN_SECS:
if device in target_device:
duration = max(duration, DEVICE_DURATION_IN_SECS[device])
if duration == 0:
duration = DEVICE_DURATION_IN_SECS[UNKNOWN_DEVICE_TYPE]
logger.warn("Default duration {} seconds is used for unknown device {}".format(duration, target_device))
return duration
def get_nireq(target_device):
nireq = 0
for device in DEVICE_NIREQ_ASYNC:
if device in target_device:
nireq = max(nireq, DEVICE_NIREQ_ASYNC[device])
if nireq == 0:
nireq = DEVICE_NIREQ_ASYNC[UNKNOWN_DEVICE_TYPE]
logger.warn("Default number of requests {} is used for unknown device {}".format(duration, target_device))
return nireq
def parseDevices(device_string):
devices = device_string
if ':' in devices:
devices = devices.partition(':')[2]
return [ d[:d.index('(')] if '(' in d else d for d in devices.split(',') ]
def parseValuePerDevice(devices, values_string):
## Format: <device1>:<value1>,<device2>:<value2> or just <value>
result = {}
if not values_string:
return result
device_value_strings = values_string.upper().split(',')
for device_value_string in device_value_strings:
device_value_vec = device_value_string.split(':')
if len(device_value_vec) == 2:
for device in devices:
if device == device_value_vec[0]:
value = int(device_value_vec[1])
result[device_value_vec[0]] = value
break
elif len(device_value_vec) == 1:
value = int(device_value_vec[0])
for device in devices:
result[device] = value
elif not device_value_vec:
raise Exception("Unknown string format: " + values_string)
return result

View File

@ -1,4 +0,0 @@
import benchmark
if __name__ == "__main__":
benchmark.main()

View File

@ -20,7 +20,7 @@ python3 classification_sample.py -h
The command yields the following usage message:
```
usage: classification_sample.py [-h] -m MODEL -i INPUT [INPUT ...]
[-l CPU_EXTENSION] [-pp PLUGIN_DIR]
[-l CPU_EXTENSION]
[-d DEVICE] [--labels LABELS] [-nt NUMBER_TOP]
Options:
@ -34,8 +34,6 @@ Options:
Optional. Required for CPU custom layers. MKLDNN (CPU)-targeted custom layers.
Absolute path to a shared library with the kernels
implementations.
-pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
Optional. Path to a plugin folder
-d DEVICE, --device DEVICE
Optional. Specify the target device to infer on; CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The sample

View File

@ -32,7 +32,7 @@ python3 classification_sample_async.py -h
The command yields the following usage message:
```
usage: classification_sample_async.py [-h] -m MODEL -i INPUT [INPUT ...]
[-l CPU_EXTENSION] [-pp PLUGIN_DIR]
[-l CPU_EXTENSION]
[-d DEVICE] [--labels LABELS]
[-nt NUMBER_TOP]
@ -47,8 +47,6 @@ Options:
Optional. Required for CPU custom layers. Absolute
path to a shared library with the kernels
implementations.
-pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
Optional. Path to a plugin folder
-d DEVICE, --device DEVICE
Optional. Specify the target device to infer on; CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
@ -68,7 +66,7 @@ To run the sample, you can use AlexNet and GoogLeNet or other image classificati
You can do inference of an image using a trained AlexNet network on FPGA with fallback to CPU using the following command:
```
python3 classification_sample_async.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU -nireq 2 -ni 200
python3 classification_sample_async.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU
```
## Sample Output

View File

@ -22,14 +22,19 @@ def main():
print("\tDevice: {}".format(device))
print("\tMetrics:")
for metric in ie.get_metric(device, "SUPPORTED_METRICS"):
metric_val = ie.get_metric(device, metric)
print("\t\t{}: {}".format(metric, param_to_string(metric_val)))
try:
metric_val = ie.get_metric(device, metric)
print("\t\t{}: {}".format(metric, param_to_string(metric_val)))
except TypeError:
print("\t\t{}: UNSUPPORTED TYPE".format(metric))
print("\n\tDefault values for device configuration keys:")
for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"):
cfg_val = ie.get_config(device, cfg)
print("\t\t{}: {}".format(cfg, param_to_string(cfg_val)))
try:
cfg_val = ie.get_config(device, cfg)
print("\t\t{}: {}".format(cfg, param_to_string(cfg_val)))
except TypeError:
print("\t\t{}: UNSUPPORTED TYPE".format(cfg))
if __name__ == '__main__':
sys.exit(main() or 0)

View File

@ -0,0 +1,73 @@
# Object Detection Python* Sample SSD
This sample demonstrates how to run the Object Detection sample application.
The sample demonstrates how to use the new Infer Request API of Inference Engine in applications.
Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
The sample demonstrates how to build and execute an inference request on example of object detection networks.
Due to properties of SSD networks, this sample works correctly only on a batch of the size 1. For a greater number of images in a batch, network reshape is required.
## How It Works
Upon the start-up, the sample application reads command line parameters and loads specified network and input images (or a
folder with images) to the Inference Engine plugin.
Then, the sample creates an inference request object and executes inference on it.
When inference is done, the application outputs data to the standard output stream and creates an output image with bounding boxes drawn atop the initial image.
> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
## Running
Running the application with the <code>-h</code> option yields the following usage message:
```
python3 object_detection_sample_ssd.py -h
```
The command yields the following usage message:
```
usage: object_detection_sample_ssd.py [-h] -m MODEL -i INPUT [INPUT ...]
[-l CPU_EXTENSION]
[-d DEVICE] [--labels LABELS]
[-nt NUMBER_TOP]
Options:
-h, --help Show this help message and exit
-m MODEL, --model MODEL
Required. Path to an .xml file with a trained model
-i INPUT [INPUT ...], --input INPUT [INPUT ...]
Required. Path to a folder with images or path to an
image files
-l CPU_EXTENSION, --cpu_extension CPU_EXTENSION
Optional. Required for CPU custom layers. Absolute
path to a shared library with the kernels
implementations
-d DEVICE, --device DEVICE
Optional. Specify the target device to infer on; CPU,
GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
will look for a suitable plugin for device specified
Default value is CPU
--labels LABELS Optional. Labels mapping file
-nt NUMBER_TOP, --number_top NUMBER_TOP
Optional. Number of top results
```
Running the application with the empty list of options yields the usage message given above and an error message.
To run the sample, you can use RMNet_SSD or other object-detection models. You can download the pre-trained models with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or from [https://download.01.org/opencv/](https://download.01.org/opencv/).
> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
You can do inference of an image using a trained RMNet_SSD network on FPGA with fallback to CPU using the following command:
```
python3 object_detection_sample_ssd.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU
```
## Sample Output
By default, the application outputs all inference results and draws bounding boxes for inference results with an over 50% confidence.
## See Also
* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)

View File

@ -0,0 +1,189 @@
#!/usr/bin/env python
"""
Copyright (c) 2018 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import print_function
import sys
import os
from argparse import ArgumentParser, SUPPRESS
import cv2
import numpy as np
import logging as log
from time import time
from openvino.inference_engine import IENetwork, IECore
def build_argparser():
parser = ArgumentParser(add_help=False)
args = parser.add_argument_group("Options")
args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
args.add_argument("-m", "--model", help="Required. Path to an .xml file with a trained model.",
required=True, type=str)
args.add_argument("-i", "--input", help="Required. Path to image file.",
required=True, type=str, nargs="+")
args.add_argument("-l", "--cpu_extension",
help="Optional. Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.",
type=str, default=None)
args.add_argument("-d", "--device",
help="Optional. Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified (CPU by default)",
default="CPU", type=str)
args.add_argument("--labels", help="Optional. Labels mapping file", default=None, type=str)
args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int)
return parser
def main():
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
args = build_argparser().parse_args()
# --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
model_xml = args.model
model_bin = os.path.splitext(model_xml)[0] + ".bin"
log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
net = IENetwork(model=model_xml, weights=model_bin)
# -----------------------------------------------------------------------------------------------------
# ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
log.info("Loading Inference Engine")
ie = IECore()
log.info("Device info:")
versions = ie.get_versions(args.device)
print("{}{}".format(" "*8, args.device))
print("{}MKLDNNPlugin version ......... {}.{}".format(" "*8, versions[args.device].major, versions[args.device].minor))
print("{}Build ........... {}".format(" "*8, versions[args.device].build_number))
if args.cpu_extension and "CPU" in args.device:
ie.add_extension(args.cpu_extension, "CPU")
log.info("CPU extension loaded: {}".format(args.cpu_extension))
if "CPU" in args.device:
supported_layers = ie.query_network(net, "CPU")
not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
if len(not_supported_layers) != 0:
log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
format(args.device, ', '.join(not_supported_layers)))
log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
"or --cpu_extension command line argument")
sys.exit(1)
# -----------------------------------------------------------------------------------------------------
# --------------------------- 3. Read and preprocess input --------------------------------------------
input_blob = next(iter(net.inputs))
n, c, h, w = net.inputs[input_blob].shape
images = np.ndarray(shape=(n, c, h, w))
images_hw = []
for i in range(n):
image = cv2.imread(args.input[i])
ih, iw = image.shape[:-1]
images_hw.append((ih, iw))
log.info("File was added: ")
log.info(" {}".format(args.input[i]))
if (ih, iw) != (h, w):
image = cv2.resize(image, (w, h))
log.warning("Image {} is resized from {} to {}".format(args.input[i], image.shape[:-1], (h, w)))
image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW
images[i] = image
# -----------------------------------------------------------------------------------------------------
# --------------------------- 4. Configure input & output ---------------------------------------------
# --------------------------- Prepare input blobs -----------------------------------------------------
log.info("Preparing input blobs")
assert (len(net.inputs.keys()) == 1 or len(net.inputs.keys()) == 2), "Sample supports topologies only with 1 or 2 inputs"
input_blob = next(iter(net.inputs))
out_blob = next(iter(net.outputs))
input_name, input_info_name = "", ""
for input_key in net.inputs:
if len(net.inputs[input_key].layout) == 4:
input_name = input_key
log.info("Batch size is {}".format(net.batch_size))
net.inputs[input_key].precision = 'U8'
elif len(net.inputs[input_key].layout) == 2:
input_info_name = input_key
net.inputs[input_key].precision = 'FP32'
if net.inputs[input_key].shape[1] != 3 and net.inputs[input_key].shape[1] != 6 or net.inputs[input_key].shape[0] != 1:
log.error('Invalid input info. Should be 3 or 6 values length.')
# --------------------------- Prepare output blobs ----------------------------------------------------
log.info('Preparing output blobs')
output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))]
for output_key in net.outputs:
if net.layers[output_key].type == "DetectionOutput":
output_name, output_info = output_key, net.outputs[output_key]
if output_name == "":
log.error("Can't find a DetectionOutput layer in the topology")
output_dims = output_info.shape
if len(output_dims) != 4:
log.error("Incorrect output dimensions for SSD model")
max_proposal_count, object_size = output_dims[2], output_dims[3]
if object_size != 7:
log.error("Output item should have 7 as a last dimension")
output_info.precision = "FP32"
# -----------------------------------------------------------------------------------------------------
# --------------------------- Performing inference ----------------------------------------------------
log.info("Loading model to the device")
exec_net = ie.load_network(network=net, device_name=args.device)
log.info("Creating infer request and starting inference")
res = exec_net.infer(inputs={input_blob: images})
# -----------------------------------------------------------------------------------------------------
# --------------------------- Read and postprocess output ---------------------------------------------
log.info("Processing output blobs")
res = res[out_blob]
boxes, classes = {}, {}
data = res[0][0]
for number, proposal in enumerate(data):
if proposal[2] > 0:
imid = np.int(proposal[0])
ih, iw = images_hw[imid]
label = np.int(proposal[1])
confidence = proposal[2]
xmin = np.int(iw * proposal[3])
ymin = np.int(ih * proposal[4])
xmax = np.int(iw * proposal[5])
ymax = np.int(ih * proposal[6])
print("[{},{}] element, prob = {:.6} ({},{})-({},{}) batch id : {}"\
.format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="")
if proposal[2] > 0.5:
print(" WILL BE PRINTED!")
if not imid in boxes.keys():
boxes[imid] = []
boxes[imid].append([xmin, ymin, xmax, ymax])
if not imid in classes.keys():
classes[imid] = []
classes[imid].append(label)
else:
print()
for imid in classes:
tmp_image = cv2.imread(args.input[imid])
for box in boxes[imid]:
cv2.rectangle(tmp_image, (box[0], box[1]), (box[2], box[3]), (232, 35, 244), 2)
cv2.imwrite("out.bmp", tmp_image)
log.info("Image out.bmp created!")
# -----------------------------------------------------------------------------------------------------
log.info("Execution successful\n")
log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool")
if __name__ == '__main__':
sys.exit(main() or 0)

View File

@ -19,7 +19,7 @@ The command yields the following usage message:
```
usage: style_transfer_sample.py [-h] -m MODEL -i INPUT [INPUT ...]
[-l CPU_EXTENSION] [-d DEVICE]
[-nt NUMBER_TOP] [-ni NUMBER_ITER]
[-nt NUMBER_TOP]
[--mean_val_r MEAN_VAL_R]
[--mean_val_g MEAN_VAL_G]
[--mean_val_b MEAN_VAL_B]

View File

@ -44,7 +44,7 @@ cdef c_map_to_dict(map[string, string] c_map):
supported_precisions = ["FP32", "FP16", "Q78", "I32", "I16", "I8", "U32", "U16", "U8"]
supported_layouts = ["NCHW", "NHWC", "OIHW", "C", "CHW", "HW", "NC", "CN", "BLOCKED", "NCDHW"]
known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL']
known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL', 'MULTI']
ctypedef enum StatusCode:
OK = 0
@ -336,7 +336,7 @@ cdef class InferRequest:
# TODO: add execution index. Check if unsigned int is properly converted to int in python.
profile[l.first.decode()] = {"status": info.status.decode(), "exec_type": info.exec_type.decode(),
"layer_type": info.layer_type.decode(), "real_time": info.real_time,
"cpu_time": info.cpu_time}
"cpu_time": info.cpu_time, "execution_index": info.execution_index}
return profile
@property
@ -493,18 +493,14 @@ cdef class IENetwork:
cdef IENetwork net = IENetwork(model, weights)
return net
# TODO: Use enum with precision type instead of srting parameter when python2 support will not be required.
def add_outputs(self, outputs, precision="FP32"):
if precision.upper() not in supported_precisions:
raise AttributeError(
"Unsupported precision {}! List of supported precisions: {}".format(precision, supported_precisions))
def add_outputs(self, outputs):
if not isinstance(outputs, list):
outputs = [outputs]
for i, l in enumerate(outputs):
if isinstance(l, str):
self.impl.addOutput(l.encode(), 0, precision.upper().encode())
self.impl.addOutput(l.encode(), 0)
elif isinstance(l, tuple) and len(l) == 2:
self.impl.addOutput(l[0].encode(), l[1], precision.upper().encode())
self.impl.addOutput(l[0].encode(), l[1])
else:
raise TypeError("Incorrect type {type} for layer to add at index {ind}. "
"Expected string with layer name or tuple with two elements: layer name as "

View File

@ -68,6 +68,11 @@ PyObject* parse_parameter(const InferenceEngine::Parameter & param){
else if (param.is<int>()) {
auto val = param.as<int>();
return PyLong_FromLong((long)val);
}
// Check for unsinged int
else if (param.is<unsigned int>()) {
auto val = param.as<unsigned int>();
return PyLong_FromLong((unsigned long)val);
}
// Check for float
else if (param.is<float>()) {
@ -97,6 +102,15 @@ PyObject* parse_parameter(const InferenceEngine::Parameter & param){
PyList_Append(list, PyLong_FromLong(it));
}
return list;
}
// Check for std::vector<unsigned int>
else if (param.is<std::vector<unsigned int>>()){
auto val = param.as<std::vector<unsigned int>>();
PyObject *list = PyList_New(0);
for (const auto & it : val){
PyList_Append(list, PyLong_FromLong(it));
}
return list;
}
// Check for std::vector<float>
else if (param.is<std::vector<float>>()){
@ -243,7 +257,7 @@ const std::map<std::string, InferenceEnginePython::InputInfo> InferenceEnginePyt
const InferenceEngine::InputsDataMap &inputsInfo = actual.getInputsInfo();
for (auto &in : inputsInfo) {
InferenceEnginePython::InputInfo info;
info.actual = *in.second;
info.actual = in.second;
const InferenceEngine::TensorDesc &inputTensorDesc = in.second->getTensorDesc();
info.dims = inputTensorDesc.getDims();
for (auto it : precision_map)
@ -277,16 +291,8 @@ const std::map<std::string, InferenceEnginePython::OutputInfo> InferenceEnginePy
}
void
InferenceEnginePython::IENetwork::addOutput(const std::string &out_layer, size_t port_id, const std::string &precision) {
InferenceEnginePython::IENetwork::addOutput(const std::string &out_layer, size_t port_id) {
actual.addOutput(out_layer, port_id);
InferenceEngine::OutputsDataMap outputsDataMapUpd = actual.getOutputsInfo();
if (outputsDataMapUpd.count(out_layer)) {
outputsDataMapUpd[out_layer]->setPrecision(precision_map[precision]);
} else if (outputsDataMapUpd.count(out_layer + "." + std::to_string(port_id))){
outputsDataMapUpd[out_layer + "." + std::to_string(port_id)]->setPrecision(precision_map[precision]);
} else {
THROW_IE_EXCEPTION << "Failed to set precision for layer " << out_layer;
}
}
void InferenceEnginePython::IENetwork::setBatch(const size_t size) {
@ -329,11 +335,11 @@ void InferenceEnginePython::IENetwork::setStats(const std::map<std::string, std:
}
void InferenceEnginePython::InputInfo::setPrecision(std::string precision) {
actual.setPrecision(precision_map[precision]);
actual->setPrecision(precision_map[precision]);
}
void InferenceEnginePython::InputInfo::setLayout(std::string layout) {
actual.setLayout(layout_map[layout]);
actual->setLayout(layout_map[layout]);
}
void InferenceEnginePython::OutputInfo::setPrecision(std::string precision) {
@ -567,6 +573,7 @@ InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
profile_info.layer_type = it.second.layer_type;
profile_info.cpu_time = it.second.cpu_uSec;
profile_info.real_time = it.second.realTime_uSec;
profile_info.execution_index = it.second.execution_index;
perf_map[it.first] = profile_info;
}
return perf_map;

View File

@ -49,7 +49,7 @@ struct IENetLayer {
};
struct InputInfo {
InferenceEngine::InputInfo actual;
InferenceEngine::InputInfo::Ptr actual;
std::vector<size_t> dims;
std::string precision;
std::string layout;
@ -85,7 +85,7 @@ struct IENetwork {
void setBatch(const size_t size);
void addOutput(const std::string &out_layer, size_t port_id, const std::string &precision);
void addOutput(const std::string &out_layer, size_t port_id);
const std::vector<std::pair<std::string, InferenceEnginePython::IENetLayer>> getLayers();

View File

@ -91,7 +91,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
const vector[pair[string, IENetLayer]] getLayers() except +
map[string, InputInfo] getInputs() except +
map[string, OutputInfo] getOutputs() except +
void addOutput(string &, size_t, string &) except +
void addOutput(string &, size_t) except +
void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except +
void setBatch(size_t size) except +
void setLayerParams(map[string, map[string, string]] params_map) except +

View File

@ -23,7 +23,13 @@ endif()
cython_add_module (${TARGET_NAME} ${SOURCE})
set_target_properties (${TARGET_NAME} PROPERTIES CXX_STANDARD 11 LINKER_LANGUAGE CXX)
target_link_libraries (${TARGET_NAME} PRIVATE statistics_collector_s)
target_link_libraries (${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
if(TARGET IE::statistics_collector_s)
target_link_libraries(${TARGET_NAME} PRIVATE IE::statistics_collector_s)
else()
target_link_libraries(${TARGET_NAME} PRIVATE statistics_collector_s)
endif()
# perform copy
ADD_CUSTOM_COMMAND (TARGET ${TARGET_NAME}

View File

@ -72,9 +72,6 @@ public:
* @return reference to layer builder
*/
ConcatLayer& setAxis(size_t axis);
private:
size_t axis = 1;
};
} // namespace Builder

View File

@ -89,7 +89,7 @@ public:
virtual OutputsDataMap getOutputsInfo() const {
OutputsDataMap outputs;
actual->getOutputsInfo(outputs);
return std::move(outputs);
return outputs;
}
/**
@ -99,7 +99,7 @@ public:
virtual InputsDataMap getInputsInfo() const {
InputsDataMap inputs;
actual->getInputsInfo(inputs);
return std::move(inputs);
return inputs;
}
/**
@ -223,7 +223,7 @@ public:
}
}
}
return std::move(shapes);
return shapes;
}
/**

View File

@ -30,17 +30,31 @@ class ExecutableNetwork {
InferenceEnginePluginPtr plg;
public:
/**
* @brief Default constructor
*/
ExecutableNetwork() = default;
/**
* @brief Destructor
*/
~ExecutableNetwork() {
actual = nullptr;
}
/**
* @brief Constructs ExecutableNetwork from the initialized shared_pointer
* @param actual Initialized shared pointer
* @param plg Plugin to use
*/
explicit ExecutableNetwork(IExecutableNetwork::Ptr actual, InferenceEnginePluginPtr plg = {})
: actual(actual), plg(plg) {}
/**
* @brief Wraps original method
* IExecutableNetwork::getOutputsInfo
* @copybrief IExecutableNetwork::GetOutputsInfo
*
* Wraps IExecutableNetwork::GetOutputsInfo.
* @return A collection that contains string as key, and const Data smart pointer as value
*/
ConstOutputsDataMap GetOutputsInfo() const {
ConstOutputsDataMap data;
@ -49,8 +63,10 @@ public:
}
/**
* @brief Wraps original method
* IExecutableNetwork::getInputsInfo
* @copybrief IExecutableNetwork::GetInputsInfo
*
* Wraps IExecutableNetwork::GetInputsInfo
* @return A collection that contains string as key, and const InputInfo smart pointer as value
*/
ConstInputsDataMap GetInputsInfo() const {
ConstInputsDataMap info;
@ -59,16 +75,20 @@ public:
}
/**
* @brief reset owned object to new pointer, essential for cases when simultaneously loaded networks not expected
* @param actual actual pointed object
* @brief reset owned object to new pointer.
*
* Eessential for cases when simultaneously loaded networks not expected.
* @param newActual actual pointed object
*/
void reset(IExecutableNetwork::Ptr newActual) {
this->actual.swap(newActual);
}
/**
* @brief Wraps original method
* IExecutableNetwork::CreateInferRequest
* @copybrief IExecutableNetwork::CreateInferRequest
*
* Wraps IExecutableNetwork::CreateInferRequest.
* @return InferRequest object
*/
InferRequest CreateInferRequest() {
IInferRequest::Ptr req;
@ -78,9 +98,10 @@ public:
}
/**
* @brief Wraps original method
* IExecutableNetwork::CreateInferRequestPtr
* @return shared pointer on InferRequest object
* @copybrief IExecutableNetwork::CreateInferRequest
*
* Wraps IExecutableNetwork::CreateInferRequest.
* @return shared pointer on InferenceEngine::InferRequest object
*/
InferRequest::Ptr CreateInferRequestPtr() {
IInferRequest::Ptr req;
@ -89,18 +110,24 @@ public:
}
/**
* @brief Exports the current executable network so it can be used later in the Import() main API
* @copybrief IExecutableNetwork::Export
*
* Wraps IExecutableNetwork::Export.
*
* @see Core::ImportNetwork
* @see InferencePlugin::ImportNetwork
*
* @param modelFileName Full path to the location of the exported file
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
*/
void Export(const std::string &modelFileName) {
CALL_STATUS_FNC(Export, modelFileName);
}
/**
* @brief Gets the mapping of IR layer names to implemented kernels
* @copybrief IExecutableNetwork::GetMappedTopology
*
* Wraps IExecutableNetwork::GetMappedTopology.
* @param deployedTopology Map of PrimitiveInfo objects that represent the deployed topology
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
*/
void GetMappedTopology(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &deployedTopology) {
CALL_STATUS_FNC(GetMappedTopology, deployedTopology);
@ -115,7 +142,9 @@ public:
}
/**
* @brief Get executable graph information from a plugin represented as CNNNetwork
* @copybrief IExecutableNetwork::GetExecGraphInfo
*
* Wraps IExecutableNetwork::GetExecGraphInfo.
* @return CNNetwork containing Executable Graph Info
*/
CNNNetwork GetExecGraphInfo() {
@ -125,7 +154,10 @@ public:
}
/**
*@brief see original function InferenceEngine::IExecutableNetwork::QueryState
* @copybrief IExecutableNetwork::QueryState
*
* Wraps IExecutableNetwork::QueryState
* @return A vector of Memory State objects
*/
std::vector<MemoryState> QueryState() {
IMemoryState::Ptr pState = nullptr;
@ -146,20 +178,21 @@ public:
}
/**
* @brief Sets configuration for current executable network
* @copybrief IExecutableNetwork::SetConfig
*
* Wraps IExecutableNetwork::SetConfig.
* @param config Map of pairs: (config parameter name, config parameter value)
* @param resp Pointer to the response message that holds a description of an error if any occurred
*/
void SetConfig(const std::map<std::string, Parameter> &config) {
CALL_STATUS_FNC(SetConfig, config);
}
/** @brief Gets configuration dedicated to plugin behaviour
* @param name - config key, can be found in ie_plugin_config.hpp
* @param options - configuration details for coonfig value
* @param result - value of config corresponding to config key
* @param resp Pointer to the response message that holds a description of an error if any occurred
*/
/** @copybrief IExecutableNetwork::GetConfig
*
* Wraps IExecutableNetwork::GetConfig
* @param name - config key, can be found in ie_plugin_config.hpp
* @return Configuration paramater value
*/
Parameter GetConfig(const std::string &name) const {
Parameter configValue;
CALL_STATUS_FNC(GetConfig, name, configValue);
@ -167,13 +200,11 @@ public:
}
/**
* @brief Gets general runtime metric for dedicated hardware
* @copybrief IExecutableNetwork::GetMetric
*
* Wraps IExecutableNetwork::GetMetric
* @param name - metric name to request
* @param options - configuration details for metric
* @param result - metric value corresponding to metric key
* @param resp - Pointer to the response message that holds a description of an error if any
* occurred
* @return code of the operation. OK if succeeded
* @return Metric paramater value
*/
Parameter GetMetric(const std::string &name) const {
Parameter metricValue;
@ -181,6 +212,9 @@ public:
return metricValue;
}
/**
* @brief A smart pointer to the ExecutableNetwork object
*/
using Ptr = std::shared_ptr<ExecutableNetwork>;
};

View File

@ -69,8 +69,14 @@ class InferRequest {
}
public:
/**
* @brief Default constructor
*/
InferRequest() = default;
/**
* @brief Destructor
*/
~InferRequest() {
actual = nullptr;
}
@ -150,8 +156,9 @@ public:
}
/**
* constructs InferRequest from initialised shared_pointer
* @param actual
* constructs InferRequest from the initialized shared_pointer
* @param request Initialized shared pointer
* @param plg Plugin to use
*/
explicit InferRequest(IInferRequest::Ptr request, InferenceEnginePluginPtr plg = {})
: actual(request), plg(plg) {}
@ -192,14 +199,25 @@ public:
return actual;
}
/**
* @brief Checks if current InferRequest object is not initialized
* @return true if current InferRequest object is not initialized, false - otherwise
*/
bool operator!() const noexcept {
return !actual;
}
/**
* @brief Checks if current InferRequest object is initialized
* @return true if current InferRequest object is initialized, false - otherwise
*/
explicit operator bool() const noexcept {
return !!actual;
}
/**
* @brief A smart pointer to the InferRequest object
*/
using Ptr = std::shared_ptr<InferRequest>;
};

View File

@ -14,40 +14,47 @@ class MemoryState {
IMemoryState::Ptr actual = nullptr;
public:
/**
* constructs MemoryState from the initialized shared_pointer
* @param pState Initialized shared pointer
*/
explicit MemoryState(IMemoryState::Ptr pState) : actual(pState) {}
/**
* @brief Wraps original method
* IMemoryState::Reset
*/
void Reset() {
void Reset() {
CALL_STATUS_FNC_NO_ARGS(Reset);
}
}
/**
* @brief Wraps original method
* IMemoryState::GetName
*/
std::string GetName() const {
char name[256];
CALL_STATUS_FNC(GetName, name, sizeof(name));
return name;
}
std::string GetName() const {
char name[256];
CALL_STATUS_FNC(GetName, name, sizeof(name));
return name;
}
/**
* @brief Wraps original method
* IMemoryState::GetLastState
*/
Blob::CPtr GetLastState() const {
Blob::CPtr stateBlob;
CALL_STATUS_FNC(GetLastState, stateBlob);
return stateBlob;
}
Blob::CPtr GetLastState() const {
Blob::CPtr stateBlob;
CALL_STATUS_FNC(GetLastState, stateBlob);
return stateBlob;
}
/**
* @brief Wraps original method
* IMemoryState::SetState
*/
void SetState(Blob::Ptr state) {
CALL_STATUS_FNC(SetState, state);
}
void SetState(Blob::Ptr state) {
CALL_STATUS_FNC(SetState, state);
}
};
} // namespace InferenceEngine

View File

@ -34,6 +34,7 @@ public:
/**
* @brief Constructs a plugin instance from the given pointer.
* @param pointer Initialized Plugin pointer
*/
explicit InferencePlugin(const InferenceEnginePluginPtr &pointer) : actual(pointer) {}
@ -53,6 +54,7 @@ public:
/**
* @deprecated Use InferencePlugin::LoadNetwork(ICNNNetwork &, const std::map<std::string, std::string> &)
* @brief Wraps original method IInferencePlugin::LoadNetwork(ICNNNetwork &, ResponseDesc *)
* @param network A network object to load
*/
INFERENCE_ENGINE_DEPRECATED
void LoadNetwork(ICNNNetwork &network) {
@ -64,6 +66,9 @@ public:
/**
* @brief Wraps original method
* IInferencePlugin::LoadNetwork(IExecutableNetwork::Ptr&, ICNNNetwork&, const std::map<std::string, std::string> &, ResponseDesc*).
* @param network A network object to load
* @param config A map of configuration options
* @return Created Executable Network object
*/
ExecutableNetwork LoadNetwork(ICNNNetwork &network, const std::map<std::string, std::string> &config) {
IExecutableNetwork::Ptr ret;
@ -74,6 +79,9 @@ public:
/**
* @brief Wraps original method
* IInferencePlugin::LoadNetwork(IExecutableNetwork::Ptr&, ICNNNetwork&, const std::map<std::string, std::string> &, ResponseDesc*).
* @param network A network object to load
* @param config A map of configuration options
* @return Created Executable Network object
*/
ExecutableNetwork LoadNetwork(CNNNetwork network, const std::map<std::string, std::string> &config) {
IExecutableNetwork::Ptr ret;
@ -85,6 +93,8 @@ public:
/**
* @deprecated Use IExecutableNetwork to create IInferRequest.
* @brief Wraps original method IInferencePlugin::Infer(const BlobMap&, BlobMap&, ResponseDesc *)
* @param input A map of input blobs accessed by input names
* @param result A map of output blobs accessed by output names
*/
INFERENCE_ENGINE_DEPRECATED
void Infer(const BlobMap &input, BlobMap &result) {
@ -96,6 +106,7 @@ public:
/**
* @deprecated Use IInferRequest to get performance counters
* @brief Wraps original method IInferencePlugin::GetPerformanceCounts
* @return Map of layers names to profiling information for that layers
*/
INFERENCE_ENGINE_DEPRECATED
std::map<std::string, InferenceEngineProfileInfo> GetPerformanceCounts() const {
@ -109,6 +120,7 @@ public:
/**
* @brief Wraps original method
* IInferencePlugin::AddExtension
* @param extension Pointer to loaded Extension
*/
void AddExtension(InferenceEngine::IExtensionPtr extension) {
CALL_STATUS_FNC(AddExtension, extension);
@ -117,6 +129,7 @@ public:
/**
* @brief Wraps original method
* IInferencePlugin::SetConfig
* @param config A configuration map
*/
void SetConfig(const std::map<std::string, std::string> &config) {
CALL_STATUS_FNC(SetConfig, config);
@ -125,7 +138,10 @@ public:
/**
* @brief Wraps original method
* IInferencePlugin::ImportNetwork
*/
* @param modelFileName A path to the imported network
* @param config A configuration map
* @return Created Executable Network object
*/
ExecutableNetwork ImportNetwork(const std::string &modelFileName, const std::map<std::string, std::string> &config) {
IExecutableNetwork::Ptr ret;
CALL_STATUS_FNC(ImportNetwork, ret, modelFileName, config);
@ -136,6 +152,8 @@ public:
* @deprecated Use InferencePlugin::QueryNetwork(const ICNNNetwork &, const std::map<std::string, std::string> &, QueryNetworkResult &) const
* @brief Wraps original method
* IInferencePlugin::QueryNetwork(const ICNNNetwork&, QueryNetworkResult& ) const
* @param network A network object to query
* @param res Query results
*/
INFERENCE_ENGINE_DEPRECATED
void QueryNetwork(const ICNNNetwork &network, QueryNetworkResult &res) const {
@ -145,6 +163,9 @@ public:
/**
* @brief Wraps original method
* IInferencePlugin::QueryNetwork(const ICNNNetwork&, const std::map<std::string, std::string> &, QueryNetworkResult&) const
* @param network A network object to query
* @param config A configuration map
* @param res Query results
*/
void QueryNetwork(const ICNNNetwork &network, const std::map<std::string, std::string> &config, QueryNetworkResult &res) const {
actual->QueryNetwork(network, config, res);
@ -153,7 +174,7 @@ public:
/**
* @brief Converts InferenceEngine to InferenceEnginePluginPtr pointer
* @brief Returns wrapped object
* @return Wrapped object
*/
operator InferenceEngine::InferenceEnginePluginPtr() {
return actual;
@ -162,7 +183,7 @@ public:
/**
* @deprecated Deprecated since HeteroPluginPtr is deprecated
* @brief Converts InferenceEngine to HeteroPluginPtr pointer
* @return wrapped Hetero object if underlined object is HeteroPlugin instance, nullptr otherwise
* @return Wrapped Hetero object if underlined object is HeteroPlugin instance, nullptr otherwise
*/
IE_SUPPRESS_DEPRECATED_START
operator InferenceEngine::HeteroPluginPtr() {

View File

@ -71,8 +71,8 @@ public:
private:
std::vector<std::shared_ptr<LT>> sortedLayers;
std::shared_ptr<LT> currentLayer;
size_t currentIdx;
NT *network = nullptr;
size_t currentIdx;
std::shared_ptr<LT> getNextLayer() {
return (sortedLayers.size() > currentIdx) ? sortedLayers[currentIdx++] : nullptr;

View File

@ -49,7 +49,7 @@ class PreAllocator : public IAllocator {
return _actualData;
}
return this;
return nullptr;
}
/**
* @brief The PreAllocator class cannot release the handle

View File

@ -0,0 +1,81 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief A header that defines advanced related properties for DLIA plugins.
* These properties should be used in SetConfig() and LoadNetwork() methods of plugins
*
* @file dlia_config.hpp
*/
#pragma once
#include <string>
#include "ie_plugin_config.hpp"
namespace InferenceEngine {
namespace DliaMetrics {
/**
* @def DLIA_METRIC_VALUE(name)
* @brief Shortcut for defining FPGA metric values
*/
#define DLIA_METRIC_VALUE(name) InferenceEngine::DliaMetrics::name
#define DECLARE_DLIA_METRIC_VALUE(name) static constexpr auto name = #name
/**
* @brief FP11 optimization capability. It's specific for FPGA device which can perform computations in FP11 data type.
*/
DECLARE_DLIA_METRIC_VALUE(FP11);
/**
* @brief Input Streaming capability. It's specific for FPGA device which can perform input streaming
*/
DECLARE_DLIA_METRIC_VALUE(INPUT_STREAMING);
} // namespace DliaMetrics
namespace DLIAConfigParams {
/**
* @def DLIA_CONFIG_KEY(name)
* @brief Shortcut for defining FPGA configuration keys
*/
#define DLIA_CONFIG_KEY(name) InferenceEngine::DLIAConfigParams::_CONFIG_KEY(DLIA_##name)
#define DECLARE_DLIA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(DLIA_##name)
#define DECLARE_DLIA_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(DLIA_##name)
/**
* @brief The key to define the type of transformations for DLIA inputs and outputs.
* DLIA use custom data layout for input and output blobs. IE DLIA Plugin provides custom
* optimized version of transformation functions that do not use OpenMP and much more faster
* than native DLIA functions. Values: "NO" - optimized plugin transformations
* are used, "YES" - native DLIA transformations are used.
*/
DECLARE_DLIA_CONFIG_KEY(IO_TRANSFORMATIONS_NATIVE);
/**
* @brief The key to define path to DLA bitstreams architectures folder
*/
DECLARE_DLIA_CONFIG_KEY(ARCH_ROOT_DIR);
/**
* @brief The bool key to define whether theoretical performance estimation should be performed.
* If true, the estimated performance is dumped via performance counters as "FPGA theoretical execute time"
*/
DECLARE_DLIA_CONFIG_KEY(PERF_ESTIMATION);
// TODO: Temporarily adding dlia config to test streaming feature
// Values - "YES" or "NO"
DECLARE_DLIA_CONFIG_KEY(ENABLE_STREAMING);
/**
* @brief The bool key to define whether information messages with a reason are printed in case the layer is unsupported by DLA
*/
DECLARE_DLIA_CONFIG_KEY(DUMP_SUPPORTED_LAYERS_INFORMATION);
} // namespace DLIAConfigParams
} // namespace InferenceEngine

View File

@ -20,6 +20,10 @@ namespace InferenceEngine {
namespace HeteroConfigParams {
/**
* @def HETERO_CONFIG_KEY(name)
* @brief Shortcut for defining HETERO configuration keys
*/
#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name)
#define DECLARE_HETERO_CONFIG_KEY(name) DECLARE_CONFIG_KEY(HETERO_##name)
#define DECLARE_HETERO_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(HETERO_##name)

View File

@ -28,6 +28,8 @@ class IAllocator : public details::IRelease {
public:
/**
* @brief Maps handle to heap memory accessible by any memory manipulation routines.
* @param handle Handle to the allocated memory to be locked
* @param LockOp Operation to lock memory for
* @return Generic pointer to memory
*/
virtual void * lock(void * handle, LockOp = LOCK_FOR_WRITE) noexcept = 0;
@ -35,6 +37,7 @@ public:
* @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
* The multiple sequential mappings of the same handle are suppose to get the same
* result while there isn't a ref counter supported.
* @param handle Handle to the locked memory to unlock
*/
virtual void unlock(void * handle) noexcept = 0;
/**

View File

@ -57,11 +57,16 @@
#define IE_DO_PRAGMA(x)
#endif
#ifdef _MSC_VER
#if defined (_MSC_VER) && !defined (__clang__)
#define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(warning(push)) \
IE_DO_PRAGMA(warning(disable: 4996))
#define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
#elif defined(__INTEL_COMPILER)
#define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(warning(push)) \
IE_DO_PRAGMA(warning(disable:1478))
#define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
#elif defined(__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405))
#define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(GCC diagnostic push) \
@ -71,3 +76,11 @@
#define IE_SUPPRESS_DEPRECATED_START
#define IE_SUPPRESS_DEPRECATED_END
#endif
#ifndef ENABLE_UNICODE_PATH_SUPPORT
#if defined(_WIN32)
#define ENABLE_UNICODE_PATH_SUPPORT
#elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2))
#define ENABLE_UNICODE_PATH_SUPPORT
#endif
#endif

View File

@ -678,7 +678,7 @@ public:
if (_handle != nullptr) {
getAllocator()->free(_handle);
}
_handle = getAllocator()->alloc(byteSize());
_handle = getAllocator()->alloc(size() * sizeof(T));
}
/**
@ -779,10 +779,7 @@ protected:
* @brief Frees handler and cleans up the stored data.
*/
virtual bool free() {
bool bCanRelease = true;
if (_handle == nullptr) return bCanRelease;
bCanRelease = getAllocator()->free(_handle);
bool bCanRelease = getAllocator()->free(_handle);
_handle = nullptr;
return bCanRelease;
}

View File

@ -109,6 +109,8 @@ inline std::ostream & operator << (std::ostream &out, const Layout & p) {
PRINT_LAYOUT(ANY);
PRINT_LAYOUT(NCHW);
PRINT_LAYOUT(NHWC);
PRINT_LAYOUT(NCDHW);
PRINT_LAYOUT(NDHWC);
PRINT_LAYOUT(OIHW);
PRINT_LAYOUT(C);
PRINT_LAYOUT(CHW);
@ -125,7 +127,7 @@ inline std::ostream & operator << (std::ostream &out, const Layout & p) {
}
/**
* @enum Color format
* @enum ColorFormat
* @brief Extra information about input color format for preprocessing
*/
enum ColorFormat : uint32_t {

View File

@ -35,7 +35,7 @@ public:
/**
* @brief Returns plugins version information
* @param Device name to indentify plugin
* @param deviceName Device name to indentify plugin
* @return A vector of versions
*/
std::map<std::string, Version> GetVersions(const std::string & deviceName) const;
@ -134,6 +134,7 @@ public:
/** @brief Registers plugin to Inference Engine Core instance using XML configuration file with
* plugins description. XML file has the following structure:
*
* ```xml
* <ie>
* <plugins>
* <plugin name="" location="">
@ -144,14 +145,16 @@ public:
* <property key="" value=""/>
* </properties>
* </plugin>
* </plugin>
* </plugins>
* </ie>
* ```
*
* - `name` identifies name of device enabled by plugin
* - `location` specifies absolute path to dynamic library with plugin. A path can also be relative to inference engine shared library.
* It allows to have common config for different systems with different configurations.
* - Properties are set to plugin via the `SetConfig` method.
* - Extensions are set to plugin via the `AddExtension` method.
* @param xmlConfigFile A path to .xml file with plugins to register.
*/
void RegisterPlugins(const std::string & xmlConfigFile);
};

View File

@ -74,6 +74,7 @@ public:
* @brief An empty constructor (dimensionless)
* @param name Name of the data node
* @param _precision Precision of the data
* @param layout Data layout
*/
Data(const std::string &name, Precision _precision, Layout layout = NCHW);
@ -82,6 +83,7 @@ public:
* @param name Name of the data node
* @param a_dims Data tensor dimensions
* @param _precision Precision of the data
* @param layout Data layout
*/
Data(const std::string &name, const SizeVector &a_dims, Precision _precision, Layout layout = NCHW);
/**
@ -183,7 +185,7 @@ public:
/**
* @brief Sets a name the Data object
* @param name Name of the data node
* @param newName Name of the data node
*/
void setName(const std::string& newName);

View File

@ -28,8 +28,10 @@ enum class TargetDevice : uint8_t {
eGPU = 3,
eFPGA = 4,
eMYRIAD = 5,
eHDDL = 6,
eGNA = 7,
eHETERO = 8,
eMULTI = 10,
};
/**
@ -53,8 +55,10 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
DECL_DEVICE(GPU),
DECL_DEVICE(FPGA),
DECL_DEVICE(MYRIAD),
DECL_DEVICE(HDDL),
DECL_DEVICE(GNA),
DECL_DEVICE(HETERO)
DECL_DEVICE(HETERO),
DECL_DEVICE(MULTI)
};
#undef DECLARE
return g_allDeviceInfos;
@ -64,6 +68,8 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
/**
* @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated
* @brief Converts string representation of device to InferenceEngine::TargetDevice enum value
* @param deviceName A string representation of a device name
* @return An instance of InferenceEngine::TargetDevice
*/
INFERENCE_ENGINE_DEPRECATED
static TargetDevice fromStr(const std::string &deviceName) {
@ -72,9 +78,11 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
{ "GPU", InferenceEngine::TargetDevice::eGPU },
{ "FPGA", InferenceEngine::TargetDevice::eFPGA },
{ "MYRIAD", InferenceEngine::TargetDevice::eMYRIAD },
{ "HDDL", InferenceEngine::TargetDevice::eHDDL },
{ "GNA", InferenceEngine::TargetDevice::eGNA },
{ "BALANCED", InferenceEngine::TargetDevice::eBalanced },
{ "HETERO", InferenceEngine::TargetDevice::eHETERO }
{ "HETERO", InferenceEngine::TargetDevice::eHETERO },
{ "MULTI", InferenceEngine::TargetDevice::eMULTI}
};
auto val = deviceFromNameMap.find(deviceName);
return val != deviceFromNameMap.end() ? val->second : InferenceEngine::TargetDevice::eDefault;
@ -82,7 +90,9 @@ class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo {
/**
* @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated
* @brief Converts InferenceEngine::TargetDevice enum value to string representation
* @brief Converts an instance of InferenceEngine::TargetDevice to string representation
* @param device Instance of InferenceEngine::TargetDevice
* @return A c-string with the name
*/
INFERENCE_ENGINE_DEPRECATED
static const char * name(TargetDevice device) {

View File

@ -4,6 +4,7 @@
/**
* @brief This is a header file for the ICNNNetworkStats class
*
* @file ie_icnn_network_stats.hpp
*/
#pragma once
@ -18,9 +19,17 @@
namespace InferenceEngine {
class NetworkNodeStats;
/**
* @brief A shared pointer to the NetworkNodeStats object
*/
using NetworkNodeStatsPtr = std::shared_ptr<NetworkNodeStats>;
/**
* @brief A smart pointer to the NetworkNodeStats object
*/
using NetworkNodeStatsWeakPtr = std::weak_ptr<NetworkNodeStats>;
/**
* @brief A map of pairs: name of a layer and related statistics
*/
using NetworkStatsMap = std::map<std::string, NetworkNodeStatsPtr>;
/**
* @class ICNNNetworkStats
@ -28,16 +37,44 @@ using NetworkStatsMap = std::map<std::string, NetworkNodeStatsPtr>;
*/
class ICNNNetworkStats : public details::IRelease {
public:
/**
* @brief Sets a map which contains layers with statistics
*
* @param stats A map which is set
* Abstract method
*/
virtual void setNodesStats(const NetworkStatsMap& stats) = 0;
/**
* @brief Gets a map which contains layers with statistics
*
* Abstract method
* @return A NetworkStatsMap object
*/
virtual const NetworkStatsMap& getNodesStats() const = 0;
/**
* @brief Checks if a container is empty
*
* Abstract method
* @return A bool value which shows whether a container is empty
*/
virtual bool isEmpty() const = 0;
};
/**
* @class NetworkNodeStats
* @brief This class implements a container which stores statistics for a layer
*/
class NetworkNodeStats {
public:
/**
* @brief The constructor which creates NetworkNodeStats object
*/
NetworkNodeStats() { }
/**
* @brief The constructor which creates NetworkNodeStats object with filled statistics
*
* @param statCount The number of minimum/maximum values in statistics
*/
explicit NetworkNodeStats(int statCount) {
float mn = (std::numeric_limits<float>::max)();
float mx = (std::numeric_limits<float>::min)();
@ -49,7 +86,13 @@ public:
}
public:
/**
* @brief Vector of floats which contains minimum values of layers activations
*/
std::vector<float> _minOutputs;
/**
* @brief Vector of floats which contains maximum values of layers activations
*/
std::vector<float> _maxOutputs;
};

View File

@ -38,39 +38,51 @@ public:
using Ptr = std::shared_ptr<IExecutableNetwork>;
/**
* @brief Gets the Executable network output Data node information. The received info is stored in the given ConstOutputsDataMap node.
* @brief Gets the Executable network output Data node information.
*
* The received info is stored in the given ::ConstOutputsDataMap node.
* This method need to be called to find output names for using them later during filling of a map
* of blobs passed to InferenceEngine::IInferencePlugin::Infer()
* @param out Reference to the ConstOutputsDataMap object
*
* @param out Reference to the ::ConstOutputsDataMap object
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode GetOutputsInfo(ConstOutputsDataMap &out, ResponseDesc *resp) const noexcept = 0;
/**
* @brief Gets the Executable network input Data node information. The received info is stored in the given ConstInputsDataMap object.
* @brief Gets the executable network input Data node information.
*
* The received info is stored in the given ::ConstInputsDataMap object.
* This method need to be called to find out input names for using them later during filling of a map
* of blobs passed to InferenceEngine::IInferencePlugin::Infer()
* @param inputs Reference to ConstInputsDataMap object.
*
* @param inputs Reference to ::ConstInputsDataMap object.
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode GetInputsInfo(ConstInputsDataMap &inputs, ResponseDesc *resp) const noexcept = 0;
/**
* @brief Creates an inference request object used to infer the network.
*
* The created request has allocated input and output blobs (that can be changed later).
*
* @param req Shared pointer to the created request object
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode CreateInferRequest(IInferRequest::Ptr& req, ResponseDesc *resp) noexcept = 0;
/**
* @brief Exports the current executable network so it can be used later in the Import() main API
* @brief Exports the current executable network.
*
* @see Core::ImportNetwork
* @see IInferencePlugin::ImportNetwork
*
* @param modelFileName Full path to the location of the exported file
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode Export(const std::string& modelFileName, ResponseDesc *resp) noexcept = 0;
@ -78,53 +90,64 @@ public:
* @brief Get the mapping of IR layer names to implemented kernels
* @param deployedTopology Map of PrimitiveInfo objects that represent the deployed topology
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode GetMappedTopology(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &deployedTopology, ResponseDesc *resp) noexcept = 0;
/**
* @brief Get executable graph information from a device
*
* @param graphPtr network ptr to store executable graph information
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode GetExecGraphInfo(ICNNNetwork::Ptr &graphPtr, ResponseDesc *resp) noexcept = 0;
/**
* @brief Gets state control interface for given executable network, State control essential for recurrent networks
* @brief Gets state control interface for given executable network.
*
* State control essential for recurrent networks
*
* @param pState reference to a pointer that receives internal states
* @param idx requested index for receiving memory state
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for given index
* @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for given index
*/
virtual StatusCode QueryState(IMemoryState::Ptr & pState, size_t idx, ResponseDesc *resp) noexcept = 0;
/**
* @brief Sets configuration for current executable network
*
* @param config Map of pairs: (config parameter name, config parameter value)
* @param resp Pointer to the response message that holds a description of an error if any occurred
* @return code of the operation. OK if succeeded
* @return code of the operation. InferenceEngine::OK if succeeded
*/
virtual StatusCode SetConfig(const std::map<std::string, Parameter> &config, ResponseDesc *resp) noexcept = 0;
/** @brief Gets configuration for current executable network. The method is responsible to extract information
* which affects executable network execution. The list of supported configuration values can be extracted via
* ExecutableNetwork::GetMetric with the SUPPORTED_CONFIG_KEYS key, but some of these keys cannot be changed dymanically,
* e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular device.
* @param name - config key, can be found in ie_plugin_config.hpp
* @param result - value of config corresponding to config key
* @param resp - Pointer to the response message that holds a description of an error if any occurred
* @return code of the operation. OK if succeeded
*/
/** @brief Gets configuration for current executable network.
*
* The method is responsible to extract information
* which affects executable network execution. The list of supported configuration values can be extracted via
* ExecutableNetwork::GetMetric with the SUPPORTED_CONFIG_KEYS key, but some of these keys cannot be changed dymanically,
* e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular device.
*
* @param name config key, can be found in ie_plugin_config.hpp
* @param result value of config corresponding to config key
* @param resp Pointer to the response message that holds a description of an error if any occurred
* @return code of the operation. InferenceEngine::OK if succeeded
*/
virtual StatusCode GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const noexcept = 0;
/**
* @brief Gets general runtime metric for an executable network. It can be network name, actual device ID on
* @brief Gets general runtime metric for an executable network.
*
* It can be network name, actual device ID on
* which executable network is running or all other properties which cannot be changed dynamically.
* @param name - metric name to request
* @param result - metric value corresponding to metric key
* @param resp - Pointer to the response message that holds a description of an error if any occurred
* @return code of the operation. OK if succeeded
*
* @param name metric name to request
* @param result metric value corresponding to metric key
* @param resp Pointer to the response message that holds a description of an error if any occurred
* @return code of the operation. InferenceEngine::OK if succeeded
*/
virtual StatusCode GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const noexcept = 0;
};

View File

@ -20,11 +20,15 @@
#include "details/ie_no_copy.hpp"
/**
* @def INFERENCE_EXTENSION_API(TYPE)
* @brief Defines Inference Engine Extension API method
*/
#if defined(_WIN32) && defined(IMPLEMENT_INFERENCE_EXTENSION_API)
#define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE
# define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE
#else
#define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
# define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
#endif

View File

@ -23,8 +23,10 @@ namespace InferenceEngine {
* plugin during setting of affinity and loading of split sub-network to the plugins
* The custom loader can define addition settings for the plugins or network loading
* Examples of cases when this interface should be implemented in the application:
*
* 1. add custom layers to existing plugins if it is not pointed to the heterogeneous plugin
* or registration of custom layer is different than supported in available public plugins
*
* 2. set affinity manually for the same plugin being initialized by different parameters,
* e.g different device id
* In this case there will be mapping of
@ -89,18 +91,29 @@ public:
INFERENCE_ENGINE_DEPRECATED
virtual void QueryNetwork(const std::string &device,
const ICNNNetwork &network,
const std::map<std::string, std::string>& /*config*/,
const std::map<std::string, std::string>& config,
QueryNetworkResult &res) noexcept = 0;
INFERENCE_ENGINE_DEPRECATED
/**
* @deprecated Use InferenceEngine::Core with HETERO device in InferenceEngine::Core::QueryNetwork.
* @brief Sets log callback
* @param listener A reference to IErrorListener object
*/
virtual void SetLogCallback(IErrorListener &listener) = 0;
IE_SUPPRESS_DEPRECATED_START
/**
* @brief Shared pointer to IHeteroDeviceLoader instance
*/
using Ptr = std::shared_ptr<IHeteroDeviceLoader>;
IE_SUPPRESS_DEPRECATED_END
};
IE_SUPPRESS_DEPRECATED_START
/**
* @brief Represents map from device name to device-specific loader
*/
using MapDeviceLoaders = std::map<std::string, InferenceEngine::IHeteroDeviceLoader::Ptr>;
IE_SUPPRESS_DEPRECATED_END

View File

@ -33,12 +33,18 @@ public:
/** IInferRequest doesn't block or interrupt current thread and immediately returns inference status */
STATUS_ONLY = 0,
};
/**
* @brief A shared pointer to the IInferRequest object
*/
using Ptr = std::shared_ptr<IInferRequest>;
/**
* @brief A smart pointer to the IInferRequest object
*/
using WeakPtr = std::weak_ptr<IInferRequest>;
/**
* @brief Sets input/output data to infer
*
* @note: Memory allocation does not happen
* @param name Name of input or output blob.
* @param data Reference to input or output blob. The type of a blob must match the network input precision and size.
@ -49,6 +55,7 @@ public:
/**
* @brief Gets input/output data for inference
*
* @note: Memory allocation does not happen
* @param name Name of input or output blob.
* @param data Reference to input or output blob. The type of Blob must match the network input precision and size.
@ -59,6 +66,7 @@ public:
/**
* @brief Infers specified input(s) in synchronous mode
*
* @note blocks all methods of IInferRequest while request is ongoing (running or waiting in queue)
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
@ -67,6 +75,7 @@ public:
/**
* @brief Queries performance measures per layer to get feedback of what is the most time consuming layer
*
* @note: not all plugins provide meaningful data
* @param perfMap Map of layer names to profiling information for that layer
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
@ -77,6 +86,7 @@ public:
/**
* @brief Waits for the result to become available. Blocks until specified millis_timeout has elapsed or the result becomes available, whichever comes first.
*
* @param millis_timeout Maximum duration in milliseconds to block for
* @note There are special cases when millis_timeout is equal some value of the WaitMode enum:
* * STATUS_ONLY - immediately returns inference status (IInferRequest::RequestStatus). It does not block or interrupt current thread
@ -88,6 +98,7 @@ public:
/**
* @brief Starts inference of specified input(s) in asynchronous mode
*
* @note: It returns immediately. Inference starts also immediately
* @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
* @return Enumeration of the resulted action: OK (0) for success
@ -96,6 +107,7 @@ public:
/**
* @brief Completion callback definition as pointer to a function
*
* @param context Pointer to request for providing context inside callback
* @param code Completion result status: OK (0) for success
*/
@ -104,6 +116,7 @@ public:
/**
* @brief Sets a callback function that will be called on success or failure of asynchronous request
*
* @param callback A function to be called
* @return Enumeration of the resulted action: OK (0) for success
*/
@ -111,6 +124,7 @@ public:
/**
* @brief Gets arbitrary data for the request and stores a pointer to a pointer to the obtained data
*
* @param data Pointer to a pointer to the gotten arbitrary data
* @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
* @return Enumeration of the resulted action: OK (0) for success
@ -119,6 +133,7 @@ public:
/**
* @brief Sets arbitrary data for the request
*
* @param data Pointer to a pointer to arbitrary data to set
* @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
* @return Enumeration of the resulted action: OK (0) for success
@ -127,6 +142,7 @@ public:
/**
* @brief Sets new batch size when dynamic batching is enabled in executable network that created this request.
*
* @param batch_size new batch size to be used by all the following inference calls for this request.
* @param resp Optional: a pointer to an already allocated object to contain extra information of a failure (if occurred)
* @return Enumeration of the resulted action: OK (0) for success

View File

@ -139,6 +139,16 @@ public:
return res;
}
}
/**
* @brief serialize float with c_locale formating
* used for default values serializing
*/
static std::string ie_serialize_float(float value) {
std::stringstream val_stream;
val_stream.imbue(std::locale("C"));
val_stream << value;
return val_stream.str();
}
/**
* @brief Gets float value for the given parameter
@ -147,7 +157,7 @@ public:
* @return float value
*/
float GetParamAsFloat(const char* param, float def) const {
std::string val = GetParamAsString(param, std::to_string(def).c_str());
std::string val = GetParamAsString(param, ie_serialize_float(def).c_str());
try {
return ie_parse_float(val);
} catch (...) {
@ -391,11 +401,11 @@ public:
return result;
}
/**
* @brief Returns an boolean value for the given parameter.
* @brief Returns a boolean value for the given parameter.
* The valid values are (true, false, 1, 0).
* @param param Name of the layer parameter
* @param def Default value of the parameter if not found
* @return An bool value for the specified parameter
* @return A bool value for the specified parameter
*/
bool GetParamAsBool(const char *param, bool def) const {
std::string val = GetParamAsString(param, std::to_string(def).c_str());
@ -414,7 +424,29 @@ public:
return result;
}
/**
* @deprecated Use CNNLayer::GetParamAsBool
* @brief Returns a boolean value for the given parameter
* @param param Name of the layer parameter
* @return A bool value for the specified parameter
*/
bool GetParamAsBool(const char *param) const {
std::string val = GetParamAsString(param);
std::string loweredCaseValue;
std::transform(val.begin(), val.end(), std::back_inserter(loweredCaseValue), [](char value) {
return std::tolower(value);
});
bool result = false;
if (!(std::istringstream(loweredCaseValue) >> std::boolalpha >> result)) {
// attempting parse using non alpha bool
return (GetParamAsInt(param) != 0);
}
return result;
}
/**
* @deprecated Use GetParamAsBool function for that functionality
*/
INFERENCE_ENGINE_DEPRECATED
bool GetParamsAsBool(const char *param, bool def) const {
@ -588,10 +620,6 @@ public:
}
return *this;
}
/**
* @brief move assignment operator
*/
ConvolutionLayer& operator = (ConvolutionLayer &&) = default;
/**
* @brief copy constructor
*/
@ -696,11 +724,6 @@ public:
}
return *this;
}
/**
* @brief move assignment operator
*/
PoolingLayer& operator = (PoolingLayer &&) = default;
/**
* @brief copy constructor
*/
@ -799,10 +822,6 @@ public:
}
return *this;
}
/**
* @brief move assignment operator
*/
BinaryConvolutionLayer& operator = (BinaryConvolutionLayer &&) = default;
/**
* @brief copy constructor
*/
@ -1020,7 +1039,7 @@ public:
enum eOperation {
Sum = 0, Prod, Max, Sub, Min, Div, Squared_diff, Floor_mod, Pow,
Equal, Not_equal, Less, Less_equal, Greater, Greater_equal,
Logical_AND, Logical_OR, Logical_XOR, Logical_NOT, Mean, Select
Logical_AND, Logical_OR, Logical_XOR, Logical_NOT, Mean
};
/**
@ -1249,7 +1268,11 @@ public:
* - Ct = ft (.) Ct-1 + it (.) ct
* - Ht = ot (.) _h(Ct)
*/
using LSTMCell = RNNCellBase;
class LSTMCell : public RNNCellBase {
public:
using RNNCellBase::RNNCellBase;
using RNNCellBase::operator=;
};
/**
* @brief GRU Cell layer
@ -1284,7 +1307,11 @@ using LSTMCell = RNNCellBase;
* - ht = _g(Wh*[rt (.) Ht-1, Xt] + Bh)
* - Ht = (1 - zt) (.) ht + zt (.) Ht-1
*/
using GRUCell = RNNCellBase;
class GRUCell : public RNNCellBase {
public:
using RNNCellBase::RNNCellBase;
using RNNCellBase::operator=;
};
/**
* @brief RNN Cell layer
@ -1314,7 +1341,12 @@ using GRUCell = RNNCellBase;
*
* - Ht = _f(Wi*[Ht-1, Xt] + Bi)
*/
using RNNCell = RNNCellBase;
class RNNCell : public RNNCellBase {
public:
using RNNCellBase::RNNCellBase;
using RNNCellBase::operator=;
};
/**
* @brief Sequence of recurrent cells
@ -1603,6 +1635,19 @@ public:
};
/**
* @brief This class represents SparseFillEmptyRows layer
* SparseFillEmptyRows fills empty rows in a sparse tensor
*/
class SparseFillEmptyRowsLayer : public CNNLayer {
public:
/**
* @brief Creates a new SparseFillEmptyRowsLayer instance.
*/
using CNNLayer::CNNLayer;
};
/**
* @brief This class represents a standard Reverse Sequence layer
* Reverse Sequence modifies input tensor according parameters
@ -1787,4 +1832,61 @@ public:
};
/**
* @brief This class represents Unique layer.
* The Unique operation searches for unique elements in 1-D input
*/
class UniqueLayer : public CNNLayer {
public:
/**
* @brief A flag indicating whether to sort unique elements
*/
bool sorted;
/**
* @brief A flag indicating whether to return indices of input data elements in the output of uniques
*/
bool return_inverse;
/**
* @brief A flag indicating whether to return a number of occurences for each unique element
*/
bool return_counts;
/**
* @brief Creates a new UniqueLayer instance.
*/
using CNNLayer::CNNLayer;
};
/**
* @brief This class represents a standard NonMaxSuppression layer
*/
class NonMaxSuppressionLayer : public CNNLayer {
public:
/**
* @brief The 'center_point_box' indicates the format of the box data
*/
bool center_point_box = false;
/**
* @brief Creates a new NonMaxSuppressionLayer instance.
*/
using CNNLayer::CNNLayer;
};
/**
* @brief This class represents a standard Scatter layer
*/
class ScatterLayer : public CNNLayer {
public:
/**
* @brief The axis in Dictionary to scatter Indexes from
*/
int axis = 0;
/**
* @brief Creates a new ScatterLayer instance.
*/
using CNNLayer::CNNLayer;
};
} // namespace InferenceEngine

View File

@ -226,6 +226,7 @@ public:
inconsistentLayout = dims.size() != 1;
break;
case Layout::BLOCKED:
case Layout::ANY:
inconsistentLayout = false;
break;
case Layout::NCDHW:

View File

@ -24,6 +24,7 @@
#include "tbb/parallel_for.h"
#include "tbb/task_arena.h"
#include "tbb/parallel_sort.h"
#include "tbb/parallel_reduce.h"
#include "tbb/blocked_range.h"
#include "tbb/blocked_range2d.h"
@ -40,6 +41,7 @@ inline int parallel_get_env_threads() { return 0; }
#define PARTITIONING
#endif
#elif IE_THREAD == IE_THREAD_OMP
#include <algorithm>
#include <cstdlib>
#include <string>
#include <omp.h>
@ -66,6 +68,7 @@ inline int parallel_get_env_threads() {
}
#elif IE_THREAD == IE_THREAD_SEQ
#include <algorithm> // NOLINT
inline int parallel_get_env_threads() { return 1; }
inline int parallel_get_max_threads() { return 1; }
inline int parallel_get_num_threads() { return 1; }
@ -130,6 +133,18 @@ void parallel_nt_static(int nthr, const F &func) {
#endif
}
template <typename I, typename F>
void parallel_sort(I begin, I end, const F &comparator) {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
tbb::parallel_sort(begin, end, comparator);
#elif IE_THREAD == IE_THREAD_OMP
// TODO: propose OpenMP version
std::sort(begin, end, comparator);
#elif IE_THREAD == IE_THREAD_SEQ
std::sort(begin, end, comparator);
#endif
}
template <typename T0, typename R, typename F>
R parallel_sum(const T0 &D0, const R &input, const F &func) {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)

View File

@ -35,7 +35,9 @@ public:
* @brief Move constructor
* @param parameter Parameter object
*/
Parameter(Parameter &&parameter) noexcept: ptr(std::move(parameter.ptr)) {}
Parameter(Parameter &&parameter) noexcept {
std::swap(ptr, parameter.ptr);
}
/**
* @brief Copy constructor
@ -233,11 +235,11 @@ private:
}
T& get() & {
return std::get<0>(*this);
return std::get<0>(*static_cast<std::tuple<T>*>(this));
}
const T& get() const & {
return std::get<0>(*this);
return std::get<0>(*static_cast<const std::tuple<T>*>(this));
}
template <class U>

View File

@ -21,6 +21,10 @@
#include <map>
#include <set>
/**
* @def INFERENCE_PLUGIN_API(type)
* @brief Defines Inference Engine Plugin API method
*/
#if defined(_WIN32)
#ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
@ -82,12 +86,14 @@ struct INFERENCE_ENGINE_API_CLASS(QueryNetworkResult) {
/**
* @brief A copy assignment operator
* @param q A value to copy from
* @return A copied object
*/
const QueryNetworkResult & operator= (const QueryNetworkResult & q);
/**
* @brief A move assignment operator
* @param q A value to move from
* @return A moved object
*/
QueryNetworkResult & operator= (QueryNetworkResult && q);
@ -220,7 +226,8 @@ public:
* @param res Reference to query network result
*/
INFERENCE_ENGINE_DEPRECATED
virtual void QueryNetwork(const ICNNNetwork& /*network*/, QueryNetworkResult& res) const noexcept {
virtual void QueryNetwork(const ICNNNetwork& network, QueryNetworkResult& res) const noexcept {
(void)network;
res.rc = InferenceEngine::NOT_IMPLEMENTED;
}
@ -230,8 +237,10 @@ public:
* @param config Map of pairs: (config parameter name, config parameter value)
* @param res Reference to query network result
*/
virtual void QueryNetwork(const ICNNNetwork& /*network*/,
const std::map<std::string, std::string> &/*config*/, QueryNetworkResult& res) const noexcept {
virtual void QueryNetwork(const ICNNNetwork& network,
const std::map<std::string, std::string> & config, QueryNetworkResult& res) const noexcept {
(void)network;
(void)config;
res.rc = InferenceEngine::NOT_IMPLEMENTED;
}
};

View File

@ -24,10 +24,15 @@ namespace Metrics {
#endif
/**
* @brief shortcut for defining common Inference Engine metrics
*/
* @def METRIC_KEY(name)
* @brief shortcut for defining common Inference Engine metrics
*/
#define METRIC_KEY(name) InferenceEngine::Metrics::METRIC_##name
/**
* @def EXEC_NETWORK_METRIC_KEY(name)
* @brief shortcut for defining common Inference Engine ExecutableNetwork metrics
*/
#define EXEC_NETWORK_METRIC_KEY(name) METRIC_KEY(name)
#define DECLARE_METRIC_KEY(name, ...) \
@ -37,8 +42,9 @@ namespace Metrics {
#define DECLARE_EXEC_NETWORK_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(name, __VA_ARGS__)
/**
* @brief shortcut for defining metric values
*/
* @def METRIC_VALUE(name)
* @brief shortcut for defining metric values
*/
#define METRIC_VALUE(name) InferenceEngine::Metrics::name
#define DECLARE_METRIC_VALUE(name) static constexpr auto name = #name
@ -141,15 +147,17 @@ DECLARE_EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS, unsigned int);
namespace PluginConfigParams {
/**
* @brief shortcut for defining configuration keys
*/
* @def CONFIG_KEY(name)
* @brief shortcut for defining configuration keys
*/
#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name)
#define _CONFIG_KEY(name) KEY_##name
#define DECLARE_CONFIG_KEY(name) static constexpr auto _CONFIG_KEY(name) = #name
/**
* @brief shortcut for defining configuration values
*/
* @def CONFIG_VALUE(name)
* @brief shortcut for defining configuration values
*/
#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
#define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name

View File

@ -12,6 +12,7 @@
#include <string>
#include <vector>
#include <cpp/ie_plugin_cpp.hpp>
#include <multi-device/multi_device_config.hpp>
namespace InferenceEngine {
/**
@ -35,6 +36,7 @@ public:
/**
* @deprecated Use InferenceEngine::Core to work with devices by name
* @brief Loads a plugin from directories that is suitable for the device string
* @param deviceName A string value representing target device
* @return A pointer to the plugin
*/
INFERENCE_ENGINE_DEPRECATED
@ -43,6 +45,7 @@ public:
/**
* @deprecated Use InferenceEngine::Core to work with devices by name
* @brief Loads a plugin from directories that is suitable for the device
* @param device An instance of InferenceEngine::TargetDevice
* @return A pointer to the plugin
*/
INFERENCE_ENGINE_DEPRECATED

View File

@ -59,7 +59,8 @@ public:
/**
* @brief Custom precision constructor
* @param byteSize size of elements
*
* @param bitsSize size of elements
* @param name optional name string, used in serialisation
*/
explicit Precision(size_t bitsSize, const char * name = nullptr) {
@ -179,8 +180,9 @@ public:
}
/**
* @brief Returns size in bytes of single element of that precision
* @deprecated : size of precision will be reported in bits in future releases
* @brief Returns size of single element of that precision in bits
*
* @returns Number of bits per element
*/
size_t size() const {
if (precisionInfo.bitsSize == 0) {
@ -195,9 +197,21 @@ public:
}
protected:
/**
* @brief Returns PrecisionInfo by its name
*
* @param name Name of precision
*/
template<Precision::ePrecision precision>
static PrecisionInfo makePrecisionInfo(const char * name);
/**
* @brief Compare two c-strings
*
* @param l Const pointer to first string
* @param r Const pointer to another string
* @returns True if strings are the same
*/
static bool areSameStrings(const char *l, const char *r) noexcept {
if (l == r)
return true;
@ -211,6 +225,9 @@ public:
return *l == *r;
}
/**
* @brief Return PrecisionInfo
*/
static PrecisionInfo getPrecisionInfo(ePrecision v) {
#define CASE(x) case x: return makePrecisionInfo<x>(#x);
switch (v) {
@ -334,6 +351,13 @@ inline std::ostream & operator << (std::ostream &out, const InferenceEngine::Pre
return out << Precision(p).name();
}
inline constexpr uint32_t getPrecisionMask(InferenceEngine::Precision::ePrecision precision1,
InferenceEngine::Precision::ePrecision precision2,
InferenceEngine::Precision::ePrecision precision3 = InferenceEngine::Precision::MIXED,
InferenceEngine::Precision::ePrecision precision4 = InferenceEngine::Precision::MIXED) {
return (precision1) | (precision2 << 8) | (precision3 << 16) | (precision4 << 24);
}
/** @endcond */
} // namespace InferenceEngine

View File

@ -17,17 +17,44 @@
namespace InferenceEngine {
/**
* @brief Structure with information about Primitive
*/
struct PrimitiveInfo {
/**
* @brief A shared pointer to PrimitiveInfo object
*/
using Ptr = std::shared_ptr<PrimitiveInfo>;
std::string sId; // some internal id, could be used as a name
std::string sType; // implementation type of this kernel
int iPreAllocatedMemory; // mainly the allocation of the output tensor
/**
* @brief Some internal id, could be used as a name
*/
std::string sId;
/**
* @brief Implementation type of this kernel
*/
std::string sType;
/**
* @brief Mainly the allocation of the output tensor
*/
int iPreAllocatedMemory;
/**
* @brief Vector of TensorInfo objects that are related to input tensors
*/
std::vector<TensorInfo::Ptr> inputs;
/**
* @brief Vector of TensorInfo object that are related to outputs tensors
*/
std::vector<TensorInfo::Ptr> outputs;
std::map<std::string, std::string> extraInfo; // any other important textual information user might find interesting about this kernel
/**
* @brief Any other important textual information user might find interesting about this kernel
*/
std::map<std::string, std::string> extraInfo;
};
} // namespace InferenceEngine

View File

@ -15,12 +15,22 @@
namespace InferenceEngine {
/**
* @struct TensorInfo
* @brief This structure describes tensor information
*/
struct TensorInfo {
/**
* @brief A shared pointer to the TensorInfo object
*/
using Ptr = std::shared_ptr<TensorInfo>;
// memory layout BFYX, BXYF (enum)
// size
// precision
/**
* @brief A map of extra info:
* - memory layout BFYX, BXYF (enum)
* - size
* - precision
*/
std::map<std::string, std::string> extraInfo;
};

View File

@ -0,0 +1,36 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief A header that defines advanced related properties for Multi_Device plugin.
* These properties should be used in SetConfig() and LoadNetwork() methods
*
* @file multi_device_config.hpp
*/
#pragma once
#include <string>
#include "ie_plugin_config.hpp"
namespace InferenceEngine {
namespace MultiDeviceConfigParams {
/**
* @def MULTI_CONFIG_KEY(name)
* @brief A macro which provides a MULTI-mangled name for configuration key with name `name`
*/
#define MULTI_CONFIG_KEY(name) InferenceEngine::MultiDeviceConfigParams::_CONFIG_KEY(MULTI_##name)
#define DECLARE_MULTI_CONFIG_KEY(name) DECLARE_CONFIG_KEY(MULTI_##name)
#define DECLARE_MULTI_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(MULTI_##name)
/**
* @brief Device Priorities config option, with comma-separated devices listed in the desired priority
*/
DECLARE_MULTI_CONFIG_KEY(DEVICE_PRIORITIES);
} // namespace MultiDeviceConfigParams
} // namespace InferenceEngine

View File

@ -0,0 +1,184 @@
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief A header that defines advanced related properties for VPU plugins.
* These properties should be used in SetConfig() and LoadNetwork() methods of plugins
*
* @file vpu_plugin_config.hpp
*/
#pragma once
#include <string>
#include <vector>
#include "ie_plugin_config.hpp"
#include "ie_api.h"
//
// Options
//
#define VPU_HDDL_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_HDDL_##name)
#define VPU_HDDL_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_HDDL_##name
#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name)
#define DECLARE_VPU_HDDL_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_HDDL_##name)
//
// Metrics
//
#define VPU_HDDL_METRIC(name) METRIC_KEY(VPU_HDDL_##name)
#define DECLARE_VPU_HDDL_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_HDDL_##name, __VA_ARGS__)
namespace InferenceEngine {
namespace Metrics {
/**
* @brief Metric to get a int of the device number, String value is METRIC_VPU_HDDL_DEVICE_NUM
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_NUM, int);
/**
* @brief Metric to get a std::vector<std::string> of device names, String value is METRIC_VPU_HDDL_DEVICE_NAME
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_NAME, std::vector<std::string>);
/**
* @brief Metric to get a std::vector<std::string> of device models, String value is METRIC_VPU_HDDL_DEVICE_MODEL
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_MODEL, std::vector<std::string>);
/**
* @brief Metric to get a std::vector<float> of device thermal, String value is METRIC_VPU_HDDL_DEVICE_THERMAL
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_THERMAL, std::vector<float>);
/**
* @brief Metric to get a std::vector<uint32> of device ids, String value is METRIC_VPU_HDDL_DEVICE_ID
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_ID, std::vector<unsigned int>);
/**
* @brief Metric to get a std::vector<int> of device subclasses, String value is METRIC_VPU_HDDL_DEVICE_SUBCLASS
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_SUBCLASS, std::vector<int>);
/**
* @brief Metric to get a std::vector<uint32> of device total memory, String value is METRIC_VPU_HDDL_MEMORY_TOTAL
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_TOTAL, std::vector<unsigned int>);
/**
* @brief Metric to get a std::vector<uint32> of device used memory, String value is METRIC_VPU_HDDL_DEVICE_MEMORY_USED
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_USED, std::vector<unsigned int>);
/**
* @brief Metric to get a std::vector<float> of device utilization, String value is METRIC_VPU_HDDL_DEVICE_UTILIZATION
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_UTILIZATION, std::vector<float>);
/**
* @brief Metric to get a std::vector<std::string> of stream ids, String value is METRIC_VPU_HDDL_DEVICE_STREAM_ID
*/
DECLARE_VPU_HDDL_METRIC(STREAM_ID, std::vector<std::string>);
/**
* @brief Metric to get a std::vector<std::string> of device tags, String value is METRIC_VPU_HDDL_DEVICE_TAG
*/
DECLARE_VPU_HDDL_METRIC(DEVICE_TAG, std::vector<std::string>);
} // namespace Metrics
namespace VPUConfigParams {
/**
* @brief [Only for HDDLPlugin]
* Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
* This option allows to specify the number of MYX devices used for inference a specific Executable network.
* Note: Only one network would be allocated to one device.
* The number of devices for the tag is specified in the hddl_service.config file.
* Example:
* "service_settings":
* {
* "graph_tag_map":
* {
* "tagA":3
* }
* }
* It means that an executable network marked with tagA will be executed on 3 devices
*/
DECLARE_VPU_HDDL_CONFIG_KEY(GRAPH_TAG);
/**
* @brief [Only for HDDLPlugin]
* Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
* This config makes the executable networks to be allocated on one certain device (instead of multiple devices).
* And all inference through this executable network, will be done on this device.
* Note: Only one network would be allocated to one device.
* The number of devices which will be used for stream-affinity must be specified in hddl_service.config file.
* Example:
* "service_settings":
* {
* "stream_device_number":5
* }
* It means that 5 device will be used for stream-affinity
*/
DECLARE_VPU_HDDL_CONFIG_KEY(STREAM_ID);
/**
* @brief [Only for HDDLPlugin]
* Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
* This config allows user to control device flexibly. This config gives a "tag" for a certain device while
* allocating a network to it. Afterward, user can allocating/deallocating networks to this device with this "tag".
* Devices used for such use case is controlled by a so-called "Bypass Scheduler" in HDDL backend, and the number
* of such device need to be specified in hddl_service.config file.
* Example:
* "service_settings":
* {
* "bypass_device_number": 5
* }
* It means that 5 device will be used for Bypass scheduler.
*/
DECLARE_VPU_HDDL_CONFIG_KEY(DEVICE_TAG);
/**
* @brief [Only for HDDLPlugin]
* Type: "YES/NO", default is "NO".
* This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set. After a user load a
* network, the user got a handle for the network.
* If "YES", the network allocated is bind to the device (with the specified "DEVICE_TAG"), which means all afterwards
* inference through this network handle will be executed on this device only.
* If "NO", the network allocated is not bind to the device (with the specified "DEVICE_TAG"). If the same network
* is allocated on multiple other devices (also set BIND_DEVICE to "False"), then inference through any handle of these
* networks may be executed on any of these devices those have the network loaded.
*/
DECLARE_VPU_HDDL_CONFIG_KEY(BIND_DEVICE);
/**
* @brief [Only for HDDLPlugin]
* Type: A signed int wrapped in a string, default is "0".
* This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False".
* When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler),
* the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority.
*/
DECLARE_VPU_HDDL_CONFIG_KEY(RUNTIME_PRIORITY);
/**
* @brief [Only for HDDLPlugin]
* Type: "YES/NO", default is "NO".
* SGAD is short for "Single Graph All Device". With this scheduler, once application allocates 1 network, all devices
* (managed by SGAD scheduler) will be loaded with this graph. The number of network that can be loaded to one device
* can exceed one. Once application deallocates 1 network from device, all devices will unload the network from them.
*/
DECLARE_VPU_HDDL_CONFIG_KEY(USE_SGAD);
} // namespace VPUConfigParams
} // namespace InferenceEngine

View File

@ -15,6 +15,7 @@
#include "ie_plugin_config.hpp"
#include "myriad_plugin_config.hpp"
#include "hddl_plugin_config.hpp"
#include "ie_api.h"
//
@ -105,6 +106,8 @@ DECLARE_VPU_CONFIG_KEY(COMPUTE_LAYOUT);
DECLARE_VPU_CONFIG_VALUE(AUTO);
DECLARE_VPU_CONFIG_VALUE(NCHW);
DECLARE_VPU_CONFIG_VALUE(NHWC);
DECLARE_VPU_CONFIG_VALUE(NCDHW);
DECLARE_VPU_CONFIG_VALUE(NDHWC);
/**
* @brief This option allows to pass custom layers binding xml.

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
cmake_minimum_required (VERSION 2.8.11)
cmake_minimum_required (VERSION 2.8.12)
project(Samples)
@ -150,8 +150,6 @@ macro(ie_add_sample)
if(NOT OpenCV_FOUND)
message(WARNING "OPENCV is disabled or not found, " ${IE_SAMPLE_NAME} " skipped")
return()
else()
add_definitions(-DUSE_OPENCV)
endif()
endif()
@ -164,6 +162,9 @@ macro(ie_add_sample)
# Create executable file from sources
add_executable(${IE_SAMPLE_NAME} ${IE_SAMPLE_SOURCES} ${IE_SAMPLES_HEADERS})
if(IE_SAMPLE_OPENCV_DEPENDENCIES)
target_compile_definitions(${IE_SAMPLE_NAME} PRIVATE USE_OPENCV)
endif()
if(WIN32)
set_target_properties(${IE_SAMPLE_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_SAMPLE_NAME})
@ -176,7 +177,6 @@ macro(ie_add_sample)
target_link_libraries(${IE_SAMPLE_NAME} PRIVATE ${OpenCV_LIBRARIES} ${InferenceEngine_LIBRARIES}
${IE_SAMPLE_DEPENDENCIES} IE::ie_cpu_extension gflags)
if(UNIX)
target_link_libraries(${IE_SAMPLE_NAME} PRIVATE pthread)
endif()
@ -195,12 +195,12 @@ endmacro()
# use this flag if you need to throw custom message in case if the IE package is not found.
if (IE_NOT_FOUND_MESSAGE)
find_package(InferenceEngine 2.0 QUIET)
find_package(InferenceEngine 2.1 QUIET)
if (NOT(InferenceEngine_FOUND))
message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE})
endif()
else()
find_package(InferenceEngine 2.0 REQUIRED)
find_package(InferenceEngine 2.1 REQUIRED)
endif()
# collect all samples subdirectories

View File

@ -1,21 +1,18 @@
# Benchmark C++ Application
# Benchmark C++ Tool
This topic demonstrates how to use the Benchmark Application to estimate deep learning inference performance on
supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Application. For the Python* implementation, refer to [Benchmark Application (Python*)](./inference-engine/ie_bridges/python/sample/benchmark_app/README.md).
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](./inference-engine/tools/benchmark_tool/README.md).
## How It Works
Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine
plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend
on the mode defined with the `-api` command-line parameter.
Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend on the mode defined with the `-api` command-line parameter.
> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
> **NOTE**: By default, Inference Engine samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
If you run the application in the synchronous mode, it creates one infer request and executes the `Infer` method.
If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` command-line parameter and executes the `StartAsync` method for each of them. If `-nireq` is not set, the demo will use the default value for specified device.
If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` command-line parameter and executes the `StartAsync` method for each of them. If `-nireq` is not set, the application will use the default value for specified device.
A number of execution steps is defined by one of the following parameters:
* Number of iterations specified with the `-niter` command-line argument
@ -45,14 +42,19 @@ The application also saves executable graph information serialized to a XML file
`-exec_graph_path` parameter.
## Running
## Run the Tool
Notice that the benchmark_app usually produces optimal performance for any device out of the box.
**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.:
**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, for example, for CPU:
```sh
./benchmark_app -m <model> -i <input> -d CPU
```
$benchmark_app -m <model> -i <input> -d CPU
```
As explained in the [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) section, it is preferable to use the FP16 IR for the model.
But it is still may be non-optimal for some cases, especially for very small networks. More details can read in [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md).
As explained in the [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) section, for all devices, including new [MULTI device](./docs/IE_DG/supported_plugins/MULTI.md) it is preferable to use the FP16 IR for the model.
Also if latency of the CPU inference on the multi-socket machines is of concern, please refer to the same
[Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) document.
Running the application with the `-h` option yields the following usage message:
```
@ -70,6 +72,7 @@ Options:
-m "<path>" Required. Path to an .xml file with a trained model.
-d "<device>" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU.
Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin.
Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin.
The application looks for a suitable plugin for the specified device.
-l "<absolute_path>" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
Or
@ -84,8 +87,11 @@ Options:
CPU-specific performance options:
-nstreams "<integer>" Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode
(for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
-nthreads "<integer>" Optional. Number of threads to use for inference on the CPU (including HETERO case).
(for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
Default value is determined automatically for a device.
Please note that although the automatic selection usually provides a reasonable performance,
it still may be non-optimal for some cases, especially for very small networks.
-nthreads "<integer>" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
-pin "YES"/"NO" Optional. Enable ("YES" is default value) or disable ("NO") CPU threads pinning for CPU-involved inference.
Statistics dumping options:
@ -102,48 +108,74 @@ If a model has only image input(s), please a provide folder with images or a pat
If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input.
If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
To run the tool, you can use public or Intel's pre-trained models. To download the models, use the OpenVINO [Model Downloader](./tools/downloader/README.md) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
> **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
For example, to perform inference on CPU in the synchronous mode and get estimated performance metrics for AlexNet model,
run the following command:
## Examples of Running the Tool
```sh
./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/alexnet_fp32.xml -d CPU -api sync
```
This section provides step-by-step instructions on how to run the Benchmark Tool with the `googlenet-v1` public model on CPU or FPGA devices. As an input, the `car.png` file from the `<INSTALL_DIR>/deployment_tools/demo/` directory is used.
For the asynchronous mode:
```sh
./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/alexnet_fp32.xml -d CPU -api async
```
> **NOTE:** The Internet access is required to execute the following steps successfully. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment.
## Demo Output
1. Download the model. Go to the the Model Downloader directory and run the `downloader.py` script with specifying the model name and directory to download the model to:
```sh
cd <INSTAL_DIR>/deployment_tools/open_model_zoo/tools/downloader
```
```sh
python3 downloader.py --name googlenet-v1 -o <models_dir>
```
2. Convert the model to the Inference Engine IR format. Go to the Model Optimizer directory and run the `mo.py` script with specifying the path to the model, model format (which must be FP32 for CPU and FPG) and output directory to generate the IR files:
```sh
cd <INSTALL_DIR>/deployment_tools/model_optimizer
```
```sh
python3 mo.py --input_model <models_dir>/public/googlenet-v1/googlenet-v1.caffemodel --data_type FP32 --output_dir <ir_dir>
```
3. Run the tool with specifying the `<INSTALL_DIR>/deployment_tools/demo/car.png` file as an input image, the IR of the `googlenet-v1` model and a device to perform inference on. The following commands demonstrate running the Benchmark Tool in the asynchronous mode on CPU and FPGA devices:
* On CPU:
```sh
./benchmark_app -m <ir_dir>/googlenet-v1.xml -d CPU -api async -i <INSTALL_DIR>/deployment_tools/demo/car.png --progress true
```
* On FPGA:
```sh
./benchmark_app -m <ir_dir>/googlenet-v1.xml -d HETERO:FPGA,CPU -api async -i <INSTALL_DIR>/deployment_tools/demo/car.png --progress true
```
The application outputs the number of executed iterations, total duration of execution, latency and throughput.
Additionally, if you set the `-report_type` parameter, the application outputs statistics report.
If you set the `-pc` parameter, the application outputs performance counters.
If you set `-exec_graph_path`, the application reports executable graph information serialized.
Additionally, if you set the `-report_type` parameter, the application outputs statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
```
[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
Progress: [....................] 100.00% done
Below are fragments of sample output for CPU and FPGA devices:
[Step 9/9] Dumping statistics report
[ INFO ] Statistics collecting was not requested. No reports are dumped.
Progress: [....................] 100.00% done
* For CPU:
```
[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
Progress: [....................] 100.00% done
Count: 4612 iterations
Duration: 60110.04 ms
Latency: 50.99 ms
Throughput: 76.73 FPS
[Step 9/9] Dumping statistics report
[ INFO ] Statistics collecting was not requested. No reports are dumped.
Progress: [....................] 100.00% done
```
Count: 4612 iterations
Duration: 60110.04 ms
Latency: 50.99 ms
Throughput: 76.73 FPS
```
All measurements including per-layer PM counters are reported in milliseconds.
* For FPGA:
```
[Step 10/11] Measuring performance (Start inference asynchronously, 5 inference requests using 4 streams for CPU, limits: 120000 ms duration)
Progress: [....................] 100% done
[Step 11/11] Dumping statistics report
Count: 102515 iterations
Duration: 120007.38 ms
Latency: 5.84 ms
Throughput: 854.24 FP
```
## See Also
* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
* [Model Downloader](./tools/downloader/README.md)

View File

@ -23,7 +23,9 @@ static const char api_message[] = "Optional. Enable Sync/Async API. Default valu
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). " \
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. ";
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. " \
"Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. " \
"The application looks for a suitable plugin for the specified device.";
/// @brief message for iterations count
static const char iterations_count_message[] = "Optional. Number of iterations. " \
@ -37,11 +39,14 @@ static const char execution_time_message[] = "Optional. Time in seconds to execu
/// @brief message for #threads for CPU inference
static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
"(including HETERO case).";
"(including HETERO and MULTI cases).";
/// @brief message for #streams for CPU inference
static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode "
"(for HETERO device case use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>)";
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just <nstreams>). "
"Default value is determined automatically for a device.Please note that although the automatic selection "
"usually provides a reasonable performance, it still may be non - optimal for some cases, especially for "
"very small networks. See sample's README for more details.";
/// @brief message for user library argument
static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.";

View File

@ -62,6 +62,10 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
throw std::logic_error(err);
}
if ((FLAGS_report_type == averageCntReport) && ((FLAGS_d.find("MULTI") != std::string::npos))) {
throw std::logic_error("only " + std::string(detailedCntReport) + " report type is supported for MULTI device");
}
return true;
}
@ -89,10 +93,20 @@ static void next_step(const std::string additional_info = "") {
<< (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl;
}
template <typename T>
T getMedianValue(const std::vector<T> &vec) {
std::vector<T> sortedVec(vec);
std::sort(sortedVec.begin(), sortedVec.end());
return (sortedVec.size() % 2 != 0) ?
sortedVec[sortedVec.size() / 2ULL] :
(sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
}
/**
* @brief The entry point of the benchmark application
*/
int main(int argc, char *argv[]) {
std::shared_ptr<StatisticsReport> statistics;
try {
// ----------------- 1. Parsing and validating input arguments -------------------------------------------------
next_step();
@ -101,10 +115,30 @@ int main(int argc, char *argv[]) {
return 0;
}
if (!FLAGS_report_type.empty()) {
std::vector<gflags::CommandLineFlagInfo> flags;
StatisticsReport::Parameters command_line_arguments;
gflags::GetAllFlags(&flags);
for (auto &flag : flags) {
if (!flag.is_default) {
command_line_arguments.push_back({ flag.name, flag.current_value });
}
}
statistics = std::make_shared<StatisticsReport>(StatisticsReport::Config{FLAGS_report_type, FLAGS_report_folder});
statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments);
}
/** This vector stores paths to the processed images **/
std::vector<std::string> inputFiles;
parseInputFilesArguments(inputFiles);
if (FLAGS_nstreams.empty()) {
slog::warn << "-nstreams default value is determined automatically for a device. "
"Although the automatic selection usually provides a reasonable performance,"
"but it still may be non-optimal for some cases, for more information look at README." << slog::endl<< slog::endl;
}
// ----------------- 2. Loading the Inference Engine -----------------------------------------------------------
next_step();
@ -141,9 +175,25 @@ int main(int argc, char *argv[]) {
slog::info << "Loading network files" << slog::endl;
CNNNetReader netBuilder;
auto startTime = Time::now();
netBuilder.ReadNetwork(FLAGS_m);
const std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin";
netBuilder.ReadWeights(binFileName);
auto float_to_string = [] (const float number) {
std::stringstream ss;
ss << std::fixed << std::setprecision(2) << number;
return ss.str();
};
auto get_total_ms_time = [ &startTime ] () {
return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
};
auto duration_ms = float_to_string(get_total_ms_time());
slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"read network time (ms)", duration_ms}
});
CNNNetwork cnnNetwork = netBuilder.getNetwork();
const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
@ -180,8 +230,9 @@ int main(int argc, char *argv[]) {
}
const size_t batchSize = cnnNetwork.getBatchSize();
const Precision precision = cnnNetwork.getPrecision();
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize <<
", precision: " << cnnNetwork.getPrecision() << slog::endl;
", precision: " << precision << slog::endl;
// ----------------- 5. Configuring input ----------------------------------------------------------------------
next_step();
@ -198,7 +249,8 @@ int main(int argc, char *argv[]) {
bool perf_counts = (FLAGS_report_type == detailedCntReport ||
FLAGS_report_type == averageCntReport ||
FLAGS_pc);
FLAGS_pc ||
!FLAGS_exec_graph_path.empty());
auto devices = parseDevices(device_name);
std::map<std::string, uint32_t> device_nstreams = parseValuePerDevice(devices, FLAGS_nstreams);
@ -208,8 +260,13 @@ int main(int argc, char *argv[]) {
if (FLAGS_nthreads != 0)
ie.SetConfig({{ CONFIG_KEY(CPU_THREADS_NUM), std::to_string(FLAGS_nthreads) }}, device);
// pin threads for CPU portion of inference
ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), FLAGS_pin }}, device);
if ((device_name.find("MULTI") != std::string::npos) &&
(device_name.find("GPU") != std::string::npos)) {
ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), CONFIG_VALUE(NO) }}, device);
} else {
// pin threads for CPU portion of inference
ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), FLAGS_pin }}, device);
}
// for CPU execution, more throughput-oriented execution via streams
if (FLAGS_api == "async")
@ -223,6 +280,13 @@ int main(int argc, char *argv[]) {
(device_nstreams.count(device) > 0 ? std::to_string(device_nstreams.at(device)) :
"GPU_THROUGHPUT_AUTO") }}, device);
device_nstreams[device] = std::stoi(ie.GetConfig(device, CONFIG_KEY(GPU_THROUGHPUT_STREAMS)).as<std::string>());
if ((device_name.find("MULTI") != std::string::npos) &&
(device_name.find("CPU") != std::string::npos)) {
// multi-device execution with the CPU + GPU performs best with GPU trottling hint,
// which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
ie.SetConfig({{ CLDNN_CONFIG_KEY(PLUGIN_THROTTLE), "1" }}, "GPU");
}
} else if (device == "MYRIAD") {
ie.SetConfig({{ CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_NONE) },
{ VPU_CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_WARNING) }}, device);
@ -234,7 +298,15 @@ int main(int argc, char *argv[]) {
std::map<std::string, std::string> config = {{ CONFIG_KEY(PERF_COUNT), perf_counts ? CONFIG_VALUE(YES) :
CONFIG_VALUE(NO) }};
startTime = Time::now();
ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, device_name, config);
duration_ms = float_to_string(get_total_ms_time());
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"load network time (ms)", duration_ms}
});
// ----------------- 8. Setting optimal runtime parameters -----------------------------------------------------
next_step();
@ -274,6 +346,28 @@ int main(int argc, char *argv[]) {
}
uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds);
if (statistics) {
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
{
{"topology", cnnNetwork.getName()},
{"target device", device_name},
{"API", FLAGS_api},
{"precision", std::string(precision.name())},
{"batch size", std::to_string(batchSize)},
{"number of iterations", std::to_string(niter)},
{"number of parallel infer requests", std::to_string(nireq)},
{"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))},
});
for (auto& nstreams : device_nstreams) {
std::stringstream ss;
ss << "number of " << nstreams.first << " streams";
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
{
{ss.str(), std::to_string(nstreams.second)},
});
}
}
// ----------------- 9. Creating infer requests and filling input blobs ----------------------------------------
next_step();
@ -333,7 +427,7 @@ int main(int argc, char *argv[]) {
inferRequestsQueue.waitAll();
inferRequestsQueue.resetTimes();
const auto startTime = Time::now();
startTime = Time::now();
auto execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
/** Start inference & calculate performance **/
@ -373,35 +467,34 @@ int main(int argc, char *argv[]) {
// wait the latest inference executions
inferRequestsQueue.waitAll();
StatisticsReport statistics({ FLAGS_d,
FLAGS_api,
batchSize,
nireq,
niter,
getDurationInMilliseconds(duration_seconds),
FLAGS_nthreads,
device_nstreams,
FLAGS_pin,
FLAGS_report_type,
FLAGS_report_folder
});
if (perf_counts) {
for (auto& request : inferRequestsQueue.requests) {
statistics.addPerfCounts(request->getPerformanceCounts());
}
}
statistics.addLatencies(inferRequestsQueue.getLatencies());
double latency = getMedianValue<double>(inferRequestsQueue.getLatencies());
double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / statistics.getMedianLatency() :
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency :
batchSize * 1000.0 * iteration / totalDuration;
if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"total execution time (ms)", float_to_string(totalDuration)},
{"total number of iterations", std::to_string(iteration)},
});
if (device_name.find("MULTI") == std::string::npos) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"latency (ms)", float_to_string(latency)},
});
}
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"throughput", float_to_string(fps)}
});
}
progressBar.finish();
// ----------------- 11. Dumping statistics report -------------------------------------------------------------
next_step();
statistics.dump(fps, iteration, totalDuration);
if (!FLAGS_exec_graph_path.empty()) {
try {
CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo();
@ -412,19 +505,40 @@ int main(int argc, char *argv[]) {
}
}
if (FLAGS_pc) {
if (perf_counts) {
std::vector<std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>> perfCounts;
for (size_t ireq = 0; ireq < nireq; ireq++) {
slog::info << "Pefrormance counts for " << ireq << "-th infer request:" << slog::endl;
printPerformanceCounts(inferRequestsQueue.requests[ireq]->getPerformanceCounts(), std::cout, getFullDeviceName(ie, FLAGS_d), false);
auto reqPerfCounts = inferRequestsQueue.requests[ireq]->getPerformanceCounts();
if (FLAGS_pc) {
slog::info << "Pefrormance counts for " << ireq << "-th infer request:" << slog::endl;
printPerformanceCounts(reqPerfCounts, std::cout, getFullDeviceName(ie, FLAGS_d), false);
}
perfCounts.push_back(reqPerfCounts);
}
if (statistics) {
statistics->dumpPerformanceCounters(perfCounts);
}
}
if (statistics)
statistics->dump();
std::cout << "Count: " << iteration << " iterations" << std::endl;
std::cout << "Duration: " << totalDuration << " ms" << std::endl;
std::cout << "Latency: " << statistics.getMedianLatency() << " ms" << std::endl;
std::cout << "Throughput: " << fps << " FPS" << std::endl;
std::cout << "Duration: " << float_to_string(totalDuration) << " ms" << std::endl;
if (device_name.find("MULTI") == std::string::npos)
std::cout << "Latency: " << float_to_string(latency) << " ms" << std::endl;
std::cout << "Throughput: " << float_to_string(fps) << " FPS" << std::endl;
} catch (const std::exception& ex) {
slog::err << ex.what() << slog::endl;
if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"error", ex.what()},
});
statistics->dump();
}
return 3;
}

View File

@ -10,215 +10,127 @@
#include "statistics_report.hpp"
void StatisticsReport::addPerfCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat) {
if (_config.report_type == averageCntReport || _config.report_type == detailedCntReport) {
// collect per-iteration statistics only in case of enabled median/detailed statistic collecting
_performanceCounters.push_back(pmStat);
}
void StatisticsReport::addParameters(const Category &category, const Parameters& parameters) {
if (_parameters.count(category) == 0)
_parameters[category] = parameters;
else
_parameters[category].insert(_parameters[category].end(), parameters.begin(), parameters.end());
}
void StatisticsReport::addLatencies(const std::vector<double> &latencies) {
_latencies.insert(_latencies.end(), latencies.begin(), latencies.end());
}
void StatisticsReport::dump() {
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv");
void StatisticsReport::dump(const double &fps, const size_t &iteration_number, const double &totalExecTime) {
if (_config.report_type.empty()) {
slog::info << "Statistics collecting was not requested. No reports are dumped." << slog::endl;
return;
}
std::string separator =
#if defined _WIN32 || defined __CYGWIN__
# if defined UNICODE
L"\\";
# else
"\\";
# endif
#else
"/";
#endif
if (_config.report_folder.empty())
separator = "";
CsvDumper dumper(true, _config.report_folder + separator + "benchmark_" + _config.report_type + "_report.csv");
// resulting number of columns in csv file depends on the report_type. If it's noCntReport, then
// no PM data is collected and there are only 3 columns in the file (in configuration section). If it's
// averageCntReport then median PM values are collected per each layer and the number of columns is 6.
// Example from GPU:
//
// layer name;exec status;layer type;exec type;real time;cpu time;
// conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;615;3;
// Here, all the data are taken from InferenceEngine::InferenceEngineProfileInfo.
//
// In case of detailedCntReport the number of columns is 4 + _config.nireq * 2, because first 4 parameters
// are the same but realTime and cpuTime can be different on each iteration (example from 5 GPU requests):
// conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;630,3;617,3;616,3;615,3;617,3;
size_t numOfColumns = 0;
if (_config.report_type == noCntReport) {
numOfColumns = 3;
} else if (_config.report_type == averageCntReport) {
numOfColumns = 6;
} else {
// for detailedCntReport
numOfColumns = 4 + _config.nireq * 2;
}
auto completeCsvRow = [](CsvDumper &dumper, size_t numOfColumns, size_t filled) {
for (size_t i = 0; i < numOfColumns - filled; i++)
dumper << "";
dumper.endLine();
};
// dump execution configuration
dumper << "Configuration setup";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "config option" << "CLI parameter" << "value";
completeCsvRow(dumper, numOfColumns, 3);
dumper << "target device" << " -d" << _config.device;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "execution mode" << " -api" << _config.api;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "batch size" << " -b" << _config.batch;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "number of iterations" << " -niter" << _config.niter;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "number of parallel infer requests" << " -nireq" << _config.nireq;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "duration in ms" << " -t" << _config.duration;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "number of CPU threads" << " -nthreads" << _config.cpu_nthreads;
completeCsvRow(dumper, numOfColumns, 3);
for (auto& item : _config.nstreams)
dumper << "number of " << item.first << " streams" << " -nstreams" << item.second;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "CPU pinning enabled" << " -pin" << _config.cpu_pin;
completeCsvRow(dumper, numOfColumns, 3);
dumper.endLine();
// write PM data from each iteration
if (!_performanceCounters.empty()) {
if (_config.report_type != averageCntReport && _config.report_type != detailedCntReport) {
throw std::logic_error("PM data can only be collected for average or detailed report types");
}
// this vector is sorted according to network layers execution order.
auto performanceMapSorted = preparePmStatistics();
dumper << "Performance counters";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "layer name" << "exec status" << "layer type" << "exec type";
if (_config.report_type == averageCntReport) {
dumper << "average real time" << "average cpu time";
completeCsvRow(dumper, numOfColumns, 6);
} else {
// detailedCntReport case
for (size_t i = 0; i< _performanceCounters.size(); i++) {
dumper << "realTime_req" + std::to_string(i) << "cpuTime_req" + std::to_string(i);
}
completeCsvRow(dumper, numOfColumns, 4 + _performanceCounters.size() * 2);
}
for (const auto &layer : performanceMapSorted) {
dumper << layer.first; // layer name
switch (layer.second.status) {
case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
dumper << "EXECUTED";
break;
case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
dumper << "NOT_RUN";
break;
case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
dumper << "OPTIMIZED_OUT";
break;
}
dumper << layer.second.layer_type << layer.second.exec_type;
if (_config.report_type == averageCntReport) {
// write average realTime and cpuTime from each processed request for current layer
dumper <<
std::to_string(std::accumulate(_perLayerRealTime[layer.first].begin(),
_perLayerRealTime[layer.first].end(), 0.0) / _perLayerRealTime[layer.first].size() / 1000.0) <<
std::to_string(std::accumulate(_perLayerCpuTime[layer.first].begin(),
_perLayerCpuTime[layer.first].end(), 0.0) / _perLayerCpuTime[layer.first].size() / 1000.0);
} else {
// write all realTime and cpuTime from each processed request for current layer
for (size_t i = 0; i < _config.nireq; i++) {
dumper << std::to_string(_perLayerRealTime[layer.first][i] / 1000.0) << std::to_string(_perLayerCpuTime[layer.first][i] / 1000.0);
}
}
auto dump_parameters = [ &dumper ] (const Parameters &parameters) {
for (auto& parameter : parameters) {
dumper << parameter.first << parameter.second;
dumper.endLine();
}
};
if (_parameters.count(Category::COMMAND_LINE_PARAMETERS)) {
dumper << "Command line parameters";
dumper.endLine();
dump_parameters(_parameters.at(Category::COMMAND_LINE_PARAMETERS));
dumper.endLine();
}
if (_parameters.count(Category::RUNTIME_CONFIG)) {
dumper << "Configuration setup";
dumper.endLine();
dump_parameters(_parameters.at(Category::RUNTIME_CONFIG));
dumper.endLine();
}
if (_parameters.count(Category::EXECUTION_RESULTS)) {
dumper << "Execution results";
dumper.endLine();
dump_parameters(_parameters.at(Category::EXECUTION_RESULTS));
dumper.endLine();
}
slog::info << "Statistics report is stored to " << dumper.getFilename() << slog::endl;
}
void StatisticsReport::dumpPerformanceCountersRequest(CsvDumper& dumper,
const PerformaceCounters& perfCounts) {
auto performanceMapSorted = perfCountersSorted(perfCounts);
long long total = 0L;
long long total_cpu = 0L;
dumper << "layerName" << "execStatus" << "layerType" << "execType";
dumper << "realTime (ms)" << "cpuTime (ms)";
dumper.endLine();
for (const auto &layer : performanceMapSorted) {
dumper << layer.first; // layer name
switch (layer.second.status) {
case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
dumper << "EXECUTED";
break;
case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
dumper << "NOT_RUN";
break;
case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
dumper << "OPTIMIZED_OUT";
break;
}
dumper << layer.second.layer_type << layer.second.exec_type;
dumper << std::to_string(layer.second.realTime_uSec / 1000.0) << std::to_string(layer.second.cpu_uSec/ 1000.0);
total += layer.second.realTime_uSec;
total_cpu += layer.second.cpu_uSec;
dumper.endLine();
}
dumper << "Total" << "" << "" << "";
dumper << total / 1000.0 << total_cpu / 1000.0;
dumper.endLine();
dumper.endLine();
}
void StatisticsReport::dumpPerformanceCounters(const std::vector<PerformaceCounters> &perfCounts) {
if ((_config.report_type.empty()) || (_config.report_type == noCntReport)) {
slog::info << "Statistics collecting for performance counters was not requested. No reports are dumped." << slog::endl;
return;
}
if (perfCounts.empty()) {
slog::info << "Peformance counters are empty. No reports are dumped." << slog::endl;
return;
}
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv");
if (_config.report_type == detailedCntReport) {
dumper << "Statistics";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "metric";
for (size_t i = 0; i < _totalLayersTime.size(); i++) {
// detailedCntReport case
dumper << "req" + std::to_string(i);
for (auto& pc : perfCounts) {
dumpPerformanceCountersRequest(dumper, pc);
}
completeCsvRow(dumper, numOfColumns, 4 + _totalLayersTime.size());
dumper << "latencies";
for (const auto &lat : _totalLayersTime) {
dumper << lat / 1000.0;
}
completeCsvRow(dumper, numOfColumns, _totalLayersTime.size());
dumper.endLine();
} else if (_config.report_type == averageCntReport) {
auto getAveragePerformanceCounters = [ &perfCounts ] () {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> performanceCountersAvg;
// sort PM data of first processed request according to layers execution order
auto performanceMapSorted = perfCountersSorted(perfCounts[0]);
// iterate over each processed infer request and handle its PM data
for (size_t i = 0; i < perfCounts.size(); i++) {
// iterate over each layer from sorted vector and add required PM data to the per-layer maps
for (const auto& pm : performanceMapSorted) {
if (performanceCountersAvg.count(pm.first) == 0) {
performanceCountersAvg[pm.first] = perfCounts.at(i).at(pm.first);
} else {
performanceCountersAvg[pm.first].realTime_uSec += perfCounts.at(i).at(pm.first).realTime_uSec;
performanceCountersAvg[pm.first].cpu_uSec += perfCounts.at(i).at(pm.first).cpu_uSec;
}
}
}
for (auto& pm : performanceCountersAvg) {
pm.second.realTime_uSec /= perfCounts.size();
pm.second.cpu_uSec /= perfCounts.size();
}
return performanceCountersAvg;
};
dumpPerformanceCountersRequest(dumper, getAveragePerformanceCounters());
} else {
throw std::logic_error("PM data can only be collected for average or detailed report types");
}
dumper << "Execution results";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "number of iterations" << iteration_number;
completeCsvRow(dumper, numOfColumns, 2);
dumper << "latency" << getMedianValue<double>(_latencies);
completeCsvRow(dumper, numOfColumns, 2);
dumper << "throughput" << fps;
completeCsvRow(dumper, numOfColumns, 2);
dumper << "total execution time" << totalExecTime;
completeCsvRow(dumper, numOfColumns, 2);
slog::info << "statistics report is stored to " << dumper.getFilename() << slog::endl;
}
double StatisticsReport::getMedianLatency() {
return getMedianValue<double>(_latencies);
}
std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> StatisticsReport::preparePmStatistics() {
if (_performanceCounters.empty()) {
throw std::logic_error("preparePmStatistics() was called when no PM data was collected");
}
// sort PM data of first processed request according to layers execution order
auto performanceMapSorted = perfCountersSorted(_performanceCounters[0]);
// iterate over each processed infer request and handle its PM data
for (auto &pm : _performanceCounters) {
long long total = 0L;
// iterate over each layer from sorted vector and add required PM data to the per-layer maps
for (const auto & it : performanceMapSorted) {
_perLayerRealTime[it.first].push_back(pm[it.first].realTime_uSec);
_perLayerCpuTime[it.first].push_back(pm[it.first].cpu_uSec);
total += pm[it.first].realTime_uSec;
}
_totalLayersTime.push_back(total);
}
return performanceMapSorted;
}
template <typename T>
T StatisticsReport::getMedianValue(const std::vector<T> &vec) {
std::vector<T> sortedVec(vec);
std::sort(sortedVec.begin(), sortedVec.end());
return (sortedVec.size() % 2 != 0) ?
sortedVec[sortedVec.size() / 2ULL] :
(sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
slog::info << "Pefromance counters report is stored to " << dumper.getFilename() << slog::endl;
}

View File

@ -22,51 +22,51 @@ static constexpr char detailedCntReport[] = "detailed_counters";
/// @brief Responsible for collecting of statistics and dumping to .csv file
class StatisticsReport {
public:
typedef std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> PerformaceCounters;
typedef std::vector<std::pair<std::string, std::string>> Parameters;
struct Config {
std::string device;
std::string api;
size_t batch;
size_t nireq;
size_t niter;
uint64_t duration;
size_t cpu_nthreads;
std::map<std::string, uint32_t> nstreams;
std::string cpu_pin;
std::string report_type;
std::string report_folder;
};
enum class Category {
COMMAND_LINE_PARAMETERS,
RUNTIME_CONFIG,
EXECUTION_RESULTS,
};
explicit StatisticsReport(Config config) : _config(std::move(config)) {
if (_config.nireq > 0) {
_performanceCounters.reserve(_config.nireq);
}
_separator =
#if defined _WIN32 || defined __CYGWIN__
# if defined UNICODE
L"\\";
# else
"\\";
# endif
#else
"/";
#endif
if (_config.report_folder.empty())
_separator = "";
}
void addPerfCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat);
void addParameters(const Category &category, const Parameters& parameters);
void addLatencies(const std::vector<double> &latency);
void dump();
void dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime);
double getMedianLatency();
void dumpPerformanceCounters(const std::vector<PerformaceCounters> &perfCounts);
private:
std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> preparePmStatistics();
template <typename T>
T getMedianValue(const std::vector<T> &vec);
// Contains PM data for each processed infer request
std::vector<std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>> _performanceCounters;
// Contains latency of each processed infer request
std::vector<double> _latencies;
void dumpPerformanceCountersRequest(CsvDumper& dumper,
const PerformaceCounters& perfCounts);
// configuration of current benchmark execution
const Config _config;
// mapping from network layer to a vector of calculated RealTime values from each processed infer request.
std::map<std::string, std::vector<long long>> _perLayerRealTime;
// mapping from network layer to a vector of calculated CPU Time values from each processed infer request.
std::map<std::string, std::vector<long long>> _perLayerCpuTime;
std::vector<long long> _totalLayersTime;
// parameters
std::map<Category, Parameters> _parameters;
// csv separator
std::string _separator;
};

View File

@ -12,4 +12,3 @@ std::vector<std::string> parseDevices(const std::string& device_string);
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
std::map<std::string, uint32_t> parseValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string);
uint32_t deviceDefaultRequestsNumber(const std::string& device);

View File

@ -12,24 +12,21 @@ file (GLOB LIBRARY_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/*.h
)
# Find OpenCV components if exist
find_package(OpenCV COMPONENTS imgcodecs videoio imgproc QUIET)
if(NOT(OpenCV_FOUND))
message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " is built without OPENCV support")
endif()
# Create named folders for the sources within the .vcproj
# Empty name lists them directly under the .vcproj
source_group("src" FILES ${LIBRARY_SRC})
source_group("include" FILES ${LIBRARY_HEADERS})
# Create library file from sources.
add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
if(OpenCV_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES})
target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV)
# Find OpenCV components if exist
find_package(OpenCV COMPONENTS imgcodecs videoio imgproc QUIET)
if(NOT OpenCV_FOUND)
message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " will be built without OPENCV support")
else()
target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES})
target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV)
endif()
target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_FORMAT_READER)

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2019 Intel Corporation
// Copyright (C) 2018-2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -6,31 +6,33 @@
#if defined(_WIN32)
#ifndef NOMINMAX
# define NOMINMAX
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN_UNDEF
#endif
#include <winsock2.h>
#include <windows.h>
#include <stdlib.h>
#ifndef NOMINMAX
# define NOMINMAX
# define NOMINMAX_UNDEF
#endif
#else
#include <unistd.h>
#include <cstdlib>
#include <string.h>
#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
# define _X86_
#endif
#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
# define _AMD64_
#endif
#include <string>
#include <windef.h>
#include <fileapi.h>
#include <Winbase.h>
#include <sys/stat.h>
#if defined(WIN32)
// Copied from linux libc sys/stat.h:
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
#endif
// Copied from linux libc sys/stat.h:
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
struct dirent {
char *d_name;
@ -38,10 +40,9 @@ struct dirent {
explicit dirent(const wchar_t *wsFilePath) {
size_t i;
auto slen = wcslen(wsFilePath);
d_name = static_cast<char*>(malloc(slen + 1));
d_name = static_cast<char *>(malloc(slen + 1));
wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen);
}
~dirent() {
free(d_name);
}
@ -60,6 +61,11 @@ class DIR {
}
public:
DIR(const DIR &other) = delete;
DIR(DIR &&other) = delete;
DIR& operator=(const DIR &other) = delete;
DIR& operator=(DIR &&other) = delete;
explicit DIR(const char *dirPath) : next(nullptr) {
std::string ws = dirPath;
if (endsWith(ws, "\\"))
@ -72,6 +78,7 @@ public:
~DIR() {
if (!next) delete next;
next = nullptr;
FindClose(hFind);
}
@ -96,7 +103,7 @@ public:
};
static DIR *opendir(const char* dirPath) {
static DIR* opendir(const char *dirPath) {
auto dp = new DIR(dirPath);
if (!dp->isValid()) {
delete dp;
@ -105,10 +112,27 @@ static DIR *opendir(const char* dirPath) {
return dp;
}
static struct dirent *readdir(DIR *dp) {
static struct dirent* readdir(DIR *dp) {
return dp->nextEnt();
}
static void closedir(DIR *dp) {
delete dp;
}
#ifdef WIN32_LEAN_AND_MEAN_UNDEF
# undef WIN32_LEAN_AND_MEAN
# undef WIN32_LEAN_AND_MEAN_UNDEF
#endif
#ifdef NOMINMAX_UNDEF
# undef NOMINMAX_UNDEF
# undef NOMINMAX
#endif
#else
#include <sys/types.h>
#include <dirent.h>
#endif

View File

@ -27,7 +27,7 @@
#include <ie_blob.h>
#ifndef UNUSED
#ifdef WIN32
#if defined (_MSC_VER) && !defined (__clang__)
#define UNUSED
#else
#define UNUSED __attribute__((unused))
@ -1120,5 +1120,4 @@ inline void showAvailableDevices() {
for (const auto& device : devices) {
std::cout << " " << device;
}
std::cout << " HDDL" << std::endl;
}

View File

@ -4,7 +4,8 @@
#pragma once
#include <iostream>
#include <cstdio>
#include <sstream>
#include <iomanip>
/**
@ -12,12 +13,15 @@
* @brief A ConsoleProgress class provides functionality for printing progress dynamics
*/
class ConsoleProgress {
static const int DEFAULT_DETALIZATION = 20;
static const size_t DEFAULT_DETALIZATION = 20;
static const size_t DEFAULT_PERCENT_TO_UPDATE_PROGRESS = 1;
size_t total;
size_t current = 0;
size_t cur_progress = 0;
size_t prev_progress = 0;
bool stream_output;
size_t detalization;
size_t percent_to_update;
public:
/**
@ -25,18 +29,19 @@ public:
* @param _total - maximum value that is correspondent to 100%
* @param _detalization - number of symbols(.) to use to represent progress
*/
explicit ConsoleProgress(size_t _total, bool _stream_output = false, size_t _detalization = DEFAULT_DETALIZATION) :
total(_total), detalization(_detalization) {
explicit ConsoleProgress(size_t _total,
bool _stream_output = false,
size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS,
size_t _detalization = DEFAULT_DETALIZATION) :
total(_total), detalization(_detalization), percent_to_update(_percent_to_update) {
stream_output = _stream_output;
if (total == 0) {
total = 1;
}
std::cout << std::unitbuf;
}
/**
* @brief Shows progress with current data. Progress is shown from the beginning of the current line.
* @return
*/
void showProgress() const {
std::stringstream strm;
@ -45,28 +50,34 @@ public:
}
strm << "Progress: [";
size_t i = 0;
for (; i < detalization * current / total; i++) {
for (; i < detalization * cur_progress / total; i++) {
strm << ".";
}
for (; i < detalization; i++) {
strm << " ";
}
strm << "] " << std::fixed << std::setprecision(2) << 100 * static_cast<float>(current) / total << "% done";
strm << "] " << std::setw(3) << 100 * cur_progress / total << "% done";
if (stream_output) {
std::cout << strm.str() << std::endl;
} else {
std::cout << strm.str() << std::flush;
strm << std::endl;
}
std::fputs(strm.str().c_str(), stdout);
std::fflush(stdout);
}
/**
* @brief Updates current value and progressbar
* @param newProgress - new value to represent
*/
void updateProgress(size_t newProgress) {
current = newProgress;
if (current > total) current = total;
showProgress();
void updateProgress() {
if (cur_progress > total) cur_progress = total;
size_t prev_percent = 100 * prev_progress / total;
size_t cur_percent = 100 * cur_progress / total;
if (prev_progress == 0 ||
cur_progress == total ||
prev_percent + percent_to_update <= cur_percent) {
showProgress();
prev_progress = cur_progress;
}
}
/**
@ -74,10 +85,11 @@ public:
* @param add - value to add
*/
void addProgress(int add) {
if (add < 0 && -add > static_cast<int>(current)) {
add = -static_cast<int>(current);
if (add < 0 && -add > static_cast<int>(cur_progress)) {
add = -static_cast<int>(cur_progress);
}
updateProgress(current + add);
cur_progress += add;
updateProgress();
}
/**
@ -85,6 +97,9 @@ public:
* @return
*/
void finish() {
std::cerr << std::nounitbuf << "\n";
std::stringstream strm;
strm << std::endl;
std::fputs(strm.str().c_str(), stdout);
std::fflush(stdout);
}
};

View File

@ -10,8 +10,8 @@ It demonstrates how to use the following Inference Engine API in applications:
There is also an API introduced to crop a ROI object and set it as input without additional memory re-allocation.
To properly demonstrate this API, it is required to run several networks in pipeline which is out of scope of this sample.
Please refer to [Security Barrier Camera Demo](./inference-engine/samples/security_barrier_camera_demo/README.md), or
[Crossroad Camera Demo](./inference-engine/samples/crossroad_camera_demo/README.md) with an example of using of new crop ROI API.
Please refer to [Security Barrier Camera Demo](./demos/security_barrier_camera_demo/README.md), or
[Crossroad Camera Demo](./demos/crossroad_camera_demo/README.md) with an example of using of new crop ROI API.
Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.

View File

@ -1,8 +1,8 @@
# Hello Query Device C++ Sample
This topic demonstrates how to run the Hello Query Device sample application, which queries Inference Engine devices and prints their metrics and default configuration values. The sample shows how to use [Query Device API feature](./docs/IE_DG/QueryDeviceAPI.md).
This topic demonstrates how to run the Hello Query Device sample application, which queries Inference Engine devices and prints their metrics and default configuration values. The sample shows how to use [Query Device API feature](./docs/IE_DG/InferenceEngine_QueryAPI.md).
> **NOTE:** This topic describes usage of C++ implementation of the Query Device Sample.
> For the Python* implementation, refer to [Hello Query Device Python* Sample](./inference-engine/ie_brudges/python/sample/hello_query_device/README.md)
> For the Python* implementation, refer to [Hello Query Device Python* Sample](./inference-engine/ie_bridges/python/sample/hello_query_device/README.md)
## Running
To see quired information, run the following:

View File

@ -3,6 +3,8 @@
This topic demonstrates how to run the Object Detection sample application, which does inference using object detection
networks like SSD-VGG on Intel® Processors and Intel® HD Graphics.
> **NOTE:** This topic describes usage of C++ implementation of the Object Detection Sample SSD. For the Python* implementation, refer to [Object Detection Python* Sample SSD](./inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md).
## How It Works
Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference

View File

@ -48,17 +48,15 @@ will be removed in GNA hardware version 3 and higher.
#### Execution Modes
Several execution modes are supported via the `-d` flag. If the device
is set to `CPU` and the GNA plugin is selected, the GNA device is
emulated in fast-but-not-bit-exact mode. If the device is set to
`GNA_AUTO`, then the GNA hardware is used if available and the driver is
installed. Otherwise, the GNA device is emulated in
fast-but-not-bit-exact mode. If the device is set to `GNA_HW`, then the
GNA hardware is used if available and the driver is installed.
is set to `CPU` mode, then all calculation will be performed on CPU device
using CPU Plugin. If the device is set to `GNA_AUTO`, then the GNA hardware is
used if available and the driver is installed. Otherwise, the GNA device is
emulated in fast-but-not-bit-exact mode. If the device is set to `GNA_HW`,
then the GNA hardware is used if available and the driver is installed.
Otherwise, an error will occur. If the device is set to `GNA_SW`, the
GNA device is emulated in fast-but-not-bit-exact mode. Finally, if
the device is set to `GNA_SW_EXACT`, the GNA device is emulated in
bit-exact mode.
`GNA_SW_FP32` mode is used for calculation on CPU device using GNA Plugin.
#### Loading and Saving Models
@ -94,7 +92,7 @@ Options:
-m "<path>" Required. Path to an .xml file with a trained model (required if -rg is missing).
-o "<path>" Optional. Output file name (default name is "scores.ark").
-l "<absolute_path>" Required for CPU custom layers. Absolute path to a shared library with the kernel implementations.
-d "<device>" Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT, GNA_SW_FP32 and HETERO with combination of GNA
-d "<device>" Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT and HETERO with combination of GNA
as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. The sample will look for a suitable plugin for device specified.
-p Optional. Plugin name. For example, GPU. If this parameter is set, the sample will look for this plugin only
-pc Optional. Enables performance report

View File

@ -706,7 +706,7 @@ int main(int argc, char *argv[]) {
outputInfo = netBuilder.getNetwork().getOutputsInfo();
}
Blob::Ptr ptrOutputBlob = inferRequests[0].inferRequest.GetBlob(cOutputInfo.rbegin()->first);
Blob::Ptr ptrOutputBlob = inferRequests.begin()->inferRequest.GetBlob(cOutputInfo.rbegin()->first);
for (auto &item : outputInfo) {
DataPtr outData = item.second;
@ -839,7 +839,7 @@ int main(int argc, char *argv[]) {
if (!FLAGS_o.empty()) {
outputFrame =
&ptrScores.front() + numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex);
Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.rbegin()->first);
auto byteSize = inferRequest.numFramesThisBatch * numScoresPerFrame * sizeof(float);
std::memcpy(outputFrame,
outputBlob->buffer(),
@ -848,7 +848,7 @@ int main(int argc, char *argv[]) {
if (!FLAGS_r.empty()) {
Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
CompareScores(outputBlob->buffer().as<float *>(),
CompareScores(outputBlob->buffer().as<float*>(),
&ptrReferenceScores[inferRequest.frameIndex *
numFrameElementsReference *
numBytesPerElementReference],
@ -876,7 +876,7 @@ int main(int argc, char *argv[]) {
ptrInputBlobs.push_back(inferRequest.inferRequest.GetBlob(input.first));
}
for (size_t i = 0; i < numInputArkFiles; i++) {
for (size_t i = 0; i < numInputArkFiles; ++i) {
std::memcpy(ptrInputBlobs[i]->buffer(),
inputFrame[i],
ptrInputBlobs[i]->byteSize());
@ -890,14 +890,14 @@ int main(int argc, char *argv[]) {
frameIndex += numFramesThisBatch;
for (size_t j = 0; j < inputArkFiles.size(); j++) {
if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
int i = frameIndex - FLAGS_cw_l;
if (i > 0 && i < static_cast<int>(numFramesArkFile)) {
int idx = frameIndex - FLAGS_cw_l;
if (idx > 0 && idx < static_cast<int>(numFramesArkFile)) {
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
} else if (i >= static_cast<int>(numFramesArkFile)) {
inputFrame[j] = &ptrUtterances[0].front() +
} else if (idx >= static_cast<int>(numFramesArkFile)) {
inputFrame[j] = &ptrUtterances[j].front() +
(numFramesArkFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
} else if (i < 0) {
inputFrame[j] = &ptrUtterances[0].front();
} else if (idx <= 0) {
inputFrame[j] = &ptrUtterances[j].front();
}
} else {
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
@ -905,7 +905,6 @@ int main(int argc, char *argv[]) {
}
inferRequestFetched |= true;
}
if (!inferRequestFetched) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;

View File

@ -23,7 +23,7 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If
"the sample will look for this plugin only";
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_FP32 "
static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, "
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
" as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. "
"The sample will look for a suitable plugin for device specified.";

View File

@ -1,4 +0,0 @@
[submodule "doc"]
path = doc
url = https://github.com/gflags/gflags.git
branch = gh-pages

View File

@ -24,10 +24,10 @@ if (ENABLE_GNA)
add_subdirectory(gna_plugin)
endif()
add_subdirectory(inference_engine)
add_subdirectory(hetero_plugin)
add_subdirectory(inference_engine)
set(InferenceEngine_LIBRARIES inference_engine)
set(InferenceEngine_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/include)
set(InferenceEngine_SRC_DIRS ${CMAKE_SOURCE_DIR}/src)

View File

@ -14,12 +14,11 @@ ie_add_plugin(NAME ${TARGET_NAME}
SOURCES ${MAIN_SRC} ${LIBRARY_HEADERS}
VERSION_DEFINES_FOR cldnn_engine.cpp)
target_link_libraries(${TARGET_NAME} PRIVATE ${INTEL_ITT_LIBS} inference_engine clDNN_shlib pugixml)
target_link_libraries(${TARGET_NAME} PRIVATE ${INTEL_ITT_LIBS} inference_engine clDNN_lib pugixml)
set (CLDNN_TOP_FOLDER ${IE_MAIN_SOURCE_DIR}/thirdparty/clDNN)
target_include_directories(${TARGET_NAME} PRIVATE
${CLDNN_TOP_FOLDER}/api
${CLDNN_TOP_FOLDER}/include
${CLDNN_TOP_FOLDER}
${IE_MAIN_SOURCE_DIR}/src/inference_engine
${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src)

View File

@ -16,7 +16,7 @@
#include "cldnn_custom_layer.h"
#include <CPP/network.hpp>
#include <api/network.hpp>
namespace CLDNNPlugin {

View File

@ -10,7 +10,7 @@
#include <map>
#include <ie_common.h>
#include "pugixml.hpp"
#include "CPP/tensor.hpp"
#include "api/tensor.hpp"
namespace CLDNNPlugin {
@ -54,7 +54,7 @@ public:
const std::vector<std::string>& GlobalSizeRules()const { return m_globalSizeRules; }
const std::vector<std::string>& LocalSizeRules()const { return m_localSizeRules; }
const std::vector<KerenlParam>& KernelParams()const { return m_kernelParams; }
const int InputDimSourceIndex() { return m_wgDimInputIdx; }
int InputDimSourceIndex() { return m_wgDimInputIdx; }
protected:
CLDNNCustomLayer() : m_wgDimInputIdx(0) {}

View File

@ -11,6 +11,7 @@
#include <iostream>
#include <cmath>
#include <tuple>
#include <cctype>
#include "ie_metric_helpers.hpp"
#include <debug.h>
@ -132,7 +133,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
try {
plugin = make_ie_compatible_plugin(
{2, 0,
{2, 1,
CI_BUILD_NUMBER,
"clDNNPlugin"}, std::make_shared<clDNNEngine>());
return OK;
@ -233,6 +234,23 @@ Parameter clDNNEngine::GetConfig(const std::string& name, const std::map<std::st
return result;
}
auto StringRightTrim = [](std::string string, std::string substring, bool case_sensitive = true) {
auto ret_str = string;
if (!case_sensitive) {
std::transform(string.begin(), string.end(), string.begin(), ::tolower);
std::transform(substring.begin(), substring.end(), substring.begin(), ::tolower);
}
auto erase_position = string.rfind(substring);
if (erase_position != std::string::npos) {
// if space exists before substring remove it also
if (std::isspace(string.at(erase_position - 1))) {
erase_position--;
}
return ret_str.substr(0, erase_position);
}
return ret_str;
};
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics;
@ -250,7 +268,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
std::vector<std::string> availableDevices = { "" };
IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, std::string(engine_info.ocl_device_name));
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, StringRightTrim(engine_info.dev_name, "NEO", false));
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys;
for (auto opt : _impl->m_config.key_config_map)

View File

@ -8,7 +8,7 @@
#include <map>
#include <string>
#include <memory>
#include <CPP/engine.hpp>
#include <api/engine.hpp>
#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include <unordered_set>
#include "ie_metric_helpers.hpp"
#include <CPP/cldnn_defs.h>
#include <CPP/data.hpp>
#include <api/cldnn.hpp>
#include <api/data.hpp>
#include <chrono>
#include <cmath>
#include <algorithm>

View File

@ -6,10 +6,10 @@
#include <set>
#include <unordered_set>
#include <sstream>
#include <CPP/cldnn_defs.h>
#include <CPP/network.hpp>
#include <CPP/profiling.hpp>
#include <CPP/custom_gpu_primitive.hpp>
#include <api/cldnn.hpp>
#include <api/network.hpp>
#include <api/profiling.hpp>
#include <api/custom_gpu_primitive.hpp>
#include <chrono>
#include <cmath>
#include <algorithm>
@ -238,7 +238,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
layer->type = to_IE_type_name(prim_info.type_id);
layer->precision = data_type_to_precision(prim_info.output_layout.data_type);
std::vector<std::string> originalNames{find_origin_layers(prim_info.original_id)};
for (auto& fused_id : prim_info.c_fused_ids.cpp_ids)
for (auto& fused_id : prim_info.c_fused_ids)
for (auto& origin_id : find_origin_layers(fused_id))
originalNames.push_back(origin_id);
@ -266,7 +266,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
if (filter_const_primitives) {
// Decrease expected dependencies count if there is a const input without original id in the IR
for (auto& dep : prim_info.c_dependencies.cpp_ids) {
for (auto& dep : prim_info.c_dependencies) {
auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
return entry.original_id == dep;
});
@ -290,16 +290,16 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
for (auto& pi : primitives_info) {
// extract mutable_data primitives and connect it's dependencies and users directly
if (pi.type_id == "mutable_data") {
if (pi.c_dependencies.cpp_ids.size() == 1 && !pi.c_users.cpp_ids.empty()) {
auto dep = pi.c_dependencies.cpp_ids[0];
auto users = pi.c_users.cpp_ids;
if (pi.c_dependencies.size() == 1 && !pi.c_users.empty()) {
auto dep = pi.c_dependencies[0];
auto users = pi.c_users;
auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) {
return entry.original_id == dep;
});
if (it == primitives_info.end())
continue;
auto& dep_users = it->c_users.cpp_ids;
auto& dep_users = it->c_users;
// Remove mutable data from users list
dep_users.erase(std::find_if(dep_users.begin(), dep_users.end(), [&](std::string user_id) {
return user_id == pi.original_id;
@ -315,7 +315,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
if (it == primitives_info.end())
continue;
for (auto& d : it->c_dependencies.cpp_ids) {
for (auto& d : it->c_dependencies) {
if (d == pi.original_id)
d = dep;
}
@ -334,8 +334,8 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
// Skip mutable_data
if (pi.type_id == "mutable_data" &&
pi.c_dependencies.cpp_ids.size() == 1 &&
!pi.c_users.cpp_ids.empty()) {
pi.c_dependencies.size() == 1 &&
!pi.c_users.empty()) {
continue;
}
}
@ -377,7 +377,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
for (auto& pair : node2layer) {
auto pi = pair.first;
auto layer = pair.second;
auto user_ids = pi.c_users.cpp_ids;
auto user_ids = pi.c_users;
for (int i = 0; i < user_ids.size(); i++) {
auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
return entry.first.original_id == user_ids[i];
@ -399,7 +399,7 @@ InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
}
int in_port_id = 0;
for (auto& dep : it->first.c_dependencies.cpp_ids) {
for (auto& dep : it->first.c_dependencies) {
if (filter_const_primitives) {
auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair<cldnn::primitive_info, CNNLayerPtr>& entry) {
return entry.first.original_id == dep;
@ -461,16 +461,8 @@ void CLDNNGraph::UpdatePerfStatistics() {
for (auto &profiledID : profilingIDs) {
auto& perfCount = perfMap[profiledID].second;
// Change status if layer wasn't executed by cldnn engine
if (perfCount.num == 0 &&
executedPrimitives.find(profiledID) == executedPrimitives.end()) {
if (allPrimitives.find(profiledID) != allPrimitives.end() &&
allPrimitives.at(profiledID) == "_optimized_") {
// Layer was marked as optimized by cldnn
perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
} else {
// Layer wasn't run for some reason
perfCount.status = InferenceEngineProfileInfo::NOT_RUN;
}
if (perfCount.num == 0 && executedPrimitives.find(profiledID) == executedPrimitives.end()) {
perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
continue;
}
@ -546,22 +538,30 @@ void CLDNNGraph::UpdateImplementationsMap() {
}
void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &result) const {
bool combinePrimByIRLayers = false;
unsigned i = 0;
for (auto& profiledID : profilingIDs) {
const auto& layerName = perfMap.at(profiledID).first;
if (layerName.length() == 0) // no layer directly associated
continue;
auto allIds = GetNetwork()->get_all_primitive_org_ids();
auto executedPrimitives = GetNetwork()->get_executed_primitives();
auto primitivesInfo = GetNetwork()->get_primitives_info();
auto getFromProfiling = [&](std::string primId) -> bool {
const auto& layerName = perfMap.at(primId).first;
if (layerName.length() == 0) // no layer directly associated
return false;
const auto& perfCounter = perfMap.at(primId).second;
if (!perfCounter.parentPrimitive.empty() && combinePrimByIRLayers)
return false;
const auto& perfCounter = perfMap.at(profiledID).second;
auto& extPerfEntry = result[layerName];
// copy layer implementation
memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
if (perfCounter.isCPU) {
static const std::string cpuExecType("CPU");
memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU
} else {
std::string impl = implementationsMap.at(profiledID);
std::string impl = implementationsMap.at(primId);
impl.copy(extPerfEntry.exec_type, impl.length());
}
@ -570,14 +570,97 @@ void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::Inf
extPerfEntry.cpu_uSec = perfCounter.cpu_avg();
extPerfEntry.realTime_uSec = perfCounter.realTime_avg();
perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length());
}
if (combinePrimByIRLayers) {
std::string kernelId = "";
long long kernelTime = 0; // used for finding the most complex computation kernel in sub_graph for perf stat
for (auto &id : profilingIDs) {
const auto &pc = perfMap.at(id).second;
if (id != primId && pc.parentPrimitive == primId) {
extPerfEntry.cpu_uSec += pc.cpu_avg();
extPerfEntry.realTime_uSec += pc.realTime_avg();
if (pc.realTime_avg() > kernelTime) {
kernelTime = pc.realTime_avg();
kernelId = id;
}
allIds.erase(std::find(allIds.begin(), allIds.end(), id));
}
}
if (!kernelId.empty())
implementationsMap.at(kernelId).copy(extPerfEntry.exec_type, implementationsMap.at(kernelId).length());
}
for (auto& prim : GetNetwork()->get_executed_primitive_ids()) {
if (std::find(profilingIDs.begin(), profilingIDs.end(), prim) == profilingIDs.end()) {
// TODO: add primitives that was added inside cldnn to perf stat
perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length());
return true;
};
for (auto& primId : allIds) {
if (std::find(profilingIDs.begin(), profilingIDs.end(), primId) != profilingIDs.end()) {
getFromProfiling(primId);
} else if (executedPrimitives.find(primId) != executedPrimitives.end()) {
auto event = executedPrimitives.at(primId);
cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()};
// Collect timings
long long cpuTime = 0;
long long deviceTime = 0;
for (auto &interval : cldnnInfo.intervals) {
using duration_t = std::chrono::duration<long long, std::chrono::microseconds::period>;
auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
if (interval.name == "submission") {
cpuTime += count;
} else if (interval.name == "executing") {
deviceTime += count;
} else if (interval.name == "duration") { // "duration" is used for CPU layers
cpuTime += count;
}
}
std::string layerName = primId;
if (primId.find(":") != std::string::npos) {
layerName = primId.substr(primId.find(":") + 1, primId.length());
}
for (auto& pi : primitivesInfo) {
if (pi.original_id == primId) {
if (pi.type_id == "mutable_data")
continue;
auto& extPerfEntry = result[layerName];
if (pi.is_cpu) {
static const std::string cpuExecType("CPU");
memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU
} else {
std::string impl = pi.kernel_id;
impl.copy(extPerfEntry.exec_type, impl.length());
}
pi.type_id.copy(extPerfEntry.layer_type, 256);
extPerfEntry.execution_index = i++;
extPerfEntry.status = InferenceEngineProfileInfo::LayerStatus::EXECUTED;
extPerfEntry.cpu_uSec = cpuTime;
extPerfEntry.realTime_uSec = deviceTime;
if (pi.type_id == "input_layout") {
const std::string input_string = "Input";
const std::string undef_string = "undef";
input_string.copy(extPerfEntry.layer_type, 256);
undef_string.copy(extPerfEntry.exec_type, 256);
}
}
}
}
}
// Checking primitives which has been deleted from execution order but added by clDNNPlugin
for (auto& primId : profilingIDs)
if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
getFromProfiling(primId);
}
}
std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {

View File

@ -15,17 +15,17 @@
#include "cpp/ie_cnn_network.h"
#include "debug_options.h"
#include "inference_engine.hpp"
#include <CPP/network.hpp>
#include <CPP/memory.hpp>
#include <CPP/primitive.hpp>
#include <CPP/topology.hpp>
#include <CPP/pooling.hpp>
#include <CPP/eltwise.hpp>
#include <CPP/concatenation.hpp>
#include <CPP/detection_output.hpp>
#include <CPP/softmax.hpp>
#include <api/network.hpp>
#include <api/memory.hpp>
#include <api/primitive.hpp>
#include <api/topology.hpp>
#include <api/pooling.hpp>
#include <api/eltwise.hpp>
#include <api/concatenation.hpp>
#include <api/detection_output.hpp>
#include <api/softmax.hpp>
#include <api/upsampling.hpp>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include <CPP/upsampling.hpp>
#include "cldnn_custom_layer.h"
#include "cldnn_config.h"
#include "cldnn_program.h"

View File

@ -6,7 +6,7 @@
#include <string>
#include <map>
#include <functional>
#include <CPP/detection_output.hpp> // todo: find a way to remove this
#include <api/detection_output.hpp> // todo: find a way to remove this
#include <description_buffer.hpp>
#include "cldnn_infer_request.h"
#include "cldnn_streams_task_executor.h"
@ -356,7 +356,6 @@ void CLDNNInferRequest::SetBatch(int new_batch) {
size_t offset = 0;
size_t bsz = single_batch;
int b = 0;
// calculate metadata for input buffers
for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) {

View File

@ -6,19 +6,19 @@
#include <vector>
#include <sstream>
#include <utility>
#include <CPP/cldnn_defs.h>
#include <CPP/data.hpp>
#include <CPP/mutable_data.hpp>
#include <CPP/reorder.hpp>
#include <CPP/fully_connected.hpp>
#include <CPP/concatenation.hpp>
#include <CPP/reshape.hpp>
#include <CPP/permute.hpp>
#include <CPP/split.hpp>
#include <CPP/crop.hpp>
#include <CPP/reverse_sequence.hpp>
#include <CPP/lstm.hpp>
#include <CPP/lstm_dynamic.hpp>
#include <api/cldnn.hpp>
#include <api/data.hpp>
#include <api/mutable_data.hpp>
#include <api/reorder.hpp>
#include <api/fully_connected.hpp>
#include <api/concatenation.hpp>
#include <api/reshape.hpp>
#include <api/permute.hpp>
#include <api/split.hpp>
#include <api/crop.hpp>
#include <api/reverse_sequence.hpp>
#include <api/lstm.hpp>
#include <api/lstm_dynamic.hpp>
#include "cldnn_program.h"
using namespace InferenceEngine;
@ -102,8 +102,8 @@ void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine
topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
primitivesToIRLayersMap[inReshapeID] = { layer->name };
primitivesToIRLayersMap[permuteID] = { layer->name };
addInnerPrimitiveToProfiler(inReshapeID, layer->name, layer);
addInnerPrimitiveToProfiler(permuteID, layer->name, layer);
std::string hiddenInResh = inHiddenReshapeID + "_1";
std::string hiddenInStr = inHiddenReorderID + "_1";
@ -115,8 +115,11 @@ void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine
topology.add(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
topology.add(cldnn::concatenation(concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
primitivesToIRLayersMap[hiddenInStr] = { layer->name };
primitivesToIRLayersMap[cellInStr] = { layer->name };
addInnerPrimitiveToProfiler(hiddenInResh, layer->name, layer);
addInnerPrimitiveToProfiler(hiddenInStr, layer->name, layer);
addInnerPrimitiveToProfiler(cellInResh, layer->name, layer);
addInnerPrimitiveToProfiler(cellInStr, layer->name, layer);
addInnerPrimitiveToProfiler(concatID, layer->name, layer);
cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, gemmSz);
@ -131,25 +134,26 @@ void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine
topology.add(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
topology.add(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
topology.add(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
0, 0, {}, {}, cldnn_lstm_offset_order_fizo));
0, 0, {}, {}, cldnn::lstm_weights_order::fizo));
primitivesToIRLayersMap[lstm_fc_id] = { layer->name };
primitivesToIRLayersMap[lstm_elt_id] = { layer->name };
addInnerPrimitiveToProfiler(lstm_fc_id, layer->name, layer);
addInnerPrimitiveToProfiler(gemmReshapeID, layer->name, layer);
addInnerPrimitiveToProfiler(gemmReorderID, layer->name, layer);
addInnerPrimitiveToProfiler(lstm_elt_id, layer->name, layer);
cldnn::primitive_id outputHiddenID = layerName;
topology.add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
addInnerPrimitiveToProfiler(outputHiddenID, layer->name, layer);
cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
topology.add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
primitivesToIRLayersMap[outputHiddenID] = { layer->name };
primitivesToIRLayersMap[outputCellID] = { layer->name };
addInnerPrimitiveToProfiler(outputCellID, layer->name, layer);
// output primitive IDs
primitiveIDs[outputHiddenID] = outputHiddenID; // LSTMCell:LSTMCell - "concat hidden"
primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = outputHiddenID; // LSTMCell:LSTMCell:0 - hidden state
primitiveIDs[outputCellID] = outputCellID; // LSTMCell:LSTMCell:1 - cell state
profilingIDs.push_back(layerName);
addPrimitiveToProfiler(layerName, layer, outputHiddenID);
}
void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@ -250,10 +254,10 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
topology.add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
topology.add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
primitivesToIRLayersMap[inReshapeID] = { layer->name };
primitivesToIRLayersMap[permuteID] = { layer->name };
primitivesToIRLayersMap[inHiddenReshapeID+"_1"] = { layer->name };
primitivesToIRLayersMap[inHiddenReshapeID+"_2"] = { layer->name };
addInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
addInnerPrimitiveToProfiler(permuteID, layerName, layer);
addInnerPrimitiveToProfiler(inHiddenReshapeID+"_1", layerName, layer);
addInnerPrimitiveToProfiler(inHiddenReshapeID+"_2", layerName, layer);
for (int i = 0; i < lstm_sequence_len; ++i)
input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
@ -262,14 +266,12 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
if (permute_input) {
topology.add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
addInnerPrimitiveToProfiler(layerName + "_inputSwap", layerName, layer);
topology.add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
primitivesToIRLayersMap[layerName + "_inputSwap"] = { layer->name };
primitivesToIRLayersMap[inputSplitID] = { layer->name };
} else {
topology.add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
primitivesToIRLayersMap[inputSplitID] = { layer->name };
}
addInnerPrimitiveToProfiler(inputSplitID, layerName, layer);
cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, gemmSz);
@ -290,29 +292,33 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
if (hiddenStr != "") {
topology.add(cldnn::concatenation(concatID, { inputSplitID + ":" + get_string_id(seqIdx), hiddenStr },
cldnn::concatenation::concatenation_axis::along_x));
addInnerPrimitiveToProfiler(concatID, layerName, layer);
topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : ""));
addInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
addInnerPrimitiveToProfiler(inputSplitID + ":" + get_string_id(seqIdx), layerName, layer);
} else {
topology.add(cldnn::fully_connected(lstm_fc_id, inputSplitID + ":" + get_string_id(seqIdx), weightID, hasBias ? biasID : ""));
addInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
}
topology.add(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
topology.add(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
topology.add(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id,
cellStr, 0, 0, {}, {},
cldnn_lstm_offset_order_fizo));
cldnn::lstm_weights_order::fizo));
addInnerPrimitiveToProfiler(lstm_fc_resh_id, layerName, layer);
addInnerPrimitiveToProfiler(lstm_fc_reor_id, layerName, layer);
addInnerPrimitiveToProfiler(lstm_elt_id, layerName, layer);
hiddenStr = crop_id + ":hidden";
cellStr = crop_id + ":cell";
topology.add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
addInnerPrimitiveToProfiler(hiddenStr, layerName, layer);
output_ids_offsets.push_back(hiddenStr);
primitivesToIRLayersMap[lstm_fc_id] = { layer->name };
primitivesToIRLayersMap[lstm_elt_id] = { layer->name };
primitivesToIRLayersMap[hiddenStr] = { layer->name };
if (i < lstm_sequence_len - 1) {
topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
primitivesToIRLayersMap[cellStr] = { layer->name };
addInnerPrimitiveToProfiler(cellStr, layerName, layer);
} else {
// last hidden state crop (output 2)
if (layer->outData.size() > 1) {
@ -325,8 +331,7 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
if (layer->outData.size() > 2) {
topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName();
primitivesToIRLayersMap[cellStr] = { layer->name };
primitiveIDs[cellStr] = cellStr;
addInnerPrimitiveToProfiler(cellStr, layerName, layer);
primitiveIDs[outputCellID] = cellStr;
}
}
@ -336,16 +341,13 @@ void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNL
if (permute_input) {
topology.add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
addInnerPrimitiveToProfiler(layerName + "_outputConcat", layerName, layer);
topology.add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
primitivesToIRLayersMap[layerName + "_outputConcat"] = { layer->name };
} else {
topology.add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
}
primitivesToIRLayersMap[layerName] = { layer->name };
primitiveIDs[layerName] = layerName;
primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = layerName;
profilingIDs.push_back(layerName);
addPrimitiveToProfiler(layerName, layer);
}
void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
@ -478,9 +480,15 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
addInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
addInnerPrimitiveToProfiler(permuteID, layerName, layer);
topology.add(cldnn::reshape(inHiddenReshapeID + "_1", inputPrimitives[1], hiddenStateShape));
topology.add(cldnn::reshape(inHiddenReshapeID + "_2", inputPrimitives[2], hiddenStateShape));
addInnerPrimitiveToProfiler(inHiddenReshapeID + "_1", layerName, layer);
addInnerPrimitiveToProfiler(inHiddenReshapeID + "_2", layerName, layer);
cldnn::primitive_id dynID = layerName + "_dynLength";
cldnn::primitive_id dynReshapeID = layerName + "_dynReshape";
cldnn::tensor dynShape = { 1, 1, lstm_batch_size, 1 };
@ -488,10 +496,8 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
topology.add(cldnn::reshape(dynReshapeID, inputPrimitives[3], dynShape));
topology.add(cldnn::reorder(dynID, dynReshapeID, dynLayout));
primitivesToIRLayersMap[inReshapeID] = { layer->name };
primitivesToIRLayersMap[permuteID] = { layer->name };
primitivesToIRLayersMap[inHiddenReshapeID + "_1"] = { layer->name };
primitivesToIRLayersMap[inHiddenReshapeID + "_2"] = { layer->name };
addInnerPrimitiveToProfiler(dynReshapeID, layerName, layer);
addInnerPrimitiveToProfiler(dynID, layerName, layer);
cldnn::primitive_id inputID = permuteID;
cldnn::primitive_id prevInputID = permuteID;
@ -500,14 +506,15 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
inputID = layerName + "_inputSwap";
topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
prevInputID = inputID;
addInnerPrimitiveToProfiler(inputID, layerName, layer);
}
primitivesToIRLayersMap[inputID] = { layer->name };
cldnn::primitive_id seq_len_id = layer->name + "seq_lengths";
if (reverseSeq) {
inputID = layerName + "_inputReverse";
topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
primitivesToIRLayersMap[inputID] = { layer->name };
addInnerPrimitiveToProfiler(inputID, layerName, layer);
prevInputID = inputID;
}
@ -538,26 +545,25 @@ void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNL
weightID, recurrentID, outputHiddenID, outputCellID, biasID,
inHiddenReshapeID + "_1", inHiddenReshapeID + "_2"));
prevInputID = inputID = dlstmID;
primitivesToIRLayersMap[dlstmID] = { layer->name };
addInnerPrimitiveToProfiler(dlstmID, layerName, layer);
if (reverseSeq) {
inputID = layerName + "_outputReverse";
topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
primitivesToIRLayersMap[inputID] = { layer->name };
addInnerPrimitiveToProfiler(inputID, layerName, layer);
prevInputID = inputID;
}
if (permute_input) {
inputID = layerName + "_outputSwap";
topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
primitivesToIRLayersMap[inputID] = { layer->name };
addInnerPrimitiveToProfiler(inputID, layerName, layer);
prevInputID = inputID;
}
primitiveIDs[layerName] = inputID;
primitiveIDs[inputID] = inputID;
primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = inputID;
profilingIDs.push_back(layerName);
addPrimitiveToProfiler(layerName, layer, inputID);
}
void Program::CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {

Some files were not shown because too many files have changed in this diff Show More