Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-08-13 10:49:35 +09:00
commit 508c616151
544 changed files with 5952 additions and 2038 deletions

View File

@ -117,7 +117,6 @@ jobs:
-DPYTHON_EXECUTABLE=/usr/bin/python3.8 -DPYTHON_EXECUTABLE=/usr/bin/python3.8
-DENABLE_WHEEL=ON -DENABLE_WHEEL=ON
-DENABLE_TESTS=ON -DENABLE_TESTS=ON
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
-DENABLE_FASTER_BUILD=ON -DENABLE_FASTER_BUILD=ON
-DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_STRICT_DEPENDENCIES=OFF

View File

@ -94,7 +94,6 @@ jobs:
-DENABLE_PROFILING_ITT=OFF -DENABLE_PROFILING_ITT=OFF
-DENABLE_SAMPLES=OFF -DENABLE_SAMPLES=OFF
-DENABLE_SPEECH_DEMO=OFF -DENABLE_SPEECH_DEMO=OFF
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
-DNGRAPH_DEBUG_ENABLE=OFF -DNGRAPH_DEBUG_ENABLE=OFF
$(REPO_DIR) $(REPO_DIR)

View File

@ -68,7 +68,6 @@ RUN cmake .. \
-DENABLE_SPEECH_DEMO=OFF \ -DENABLE_SPEECH_DEMO=OFF \
-DENABLE_PYTHON=ON \ -DENABLE_PYTHON=ON \
-DPYTHON_EXECUTABLE=/usr/bin/python3 \ -DPYTHON_EXECUTABLE=/usr/bin/python3 \
-DNGRAPH_ONNX_IMPORT_ENABLE=ON \
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON \ -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
-DNGRAPH_DEBUG_ENABLE=OFF \ -DNGRAPH_DEBUG_ENABLE=OFF \
-DCMAKE_INSTALL_PREFIX=/openvino/dist \ -DCMAKE_INSTALL_PREFIX=/openvino/dist \

View File

@ -34,7 +34,7 @@ jobs:
- name: Create code style diff - name: Create code style diff
if: failure() if: failure()
run: | run: |
cmake --build . --target clang_format_fix_all cmake --build . --target clang_format_fix_all -j8
git diff > code_style_diff.diff git diff > code_style_diff.diff
working-directory: build working-directory: build
@ -42,7 +42,7 @@ jobs:
if: failure() if: failure()
with: with:
name: code_style_diff name: code_style_diff
path: code_style_diff.diff path: build/code_style_diff.diff
ShellCheck: ShellCheck:
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04

8
.gitmodules vendored
View File

@ -18,12 +18,12 @@
path = thirdparty/ade path = thirdparty/ade
url = https://github.com/opencv/ade.git url = https://github.com/opencv/ade.git
ignore = dirty ignore = dirty
[submodule "thirdparty/gflags"] [submodule "thirdparty/gflags/gflags"]
path = thirdparty/gflags path = thirdparty/gflags/gflags
url = https://github.com/gflags/gflags.git url = https://github.com/gflags/gflags.git
ignore = dirty ignore = dirty
[submodule "thirdparty/gtest"] [submodule "thirdparty/gtest/gtest"]
path = thirdparty/gtest path = thirdparty/gtest/gtest
url = https://github.com/openvinotoolkit/googletest.git url = https://github.com/openvinotoolkit/googletest.git
ignore = dirty ignore = dirty
[submodule "thirdparty/ocl/icd_loader"] [submodule "thirdparty/ocl/icd_loader"]

View File

@ -90,17 +90,10 @@ ie_coverage_extract(INPUT "openvino" OUTPUT "ngraph"
ie_coverage_genhtml(INFO_FILE "ngraph" ie_coverage_genhtml(INFO_FILE "ngraph"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}") PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
if(NGRAPH_ONNX_IMPORT_ENABLE)
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*"
"${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*")
ie_coverage_genhtml(INFO_FILE "onnx_importer"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
endif()
if(NGRAPH_ONNX_FRONTEND_ENABLE) if(NGRAPH_ONNX_FRONTEND_ENABLE)
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend" ie_coverage_extract(INPUT "openvino" OUTPUT "onnx"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*") PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*"
ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend" "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*")
ie_coverage_genhtml(INFO_FILE "onnx"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}") PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
endif() endif()

View File

@ -122,13 +122,12 @@ else()
set(protoc_available ON) set(protoc_available ON)
endif() endif()
ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF) ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" ON "protoc_available" OFF)
ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF) ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON
"NGRAPH_ONNX_IMPORT_ENABLE" OFF) "NGRAPH_ONNX_FRONTEND_ENABLE" OFF)
ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
"NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF) "NGRAPH_ONNX_FRONTEND_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF) ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
"NGRAPH_UNIT_TEST_ENABLE" OFF) "NGRAPH_UNIT_TEST_ENABLE" OFF)

View File

@ -28,9 +28,8 @@
# #
# ngraph::frontend_manager - nGraph frontend manager # ngraph::frontend_manager - nGraph frontend manager
# #
# ngraph_onnx_importer_FOUND - True if the system has onnx_importer library # ngraph_onnx_ngraph_frontend_FOUND - True if the system has onnx_ngraph_frontend library
# ngraph::onnx_importer - ONNX importer target (optional) # ngraph::onnx_ngraph_frontend - ONNX FrontEnd target (optional)
# ONNX_IMPORTER_LIBRARIES - ONNX importer libraries
# #
# ngraph_paddlepaddle_frontend_FOUND - True if the system has PDPD frontend # ngraph_paddlepaddle_frontend_FOUND - True if the system has PDPD frontend
# ngraph::paddlepaddle_ngraph_frontend - nGraph PDPD frontend (optional) # ngraph::paddlepaddle_ngraph_frontend - nGraph PDPD frontend (optional)
@ -38,18 +37,30 @@
@PACKAGE_INIT@ @PACKAGE_INIT@
function(set_imported_global target)
get_target_property(IS_IMPORTED_GLOBAL ${target} IMPORTED_GLOBAL)
if (NOT IS_IMPORTED_GLOBAL)
set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE)
endif()
endfunction()
if(NOT TARGET ngraph) if(NOT TARGET ngraph)
include("${CMAKE_CURRENT_LIST_DIR}/ngraphTargets.cmake") include("${CMAKE_CURRENT_LIST_DIR}/ngraphTargets.cmake")
set_imported_global(ngraph::ngraph)
set_imported_global(ngraph::frontend_manager)
endif() endif()
set(ngraph_ngraph_FOUND ON) set(ngraph_ngraph_FOUND ON)
set(NGRAPH_LIBRARIES ngraph::ngraph) set(NGRAPH_LIBRARIES ngraph::ngraph)
set(ngraph_onnx_importer_FOUND @NGRAPH_ONNX_IMPORT_ENABLE@) set(ngraph_onnx_ngraph_frontend_FOUND @NGRAPH_ONNX_FRONTEND_ENABLE@)
if(ngraph_onnx_importer_FOUND) if (ngraph_onnx_ngraph_frontend_FOUND AND NOT TARGET onnx_ngraph_frontend AND NOT TARGET ngraph::onnx_importer)
set(ONNX_IMPORTER_LIBRARIES ngraph::onnx_importer) set_imported_global(ngraph::onnx_ngraph_frontend)
add_library(ngraph::onnx_importer ALIAS ngraph::onnx_ngraph_frontend)
endif()
set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
if(ngraph_paddlepaddle_frontend_FOUND AND NOT TARGET paddlepaddle_ngraph_frontend)
set_imported_global(ngraph::paddlepaddle_ngraph_frontend)
endif() endif()
set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
check_required_components(ngraph) check_required_components(ngraph)

View File

@ -17,7 +17,7 @@ function(ov_model_convert SRC DST OUT)
get_filename_component(name_we "${in_file}" NAME_WE) get_filename_component(name_we "${in_file}" NAME_WE)
set(model_source_dir "${SRC}/${rel_dir}") set(model_source_dir "${SRC}/${rel_dir}")
if(NOT NGRAPH_ONNX_IMPORT_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$") if(NOT NGRAPH_ONNX_FRONTEND_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$")
# don't copy / process ONNX / prototxt files # don't copy / process ONNX / prototxt files
continue() continue()
endif() endif()
@ -78,7 +78,7 @@ ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
ie_onnx_import_out_files) ie_onnx_import_out_files)
if(ENABLE_TESTS) if(ENABLE_TESTS)
if(NGRAPH_ONNX_IMPORT_ENABLE AND ENABLE_REQUIREMENTS_INSTALL) if(NGRAPH_ONNX_FRONTEND_ENABLE AND ENABLE_REQUIREMENTS_INSTALL)
find_package(PythonInterp 3 REQUIRED) find_package(PythonInterp 3 REQUIRED)
get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY) get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY)

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google BasedOnStyle: Google
IndentWidth: 4 IndentWidth: 4
UseTab: Never UseTab: Never
ColumnLimit: 120
Language: Cpp Language: Cpp
Standard: Cpp11 Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4 AccessModifierOffset: -4
AlignConsecutiveMacros: true AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160 BinPackArguments: false
# Specialize this comment pragma in order to avoid changes in SEA copyrights BinPackParameters: false
CommentPragmas: '^#' CommentPragmas: '^#'
DerivePointerAlignment: false DerivePointerAlignment: false
FixNamespaceComments: true FixNamespaceComments: true
IndentCaseLabels: false IndentCaseLabels: false
IndentPPDirectives: BeforeHash IndentPPDirectives: AfterHash
SpaceBeforeCpp11BracedList: true ForEachMacros:
SpaceBeforeCtorInitializerColon: false - foreach
- FOREACH_CHILD

View File

@ -25,7 +25,7 @@ if(NOT ENABLE_DOCKER)
set(InferenceEngine_DIR ${CMAKE_BINARY_DIR}) set(InferenceEngine_DIR ${CMAKE_BINARY_DIR})
endif() endif()
if(NGRAPH_ONNX_IMPORT_ENABLE) if(NGRAPH_ONNX_FRONTEND_ENABLE)
add_subdirectory(onnx_custom_op) add_subdirectory(onnx_custom_op)
endif() endif()
add_subdirectory(template_extension) add_subdirectory(template_extension)
@ -223,7 +223,11 @@ function(build_docs)
"${OpenVINO_SOURCE_DIR}/inference-engine/*.md" "${OpenVINO_SOURCE_DIR}/inference-engine/*.md"
"${OpenVINO_SOURCE_DIR}/inference-engine/*.png" "${OpenVINO_SOURCE_DIR}/inference-engine/*.png"
"${OpenVINO_SOURCE_DIR}/inference-engine/*.gif" "${OpenVINO_SOURCE_DIR}/inference-engine/*.gif"
"${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg") "${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg"
"${OpenVINO_SOURCE_DIR}/tools/*.md"
"${OpenVINO_SOURCE_DIR}/tools/*.png"
"${OpenVINO_SOURCE_DIR}/tools/*.gif"
"${OpenVINO_SOURCE_DIR}/tools/*.jpg")
foreach(source_file ${ovino_doc_files}) foreach(source_file ${ovino_doc_files})
list(APPEND commands COMMAND ${CMAKE_COMMAND} -E copy list(APPEND commands COMMAND ${CMAKE_COMMAND} -E copy

View File

@ -50,10 +50,9 @@ The example below demonstrates how to unregister an operator from the destructor
## Requirements for Building with CMake ## Requirements for Building with CMake
A program that uses the `register_operator` functionality requires `ngraph` and `onnx_importer` libraries in addition to the Inference Engine. A program that uses the `register_operator` functionality requires `ngraph::ngraph` and `ngraph::onnx_ngraph_frontend` libraries in addition to the Inference Engine.
The `onnx_importer` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_importer)` can find both. The `onnx_ngraph_frontend` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)` can find both.
The `ngraph` package exposes two variables, `${NGRAPH_LIBRARIES}` and `${ONNX_IMPORTER_LIBRARIES}`, which reference the `ngraph` and `onnx_importer` libraries. Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
Those variables need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
See CMakeLists.txt below for reference: See CMakeLists.txt below for reference:
@snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op @snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op

View File

@ -14,7 +14,7 @@ Inference Engine sample applications include the following:
- [Automatic Speech Recognition Python Sample](../../inference-engine/ie_bridges/python/sample/speech_sample/README.md) - [Automatic Speech Recognition Python Sample](../../inference-engine/ie_bridges/python/sample/speech_sample/README.md)
- **Benchmark Application** Estimates deep learning inference performance on supported devices for synchronous and asynchronous modes. - **Benchmark Application** Estimates deep learning inference performance on supported devices for synchronous and asynchronous modes.
- [Benchmark C++ Tool](../../inference-engine/samples/benchmark_app/README.md) - [Benchmark C++ Tool](../../inference-engine/samples/benchmark_app/README.md)
- [Benchmark Python Tool](../../inference-engine/tools/benchmark_tool/README.md) - [Benchmark Python Tool](../../tools/benchmark_tool/README.md)
- **Hello Classification Sample** Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API. Input of any size and layout can be set to an infer request which will be pre-processed automatically during inference (the sample supports only images as inputs and supports Unicode paths). - **Hello Classification Sample** Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API. Input of any size and layout can be set to an infer request which will be pre-processed automatically during inference (the sample supports only images as inputs and supports Unicode paths).
- [Hello Classification C++ Sample](../../inference-engine/samples/hello_classification/README.md) - [Hello Classification C++ Sample](../../inference-engine/samples/hello_classification/README.md)
- [Hello Classification C Sample](../../inference-engine/ie_bridges/c/samples/hello_classification/README.md) - [Hello Classification C Sample](../../inference-engine/ie_bridges/c/samples/hello_classification/README.md)

View File

@ -15,7 +15,7 @@ The models used in the performance benchmarks were chosen based on general adopt
CF means Caffe*, while TF means TensorFlow*. CF means Caffe*, while TF means TensorFlow*.
#### 5. How can I run the benchmark results on my own? #### 5. How can I run the benchmark results on my own?
All of the performance benchmarks were generated using the open-sourced tool within the Intel® Distribution of OpenVINO™ toolkit called `benchmark_app`, which is available in both [C++](../../inference-engine/samples/benchmark_app/README.md) and [Python](../../inference-engine/tools/benchmark_tool/README.md). All of the performance benchmarks were generated using the open-sourced tool within the Intel® Distribution of OpenVINO™ toolkit called `benchmark_app`, which is available in both [C++](../../inference-engine/samples/benchmark_app/README.md) and [Python](../../tools/benchmark_tool/README.md).
#### 6. What image sizes are used for the classification network models? #### 6. What image sizes are used for the classification network models?
The image size used in the inference depends on the network being benchmarked. The following table shows the list of input sizes for each network model. The image size used in the inference depends on the network being benchmarked. The following table shows the list of input sizes for each network model.

View File

@ -61,7 +61,7 @@ limitations under the License.
<tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/> <tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
<tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/> <tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
<tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/> <tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
<tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN"> <tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN"/>
<tab type="user" title="Convert PyTorch* BERT-NER Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner"/> <tab type="user" title="Convert PyTorch* BERT-NER Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner"/>
</tab> </tab>
</tab> </tab>

View File

@ -7,11 +7,11 @@ set(CMAKE_CXX_STANDARD 11)
set(TARGET_NAME "onnx_custom_op") set(TARGET_NAME "onnx_custom_op")
find_package(ngraph REQUIRED COMPONENTS onnx_importer) find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)
add_library(${TARGET_NAME} STATIC onnx_custom_op.cpp onnx_custom_op.hpp) add_library(${TARGET_NAME} STATIC onnx_custom_op.cpp onnx_custom_op.hpp)
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} ${ONNX_IMPORTER_LIBRARIES}) target_link_libraries(${TARGET_NAME} PUBLIC ngraph::ngraph ngraph::onnx_ngraph_frontend)
# [cmake:onnx_custom_op] # [cmake:onnx_custom_op]
# Enable code style check # Enable code style check

View File

@ -15,7 +15,7 @@
Let *min_value* and *max_value* be *min* and *max*, respectively. The mathematical formula of *Clamp* is as follows: Let *min_value* and *max_value* be *min* and *max*, respectively. The mathematical formula of *Clamp* is as follows:
\f[ \f[
clamp( x_{i} )=\min\big( \max\left( x_{i}, min\_value \right), max\_value \big) clamp( x_{i} )=\min\big( \max\left( x_{i},\ min\_value \right),\ max\_value \big)
\f] \f]
**Attributes**: **Attributes**:

View File

@ -12,7 +12,7 @@
It performs element-wise activation function on a given input tensor, based on the following mathematical formula: It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
\f[ \f[
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right] Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\frac{x}{\sqrt{2}}\right]
\f] \f]
where Φ(x) is the Cumulative Distribution Function for Gaussian Distribution. where Φ(x) is the Cumulative Distribution Function for Gaussian Distribution.

View File

@ -22,13 +22,13 @@ The *Gelu* function may be approximated in two different ways based on *approxim
For `erf` approximation mode, *Gelu* function is represented as: For `erf` approximation mode, *Gelu* function is represented as:
\f[ \f[
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right] Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\frac{x}{\sqrt{2}}\right]
\f] \f]
For `tanh` approximation mode, *Gelu* function is represented as: For `tanh` approximation mode, *Gelu* function is represented as:
\f[ \f[
Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{2/\pi} \cdot (x + 0.044715 \cdot x^3)\right]\right) Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{\frac{2}{\pi}} \cdot (x + 0.044715 \cdot x^3)\right]\right)
\f] \f]
**Attributes** **Attributes**

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula: element in the output tensor with the following formula:
\f[ \f[
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6} HSigmoid(x) = \frac{min(max(x + 3,\ 0),\ 6)}{6}
\f] \f]
The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf). The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula: element in the output tensor with the following formula:
\f[ \f[
HSwish(x) = x \frac{min(max(x + 3, 0), 6)}{6} HSwish(x) = x \cdot \frac{min(max(x + 3,\ 0),\ 6)}{6}
\f] \f]
The HSwish operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf). The HSwish operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).

View File

@ -12,10 +12,13 @@
For each element from the input tensor calculates corresponding For each element from the input tensor calculates corresponding
element in the output tensor with the following formula: element in the output tensor with the following formula:
\f[ \f[
y = max(0, min(1, alpha * x + beta)) y = max(0,\ min(1,\ \alpha x + \beta))
\f] \f]
where α corresponds to `alpha` scalar input and β corresponds to `beta` scalar input.
**Inputs** **Inputs**
* **1**: An tensor of type *T*. **Required.** * **1**: An tensor of type *T*. **Required.**

View File

@ -8,8 +8,8 @@
**Note**: This is recommended to not compute LogSoftmax directly as Log(Softmax(x, axis)), more numeric stable is to compute LogSoftmax as: **Note**: This is recommended to not compute LogSoftmax directly as Log(Softmax(x, axis)), more numeric stable is to compute LogSoftmax as:
\f[ \f[
t = (x - ReduceMax(x, axis)) \\ t = (x - ReduceMax(x,\ axis)) \\
LogSoftmax(x, axis) = t - Log(ReduceSum(Exp(t), axis)) LogSoftmax(x, axis) = t - Log(ReduceSum(Exp(t),\ axis))
\f] \f]
**Attributes** **Attributes**

View File

@ -15,7 +15,7 @@
For each element from the input tensor calculates corresponding For each element from the input tensor calculates corresponding
element in the output tensor with the following formula: element in the output tensor with the following formula:
\f[ \f[
Y_{i}^{( l )} = max(0, Y_{i}^{( l - 1 )}) Y_{i}^{( l )} = max(0,\ Y_{i}^{( l - 1 )})
\f] \f]
**Inputs**: **Inputs**:

View File

@ -25,7 +25,7 @@
*Abs* does the following with the input tensor *a*: *Abs* does the following with the input tensor *a*:
\f[ \f[
a_{i} = abs(a_{i}) a_{i} = \vert a_{i} \vert
\f] \f]
**Examples** **Examples**

View File

@ -4,11 +4,13 @@
**Category**: Arithmetic unary operation **Category**: Arithmetic unary operation
**Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with given tensor. **Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with a given tensor.
**Attributes**: **Detailed description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation on a given input tensor, based on the following mathematical formula:
No attributes available. \f[ a_{i} = atanh(a_{i}) \f]
**Attributes**: Atanh operation has no attributes.
**Inputs** **Inputs**
@ -16,22 +18,14 @@
**Outputs** **Outputs**
* **1**: The result of element-wise atanh operation. A tensor of type *T*. * **1**: The result of element-wise atanh operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
**Types** **Types**
* *T*: any floating-point type. * *T*: any supported numeric type.
*Atanh* does the following with the input tensor *a*:
\f[
a_{i} = atanh(a_{i})
\f]
**Examples** **Examples**
*Example 1*
```xml ```xml
<layer ... type="Atanh"> <layer ... type="Atanh">
<input> <input>

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula: element in the output tensor with the following formula:
\f[ \f[
a_{i} = ceiling(a_{i}) a_{i} = \lceil a_{i} \rceil
\f] \f]
**Attributes**: *Ceiling* operation has no attributes. **Attributes**: *Ceiling* operation has no attributes.

View File

@ -11,7 +11,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *Divide* performs division operation for the input tensors *a* and *b* using the formula below: After broadcasting *Divide* performs division operation for the input tensors *a* and *b* using the formula below:
\f[ \f[
o_{i} = a_{i} / b_{i} o_{i} = \frac{a_{i}}{b_{i}}
\f] \f]
The result of division by zero is undefined. The result of division by zero is undefined.

View File

@ -10,7 +10,7 @@
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *FloorMod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below: As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *FloorMod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
\f[ \f[
o_{i} = a_{i} % b_{i} o_{i} = a_{i} \mod b_{i}
\f] \f]
*FloorMod* operation computes a reminder of a floored division. It is the same behaviour like in Python programming language: `floor(x / y) * y + floor_mod(x, y) = x`. The sign of the result is equal to a sign of a divisor. The result of division by zero is undefined. *FloorMod* operation computes a reminder of a floored division. It is the same behaviour like in Python programming language: `floor(x / y) * y + floor_mod(x, y) = x`. The sign of the result is equal to a sign of a divisor. The result of division by zero is undefined.

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula: element in the output tensor with the following formula:
\f[ \f[
a_{i} = floor(a_{i}) a_{i} = \lfloor a_{i} \rfloor
\f] \f]
**Attributes**: *Floor* operation has no attributes. **Attributes**: *Floor* operation has no attributes.

View File

@ -12,7 +12,7 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ
After broadcasting *Maximum* does the following with the input tensors *a* and *b*: After broadcasting *Maximum* does the following with the input tensors *a* and *b*:
\f[ \f[
o_{i} = max(a_{i}, b_{i}) o_{i} = max(a_{i},\ b_{i})
\f] \f]
**Attributes**: **Attributes**:

View File

@ -10,7 +10,7 @@
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Minimum* operation is computed element-wise on the input tensors *a* and *b* according to the formula below: As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Minimum* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
\f[ \f[
o_{i} = min(a_{i}, b_{i}) o_{i} = min(a_{i},\ b_{i})
\f] \f]
**Attributes**: **Attributes**:

View File

@ -10,7 +10,7 @@
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Mod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below: As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Mod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
\f[ \f[
o_{i} = a_{i} % b_{i} o_{i} = a_{i} \mod b_{i}
\f] \f]
*Mod* operation computes a reminder of a truncated division. It is the same behaviour like in C programming language: `truncated(x / y) * y + truncated_mod(x, y) = x`. The sign of the result is equal to a sign of a dividend. The result of division by zero is undefined. *Mod* operation computes a reminder of a truncated division. It is the same behaviour like in C programming language: `truncated(x / y) * y + truncated_mod(x, y) = x`. The sign of the result is equal to a sign of a dividend. The result of division by zero is undefined.

View File

@ -11,7 +11,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *Multiply* performs multiplication operation for the input tensors *a* and *b* using the formula below: After broadcasting *Multiply* performs multiplication operation for the input tensors *a* and *b* using the formula below:
\f[ \f[
o_{i} = a_{i} * b_{i} o_{i} = a_{i} \cdot b_{i}
\f] \f]
**Attributes**: **Attributes**:

View File

@ -4,35 +4,10 @@
**Category**: Comparison binary operation **Category**: Comparison binary operation
**Short description**: *Equal* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. **Short description**: *Equal* performs element-wise comparison operation with two given input tensors applying multi-directional broadcast rules specified in the *auto_broadcast* attribute.
**Attributes**:
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T*. **Required.**
* **2**: A tensor of type *T*. **Required.**
**Outputs**
* **1**: The result of element-wise comparison operation. A tensor of type boolean.
**Types**
* *T*: arbitrary supported type.
**Detailed description** **Detailed description**
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and *auto_broadcast* attributes is not *none*. Broadcasting is performed according to *auto_broadcast* value.
After broadcasting *Equal* does the following with the input tensors *a* and *b*: After broadcasting *Equal* does the following with the input tensors *a* and *b*:
@ -40,12 +15,40 @@ After broadcasting *Equal* does the following with the input tensors *a* and *b*
o_{i} = a_{i} == b_{i} o_{i} = a_{i} == b_{i}
\f] \f]
**Attributes**:
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match,
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
* **2**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise **comparison** operation applied to the input tensors. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
**Types**
* *T*: arbitrary supported type.
* *T_BOOL*: `boolean`.
**Examples** **Examples**
*Example 1* *Example 1: no broadcast*
```xml ```xml
<layer ... type="Equal"> <layer ... type="Equal">
<data auto_broadcast="none"/>
<input> <input>
<port id="0"> <port id="0">
<dim>256</dim> <dim>256</dim>
@ -65,9 +68,10 @@ o_{i} = a_{i} == b_{i}
</layer> </layer>
``` ```
*Example 2: broadcast* *Example 2: numpy broadcast*
```xml ```xml
<layer ... type="Equal"> <layer ... type="Equal">
<data auto_broadcast="numpy"/>
<input> <input>
<port id="0"> <port id="0">
<dim>8</dim> <dim>8</dim>

View File

@ -37,7 +37,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *GreaterEqual* does the following with the input tensors *a* and *b*: After broadcasting *GreaterEqual* does the following with the input tensors *a* and *b*:
\f[ \f[
o_{i} = a_{i} >= b_{i} o_{i} = a_{i} \geq b_{i}
\f] \f]
**Examples** **Examples**

View File

@ -12,7 +12,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *LessEqual* does the following with the input tensors *a* and *b*: After broadcasting *LessEqual* does the following with the input tensors *a* and *b*:
\f[ \f[
o_{i} = a_{i} <= b_{i} o_{i} = a_{i} \leq b_{i}
\f] \f]
**Attributes**: **Attributes**:

View File

@ -37,7 +37,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *NotEqual* does the following with the input tensors *a* and *b*: After broadcasting *NotEqual* does the following with the input tensors *a* and *b*:
\f[ \f[
o_{i} = a_{i} != b_{i} o_{i} = a_{i} \neq b_{i}
\f] \f]
**Examples** **Examples**

View File

@ -16,15 +16,15 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1
The receptive field in each layer is calculated using the formulas: The receptive field in each layer is calculated using the formulas:
* Jump in the output feature map: * Jump in the output feature map:
\f[ \f[
j_{out} = j_{in} * s j_{out} = j_{in} \cdot s
\f] \f]
* Size of the receptive field of output feature: * Size of the receptive field of output feature:
\f[ \f[
r_{out} = r_{in} + ( k - 1 ) * j_{in} r_{out} = r_{in} + ( k - 1 ) \cdot j_{in}
\f] \f]
* Center position of the receptive field of the first output feature: * Center position of the receptive field of the first output feature:
\f[ \f[
start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) * j_{in} start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) \cdot j_{in}
\f] \f]
* Output is calculated using the following formula: * Output is calculated using the following formula:
\f[ \f[

View File

@ -12,7 +12,7 @@ Output is calculated using the following formula:
\f[ \f[
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) y(p) = \displaystyle{\sum_{k = 1}^{K}}w_{k}x(p + p_{k} + {\Delta}p_{k})
\f] \f]

View File

@ -14,7 +14,7 @@ Output is calculated using the following formula:
\f[ \f[
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) * {\Delta}m_{k} y(p) = \displaystyle{\sum_{k = 1}^{K}}w_{k}x(p + p_{k} + {\Delta}p_{k}) \cdot {\Delta}m_{k}
\f] \f]
Where Where

View File

@ -8,7 +8,7 @@
**Detailed description**: **Detailed description**:
*RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`. *RandomUniform* operation generates random numbers from a uniform distribution in the range `[minval, maxval)`.
The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm
is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns
four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized
@ -42,7 +42,7 @@ R' = mulhi(R, M) {\oplus} k {\oplus} L \\
mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\ mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\
mullo(a, b) = (a {\times} b) \mod 2^{32} mullo(a, b) = (a {\times} b) \mod 2^{32}
\f] \f]
where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*, where \f${\oplus}\f$ - bitwise xor, *k* = \f$R_{key}\f$ for updating counter, *k* = \f$L_{key}\f$ for updating *n*,
*M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*. *M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*.
After each round *key* is raised by summing with another pair of const values: After each round *key* is raised by summing with another pair of const values:
@ -50,7 +50,7 @@ After each round *key* is raised by summing with another pair of const values:
L += 0x9E3779B9 \\ L += 0x9E3779B9 \\
R += 0xBB67AE85 R += 0xBB67AE85
\f] \f]
Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers. Values \f$L'_{n}, R'_{n}, L'_{counter}, R'_{counter}\f$ are resulting four random numbers.
Float values between [0..1) are obtained from 32-bit integers by the following rules. Float values between [0..1) are obtained from 32-bit integers by the following rules.

View File

@ -25,7 +25,7 @@
*LogicalNot* does the following with the input tensor *a*: *LogicalNot* does the following with the input tensor *a*:
\f[ \f[
a_{i} = not(a_{i}) a_{i} = \lnot a_{i}
\f] \f]
**Examples** **Examples**

View File

@ -37,7 +37,7 @@ Before performing logical operation, input tensors *a* and *b* are broadcasted i
After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*: After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
\f[ \f[
o_{i} = a_{i} xor b_{i} o_{i} = a_{i} \oplus b_{i}
\f] \f]
**Examples** **Examples**

View File

@ -11,19 +11,19 @@ The kernel dimensions are calculated using the following formulae for the `NCDHW
\f[ \f[
\begin{array}{lcl} \begin{array}{lcl}
d_{start} &=& floor(i*D_{in}/D_{out})\\ d_{start} &=& \lfloor i \cdot \frac{D_{in}}{D_{out}}\rfloor\\
d_{end} &=& ceil((i+1)*D_{in}/D_{out})\\ d_{end} &=& \lceil(i+1) \cdot \frac{D_{in}}{D_{out}}\rceil\\
h_{start} &=& floor(j*H_{in}/H_{out})\\ h_{start} &=& \lfloor j \cdot \frac{H_{in}}{H_{out}}\rfloor\\
h_{end} &=& ceil((j+1)*H_{in}/H_{out})\\ h_{end} &=& \lceil(j+1) \cdot \frac{H_{in}}{H_{out}}\rceil\\
w_{start} &=& floor(k*W_{in}/W_{out})\\ w_{start} &=& \lfloor k \cdot \frac{W_{in}}{W_{out}}\rfloor\\
w_{end} &=& ceil((k+1)*W_{in}/W_{out}) w_{end} &=& \lceil(k+1) \cdot \frac{W_{in}}{W_{out}}\rceil
\end{array} \end{array}
\f] \f]
The output is calculated with the following formula: The output is calculated with the following formula:
\f[ \f[
Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start})*(h_{end}-h_{start})*(w_{end}-w_{start})} Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start}) \cdot (h_{end}-h_{start}) \cdot (w_{end}-w_{start})}
\f] \f]
**Inputs**: **Inputs**:

View File

@ -11,12 +11,12 @@ The kernel dimensions are calculated using the following formulae for the `NCDHW
\f[ \f[
\begin{array}{lcl} \begin{array}{lcl}
d_{start} &=& floor(i*D_{in}/D_{out})\\ d_{start} &=& \lfloor i \cdot \frac{D_{in}}{D_{out}}\rfloor\\
d_{end} &=& ceil((i+1)*D_{in}/D_{out})\\ d_{end} &=& \lceil(i+1) \cdot \frac{D_{in}}{D_{out}}\rceil\\
h_{start} &=& floor(j*H_{in}/H_{out})\\ h_{start} &=& \lfloor j \cdot \frac{H_{in}}{H_{out}}\rfloor\\
h_{end} &=& ceil((j+1)*H_{in}/H_{out})\\ h_{end} &=& \lceil(j+1) \cdot \frac{H_{in}}{H_{out}}\rceil\\
w_{start} &=& floor(k*W_{in}/W_{out})\\ w_{start} &=& \lfloor k \cdot \frac{W_{in}}{W_{out}}\rfloor\\
w_{end} &=& ceil((k+1)*W_{in}/W_{out}) w_{end} &=& \lceil(k+1) \cdot \frac{W_{in}}{W_{out}}\rceil
\end{array} \end{array}
\f] \f]

View File

@ -44,8 +44,8 @@ if(OpenCV_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE opencv_core) target_link_libraries(${TARGET_NAME} PRIVATE opencv_core)
endif() endif()
if(NGRAPH_ONNX_IMPORT_ENABLE) if(NGRAPH_ONNX_FRONTEND_ENABLE)
target_link_libraries(${TARGET_NAME} PRIVATE onnx_importer) target_link_libraries(${TARGET_NAME} PRIVATE onnx_ngraph_frontend)
endif() endif()
if(NOT MSVC) if(NOT MSVC)

View File

@ -7,7 +7,7 @@ set(CMAKE_CXX_STANDARD 11)
set(TARGET_NAME "template_extension") set(TARGET_NAME "template_extension")
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_importer) find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_ngraph_frontend)
find_package(InferenceEngine REQUIRED) find_package(InferenceEngine REQUIRED)
find_package(OpenCV QUIET COMPONENTS core) find_package(OpenCV QUIET COMPONENTS core)
@ -28,9 +28,9 @@ target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_EXTENSION_
target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine
${NGRAPH_LIBRARIES}) ${NGRAPH_LIBRARIES})
if (ngraph_onnx_importer_FOUND) if (ngraph_onnx_ngraph_frontend_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE ${ONNX_IMPORTER_LIBRARIES}) target_link_libraries(${TARGET_NAME} PRIVATE ngraph::onnx_ngraph_frontend)
target_compile_definitions(${TARGET_NAME} PRIVATE NGRAPH_ONNX_IMPORT_ENABLED) target_compile_definitions(${TARGET_NAME} PRIVATE NGRAPH_ONNX_FRONTEND_ENABLED)
endif() endif()
# [cmake:extension] # [cmake:extension]

View File

@ -22,7 +22,8 @@ OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node>& node) {
IE_THROW() << "Cannot create implementation for op with dynamic shapes!"; IE_THROW() << "Cannot create implementation for op with dynamic shapes!";
if (castedNode->get_input_shape(0).size() != 4 || castedNode->get_output_shape(0).size() != 4) if (castedNode->get_input_shape(0).size() != 4 || castedNode->get_output_shape(0).size() != 4)
IE_THROW() << "Operation supports only 4d tensors for input and output."; IE_THROW() << "Operation supports only 4d tensors for input and output.";
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32) if (castedNode->get_input_element_type(0) != ngraph::element::f32 ||
castedNode->get_output_element_type(0) != ngraph::element::f32)
IE_THROW() << "Operation supports only FP32 tensors."; IE_THROW() << "Operation supports only FP32 tensors.";
add = castedNode->getAddAttr(); add = castedNode->getAddAttr();
inShape = castedNode->get_input_shape(0); inShape = castedNode->get_input_shape(0);
@ -34,9 +35,12 @@ OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node>& node) {
//! [cpu_implementation:ctor] //! [cpu_implementation:ctor]
//! [cpu_implementation:getSupportedConfigurations] //! [cpu_implementation:getSupportedConfigurations]
InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(
std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept { InferenceEngine::ResponseDesc* resp) noexcept {
auto createConfig = [](const InferenceEngine::SizeVector inShape, const InferenceEngine::SizeVector& outShape, bool planar) { auto createConfig = [](const InferenceEngine::SizeVector inShape,
const InferenceEngine::SizeVector& outShape,
bool planar) {
InferenceEngine::LayerConfig config; InferenceEngine::LayerConfig config;
config.dynBatchSupport = false; config.dynBatchSupport = false;
InferenceEngine::DataConfig inData; InferenceEngine::DataConfig inData;
@ -45,9 +49,11 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
// Allow any offset before data // Allow any offset before data
size_t offset((std::numeric_limits<size_t>::max)()); size_t offset((std::numeric_limits<size_t>::max)());
if (planar) { if (planar) {
inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset}); inData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset});
config.inConfs.push_back(inData); config.inConfs.push_back(inData);
outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset}); outData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
config.outConfs.push_back(outData); config.outConfs.push_back(outData);
} else { } else {
// Add blocked (nChw8c) format // Add blocked (nChw8c) format
@ -64,9 +70,11 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
InferenceEngine::SizeVector outBlkDims = outShape; InferenceEngine::SizeVector outBlkDims = outShape;
outBlkDims[1] = div_up(outBlkDims[1], 8); outBlkDims[1] = div_up(outBlkDims[1], 8);
outBlkDims.push_back(8); outBlkDims.push_back(8);
inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset}); inData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset});
config.inConfs.push_back(inData); config.inConfs.push_back(inData);
outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset}); outData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset});
config.outConfs.push_back(outData); config.outConfs.push_back(outData);
} }
return config; return config;
@ -87,7 +95,8 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
//! [cpu_implementation:getSupportedConfigurations] //! [cpu_implementation:getSupportedConfigurations]
//! [cpu_implementation:init] //! [cpu_implementation:init]
InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept { InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& config,
InferenceEngine::ResponseDesc* resp) noexcept {
try { try {
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) { if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!"; IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!";
@ -115,10 +124,13 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig&
//! [cpu_implementation:init] //! [cpu_implementation:init]
//! [cpu_implementation:execute] //! [cpu_implementation:execute]
InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept { InferenceEngine::ResponseDesc* resp) noexcept {
const float* src_data = inputs[0]->cbuffer().as<const float*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); const float* src_data =
float* dst_data = outputs[0]->buffer().as<float*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); inputs[0]->cbuffer().as<const float*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
float* dst_data =
outputs[0]->buffer().as<float*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
for (size_t i = 0; i < inputs[0]->size(); i++) { for (size_t i = 0; i < inputs[0]->size(); i++) {
dst_data[i] = src_data[i] + add; dst_data[i] = src_data[i] + add;

View File

@ -16,8 +16,10 @@ public:
explicit OpImplementation(const std::shared_ptr<ngraph::Node>& node); explicit OpImplementation(const std::shared_ptr<ngraph::Node>& node);
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept override; InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept override; InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept override; InferenceEngine::ResponseDesc* resp) noexcept override;
private: private:

View File

@ -7,12 +7,12 @@
#include "cpu_kernel.hpp" #include "cpu_kernel.hpp"
#include "op.hpp" #include "op.hpp"
#ifdef OPENCV_IMPORT_ENABLED #ifdef OPENCV_IMPORT_ENABLED
#include "fft_kernel.hpp" # include "fft_kernel.hpp"
#include "fft_op.hpp" # include "fft_op.hpp"
#endif #endif
#include <ngraph/ngraph.hpp> #include <ngraph/ngraph.hpp>
#ifdef NGRAPH_ONNX_IMPORT_ENABLED #ifdef NGRAPH_ONNX_FRONTEND_ENABLED
#include <onnx_import/onnx_utils.hpp> # include <onnx_import/onnx_utils.hpp>
#endif #endif
#include <map> #include <map>
@ -24,31 +24,37 @@ using namespace TemplateExtension;
//! [extension:ctor] //! [extension:ctor]
Extension::Extension() { Extension::Extension() {
#ifdef NGRAPH_ONNX_IMPORT_ENABLED #ifdef NGRAPH_ONNX_FRONTEND_ENABLED
ngraph::onnx_import::register_operator(Operation::type_info.name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector { ngraph::onnx_import::register_operator(Operation::type_info.name,
ngraph::OutputVector ng_inputs {node.get_ng_inputs()}; 1,
"custom_domain",
[](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
ngraph::OutputVector ng_inputs{node.get_ng_inputs()};
int64_t add = node.get_attribute_value<int64_t>("add"); int64_t add = node.get_attribute_value<int64_t>("add");
return {std::make_shared<Operation>(ng_inputs.at(0), add)}; return {std::make_shared<Operation>(ng_inputs.at(0), add)};
}); });
#ifdef OPENCV_IMPORT_ENABLED # ifdef OPENCV_IMPORT_ENABLED
ngraph::onnx_import::register_operator(FFTOp::type_info.name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector { ngraph::onnx_import::register_operator(FFTOp::type_info.name,
ngraph::OutputVector ng_inputs {node.get_ng_inputs()}; 1,
"custom_domain",
[](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
ngraph::OutputVector ng_inputs{node.get_ng_inputs()};
bool inverse = node.get_attribute_value<int64_t>("inverse"); bool inverse = node.get_attribute_value<int64_t>("inverse");
return {std::make_shared<FFTOp>(ng_inputs.at(0), inverse)}; return {std::make_shared<FFTOp>(ng_inputs.at(0), inverse)};
}); });
#endif # endif
#endif #endif
} }
//! [extension:ctor] //! [extension:ctor]
//! [extension:dtor] //! [extension:dtor]
Extension::~Extension() { Extension::~Extension() {
#ifdef NGRAPH_ONNX_IMPORT_ENABLED #ifdef NGRAPH_ONNX_FRONTEND_ENABLED
ngraph::onnx_import::unregister_operator(Operation::type_info.name, 1, "custom_domain"); ngraph::onnx_import::unregister_operator(Operation::type_info.name, 1, "custom_domain");
#ifdef OPENCV_IMPORT_ENABLED # ifdef OPENCV_IMPORT_ENABLED
ngraph::onnx_import::unregister_operator(FFTOp::type_info.name, 1, "custom_domain"); ngraph::onnx_import::unregister_operator(FFTOp::type_info.name, 1, "custom_domain");
#endif // OPENCV_IMPORT_ENABLED # endif // OPENCV_IMPORT_ENABLED
#endif // NGRAPH_ONNX_IMPORT_ENABLED #endif // NGRAPH_ONNX_FRONTEND_ENABLED
} }
//! [extension:dtor] //! [extension:dtor]
@ -92,7 +98,8 @@ std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::N
//! [extension:getImplTypes] //! [extension:getImplTypes]
//! [extension:getImplementation] //! [extension:getImplementation]
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) { InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node,
const std::string& implType) {
if (implType == "CPU") { if (implType == "CPU") {
if (std::dynamic_pointer_cast<Operation>(node)) { if (std::dynamic_pointer_cast<Operation>(node)) {
return std::make_shared<OpImplementation>(node); return std::make_shared<OpImplementation>(node);

View File

@ -25,7 +25,8 @@ public:
std::map<std::string, ngraph::OpSet> getOpSets() override; std::map<std::string, ngraph::OpSet> getOpSets() override;
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override; std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override;
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override; InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node,
const std::string& implType) override;
}; };
} // namespace TemplateExtension } // namespace TemplateExtension

View File

@ -21,14 +21,16 @@ FFTImpl::FFTImpl(const std::shared_ptr<ngraph::Node>& node) {
IE_THROW() << "Cannot create implementation for operation with incorrect number of inputs or outputs!"; IE_THROW() << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic()) if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
IE_THROW() << "Cannot create implementation for op with dynamic shapes!"; IE_THROW() << "Cannot create implementation for op with dynamic shapes!";
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32) if (castedNode->get_input_element_type(0) != ngraph::element::f32 ||
castedNode->get_output_element_type(0) != ngraph::element::f32)
IE_THROW() << "Operation supports only FP32 tensors."; IE_THROW() << "Operation supports only FP32 tensors.";
inpShape = castedNode->get_input_shape(0); inpShape = castedNode->get_input_shape(0);
outShape = castedNode->get_output_shape(0); outShape = castedNode->get_output_shape(0);
inverse = castedNode->inverse; inverse = castedNode->inverse;
} }
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc* resp) noexcept { InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept {
std::vector<InferenceEngine::DataConfig> inDataConfig; std::vector<InferenceEngine::DataConfig> inDataConfig;
std::vector<InferenceEngine::DataConfig> outDataConfig; std::vector<InferenceEngine::DataConfig> outDataConfig;
InferenceEngine::SizeVector order(inpShape.size()); InferenceEngine::SizeVector order(inpShape.size());
@ -55,7 +57,8 @@ InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<Infe
return InferenceEngine::StatusCode::OK; return InferenceEngine::StatusCode::OK;
} }
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept { InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config,
InferenceEngine::ResponseDesc* resp) noexcept {
try { try {
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) { if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!"; IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!";
@ -85,7 +88,8 @@ static cv::Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob) {
return cv::Mat(size, CV_32F, (void*)blob->buffer()); return cv::Mat(size, CV_32F, (void*)blob->buffer());
} }
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept { InferenceEngine::ResponseDesc* resp) noexcept {
cv::Mat inp = infEngineBlobToMat(inputs[0]); cv::Mat inp = infEngineBlobToMat(inputs[0]);
cv::Mat out = infEngineBlobToMat(outputs[0]); cv::Mat out = infEngineBlobToMat(outputs[0]);
@ -95,7 +99,8 @@ InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::
const int w = inp.size[3]; const int w = inp.size[3];
cv::Mat complex(h, w, CV_32FC2), interleavedOut(h, w, CV_32FC2); cv::Mat complex(h, w, CV_32FC2), interleavedOut(h, w, CV_32FC2);
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
std::vector<cv::Mat> components = {cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)), cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))}; std::vector<cv::Mat> components = {cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)),
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))};
cv::merge(components, complex); cv::merge(components, complex);
if (!inverse) if (!inverse)

View File

@ -16,8 +16,10 @@ public:
explicit FFTImpl(const std::shared_ptr<ngraph::Node>& node); explicit FFTImpl(const std::shared_ptr<ngraph::Node>& node);
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept override; InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept override; InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept override; InferenceEngine::ResponseDesc* resp) noexcept override;
private: private:

View File

@ -9,7 +9,7 @@ using namespace TemplateExtension;
constexpr ngraph::NodeTypeInfo FFTOp::type_info; constexpr ngraph::NodeTypeInfo FFTOp::type_info;
FFTOp::FFTOp(const ngraph::Output<ngraph::Node>& inp, bool _inverse): Op({inp}) { FFTOp::FFTOp(const ngraph::Output<ngraph::Node>& inp, bool _inverse) : Op({inp}) {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
inverse = _inverse; inverse = _inverse;
} }

View File

@ -11,7 +11,7 @@ namespace TemplateExtension {
class FFTOp : public ngraph::op::Op { class FFTOp : public ngraph::op::Op {
public: public:
static constexpr ngraph::NodeTypeInfo type_info {"FFT", 0}; static constexpr ngraph::NodeTypeInfo type_info{"FFT", 0};
const ngraph::NodeTypeInfo& get_type_info() const override { const ngraph::NodeTypeInfo& get_type_info() const override {
return type_info; return type_info;
} }

View File

@ -9,7 +9,7 @@ using namespace TemplateExtension;
//! [op:ctor] //! [op:ctor]
NGRAPH_RTTI_DEFINITION(TemplateExtension::Operation, "Template", 0); NGRAPH_RTTI_DEFINITION(TemplateExtension::Operation, "Template", 0);
Operation::Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add): Op({arg}), add(add) { Operation::Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add) : Op({arg}), add(add) {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
} }
//! [op:ctor] //! [op:ctor]

View File

@ -9,10 +9,13 @@
using namespace TemplatePlugin; using namespace TemplatePlugin;
// ! [async_infer_request:ctor] // ! [async_infer_request:ctor]
TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor, TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
: AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) { : AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor),
_inferRequest(inferRequest),
_waitExecutor(waitExecutor) {
// In current implementation we have CPU only tasks and no needs in 2 executors // In current implementation we have CPU only tasks and no needs in 2 executors
// So, by default single stage pipeline is created. // So, by default single stage pipeline is created.
// This stage executes InferRequest::Infer() using cpuTaskExecutor. // This stage executes InferRequest::Infer() using cpuTaskExecutor.
@ -23,7 +26,8 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest:
if (remoteDevice) { if (remoteDevice) {
_pipeline = {{cpuTaskExecutor, _pipeline = {{cpuTaskExecutor,
[this] { [this] {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "TemplateAsyncInferRequest::PreprocessingAndStartPipeline"); OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin,
"TemplateAsyncInferRequest::PreprocessingAndStartPipeline");
_inferRequest->inferPreprocess(); _inferRequest->inferPreprocess();
_inferRequest->startPipeline(); _inferRequest->startPipeline();
}}, }},

View File

@ -13,8 +13,10 @@ namespace TemplatePlugin {
// ! [async_infer_request:header] // ! [async_infer_request:header]
class TemplateAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { class TemplateAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public: public:
TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
~TemplateAsyncInferRequest(); ~TemplateAsyncInferRequest();

View File

@ -23,7 +23,8 @@ Configuration::Configuration(const ConfigMap& config, const Configuration& defau
if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) { if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) {
_streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value); _streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value);
} else if (streamExecutorConfigKeys.end() != std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { } else if (streamExecutorConfigKeys.end() !=
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
_streamsExecutorConfig.SetConfig(key, value); _streamsExecutorConfig.SetConfig(key, value);
} else if (CONFIG_KEY(DEVICE_ID) == key) { } else if (CONFIG_KEY(DEVICE_ID) == key) {
deviceId = std::stoi(value); deviceId = std::stoi(value);

View File

@ -21,7 +21,9 @@ struct Configuration {
Configuration& operator=(const Configuration&) = default; Configuration& operator=(const Configuration&) = default;
Configuration& operator=(Configuration&&) = default; Configuration& operator=(Configuration&&) = default;
explicit Configuration(const ConfigMap& config, const Configuration& defaultCfg = {}, const bool throwOnUnsupported = true); explicit Configuration(const ConfigMap& config,
const Configuration& defaultCfg = {},
const bool throwOnUnsupported = true);
InferenceEngine::Parameter Get(const std::string& name) const; InferenceEngine::Parameter Get(const std::string& name) const;

View File

@ -18,8 +18,10 @@ using namespace TemplatePlugin;
// ! [executable_network:ctor_cnnnetwork] // ! [executable_network:ctor_cnnnetwork]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function, TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap, const InferenceEngine::OutputsDataMap& outputsInfoMap, const InferenceEngine::InputsDataMap& inputInfoMap,
const Configuration& cfg, const Plugin::Ptr& plugin) const InferenceEngine::OutputsDataMap& outputsInfoMap,
const Configuration& cfg,
const Plugin::Ptr& plugin)
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
_cfg(cfg), _cfg(cfg),
_plugin(plugin) { _plugin(plugin) {
@ -40,7 +42,11 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const
// ! [executable_network:ctor_cnnnetwork] // ! [executable_network:ctor_cnnnetwork]
// ! [executable_network:ctor_import_stream] // ! [executable_network:ctor_import_stream]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const Configuration& cfg, const Plugin::Ptr& plugin): _cfg(cfg), _plugin(plugin) { TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model,
const Configuration& cfg,
const Plugin::Ptr& plugin)
: _cfg(cfg),
_plugin(plugin) {
// read XML content // read XML content
std::string xmlString; std::string xmlString;
std::uint64_t dataSize = 0; std::uint64_t dataSize = 0;
@ -53,7 +59,9 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const
model.read(reinterpret_cast<char*>(&dataSize), sizeof(dataSize)); model.read(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
if (0 != dataSize) { if (0 != dataSize) {
dataBlob = InferenceEngine::make_shared_blob<std::uint8_t>( dataBlob = InferenceEngine::make_shared_blob<std::uint8_t>(
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {static_cast<std::size_t>(dataSize)}, InferenceEngine::Layout::C)); InferenceEngine::TensorDesc(InferenceEngine::Precision::U8,
{static_cast<std::size_t>(dataSize)},
InferenceEngine::Layout::C));
dataBlob->allocate(); dataBlob->allocate();
model.read(dataBlob->buffer(), dataSize); model.read(dataBlob->buffer(), dataSize);
} }
@ -84,7 +92,8 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const
// ! [executable_network:map_graph] // ! [executable_network:map_graph]
// forward declaration // forward declaration
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap, std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap); const InferenceEngine::OutputsDataMap& outputsInfoMap);
void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function, void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
@ -117,29 +126,36 @@ void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<con
void TemplatePlugin::ExecutableNetwork::InitExecutor() { void TemplatePlugin::ExecutableNetwork::InitExecutor() {
// Default multi-threaded configuration is balanced for throughtput and latency cases and takes into account // Default multi-threaded configuration is balanced for throughtput and latency cases and takes into account
// real hardware cores and NUMA nodes. // real hardware cores and NUMA nodes.
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig); auto streamsExecutorConfig =
InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
streamsExecutorConfig._name = "TemplateStreamsExecutor"; streamsExecutorConfig._name = "TemplateStreamsExecutor";
// As Inference Engine CPU Streams Executor creates some additional therads // As Inference Engine CPU Streams Executor creates some additional therads
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases // it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
// and memory consumption can be larger than it is expected. // and memory consumption can be larger than it is expected.
// So Inference Engone provides executors cache. // So Inference Engone provides executors cache.
_taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig); _taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
// NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of inference request pipeline // NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of
// _callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"}); // inference request pipeline _callbackExecutor =
// InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
} }
// ! [executable_network:init_executor] // ! [executable_network:init_executor]
// ! [executable_network:create_infer_request_impl] // ! [executable_network:create_infer_request_impl]
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) { InferenceEngine::OutputsDataMap networkOutputs) {
return std::make_shared<TemplateInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecutableNetwork>(shared_from_this())); return std::make_shared<TemplateInferRequest>(networkInputs,
networkOutputs,
std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
} }
// ! [executable_network:create_infer_request_impl] // ! [executable_network:create_infer_request_impl]
// ! [executable_network:create_infer_request] // ! [executable_network:create_infer_request]
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequest() { InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequest() {
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs); auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest), _taskExecutor, _plugin->_waitExecutor, return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
_taskExecutor,
_plugin->_waitExecutor,
_callbackExecutor); _callbackExecutor);
} }
// ! [executable_network:create_infer_request] // ! [executable_network:create_infer_request]
@ -154,11 +170,16 @@ InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetConfig(const st
InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const std::string& name) const { InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const std::string& name) const {
// TODO: return more supported values for metrics // TODO: return more supported values for metrics
if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_METRICS) == name) { if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_METRICS) == name) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, std::vector<std::string> {METRIC_KEY(NETWORK_NAME), METRIC_KEY(SUPPORTED_METRICS), IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)}); std::vector<std::string>{METRIC_KEY(NETWORK_NAME),
METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
} else if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) { } else if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)}; std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID),
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config {}.SupportedKeys(); CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) { for (auto&& configKey : streamExecutorConfigKeys) {
configKeys.emplace_back(configKey); configKeys.emplace_back(configKey);
} }

View File

@ -23,15 +23,19 @@ class Plugin;
// ! [executable_network:header] // ! [executable_network:header]
class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault { class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public: public:
ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap, ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::OutputsDataMap& outputsInfoMap, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin); const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap,
const Configuration& cfg,
const std::shared_ptr<Plugin>& plugin);
ExecutableNetwork(std::istream& model, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin); ExecutableNetwork(std::istream& model, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin);
// Methods from a base class ExecutableNetworkThreadSafeDefault // Methods from a base class ExecutableNetworkThreadSafeDefault
void Export(std::ostream& model) override; void Export(std::ostream& model) override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) override; InferenceEngine::OutputsDataMap networkOutputs) override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
InferenceEngine::Parameter GetMetric(const std::string& name) const override; InferenceEngine::Parameter GetMetric(const std::string& name) const override;
@ -40,7 +44,8 @@ public:
private: private:
friend class TemplateInferRequest; friend class TemplateInferRequest;
void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap, void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap); const InferenceEngine::OutputsDataMap& outputsInfoMap);
void InitExecutor(); void InitExecutor();

View File

@ -23,19 +23,25 @@ using namespace InferenceEngine;
using Time = std::chrono::high_resolution_clock; using Time = std::chrono::high_resolution_clock;
// ! [infer_request:ctor] // ! [infer_request:ctor]
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs, TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const std::shared_ptr<TemplatePlugin::ExecutableNetwork>& executableNetwork) const std::shared_ptr<TemplatePlugin::ExecutableNetwork>& executableNetwork)
: IInferRequestInternal(networkInputs, networkOutputs), _executableNetwork(executableNetwork) { : IInferRequestInternal(networkInputs, networkOutputs),
_executableNetwork(executableNetwork) {
// TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks // TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks
auto requestID = std::to_string(_executableNetwork->_requestId.fetch_add(1)); auto requestID = std::to_string(_executableNetwork->_requestId.fetch_add(1));
std::string name = _executableNetwork->_function->get_friendly_name() + "_Req" + requestID; std::string name = _executableNetwork->_function->get_friendly_name() + "_Req" + requestID;
_profilingTask = { _profilingTask = {
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Preprocess"), openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Postprocess"), "_Preprocess"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_StartPipline"), openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_WaitPipline"), "_Postprocess"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
"_StartPipline"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
"_WaitPipline"),
}; };
_executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function); _executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
@ -60,7 +66,10 @@ void TemplateInferRequest::allocateDeviceBuffers() {
} }
template <typename BlobDataMap, typename GetNetworkPrecisionF> template <typename BlobDataMap, typename GetNetworkPrecisionF>
static void AllocateImpl(const BlobDataMap& userDataMap, BlobMap& userBlobMap, BlobMap& deviceBlobMap, GetNetworkPrecisionF&& GetNetworkPrecision, static void AllocateImpl(const BlobDataMap& userDataMap,
BlobMap& userBlobMap,
BlobMap& deviceBlobMap,
GetNetworkPrecisionF&& GetNetworkPrecision,
bool isInputBlob = true) { bool isInputBlob = true) {
for (auto&& userData : userDataMap) { for (auto&& userData : userDataMap) {
const auto& dims = userData.second->getTensorDesc().getDims(); const auto& dims = userData.second->getTensorDesc().getDims();
@ -95,7 +104,9 @@ void TemplateInferRequest::allocateBlobs() {
}); });
auto&& results = _executableNetwork->_function->get_results(); auto&& results = _executableNetwork->_function->get_results();
AllocateImpl( AllocateImpl(
_networkOutputs, _outputs, _networkOutputBlobs, _networkOutputs,
_outputs,
_networkOutputBlobs,
[&](const std::string& blobName) { [&](const std::string& blobName) {
return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type(); return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
}, },
@ -114,8 +125,10 @@ void TemplateInferRequest::InferImpl() {
template <typename SrcT, typename DstT> template <typename SrcT, typename DstT>
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) { static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
ngraph::runtime::reference::convert<SrcT, DstT>(InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(), ngraph::runtime::reference::convert<SrcT, DstT>(
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>(), src->size()); InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>(),
src->size());
} }
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) { static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
@ -128,8 +141,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<std::uint8_t, float>(src, dst); blobCopy<std::uint8_t, float>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -141,8 +154,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<float, std::uint8_t>(src, dst); blobCopy<float, std::uint8_t>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -154,8 +167,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<int64_t, int32_t>(src, dst); blobCopy<int64_t, int32_t>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -167,8 +180,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<int16_t, float>(src, dst); blobCopy<int16_t, float>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -180,8 +193,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<int8_t, float>(src, dst); blobCopy<int8_t, float>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -193,8 +206,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<bool, float>(src, dst); blobCopy<bool, float>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -206,8 +219,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<uint16_t, float>(src, dst); blobCopy<uint16_t, float>(src, dst);
} break; } break;
default: { default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to " IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< dst->getTensorDesc().getPrecision(); << " to " << dst->getTensorDesc().getPrecision();
} }
} }
} break; } break;
@ -230,7 +243,9 @@ void TemplateInferRequest::inferPreprocess() {
const auto& parameterShape = parameter->get_shape(); const auto& parameterShape = parameter->get_shape();
const auto& parameterType = parameter->get_element_type(); const auto& parameterType = parameter->get_element_type();
_inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor( _inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
parameterType, parameterShape, InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>()); parameterType,
parameterShape,
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
} }
for (auto&& output : _outputs) { for (auto&& output : _outputs) {
auto outputBlob = output.second; auto outputBlob = output.second;
@ -243,7 +258,9 @@ void TemplateInferRequest::inferPreprocess() {
const auto& resultShape = result->get_shape(); const auto& resultShape = result->get_shape();
const auto& resultType = result->get_element_type(); const auto& resultType = result->get_element_type();
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor( _outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
resultType, resultShape, InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>()); resultType,
resultShape,
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
} }
_durations[Preprocess] = Time::now() - start; _durations[Preprocess] = Time::now() - start;
} }

View File

@ -26,7 +26,8 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
public: public:
typedef std::shared_ptr<TemplateInferRequest> Ptr; typedef std::shared_ptr<TemplateInferRequest> Ptr;
TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs, TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const std::shared_ptr<ExecutableNetwork>& executableNetwork); const std::shared_ptr<ExecutableNetwork>& executableNetwork);
~TemplateInferRequest(); ~TemplateInferRequest();

View File

@ -38,7 +38,8 @@ Plugin::Plugin() {
_backend = ngraph::runtime::Backend::create("INTERPRETER"); _backend = ngraph::runtime::Backend::create("INTERPRETER");
// create default stream executor with a given name // create default stream executor with a given name
_waitExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"}); _waitExecutor =
InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
} }
// ! [plugin:ctor] // ! [plugin:ctor]
@ -54,7 +55,8 @@ Plugin::~Plugin() {
// ! [plugin:transform_network] // ! [plugin:transform_network]
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap, std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap) { const InferenceEngine::OutputsDataMap& outputsInfoMap) {
// 1. Copy ngraph::Function first to apply some transformations which modify original ngraph::Function // 1. Copy ngraph::Function first to apply some transformations which modify original ngraph::Function
auto transformedNetwork = ngraph::clone_function(*function); auto transformedNetwork = ngraph::clone_function(*function);
@ -70,13 +72,15 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
bool needF16toF32 = false; bool needF16toF32 = false;
for (const auto& param : function->get_parameters()) { for (const auto& param : function->get_parameters()) {
if (param->get_element_type() == ngraph::element::f16 && if (param->get_element_type() == ngraph::element::f16 &&
inputInfoMap.at(param->get_friendly_name())->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) { inputInfoMap.at(param->get_friendly_name())->getTensorDesc().getPrecision() !=
InferenceEngine::Precision::FP16) {
needF16toF32 = true; needF16toF32 = true;
break; break;
} }
} }
if (needF16toF32) if (needF16toF32)
passManager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ngraph::element::f16, ngraph::element::f32}}); passManager.register_pass<ngraph::pass::ConvertPrecision>(
precisions_array{{ngraph::element::f16, ngraph::element::f32}});
// Example: register plugin specific transformation // Example: register plugin specific transformation
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>(); passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>(); passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
@ -92,32 +96,41 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
// ! [plugin:transform_network] // ! [plugin:transform_network]
// ! [plugin:load_exe_network_impl] // ! [plugin:load_exe_network_impl]
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) { InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
const ConfigMap& config) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::LoadExeNetworkImpl"); OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::LoadExeNetworkImpl");
InferenceEngine::InputsDataMap networkInputs = network.getInputsInfo(); InferenceEngine::InputsDataMap networkInputs = network.getInputsInfo();
InferenceEngine::OutputsDataMap networkOutputs = network.getOutputsInfo(); InferenceEngine::OutputsDataMap networkOutputs = network.getOutputsInfo();
auto fullConfig = Configuration {config, _cfg}; auto fullConfig = Configuration{config, _cfg};
return std::make_shared<ExecutableNetwork>(network.getFunction(), networkInputs, networkOutputs, fullConfig, return std::make_shared<ExecutableNetwork>(network.getFunction(),
networkInputs,
networkOutputs,
fullConfig,
std::static_pointer_cast<Plugin>(shared_from_this())); std::static_pointer_cast<Plugin>(shared_from_this()));
} }
// ! [plugin:load_exe_network_impl] // ! [plugin:load_exe_network_impl]
// ! [plugin:import_network] // ! [plugin:import_network]
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& modelStream, const std::map<std::string, std::string>& config) { InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(
std::istream& modelStream,
const std::map<std::string, std::string>& config) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetwork"); OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetwork");
auto fullConfig = Configuration {config, _cfg}; auto fullConfig = Configuration{config, _cfg};
return std::make_shared<ExecutableNetwork>(modelStream, fullConfig, std::static_pointer_cast<Plugin>(shared_from_this())); return std::make_shared<ExecutableNetwork>(modelStream,
fullConfig,
std::static_pointer_cast<Plugin>(shared_from_this()));
} }
// ! [plugin:import_network] // ! [plugin:import_network]
// ! [plugin:query_network] // ! [plugin:query_network]
InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) const { InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network,
const ConfigMap& config) const {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::QueryNetwork"); OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::QueryNetwork");
Configuration fullConfig {config, _cfg, false}; Configuration fullConfig{config, _cfg, false};
auto function = network.getFunction(); auto function = network.getFunction();
// 1. First of all we should store initial input operation set // 1. First of all we should store initial input operation set
@ -160,7 +173,8 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
// 5. If some housekeeping nodes were not added - add them. // 5. If some housekeeping nodes were not added - add them.
if (InferenceEngine::details::contains(supported, node->get_friendly_name())) { if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
for (auto&& inputNodeOutput : node->input_values()) { for (auto&& inputNodeOutput : node->input_values()) {
if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) { if (ngraph::op::is_constant(inputNodeOutput.get_node()) ||
ngraph::op::is_parameter(inputNodeOutput.get_node())) {
supported.emplace(inputNodeOutput.get_node()->get_friendly_name()); supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
} }
} }
@ -175,11 +189,14 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
// 6. Eliminate subgraphs that consist of housekeeping nodes only // 6. Eliminate subgraphs that consist of housekeeping nodes only
if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) { if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) { if (!InferenceEngine::details::contains(
supported,
node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
supported.erase(node->get_friendly_name()); supported.erase(node->get_friendly_name());
} }
} else if (ngraph::op::is_output(node)) { } else if (ngraph::op::is_output(node)) {
if (!InferenceEngine::details::contains(supported, node->input_values().begin()->get_node()->get_friendly_name())) { if (!InferenceEngine::details::contains(supported,
node->input_values().begin()->get_node()->get_friendly_name())) {
supported.erase(node->get_friendly_name()); supported.erase(node->get_friendly_name());
} }
} }
@ -204,27 +221,36 @@ void Plugin::AddExtension(const InferenceEngine::IExtensionPtr& /*extension*/) {
// ! [plugin:set_config] // ! [plugin:set_config]
void Plugin::SetConfig(const ConfigMap& config) { void Plugin::SetConfig(const ConfigMap& config) {
_cfg = Configuration {config, _cfg}; _cfg = Configuration{config, _cfg};
} }
// ! [plugin:set_config] // ! [plugin:set_config]
// ! [plugin:get_config] // ! [plugin:get_config]
InferenceEngine::Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& /*options*/) const { InferenceEngine::Parameter Plugin::GetConfig(
const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& /*options*/) const {
return _cfg.Get(name); return _cfg.Get(name);
} }
// ! [plugin:get_config] // ! [plugin:get_config]
// ! [plugin:get_metric] // ! [plugin:get_metric]
InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const { InferenceEngine::Parameter Plugin::GetMetric(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const {
if (METRIC_KEY(SUPPORTED_METRICS) == name) { if (METRIC_KEY(SUPPORTED_METRICS) == name) {
std::vector<std::string> supportedMetrics = {METRIC_KEY(AVAILABLE_DEVICES), METRIC_KEY(SUPPORTED_METRICS), std::vector<std::string> supportedMetrics = {METRIC_KEY(AVAILABLE_DEVICES),
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(FULL_DEVICE_NAME), METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(IMPORT_EXPORT_SUPPORT), METRIC_KEY(DEVICE_ARCHITECTURE), METRIC_KEY(SUPPORTED_CONFIG_KEYS),
METRIC_KEY(OPTIMIZATION_CAPABILITIES), METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)}; METRIC_KEY(FULL_DEVICE_NAME),
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
METRIC_KEY(DEVICE_ARCHITECTURE),
METRIC_KEY(OPTIMIZATION_CAPABILITIES),
METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)};
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics); IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics);
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) { } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)}; std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID),
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config {}.SupportedKeys(); CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) { for (auto&& configKey : streamExecutorConfigKeys) {
if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) { if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) {
configKeys.emplace_back(configKey); configKeys.emplace_back(configKey);
@ -251,7 +277,7 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
} else if (METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) == name) { } else if (METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) == name) {
// TODO: fill with actual values // TODO: fill with actual values
using uint = unsigned int; using uint = unsigned int;
IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, std::make_tuple(uint {1}, uint {1}, uint {1})); IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, std::make_tuple(uint{1}, uint{1}, uint{1}));
} else { } else {
IE_THROW(NotFound) << "Unsupported device metric: " << name; IE_THROW(NotFound) << "Unsupported device metric: " << name;
} }

View File

@ -23,12 +23,19 @@ public:
void SetConfig(const std::map<std::string, std::string>& config) override; void SetConfig(const std::map<std::string, std::string>& config) override;
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const override; const std::map<std::string, std::string>& config) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network, InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) override; const std::map<std::string, std::string>& config) override;
void AddExtension(const std::shared_ptr<InferenceEngine::IExtension>& extension) override; void AddExtension(const std::shared_ptr<InferenceEngine::IExtension>& extension) override;
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override; InferenceEngine::Parameter GetConfig(
InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override; const std::string& name,
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& model, const std::map<std::string, std::string>& config) override; const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::Parameter GetMetric(
const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(
std::istream& model,
const std::map<std::string, std::string>& config) override;
private: private:
friend class ExecutableNetwork; friend class ExecutableNetwork;

View File

@ -28,7 +28,10 @@ ngraph::pass::AddMeanSubtract::AddMeanSubtract(const MeanMap& inputInfoMap) {
} }
auto mean_const = it->second; auto mean_const = it->second;
NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32, "Mean for ", param->get_friendly_name(), " must have f32 type"); NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32,
"Mean for ",
param->get_friendly_name(),
" must have f32 type");
auto copy_param = param->clone_with_new_inputs({}); auto copy_param = param->clone_with_new_inputs({});
auto sub = std::make_shared<ngraph::opset3::Subtract>(copy_param, mean_const); auto sub = std::make_shared<ngraph::opset3::Subtract>(copy_param, mean_const);

View File

@ -12,7 +12,8 @@
NGRAPH_RTTI_DEFINITION(ngraph::pass::AddPreprocessing, "AddPreprocessing", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::AddPreprocessing, "AddPreprocessing", 0);
ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap& inputInfoMap): m_inputInfoMap(inputInfoMap) {} ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap& inputInfoMap)
: m_inputInfoMap(inputInfoMap) {}
bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Function> f) { bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Function> f) {
ngraph::pass::AddMeanSubtract::MeanMap meanMap; ngraph::pass::AddMeanSubtract::MeanMap meanMap;
@ -39,10 +40,12 @@ bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Fun
has_mean_image = true; has_mean_image = true;
if (c == 0) { if (c == 0) {
meanImage = pInfo[c]->meanData; meanImage = pInfo[c]->meanData;
NGRAPH_CHECK(meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32, NGRAPH_CHECK(
meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32,
"Only InferenceEngine::Precision::FP32 precision is supported for PreProcessChannel::meanData"); "Only InferenceEngine::Precision::FP32 precision is supported for PreProcessChannel::meanData");
} else { } else {
NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(), "TensorDesc for PreProcessChannel::meanData must be equal"); NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(),
"TensorDesc for PreProcessChannel::meanData must be equal");
} }
} }
} }
@ -52,7 +55,8 @@ bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Fun
continue; continue;
} }
NGRAPH_CHECK(!(has_mean_image && has_scales), "Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set."); NGRAPH_CHECK(!(has_mean_image && has_scales),
"Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set.");
if (has_scales) { if (has_scales) {
ngraph::Shape shape(inputDims.size(), 1); ngraph::Shape shape(inputDims.size(), 1);

View File

@ -28,7 +28,10 @@ ngraph::pass::AddStdScale::AddStdScale(const ScaleMap& inputInfoMap) {
} }
auto scale_const = it->second; auto scale_const = it->second;
NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32, "Scale for ", param->get_friendly_name(), " must have f32 type"); NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32,
"Scale for ",
param->get_friendly_name(),
" must have f32 type");
auto copy_param = param->clone_with_new_inputs({}); auto copy_param = param->clone_with_new_inputs({});
auto div = std::make_shared<ngraph::opset3::Divide>(copy_param, it->second); auto div = std::make_shared<ngraph::opset3::Divide>(copy_param, it->second);

View File

@ -24,7 +24,8 @@ bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Fun
// Check that input and output shape a fully defined (not dynamic) and number of consumers equal to 1 // Check that input and output shape a fully defined (not dynamic) and number of consumers equal to 1
Input<Node> input = node->input(0); Input<Node> input = node->input(0);
Output<Node> output = node->output(0); Output<Node> output = node->output(0);
if (input.get_partial_shape().is_static() && output.get_partial_shape().is_static() && output.get_target_inputs().size() == 1) { if (input.get_partial_shape().is_static() && output.get_partial_shape().is_static() &&
output.get_target_inputs().size() == 1) {
nodes.push_back(node); nodes.push_back(node);
} }
} }
@ -32,7 +33,8 @@ bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Fun
// Print types and names for collected nodes // Print types and names for collected nodes
for (auto& node : nodes) { for (auto& node : nodes) {
std::cout << "Type: " << node->get_type_info().name << std::endl << "Name: " << node->get_friendly_name() << std::endl; std::cout << "Type: " << node->get_type_info().name << std::endl
<< "Name: " << node->get_friendly_name() << std::endl;
} }
// Return false because we didn't change nGraph Function // Return false because we didn't change nGraph Function

View File

@ -33,7 +33,9 @@ ngraph::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() {
} }
// Decompose Divide into Multiply with Power operations // Decompose Divide into Multiply with Power operations
auto pow = std::make_shared<ngraph::opset3::Power>(div->input_value(1), opset3::Constant::create(div->get_input_element_type(1), Shape {1}, {-1})); auto pow = std::make_shared<ngraph::opset3::Power>(
div->input_value(1),
opset3::Constant::create(div->get_input_element_type(1), Shape{1}, {-1}));
auto mul = std::make_shared<ngraph::opset3::Multiply>(div->input_value(0), pow); auto mul = std::make_shared<ngraph::opset3::Multiply>(div->input_value(0), pow);
@ -70,7 +72,8 @@ ngraph::pass::ReluReluFusionMatcher::ReluReluFusionMatcher() {
auto& node_to_output = m.get_pattern_value_map(); auto& node_to_output = m.get_pattern_value_map();
// Create new Relu operation and add register it for additional execution // Create new Relu operation and add register it for additional execution
auto new_relu = register_new_node<ngraph::opset3::Relu>(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0)); auto new_relu =
register_new_node<ngraph::opset3::Relu>(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0));
// Copy runtime info attributes to newly created operation // Copy runtime info attributes to newly created operation
ngraph::copy_runtime_info(m.get_matched_nodes(), new_relu); ngraph::copy_runtime_info(m.get_matched_nodes(), new_relu);

View File

@ -0,0 +1,94 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <ie_ngraph_utils.hpp>
#include <limits>
#include <algorithm>
#include <ngraph/ngraph.hpp>
#include <shared_test_classes/base/layer_test_utils.hpp>
#include "base_reference_test.hpp"
using namespace reference_tests;
using namespace ngraph;
using namespace InferenceEngine;
struct AtanhParams {
template <class IT>
AtanhParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues)
: pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) {
std::vector<IT> oValues;
std::vector<double> output;
for (auto element : iValues)
output.push_back(static_cast<double>(element));
std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
return std::atanh(input);
});
if (std::is_integral<IT>()) {
std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
return std::round(input);
});
}
for (auto element : output)
oValues.push_back(static_cast<IT>(element));
refData = CreateBlob(outType, oValues);
}
ngraph::PartialShape pshape;
ngraph::element::Type inType;
ngraph::element::Type outType;
InferenceEngine::Blob::Ptr inputData;
InferenceEngine::Blob::Ptr refData;
};
class ReferenceAtanhLayerTest : public testing::TestWithParam<AtanhParams>, public CommonReferenceTest {
public:
void SetUp() override {
auto params = GetParam();
function = CreateFunction(params.pshape, params.inType, params.outType);
inputData = {params.inputData};
refOutData = {params.refData};
}
static std::string getTestCaseName(const testing::TestParamInfo<AtanhParams>& obj) {
auto param = obj.param;
std::ostringstream result;
result << "shape=" << param.pshape << "_";
result << "iType=" << param.inType << "_";
result << "oType=" << param.outType;
return result.str();
}
private:
static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape, const element::Type& input_type,
const element::Type& expected_output_type) {
const auto in = std::make_shared<op::Parameter>(input_type, input_shape);
const auto atanh = std::make_shared<op::Atanh>(in);
return std::make_shared<Function>(NodeVector {atanh}, ParameterVector {in});
}
};
TEST_P(ReferenceAtanhLayerTest, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(
smoke_Atanh_With_Hardcoded_Refs, ReferenceAtanhLayerTest,
::testing::Values(AtanhParams(ngraph::PartialShape {2, 4}, ngraph::element::f32,
std::vector<float> {-INFINITY, -2.0f, -1.0f, -0.5f, 0.0f, 0.8f, 1.0f, INFINITY}),
AtanhParams(ngraph::PartialShape {2, 4}, ngraph::element::f16,
std::vector<float16> {-INFINITY, -2.0f, -1.0f, -0.5f, -0.0f, 0.8f, 1.0f, INFINITY}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::i32,
std::vector<int32_t> {std::numeric_limits<int32_t>::min(), -2, -1, 1, 2, std::numeric_limits<int32_t>::max()}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::u32,
std::vector<uint32_t> {std::numeric_limits<uint32_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint32_t>::max()}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::i64,
std::vector<int64_t> {std::numeric_limits<int64_t>::min(), -2, -1, 1, 2, std::numeric_limits<int64_t>::max()}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::u64,
std::vector<uint64_t> {std::numeric_limits<uint64_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint64_t>::max()})),
ReferenceAtanhLayerTest::getTestCaseName);

View File

@ -75,6 +75,48 @@ std::vector<RefComparisonParams> generateComparisonCombinedParams() {
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName); ReferenceComparisonLayerTest::getTestCaseName);
template <element::Type_t IN_ET>
std::vector<RefComparisonParams> generateNumericParams(const element::Type& type) {
using T = typename element_type_traits<IN_ET>::value_type;
std::vector<RefComparisonParams> compParams {
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{4}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, NAN}})
.input2({{4}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f}})
.expected({{4}, element::boolean, std::vector<char> {0, 0, 1, 0, }}),
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{2, 3}, type, std::vector<T> {0.0f, NAN, NAN, 1.0f, 21.0f, -INFINITY}})
.input2({{2, 3}, type, std::vector<T> {1.0f, NAN, 23.0f, 1.0f, 19.0f, 21.0f}})
.expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 0, 1, 0, 0}}),
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{1}, type, std::vector<T> {INFINITY}})
.input2({{1}, type, std::vector<T> {INFINITY}})
.expected({{1}, element::boolean, std::vector<char> {1}}),
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{5}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}})
.input2({{5}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}})
.expected({{5}, element::boolean, std::vector<char> {0, 0, 1, 0, 0}})};
return compParams;
}
std::vector<RefComparisonParams> generateNumericCombinedParams() {
const std::vector<std::vector<RefComparisonParams>> compTypeParams {
generateNumericParams<element::Type_t::f16>(element::f16),
generateNumericParams<element::Type_t::f32>(element::f32)};
std::vector<RefComparisonParams> combinedParams;
for (const auto& params : compTypeParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_Numeric_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateNumericCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName);
} // namespace } // namespace
} // namespace ComparisonOpsRefTestDefinitions } // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests } // namespace reference_tests

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google BasedOnStyle: Google
IndentWidth: 4 IndentWidth: 4
UseTab: Never UseTab: Never
ColumnLimit: 120
Language: Cpp Language: Cpp
Standard: Cpp11 Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4 AccessModifierOffset: -4
AlignConsecutiveMacros: true AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160 BinPackArguments: false
# Specialize this comment pragma in order to avoid changes in SEA copyrights BinPackParameters: false
CommentPragmas: '^#' CommentPragmas: '^#'
DerivePointerAlignment: false DerivePointerAlignment: false
FixNamespaceComments: true FixNamespaceComments: true
IndentCaseLabels: false IndentCaseLabels: false
IndentPPDirectives: BeforeHash IndentPPDirectives: AfterHash
SpaceBeforeCpp11BracedList: true ForEachMacros:
SpaceBeforeCtorInitializerColon: false - foreach
- FOREACH_CHILD

View File

@ -24,8 +24,8 @@ int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int n
#else #else
#include <algorithm> # include <algorithm>
#include <opencv2/opencv.hpp> # include <opencv2/opencv.hpp>
int image_read(const char* img_path, c_mat_t* img) { int image_read(const char* img_path, c_mat_t* img) {
if (img_path == nullptr || img == nullptr) { if (img_path == nullptr || img == nullptr) {
@ -102,9 +102,11 @@ int image_free(c_mat_t* img) {
int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int num, int thickness) { int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int num, int thickness) {
int colors_num = 21; int colors_num = 21;
color_t colors[21] = {// colors to be used for bounding boxes color_t colors[21] = {// colors to be used for bounding boxes
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, {30, 170, 250}, {128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
{0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
{70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, {81, 0, 81}}; {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
{100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
{81, 0, 81}};
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
int x = rects[i].x_min; int x = rects[i].x_min;

View File

@ -6,23 +6,23 @@
#include <stdio.h> #include <stdio.h>
#ifdef __cplusplus #ifdef __cplusplus
#define OPENCV_C_EXTERN extern "C" # define OPENCV_C_EXTERN extern "C"
#else #else
#define OPENCV_C_EXTERN # define OPENCV_C_EXTERN
#endif #endif
#if defined(__GNUC__) && (__GNUC__ < 4) #if defined(__GNUC__) && (__GNUC__ < 4)
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __VA_ARGS__ # define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __VA_ARGS__
#else #else
#if defined(_WIN32) # if defined(_WIN32)
#ifdef opencv_c_wrapper_EXPORTS # ifdef opencv_c_wrapper_EXPORTS
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllexport) __VA_ARGS__ __cdecl # define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllexport) __VA_ARGS__ __cdecl
#else # else
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllimport) __VA_ARGS__ __cdecl # define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllimport) __VA_ARGS__ __cdecl
#endif # endif
#else # else
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __attribute__((visibility("default"))) __VA_ARGS__ # define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __attribute__((visibility("default"))) __VA_ARGS__
#endif # endif
#endif #endif
/** /**

View File

@ -6,43 +6,43 @@
#if defined(_WIN32) #if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN # ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN_UNDEF # define WIN32_LEAN_AND_MEAN_UNDEF
#endif # endif
#ifndef NOMINMAX # ifndef NOMINMAX
#define NOMINMAX # define NOMINMAX
#define NOMINMAX_UNDEF # define NOMINMAX_UNDEF
#endif # endif
#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_) # if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
#define _X86_ # define _X86_
#endif # endif
#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_) # if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
#define _AMD64_ # define _AMD64_
#endif # endif
#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_) # if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
#define _ARM_ # define _ARM_
#endif # endif
#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_) # if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
#define _ARM64_ # define _ARM64_
#endif # endif
// clang-format off // clang-format off
#include <string.h> #include <string.h>
#include <windef.h> #include <windef.h>
#include <fileapi.h> #include <fileapi.h>
#include <Winbase.h> #include <Winbase.h>
#include <sys/stat.h> #include <sys/stat.h>
// clang-format on // clang-format on
// Copied from linux libc sys/stat.h: // Copied from linux libc sys/stat.h:
#define S_ISREG(m) (((m)&S_IFMT) == S_IFREG) # define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) # define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
/// @brief structure to store directory names /// @brief structure to store directory names
typedef struct dirent { typedef struct dirent {
@ -171,19 +171,19 @@ static void closedir(DIR* dp) {
free(dp); free(dp);
} }
#ifdef WIN32_LEAN_AND_MEAN_UNDEF # ifdef WIN32_LEAN_AND_MEAN_UNDEF
#undef WIN32_LEAN_AND_MEAN # undef WIN32_LEAN_AND_MEAN
#undef WIN32_LEAN_AND_MEAN_UNDEF # undef WIN32_LEAN_AND_MEAN_UNDEF
#endif # endif
#ifdef NOMINMAX_UNDEF # ifdef NOMINMAX_UNDEF
#undef NOMINMAX_UNDEF # undef NOMINMAX_UNDEF
#undef NOMINMAX # undef NOMINMAX
#endif # endif
#else #else
#include <dirent.h> # include <dirent.h>
#include <sys/types.h> # include <sys/types.h>
#endif #endif

View File

@ -12,9 +12,9 @@
#include "object_detection_sample_ssd.h" #include "object_detection_sample_ssd.h"
#ifdef _WIN32 #ifdef _WIN32
#include "c_w_dirent.h" # include "c_w_dirent.h"
#else #else
#include <dirent.h> # include <dirent.h>
#endif #endif
#define MAX_IMAGES 20 #define MAX_IMAGES 20
@ -346,7 +346,10 @@ int main(int argc, char** argv) {
goto err; goto err;
for (i = 0; i < ver.num_vers; ++i) { for (i = 0; i < ver.num_vers; ++i) {
printf(" %s\n", ver.versions[i].device_name); printf(" %s\n", ver.versions[i].device_name);
printf(" %s version ......... %zu.%zu\n", ver.versions[i].description, ver.versions[i].major, ver.versions[i].minor); printf(" %s version ......... %zu.%zu\n",
ver.versions[i].description,
ver.versions[i].major,
ver.versions[i].minor);
printf(" Build ......... %s\n", ver.versions[i].build_number); printf(" Build ......... %s\n", ver.versions[i].build_number);
} }
ie_core_versions_free(&ver); ie_core_versions_free(&ver);
@ -360,7 +363,8 @@ int main(int argc, char** argv) {
printf("%sCustom extension loaded: %s\n", info, custom_ex_library_msg); printf("%sCustom extension loaded: %s\n", info, custom_ex_library_msg);
} }
if (custom_plugin_cfg_msg && (strcmp(device_name, "GPU") == 0 || strcmp(device_name, "MYRIAD") == 0 || strcmp(device_name, "HDDL") == 0)) { if (custom_plugin_cfg_msg &&
(strcmp(device_name, "GPU") == 0 || strcmp(device_name, "MYRIAD") == 0 || strcmp(device_name, "HDDL") == 0)) {
// Config for device plugin custom extension is loaded from an .xml // Config for device plugin custom extension is loaded from an .xml
// description // description
ie_config_t cfg = {"CONFIG_FILE", custom_plugin_cfg_msg, NULL}; ie_config_t cfg = {"CONFIG_FILE", custom_plugin_cfg_msg, NULL};
@ -480,7 +484,12 @@ int main(int argc, char** argv) {
for (j = 0; j < resized_img.mat_data_size; ++j) for (j = 0; j < resized_img.mat_data_size; ++j)
resized_img.mat_data[j] = img.mat_data[j]; resized_img.mat_data[j] = img.mat_data[j];
} else { } else {
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n", warn, img.mat_width, img.mat_height, input_width, input_height); printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n",
warn,
img.mat_width,
img.mat_height,
input_width,
input_height);
if (image_resize(&img, &resized_img, (int)input_width, (int)input_height) == -1) { if (image_resize(&img, &resized_img, (int)input_width, (int)input_height) == -1) {
printf("%sImage %s cannot be resized!\n", warn, file_paths[i]); printf("%sImage %s cannot be resized!\n", warn, file_paths[i]);
@ -623,7 +632,8 @@ int main(int argc, char** argv) {
for (ch = 0; ch < num_channels; ++ch) { for (ch = 0; ch < num_channels; ++ch) {
/** [images stride + channels stride + pixel id ] all in bytes /** [images stride + channels stride + pixel id ] all in bytes
* **/ * **/
data[image_id * image_size * num_channels + ch * image_size + pid] = images[image_id].mat_data[pid * num_channels + ch]; data[image_id * image_size * num_channels + ch * image_size + pid] =
images[image_id].mat_data[pid * num_channels + ch];
} }
} }
image_free(&images[image_id]); image_free(&images[image_id]);
@ -704,7 +714,15 @@ int main(int argc, char** argv) {
int xmax = (int)(detection[curProposal * objectSize + 5] * originalImages[image_id].mat_width); int xmax = (int)(detection[curProposal * objectSize + 5] * originalImages[image_id].mat_width);
int ymax = (int)(detection[curProposal * objectSize + 6] * originalImages[image_id].mat_height); int ymax = (int)(detection[curProposal * objectSize + 6] * originalImages[image_id].mat_height);
printf("[%d, %d] element, prob = %f (%d, %d)-(%d, %d) batch id : %d", curProposal, label, confidence, xmin, ymin, xmax, ymax, image_id); printf("[%d, %d] element, prob = %f (%d, %d)-(%d, %d) batch id : %d",
curProposal,
label,
confidence,
xmin,
ymin,
xmax,
ymax,
image_id);
if (confidence > 0.5) { if (confidence > 0.5) {
/** Drawing only objects with >50% probability **/ /** Drawing only objects with >50% probability **/
@ -722,7 +740,11 @@ int main(int argc, char** argv) {
int batch_id; int batch_id;
for (batch_id = 0; batch_id < batchSize; ++batch_id) { for (batch_id = 0; batch_id < batchSize; ++batch_id) {
if (object_num[batch_id] > 0) { if (object_num[batch_id] > 0) {
image_add_rectangles(&originalImages[batch_id], boxes[batch_id], classes[batch_id], object_num[batch_id], 2); image_add_rectangles(&originalImages[batch_id],
boxes[batch_id],
classes[batch_id],
object_num[batch_id],
2);
} }
const char* out = "out_"; const char* out = "out_";
char str_num[16] = {0}; char str_num[16] = {0};

View File

@ -16,13 +16,15 @@ static const char* model_message = "Required. Path to an .xml file with a traine
static const char* image_message = "Required. Path to one or more images or folder with images."; static const char* image_message = "Required. Path to one or more images or folder with images.";
/// @brief message for assigning cnn calculation to device /// @brief message for assigning cnn calculation to device
static const char* target_device_message = "Optional. Specify the target device to infer. " static const char* target_device_message =
"Optional. Specify the target device to infer. "
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify " "Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
"HETERO plugin. " "HETERO plugin. "
"Sample will look for a suitable plugin for device specified."; "Sample will look for a suitable plugin for device specified.";
/// @brief message for plugin custom kernels desc /// @brief message for plugin custom kernels desc
static const char* custom_plugin_config_message = "Required for GPU, MYRIAD, HDDL custom kernels. " static const char* custom_plugin_config_message =
"Required for GPU, MYRIAD, HDDL custom kernels. "
"Absolute path to the .xml config file with the kernels descriptions."; "Absolute path to the .xml config file with the kernels descriptions.";
/// @brief message for user extension library argument /// @brief message for user extension library argument

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google BasedOnStyle: Google
IndentWidth: 4 IndentWidth: 4
UseTab: Never UseTab: Never
ColumnLimit: 120
Language: Cpp Language: Cpp
Standard: Cpp11 Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4 AccessModifierOffset: -4
AlignConsecutiveMacros: true AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160 BinPackArguments: false
# Specialize this comment pragma in order to avoid changes in SEA copyrights BinPackParameters: false
CommentPragmas: '^#' CommentPragmas: '^#'
DerivePointerAlignment: false DerivePointerAlignment: false
FixNamespaceComments: true FixNamespaceComments: true
IndentCaseLabels: false IndentCaseLabels: false
IndentPPDirectives: BeforeHash IndentPPDirectives: AfterHash
SpaceBeforeCpp11BracedList: true ForEachMacros:
SpaceBeforeCtorInitializerColon: false - foreach
- FOREACH_CHILD

View File

@ -8,11 +8,17 @@
#include "ie_plugin_config.hpp" #include "ie_plugin_config.hpp"
const std::string EXPORTED_NETWORK_NAME = "undefined"; const std::string EXPORTED_NETWORK_NAME = "undefined";
std::map<std::string, InferenceEngine::Precision> precision_map = { std::map<std::string, InferenceEngine::Precision> precision_map = {{"FP32", InferenceEngine::Precision::FP32},
{"FP32", InferenceEngine::Precision::FP32}, {"FP64", InferenceEngine::Precision::FP64}, {"FP16", InferenceEngine::Precision::FP16}, {"FP64", InferenceEngine::Precision::FP64},
{"I8", InferenceEngine::Precision::I8}, {"I16", InferenceEngine::Precision::I16}, {"I32", InferenceEngine::Precision::I32}, {"FP16", InferenceEngine::Precision::FP16},
{"I64", InferenceEngine::Precision::I64}, {"U8", InferenceEngine::Precision::U8}, {"U16", InferenceEngine::Precision::U16}, {"I8", InferenceEngine::Precision::I8},
{"U32", InferenceEngine::Precision::U32}, {"U64", InferenceEngine::Precision::U64}}; {"I16", InferenceEngine::Precision::I16},
{"I32", InferenceEngine::Precision::I32},
{"I64", InferenceEngine::Precision::I64},
{"U8", InferenceEngine::Precision::U8},
{"U16", InferenceEngine::Precision::U16},
{"U32", InferenceEngine::Precision::U32},
{"U64", InferenceEngine::Precision::U64}};
std::map<std::string, InferenceEngine::Layout> layout_map = {{"ANY", InferenceEngine::Layout::ANY}, std::map<std::string, InferenceEngine::Layout> layout_map = {{"ANY", InferenceEngine::Layout::ANY},
{"NCHW", InferenceEngine::Layout::NCHW}, {"NCHW", InferenceEngine::Layout::NCHW},
@ -200,7 +206,8 @@ InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string
return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net)); return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
} }
InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network): actual(cnn_network) { InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network)
: actual(cnn_network) {
if (actual == nullptr) if (actual == nullptr)
IE_THROW() << "IENetwork was not initialized."; IE_THROW() << "IENetwork was not initialized.";
name = actual->getName(); name = actual->getName();
@ -286,7 +293,9 @@ void InferenceEnginePython::IENetwork::reshape(const std::map<std::string, std::
actual->reshape(input_shapes); actual->reshape(input_shapes);
} }
InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests): infer_requests(num_requests), name(name) { InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests)
: infer_requests(num_requests),
name(name) {
request_queue_ptr = std::make_shared<IdleInferRequestQueue>(); request_queue_ptr = std::make_shared<IdleInferRequestQueue>();
} }
@ -333,16 +342,19 @@ std::shared_ptr<InferenceEngine::ExecutableNetwork> InferenceEnginePython::IEExe
return actual; return actual;
} }
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr) { void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name,
const InferenceEngine::Blob::Ptr& blob_ptr) {
request_ptr.SetBlob(blob_name.c_str(), blob_ptr); request_ptr.SetBlob(blob_name.c_str(), blob_ptr);
} }
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr, void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name,
const InferenceEngine::Blob::Ptr& blob_ptr,
const InferenceEngine::PreProcessInfo& info) { const InferenceEngine::PreProcessInfo& info) {
request_ptr.SetBlob(blob_name.c_str(), blob_ptr, info); request_ptr.SetBlob(blob_name.c_str(), blob_ptr, info);
} }
const InferenceEngine::PreProcessInfo& InferenceEnginePython::InferRequestWrap::getPreProcess(const std::string& blob_name) { const InferenceEngine::PreProcessInfo& InferenceEnginePython::InferRequestWrap::getPreProcess(
const std::string& blob_name) {
return request_ptr.GetPreProcess(blob_name.c_str()); return request_ptr.GetPreProcess(blob_name.c_str());
} }
@ -392,7 +404,8 @@ int InferenceEnginePython::InferRequestWrap::wait(int64_t timeout) {
return static_cast<int>(code); return static_cast<int>(code);
} }
std::map<std::string, InferenceEnginePython::ProfileInfo> InferenceEnginePython::InferRequestWrap::getPerformanceCounts() { std::map<std::string, InferenceEnginePython::ProfileInfo>
InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perf_counts = request_ptr.GetPerformanceCounts(); std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perf_counts = request_ptr.GetPerformanceCounts();
std::map<std::string, InferenceEnginePython::ProfileInfo> perf_map; std::map<std::string, InferenceEnginePython::ProfileInfo> perf_map;
@ -430,7 +443,8 @@ InferenceEnginePython::IECore::IECore(const std::string& xmlConfigFile) {
actual = InferenceEngine::Core(xmlConfigFile); actual = InferenceEngine::Core(xmlConfigFile);
} }
std::map<std::string, InferenceEngine::Version> InferenceEnginePython::IECore::getVersions(const std::string& deviceName) { std::map<std::string, InferenceEngine::Version> InferenceEnginePython::IECore::getVersions(
const std::string& deviceName) {
return actual.GetVersions(deviceName); return actual.GetVersions(deviceName);
} }
@ -485,12 +499,16 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
infer_request.request_queue_ptr = request_queue_ptr; infer_request.request_queue_ptr = request_queue_ptr;
infer_request.request_ptr = actual->CreateInferRequest(); infer_request.request_ptr = actual->CreateInferRequest();
infer_request.request_ptr.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest r, InferenceEngine::StatusCode)>>( infer_request.request_ptr
.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest r, InferenceEngine::StatusCode)>>(
[&](InferenceEngine::InferRequest request, InferenceEngine::StatusCode code) { [&](InferenceEngine::InferRequest request, InferenceEngine::StatusCode code) {
if (code != InferenceEngine::StatusCode::OK) { if (code != InferenceEngine::StatusCode::OK) {
IE_EXCEPTION_SWITCH(code, ExceptionType, IE_EXCEPTION_SWITCH(code,
InferenceEngine::details::ThrowNow<ExceptionType> {} <<= ExceptionType,
std::stringstream {} << IE_LOCATION << InferenceEngine::details::ExceptionTraits<ExceptionType>::string()); InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
std::stringstream{}
<< IE_LOCATION
<< InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
} }
auto end_time = Time::now(); auto end_time = Time::now();
@ -504,12 +522,15 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
} }
} }
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath, const std::string& binPath) { InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath,
const std::string& binPath) {
InferenceEngine::CNNNetwork net = actual.ReadNetwork(modelPath, binPath); InferenceEngine::CNNNetwork net = actual.ReadNetwork(modelPath, binPath);
return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net)); return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
} }
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size) { InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& model,
const uint8_t* bin,
size_t bin_size) {
InferenceEngine::MemoryBlob::Ptr weights_blob; InferenceEngine::MemoryBlob::Ptr weights_blob;
if (bin_size != 0) { if (bin_size != 0) {
InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C); InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C);
@ -521,44 +542,58 @@ InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(cons
return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net)); return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
} }
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetwork(IENetwork network, const std::string& deviceName, std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetwork(
const std::map<std::string, std::string>& config, IENetwork network,
int num_requests) {
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(network.name, num_requests);
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(*network.actual, deviceName, config));
exec_network->createInferRequests(num_requests);
return exec_network;
}
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetworkFromFile(const std::string& modelPath,
const std::string& deviceName, const std::string& deviceName,
const std::map<std::string, std::string>& config, const std::map<std::string, std::string>& config,
int num_requests) { int num_requests) {
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(modelPath, num_requests); auto exec_network =
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(modelPath, deviceName, config)); InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(network.name, num_requests);
exec_network->actual =
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(*network.actual, deviceName, config));
exec_network->createInferRequests(num_requests); exec_network->createInferRequests(num_requests);
return exec_network; return exec_network;
} }
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::importNetwork(const std::string& modelFIle, const std::string& deviceName, std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetworkFromFile(
const std::string& modelPath,
const std::string& deviceName,
const std::map<std::string, std::string>& config, const std::map<std::string, std::string>& config,
int num_requests) { int num_requests) {
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(EXPORTED_NETWORK_NAME, num_requests); auto exec_network =
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.ImportNetwork(modelFIle, deviceName, config)); InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(modelPath, num_requests);
exec_network->actual =
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(modelPath, deviceName, config));
exec_network->createInferRequests(num_requests); exec_network->createInferRequests(num_requests);
return exec_network; return exec_network;
} }
std::map<std::string, std::string> InferenceEnginePython::IECore::queryNetwork(InferenceEnginePython::IENetwork network, const std::string& deviceName, std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::importNetwork(
const std::string& modelFIle,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests) {
auto exec_network =
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(EXPORTED_NETWORK_NAME, num_requests);
exec_network->actual =
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.ImportNetwork(modelFIle, deviceName, config));
exec_network->createInferRequests(num_requests);
return exec_network;
}
std::map<std::string, std::string> InferenceEnginePython::IECore::queryNetwork(
InferenceEnginePython::IENetwork network,
const std::string& deviceName,
const std::map<std::string, std::string>& config) { const std::map<std::string, std::string>& config) {
auto res = actual.QueryNetwork(*network.actual, deviceName, config); auto res = actual.QueryNetwork(*network.actual, deviceName, config);
return res.supportedLayersMap; return res.supportedLayersMap;
} }
void InferenceEnginePython::IECore::setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName) { void InferenceEnginePython::IECore::setConfig(const std::map<std::string, std::string>& config,
const std::string& deviceName) {
actual.SetConfig(config, deviceName); actual.SetConfig(config, deviceName);
} }

View File

@ -115,7 +115,9 @@ struct InferRequestWrap {
void setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr); void setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr);
void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data, const InferenceEngine::PreProcessInfo& info); void setBlob(const std::string& name,
const InferenceEngine::Blob::Ptr& data,
const InferenceEngine::PreProcessInfo& info);
void setBatch(int size); void setBatch(int size);
@ -160,13 +162,23 @@ struct IECore {
std::map<std::string, InferenceEngine::Version> getVersions(const std::string& deviceName); std::map<std::string, InferenceEngine::Version> getVersions(const std::string& deviceName);
InferenceEnginePython::IENetwork readNetwork(const std::string& modelPath, const std::string& binPath); InferenceEnginePython::IENetwork readNetwork(const std::string& modelPath, const std::string& binPath);
InferenceEnginePython::IENetwork readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size); InferenceEnginePython::IENetwork readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetwork(IENetwork network, const std::string& deviceName, std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetwork(IENetwork network,
const std::map<std::string, std::string>& config, int num_requests); const std::string& deviceName,
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetworkFromFile(const std::string& modelPath, const std::string& deviceName, const std::map<std::string, std::string>& config,
const std::map<std::string, std::string>& config, int num_requests); int num_requests);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> importNetwork(const std::string& modelFIle, const std::string& deviceName, std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetworkFromFile(
const std::map<std::string, std::string>& config, int num_requests); const std::string& modelPath,
std::map<std::string, std::string> queryNetwork(IENetwork network, const std::string& deviceName, const std::map<std::string, std::string>& config); const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> importNetwork(
const std::string& modelFIle,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests);
std::map<std::string, std::string> queryNetwork(IENetwork network,
const std::string& deviceName,
const std::map<std::string, std::string>& config);
void setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = std::string()); void setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = std::string());
void registerPlugin(const std::string& pluginName, const std::string& deviceName); void registerPlugin(const std::string& pluginName, const std::string& deviceName);
void unregisterPlugin(const std::string& deviceName); void unregisterPlugin(const std::string& deviceName);

View File

@ -26,7 +26,8 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
manager.run_passes(network.actual->getFunction()); manager.run_passes(network.actual->getFunction());
} }
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) { void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network,
bool use_const_initializer) {
ngraph::pass::Manager manager; ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer); manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
manager.run_passes(network.actual->getFunction()); manager.run_passes(network.actual->getFunction());
@ -38,7 +39,9 @@ void InferenceEnginePython::ApplyPruningTransformation(InferenceEnginePython::IE
manager.run_passes(network.actual->getFunction()); manager.run_passes(network.actual->getFunction());
} }
void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names) { void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network,
std::string path,
bool extract_names) {
ngraph::pass::Manager manager; ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::GenerateMappingFile>(path, extract_names); manager.register_pass<ngraph::pass::GenerateMappingFile>(path, extract_names);
manager.run_passes(network.actual->getFunction()); manager.run_passes(network.actual->getFunction());
@ -47,9 +50,10 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork
void InferenceEnginePython::CheckAPI() { void InferenceEnginePython::CheckAPI() {
std::shared_ptr<ngraph::Function> f; std::shared_ptr<ngraph::Function> f;
{ {
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape {1, 1000, 4}); auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1000, 4});
auto reshape = std::make_shared<ngraph::opset6::Reshape>(input, std::make_shared<ngraph::opset6::ShapeOf>(input), true); auto reshape =
f = std::make_shared<ngraph::Function>(ngraph::NodeVector {reshape}, ngraph::ParameterVector {input}); std::make_shared<ngraph::opset6::Reshape>(input, std::make_shared<ngraph::opset6::ShapeOf>(input), true);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape}, ngraph::ParameterVector{input});
} }
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ngraph::pass::ConstantFolding>(); m.register_pass<ngraph::pass::ConstantFolding>();

View File

@ -7,6 +7,7 @@
#include <common_test_utils/ngraph_test_utils.hpp> #include <common_test_utils/ngraph_test_utils.hpp>
#include <string> #include <string>
std::pair<bool, std::string> InferenceEnginePython::CompareNetworks(InferenceEnginePython::IENetwork lhs, InferenceEnginePython::IENetwork rhs) { std::pair<bool, std::string> InferenceEnginePython::CompareNetworks(InferenceEnginePython::IENetwork lhs,
InferenceEnginePython::IENetwork rhs) {
return compare_functions(lhs.actual->getFunction(), rhs.actual->getFunction(), true, true, false, true, true); return compare_functions(lhs.actual->getFunction(), rhs.actual->getFunction(), true, true, false, true, true);
} }

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google BasedOnStyle: Google
IndentWidth: 4 IndentWidth: 4
UseTab: Never UseTab: Never
ColumnLimit: 120
Language: Cpp Language: Cpp
Standard: Cpp11 Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4 AccessModifierOffset: -4
AlignConsecutiveMacros: true AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160 BinPackArguments: false
# Specialize this comment pragma in order to avoid changes in SEA copyrights BinPackParameters: false
CommentPragmas: '^#' CommentPragmas: '^#'
DerivePointerAlignment: false DerivePointerAlignment: false
FixNamespaceComments: true FixNamespaceComments: true
IndentCaseLabels: false IndentCaseLabels: false
IndentPPDirectives: BeforeHash IndentPPDirectives: AfterHash
SpaceBeforeCpp11BracedList: true ForEachMacros:
SpaceBeforeCtorInitializerColon: false - foreach
- FOREACH_CHILD

View File

@ -113,19 +113,7 @@ endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags" AND if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags" AND
NOT DEFINED OpenVINO_SOURCE_DIR) NOT DEFINED OpenVINO_SOURCE_DIR)
function(add_gflags)
# common gflags settings
set(GFLAGS_IS_SUBPROJECT TRUE)
set(HAVE_SYS_STAT_H 1)
set(HAVE_INTTYPES_H 1)
set(INTTYPES_FORMAT C99)
set(BUILD_TESTING OFF)
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL) add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL)
set_target_properties(gflags_nothreads_static PROPERTIES FOLDER thirdparty)
endfunction()
add_gflags()
endif() endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib") if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib")

View File

@ -2,7 +2,7 @@
This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented). This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../tools/benchmark_tool/README.md). > **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../../tools/benchmark_tool/README.md).
> **TIP**: You also can work with the Benchmark Tool inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). > **TIP**: You also can work with the Benchmark Tool inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench).
> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare > [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare

View File

@ -14,28 +14,33 @@
static const char help_message[] = "Print a usage message"; static const char help_message[] = "Print a usage message";
/// @brief message for images argument /// @brief message for images argument
static const char input_message[] = "Optional. Path to a folder with images and/or binaries or to specific image or binary file."; static const char input_message[] =
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
/// @brief message for model argument /// @brief message for model argument
static const char model_message[] = "Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with " static const char model_message[] =
"Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
"a trained compiled model."; "a trained compiled model.";
/// @brief message for execution mode /// @brief message for execution mode
static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\"."; static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\".";
/// @brief message for assigning cnn calculation to device /// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). " static const char target_device_message[] =
"Optional. Specify a target device to infer on (the list of available devices is shown below). "
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify " "Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
"HETERO plugin. " "HETERO plugin. "
"Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. " "Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. "
"The application looks for a suitable plugin for the specified device."; "The application looks for a suitable plugin for the specified device.";
/// @brief message for iterations count /// @brief message for iterations count
static const char iterations_count_message[] = "Optional. Number of iterations. " static const char iterations_count_message[] =
"Optional. Number of iterations. "
"If not specified, the number of iterations is calculated depending on a device."; "If not specified, the number of iterations is calculated depending on a device.";
/// @brief message for requests count /// @brief message for requests count
static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is determined automatically for device."; static const char infer_requests_count_message[] =
"Optional. Number of infer requests. Default value is determined automatically for device.";
/// @brief message for execution time /// @brief message for execution time
static const char execution_time_message[] = "Optional. Time in seconds to execute topology."; static const char execution_time_message[] = "Optional. Time in seconds to execute topology.";
@ -45,7 +50,8 @@ static const char infer_num_threads_message[] = "Optional. Number of threads to
"(including HETERO and MULTI cases)."; "(including HETERO and MULTI cases).";
/// @brief message for #streams for CPU inference /// @brief message for #streams for CPU inference
static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices " static const char infer_num_streams_message[] =
"Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices "
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just " "(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
"<nstreams>). " "<nstreams>). "
"Default value is determined automatically for a device.Please note that although the " "Default value is determined automatically for a device.Please note that although the "
@ -58,38 +64,46 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to
/// @brief message for latency percentile settings /// @brief message for latency percentile settings
static const char infer_latency_percentile_message[] = static const char infer_latency_percentile_message[] =
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median)."; "Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value "
"is 50 (median).";
/// @brief message for enforcing of BF16 execution where it is possible /// @brief message for enforcing of BF16 execution where it is possible
static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced " static const char enforce_bf16_message[] =
"Optional. By default floating point operations execution in bfloat16 precision are enforced "
"if supported by platform.\n" "if supported by platform.\n"
" 'true' - enable bfloat16 regardless of platform support\n" " 'true' - enable bfloat16 regardless of platform support\n"
" 'false' - disable bfloat16 regardless of platform support"; " 'false' - disable bfloat16 regardless of platform support";
/// @brief message for user library argument /// @brief message for user library argument
static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels " static const char custom_cpu_library_message[] =
"Required for CPU custom layers. Absolute path to a shared library with the kernels "
"implementations."; "implementations.";
/// @brief message for clDNN custom kernels desc /// @brief message for clDNN custom kernels desc
static const char custom_cldnn_message[] = "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description."; static const char custom_cldnn_message[] =
"Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
static const char batch_size_message[] = "Optional. Batch size value. If not specified, the batch size value is determined from " static const char batch_size_message[] =
"Optional. Batch size value. If not specified, the batch size value is determined from "
"Intermediate Representation."; "Intermediate Representation.";
// @brief message for CPU threads pinning option // @brief message for CPU threads pinning option
static const char infer_threads_pinning_message[] = static const char infer_threads_pinning_message[] =
"Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):\n" "Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):\n"
"\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n" "\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n"
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on the hybrid CPUs) \n" "\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on "
"the hybrid CPUs) \n"
"\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n" "\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n"
"\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning"; "\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning";
// @brief message for stream_output option // @brief message for stream_output option
static const char stream_output_message[] = "Optional. Print progress as a plain text. When specified, an interactive progress bar is " static const char stream_output_message[] =
"Optional. Print progress as a plain text. When specified, an interactive progress bar is "
"replaced with a " "replaced with a "
"multiline output."; "multiline output.";
// @brief message for report_type option // @brief message for report_type option
static const char report_type_message[] = "Optional. Enable collecting statistics report. \"no_counters\" report contains " static const char report_type_message[] =
"Optional. Enable collecting statistics report. \"no_counters\" report contains "
"configuration options specified, resulting FPS and latency. \"average_counters\" " "configuration options specified, resulting FPS and latency. \"average_counters\" "
"report extends \"no_counters\" report and additionally includes average PM " "report extends \"no_counters\" report and additionally includes average PM "
"counters values for each layer from the network. \"detailed_counters\" report " "counters values for each layer from the network. \"detailed_counters\" report "
@ -100,10 +114,12 @@ static const char report_type_message[] = "Optional. Enable collecting statistic
static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored."; static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored.";
// @brief message for exec_graph_path option // @brief message for exec_graph_path option
static const char exec_graph_path_message[] = "Optional. Path to a file where to store executable graph information serialized."; static const char exec_graph_path_message[] =
"Optional. Path to a file where to store executable graph information serialized.";
// @brief message for progress bar option // @brief message for progress bar option
static const char progress_message[] = "Optional. Show progress bar (can affect performance measurement). Default values is " static const char progress_message[] =
"Optional. Show progress bar (can affect performance measurement). Default values is "
"\"false\"."; "\"false\".";
// @brief message for performance counters option // @brief message for performance counters option
@ -111,19 +127,23 @@ static const char pc_message[] = "Optional. Report performance counters.";
#ifdef USE_OPENCV #ifdef USE_OPENCV
// @brief message for load config option // @brief message for load config option
static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file to load custom IE parameters." static const char load_config_message[] =
"Optional. Path to XML/YAML/JSON file to load custom IE parameters."
" Please note, command line parameters have higher priority then parameters from configuration " " Please note, command line parameters have higher priority then parameters from configuration "
"file."; "file.";
// @brief message for dump config option // @brief message for dump config option
static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application."; static const char dump_config_message[] =
"Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
#endif #endif
static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or " static const char shape_message[] =
"Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
"\"[1,3,224,224]\"" "\"[1,3,224,224]\""
" in case of one input size."; " in case of one input size.";
static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. " static const char layout_message[] =
"Optional. Prompts how network layouts should be treated by application. "
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size."; "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
// @brief message for enabling caching // @brief message for enabling caching
@ -139,19 +159,23 @@ static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8
static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network."; static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network.";
static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of the network."; static constexpr char outputs_precision_message[] =
"Optional. Specifies precision for all output layers of the network.";
static constexpr char iop_message[] = "Optional. Specifies precision for input and output layers by name.\n" static constexpr char iop_message[] =
"Optional. Specifies precision for input and output layers by name.\n"
" Example: -iop \"input:FP16, output:FP16\".\n" " Example: -iop \"input:FP16, output:FP16\".\n"
" Notice that quotes are required.\n" " Notice that quotes are required.\n"
" Overwrites precision from ip and op options for " " Overwrites precision from ip and op options for "
"specified layers."; "specified layers.";
static constexpr char input_image_scale_message[] = "Optional. Scale values to be used for the input image per channel.\n" static constexpr char input_image_scale_message[] =
"Optional. Scale values to be used for the input image per channel.\n"
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n" "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
"Example: -iscale data[255,255,255],info[255,255,255]\n"; "Example: -iscale data[255,255,255],info[255,255,255]\n";
static constexpr char input_image_mean_message[] = "Optional. Mean values to be used for the input image per channel.\n" static constexpr char input_image_mean_message[] =
"Optional. Mean values to be used for the input image per channel.\n"
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n" "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
"Example: -imean data[255,255,255],info[255,255,255]\n"; "Example: -imean data[255,255,255],info[255,255,255]\n";

View File

@ -23,7 +23,8 @@ typedef std::chrono::nanoseconds ns;
typedef std::function<void(size_t id, const double latency)> QueueCallbackFunction; typedef std::function<void(size_t id, const double latency)> QueueCallbackFunction;
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution time. /// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
/// time.
class InferReqWrap final { class InferReqWrap final {
public: public:
using Ptr = std::shared_ptr<InferReqWrap>; using Ptr = std::shared_ptr<InferReqWrap>;
@ -31,7 +32,9 @@ public:
~InferReqWrap() = default; ~InferReqWrap() = default;
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue) explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
: _request(net.CreateInferRequest()), _id(id), _callbackQueue(callbackQueue) { : _request(net.CreateInferRequest()),
_id(id),
_callbackQueue(callbackQueue) {
_request.SetCompletionCallback([&]() { _request.SetCompletionCallback([&]() {
_endTime = Time::now(); _endTime = Time::now();
_callbackQueue(_id, getExecutionTimeInMilliseconds()); _callbackQueue(_id, getExecutionTimeInMilliseconds());
@ -79,8 +82,10 @@ class InferRequestsQueue final {
public: public:
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) { InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) {
for (size_t id = 0; id < nireq; id++) { for (size_t id = 0; id < nireq; id++) {
requests.push_back( requests.push_back(std::make_shared<InferReqWrap>(
std::make_shared<InferReqWrap>(net, id, std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2))); net,
id,
std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
_idleIds.push(id); _idleIds.push(id);
} }
resetTimes(); resetTimes();
@ -90,7 +95,8 @@ public:
// So it should be released before any context that the request can use inside internal asynchronous tasks // So it should be released before any context that the request can use inside internal asynchronous tasks
// For example all members of InferRequestsQueue would be destroyed before `requests` vector // For example all members of InferRequestsQueue would be destroyed before `requests` vector
// So requests can try to use this members from `putIdleRequest()` that would be called from request callback // So requests can try to use this members from `putIdleRequest()` that would be called from request callback
// To avoid this we should move this vector declaration after all members declaration or just clear it manually in destructor // To avoid this we should move this vector declaration after all members declaration or just clear it manually
// in destructor
requests.clear(); requests.clear();
} }

View File

@ -16,14 +16,15 @@
using namespace InferenceEngine; using namespace InferenceEngine;
#ifdef USE_OPENCV #ifdef USE_OPENCV
static const std::vector<std::string> supported_image_extensions = {"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", static const std::vector<std::string> supported_image_extensions =
"pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"}; {"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", "pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
#else #else
static const std::vector<std::string> supported_image_extensions = {"bmp"}; static const std::vector<std::string> supported_image_extensions = {"bmp"};
#endif #endif
static const std::vector<std::string> supported_binary_extensions = {"bin"}; static const std::vector<std::string> supported_binary_extensions = {"bin"};
std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>& filePaths, const std::vector<std::string>& extensions) { std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>& filePaths,
const std::vector<std::string>& extensions) {
std::vector<std::string> filtered; std::vector<std::string> filtered;
auto getExtension = [](const std::string& name) { auto getExtension = [](const std::string& name) {
auto extensionPosition = name.rfind('.', name.size()); auto extensionPosition = name.rfind('.', name.size());
@ -40,8 +41,13 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
} }
template <typename T> template <typename T>
void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const benchmark_app::InputInfo& app_info, void fillBlobImage(Blob::Ptr& inputBlob,
const size_t& requestId, const size_t& inputId, const size_t& inputSize) { const std::vector<std::string>& filePaths,
const size_t& batchSize,
const benchmark_app::InputInfo& app_info,
const size_t& requestId,
const size_t& inputId,
const size_t& inputSize) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob); MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) { if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in " IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
@ -57,7 +63,8 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
std::vector<std::shared_ptr<uint8_t>> vreader; std::vector<std::shared_ptr<uint8_t>> vreader;
vreader.reserve(batchSize); vreader.reserve(batchSize);
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) { for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
i++, inputIndex += inputSize) {
inputIndex %= filePaths.size(); inputIndex %= filePaths.size();
slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl; slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl;
@ -88,11 +95,13 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
for (size_t ch = 0; ch < numChannels; ++ch) { for (size_t ch = 0; ch < numChannels; ++ch) {
/** [images stride + channels stride + pixel id ] all in /** [images stride + channels stride + pixel id ] all in
* bytes **/ * bytes **/
size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW")) size_t offset = imageId * numChannels * width * height +
(((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
? (ch * width * height + h * width + w) ? (ch * width * height + h * width + w)
: (h * width * numChannels + w * numChannels + ch)); : (h * width * numChannels + w * numChannels + ch));
inputBlobData[offset] = inputBlobData[offset] =
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) - static_cast<T>(app_info.mean[ch])) / (static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) -
static_cast<T>(app_info.mean[ch])) /
static_cast<T>(app_info.scale[ch]); static_cast<T>(app_info.scale[ch]);
} }
} }
@ -101,7 +110,11 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
} }
template <typename T> template <typename T>
void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const size_t& requestId, const size_t& inputId, void fillBlobBinary(Blob::Ptr& inputBlob,
const std::vector<std::string>& filePaths,
const size_t& batchSize,
const size_t& requestId,
const size_t& inputId,
const size_t& inputSize) { const size_t& inputSize) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob); MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) { if (!minput) {
@ -114,7 +127,8 @@ void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePa
auto minputHolder = minput->wmap(); auto minputHolder = minput->wmap();
auto inputBlobData = minputHolder.as<char*>(); auto inputBlobData = minputHolder.as<char*>();
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) { for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
i++, inputIndex += inputSize) {
inputIndex %= filePaths.size(); inputIndex %= filePaths.size();
slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl; slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl;
@ -140,12 +154,15 @@ void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePa
} }
template <typename T> template <typename T>
using uniformDistribution = using uniformDistribution = typename std::conditional<
typename std::conditional<std::is_floating_point<T>::value, std::uniform_real_distribution<T>, std::is_floating_point<T>::value,
std::uniform_real_distribution<T>,
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type; typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
template <typename T, typename T2> template <typename T, typename T2>
void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<uint8_t>::min(), T rand_max = std::numeric_limits<uint8_t>::max()) { void fillBlobRandom(Blob::Ptr& inputBlob,
T rand_min = std::numeric_limits<uint8_t>::min(),
T rand_max = std::numeric_limits<uint8_t>::max()) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob); MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) { if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in " IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
@ -191,14 +208,17 @@ void fillBlobImInfo(Blob::Ptr& inputBlob, const size_t& batchSize, std::pair<siz
} }
} }
void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info, void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests) { std::vector<InferReqWrap::Ptr> requests) {
std::vector<std::pair<size_t, size_t>> input_image_sizes; std::vector<std::pair<size_t, size_t>> input_image_sizes;
for (auto& item : app_inputs_info) { for (auto& item : app_inputs_info) {
if (item.second.isImage()) { if (item.second.isImage()) {
input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height())); input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
} }
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions (" << item.second.layout << "): "; slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions ("
<< item.second.layout << "): ";
for (const auto& i : item.second.shape) { for (const auto& i : item.second.shape) {
slog::info << i << " "; slog::info << i << " ";
} }
@ -232,10 +252,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
"extensions: " "extensions: "
<< ss.str() << slog::endl; << ss.str() << slog::endl;
} else if (binaryToBeUsed > binaryFiles.size()) { } else if (binaryToBeUsed > binaryFiles.size()) {
slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed << " files are required but only " << binaryFiles.size() slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed
<< " are provided" << slog::endl; << " files are required but only " << binaryFiles.size() << " are provided" << slog::endl;
} else if (binaryToBeUsed < binaryFiles.size()) { } else if (binaryToBeUsed < binaryFiles.size()) {
slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from " << binaryFiles.size() << slog::endl; slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from "
<< binaryFiles.size() << slog::endl;
} }
imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions); imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions);
@ -254,10 +275,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
"extensions: " "extensions: "
<< ss.str() << slog::endl; << ss.str() << slog::endl;
} else if (imagesToBeUsed > imageFiles.size()) { } else if (imagesToBeUsed > imageFiles.size()) {
slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed << " files are required but only " << imageFiles.size() slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed
<< " are provided" << slog::endl; << " files are required but only " << imageFiles.size() << " are provided" << slog::endl;
} else if (imagesToBeUsed < imageFiles.size()) { } else if (imagesToBeUsed < imageFiles.size()) {
slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from " << imageFiles.size() << slog::endl; slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from "
<< imageFiles.size() << slog::endl;
} }
} }
@ -274,15 +296,45 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
if (!imageFiles.empty()) { if (!imageFiles.empty()) {
// Fill with Images // Fill with Images
if (precision == InferenceEngine::Precision::FP32) { if (precision == InferenceEngine::Precision::FP32) {
fillBlobImage<float>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); fillBlobImage<float>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::FP16) { } else if (precision == InferenceEngine::Precision::FP16) {
fillBlobImage<short>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); fillBlobImage<short>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::I32) { } else if (precision == InferenceEngine::Precision::I32) {
fillBlobImage<int32_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); fillBlobImage<int32_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::I64) { } else if (precision == InferenceEngine::Precision::I64) {
fillBlobImage<int64_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); fillBlobImage<int64_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::U8) { } else if (precision == InferenceEngine::Precision::U8) {
fillBlobImage<uint8_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); fillBlobImage<uint8_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else { } else {
IE_THROW() << "Input precision is not supported for " << item.first; IE_THROW() << "Input precision is not supported for " << item.first;
} }
@ -292,15 +344,41 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
if (!binaryFiles.empty()) { if (!binaryFiles.empty()) {
// Fill with binary files // Fill with binary files
if (precision == InferenceEngine::Precision::FP32) { if (precision == InferenceEngine::Precision::FP32) {
fillBlobBinary<float>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); fillBlobBinary<float>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::FP16) { } else if (precision == InferenceEngine::Precision::FP16) {
fillBlobBinary<short>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); fillBlobBinary<short>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::I32) { } else if (precision == InferenceEngine::Precision::I32) {
fillBlobBinary<int32_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); fillBlobBinary<int32_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::I64) { } else if (precision == InferenceEngine::Precision::I64) {
fillBlobBinary<int64_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); fillBlobBinary<int64_t>(inputBlob,
} else if ((precision == InferenceEngine::Precision::U8) || (precision == InferenceEngine::Precision::BOOL)) { binaryFiles,
fillBlobBinary<uint8_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if ((precision == InferenceEngine::Precision::U8) ||
(precision == InferenceEngine::Precision::BOOL)) {
fillBlobBinary<uint8_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else { } else {
IE_THROW() << "Input precision is not supported for " << item.first; IE_THROW() << "Input precision is not supported for " << item.first;
} }
@ -310,7 +388,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) { if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) {
// Most likely it is image info: fill with image information // Most likely it is image info: fill with image information
auto image_size = input_image_sizes.at(0); auto image_size = input_image_sizes.at(0);
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x" << image_size.second << slog::endl; slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x"
<< image_size.second << slog::endl;
if (precision == InferenceEngine::Precision::FP32) { if (precision == InferenceEngine::Precision::FP32) {
fillBlobImInfo<float>(inputBlob, batchSize, image_size); fillBlobImInfo<float>(inputBlob, batchSize, image_size);
} else if (precision == InferenceEngine::Precision::FP16) { } else if (precision == InferenceEngine::Precision::FP16) {
@ -326,8 +405,9 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
} }
} }
// Fill random // Fill random
slog::info << "Fill input '" << item.first << "' with random values (" << std::string((app_info.isImage() ? "image" : "some binary data")) slog::info << "Fill input '" << item.first << "' with random values ("
<< " is expected)" << slog::endl; << std::string((app_info.isImage() ? "image" : "some binary data")) << " is expected)"
<< slog::endl;
if (precision == InferenceEngine::Precision::FP32) { if (precision == InferenceEngine::Precision::FP32) {
fillBlobRandom<float, float>(inputBlob); fillBlobRandom<float, float>(inputBlob);
} else if (precision == InferenceEngine::Precision::FP16) { } else if (precision == InferenceEngine::Precision::FP16) {

View File

@ -11,5 +11,7 @@
#include "infer_request_wrap.hpp" #include "infer_request_wrap.hpp"
#include "utils.hpp" #include "utils.hpp"
void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info, void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests); std::vector<InferReqWrap::Ptr> requests);

View File

@ -60,8 +60,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value.");
} }
if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) { if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport &&
std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" + std::string(detailedCntReport) + FLAGS_report_type != detailedCntReport) {
std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" +
std::string(detailedCntReport) +
" report types are supported (invalid -report_type option value)"; " report types are supported (invalid -report_type option value)";
throw std::logic_error(err); throw std::logic_error(err);
} }
@ -73,7 +75,8 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
bool isNetworkCompiled = fileExt(FLAGS_m) == "blob"; bool isNetworkCompiled = fileExt(FLAGS_m) == "blob";
bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty()); bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty());
if (isNetworkCompiled && isPrecisionSet) { if (isNetworkCompiled && isPrecisionSet) {
std::string err = std::string("Cannot set precision for a compiled network. ") + std::string("Please re-compile your network with required precision " std::string err = std::string("Cannot set precision for a compiled network. ") +
std::string("Please re-compile your network with required precision "
"using compile_tool"); "using compile_tool");
throw std::logic_error(err); throw std::logic_error(err);
@ -83,7 +86,8 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
static void next_step(const std::string additional_info = "") { static void next_step(const std::string additional_info = "") {
static size_t step_id = 0; static size_t step_id = 0;
static const std::map<size_t, std::string> step_names = {{1, "Parsing and validating input arguments"}, static const std::map<size_t, std::string> step_names = {
{1, "Parsing and validating input arguments"},
{2, "Loading Inference Engine"}, {2, "Loading Inference Engine"},
{3, "Setting device configuration"}, {3, "Setting device configuration"},
{4, "Reading network files"}, {4, "Reading network files"},
@ -140,11 +144,14 @@ int main(int argc, char* argv[]) {
} }
} }
if (!FLAGS_report_type.empty()) { if (!FLAGS_report_type.empty()) {
statistics = std::make_shared<StatisticsReport>(StatisticsReport::Config {FLAGS_report_type, FLAGS_report_folder}); statistics =
std::make_shared<StatisticsReport>(StatisticsReport::Config{FLAGS_report_type, FLAGS_report_folder});
statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments); statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments);
} }
auto isFlagSetInCommandLine = [&command_line_arguments](const std::string& name) { auto isFlagSetInCommandLine = [&command_line_arguments](const std::string& name) {
return (std::find_if(command_line_arguments.begin(), command_line_arguments.end(), [name](const std::pair<std::string, std::string>& p) { return (std::find_if(command_line_arguments.begin(),
command_line_arguments.end(),
[name](const std::pair<std::string, std::string>& p) {
return p.first == name; return p.first == name;
}) != command_line_arguments.end()); }) != command_line_arguments.end());
}; };
@ -213,13 +220,17 @@ int main(int argc, char* argv[]) {
if (isFlagSetInCommandLine("pc")) { if (isFlagSetInCommandLine("pc")) {
// set to user defined value // set to user defined value
device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO); device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
} else if (device_config.count(CONFIG_KEY(PERF_COUNT)) && (device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) { } else if (device_config.count(CONFIG_KEY(PERF_COUNT)) &&
slog::warn << "Performance counters for " << device << " device is turned on. To print results use -pc option." << slog::endl; (device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) {
slog::warn << "Performance counters for " << device
<< " device is turned on. To print results use -pc option." << slog::endl;
} else if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == averageCntReport) { } else if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == averageCntReport) {
slog::warn << "Turn on performance counters for " << device << " device since report type is " << FLAGS_report_type << "." << slog::endl; slog::warn << "Turn on performance counters for " << device << " device since report type is "
<< FLAGS_report_type << "." << slog::endl;
device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES); device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
} else if (!FLAGS_exec_graph_path.empty()) { } else if (!FLAGS_exec_graph_path.empty()) {
slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping." << slog::endl; slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping."
<< slog::endl;
device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES); device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
} else { } else {
// set to default value // set to default value
@ -231,8 +242,10 @@ int main(int argc, char* argv[]) {
const std::string key = device + "_THROUGHPUT_STREAMS"; const std::string key = device + "_THROUGHPUT_STREAMS";
if (device_nstreams.count(device)) { if (device_nstreams.count(device)) {
// set to user defined value // set to user defined value
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); std::vector<std::string> supported_config_keys =
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) == supported_config_keys.end()) { ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) ==
supported_config_keys.end()) {
throw std::logic_error("Device " + device + " doesn't support config key '" + key + "'! " + throw std::logic_error("Device " + device + " doesn't support config key '" + key + "'! " +
"Please specify -nstreams for correct devices in format " "Please specify -nstreams for correct devices in format "
"<dev1>:<nstreams1>,<dev2>:<nstreams2>" + "<dev1>:<nstreams1>,<dev2>:<nstreams2>" +
@ -267,8 +280,10 @@ int main(int argc, char* argv[]) {
// set to user defined value // set to user defined value
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin; device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
} else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) { } else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) {
if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("GPU") != std::string::npos)) { if ((device_name.find("MULTI") != std::string::npos) &&
slog::warn << "Turn off threads pinning for " << device << " device since multi-scenario with GPU device is used." << slog::endl; (device_name.find("GPU") != std::string::npos)) {
slog::warn << "Turn off threads pinning for " << device
<< " device since multi-scenario with GPU device is used." << slog::endl;
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO); device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO);
} }
} }
@ -279,7 +294,8 @@ int main(int argc, char* argv[]) {
// for GPU execution, more throughput-oriented execution via streams // for GPU execution, more throughput-oriented execution via streams
setThroughputStreams(); setThroughputStreams();
if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("CPU") != std::string::npos)) { if ((device_name.find("MULTI") != std::string::npos) &&
(device_name.find("CPU") != std::string::npos)) {
slog::warn << "Turn on GPU throttling. Multi-device execution with " slog::warn << "Turn on GPU throttling. Multi-device execution with "
"the CPU + GPU performs best with GPU throttling hint, " "the CPU + GPU performs best with GPU throttling hint, "
<< "which releases another CPU thread (that is otherwise " << "which releases another CPU thread (that is otherwise "
@ -299,9 +315,11 @@ int main(int argc, char* argv[]) {
if (isFlagSetInCommandLine("nthreads")) if (isFlagSetInCommandLine("nthreads"))
device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads); device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
} else { } else {
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); std::vector<std::string> supported_config_keys =
ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
auto supported = [&](const std::string& key) { auto supported = [&](const std::string& key) {
return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) != std::end(supported_config_keys); return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) !=
std::end(supported_config_keys);
}; };
if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) { if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads); device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
@ -351,7 +369,8 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(get_total_ms_time(startTime)); auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl; slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"load network time (ms)", duration_ms}});
if (batchSize == 0) { if (batchSize == 0) {
batchSize = 1; batchSize = 1;
} }
@ -367,7 +386,8 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(get_total_ms_time(startTime)); auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Read network took " << duration_ms << " ms" << slog::endl; slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"read network time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"read network time (ms)", duration_ms}});
const InputsDataMap inputInfo(cnnNetwork.getInputsInfo()); const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
if (inputInfo.empty()) { if (inputInfo.empty()) {
@ -380,7 +400,13 @@ int main(int argc, char* argv[]) {
batchSize = cnnNetwork.getBatchSize(); batchSize = cnnNetwork.getBatchSize();
// Parse input shapes if specified // Parse input shapes if specified
bool reshape = false; bool reshape = false;
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, inputInfo, reshape); app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
FLAGS_iscale,
FLAGS_imean,
inputInfo,
reshape);
if (reshape) { if (reshape) {
InferenceEngine::ICNNNetwork::InputShapes shapes = {}; InferenceEngine::ICNNNetwork::InputShapes shapes = {};
for (auto& item : app_inputs_info) for (auto& item : app_inputs_info)
@ -391,13 +417,15 @@ int main(int argc, char* argv[]) {
duration_ms = double_to_string(get_total_ms_time(startTime)); duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl; slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"reshape network time (ms)", duration_ms}});
} }
// use batch size according to provided layout and shapes // use batch size according to provided layout and shapes
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize(); batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize();
topology_name = cnnNetwork.getName(); topology_name = cnnNetwork.getName();
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl; slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
<< slog::endl;
// ----------------- 6. Configuring inputs and outputs // ----------------- 6. Configuring inputs and outputs
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
@ -424,7 +452,8 @@ int main(int argc, char* argv[]) {
duration_ms = double_to_string(get_total_ms_time(startTime)); duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl; slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"load network time (ms)", duration_ms}});
} else { } else {
next_step(); next_step();
slog::info << "Skipping the step for compiled network" << slog::endl; slog::info << "Skipping the step for compiled network" << slog::endl;
@ -440,8 +469,14 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(get_total_ms_time(startTime)); auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Import network took " << duration_ms << " ms" << slog::endl; slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, exeNetwork.GetInputsInfo()); {{"import network time (ms)", duration_ms}});
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
FLAGS_iscale,
FLAGS_imean,
exeNetwork.GetInputsInfo());
if (batchSize == 0) { if (batchSize == 0) {
batchSize = 1; batchSize = 1;
} }
@ -479,8 +514,8 @@ int main(int argc, char* argv[]) {
if ((niter > 0) && (FLAGS_api == "async")) { if ((niter > 0) && (FLAGS_api == "async")) {
niter = ((niter + nireq - 1) / nireq) * nireq; niter = ((niter + nireq - 1) / nireq) * nireq;
if (FLAGS_niter != niter) { if (FLAGS_niter != niter) {
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to " << niter << " using number of requests " slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
<< nireq << slog::endl; << niter << " using number of requests " << nireq << slog::endl;
} }
} }
@ -496,7 +531,8 @@ int main(int argc, char* argv[]) {
uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds); uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds);
if (statistics) { if (statistics) {
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG, statistics->addParameters(
StatisticsReport::Category::RUNTIME_CONFIG,
{ {
{"topology", topology_name}, {"topology", topology_name},
{"target device", device_name}, {"target device", device_name},
@ -510,7 +546,8 @@ int main(int argc, char* argv[]) {
for (auto& nstreams : device_nstreams) { for (auto& nstreams : device_nstreams) {
std::stringstream ss; std::stringstream ss;
ss << "number of " << nstreams.first << " streams"; ss << "number of " << nstreams.first << " streams";
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG, { statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
{
{ss.str(), nstreams.second}, {ss.str(), nstreams.second},
}); });
} }
@ -576,7 +613,8 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]); auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]);
slog::info << "First inference took " << duration_ms << " ms" << slog::endl; slog::info << "First inference took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"first inference time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"first inference time (ms)", duration_ms}});
inferRequestsQueue.resetTimes(); inferRequestsQueue.resetTimes();
auto startTime = Time::now(); auto startTime = Time::now();
@ -587,7 +625,8 @@ int main(int argc, char* argv[]) {
* executed in the same conditions **/ * executed in the same conditions **/
ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress); ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress);
while ((niter != 0LL && iteration < niter) || (duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) || while ((niter != 0LL && iteration < niter) ||
(duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
(FLAGS_api == "async" && iteration % nireq != 0)) { (FLAGS_api == "async" && iteration % nireq != 0)) {
inferRequest = inferRequestsQueue.getIdleRequest(); inferRequest = inferRequestsQueue.getIdleRequest();
if (!inferRequest) { if (!inferRequest) {
@ -629,10 +668,12 @@ int main(int argc, char* argv[]) {
double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile); double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
double totalDuration = inferRequestsQueue.getDurationInMilliseconds(); double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration; double fps =
(FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
if (statistics) { if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"total execution time (ms)", double_to_string(totalDuration)}, {"total execution time (ms)", double_to_string(totalDuration)},
{"total number of iterations", std::to_string(iteration)}, {"total number of iterations", std::to_string(iteration)},
}); });
@ -643,11 +684,13 @@ int main(int argc, char* argv[]) {
} else { } else {
latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)"; latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
} }
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{latency_label, double_to_string(latency)}, {latency_label, double_to_string(latency)},
}); });
} }
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"throughput", double_to_string(fps)}});
} }
progressBar.finish(); progressBar.finish();
@ -707,7 +750,8 @@ int main(int argc, char* argv[]) {
slog::err << ex.what() << slog::endl; slog::err << ex.what() << slog::endl;
if (statistics) { if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"error", ex.what()}, {"error", ex.what()},
}); });
statistics->dump(); statistics->dump();

View File

@ -35,14 +35,14 @@ public:
EXECUTION_RESULTS, EXECUTION_RESULTS,
}; };
explicit StatisticsReport(Config config): _config(std::move(config)) { explicit StatisticsReport(Config config) : _config(std::move(config)) {
_separator = _separator =
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32 || defined __CYGWIN__
#if defined UNICODE # if defined UNICODE
L"\\"; L"\\";
#else # else
"\\"; "\\";
#endif # endif
#else #else
"/"; "/";
#endif #endif

View File

@ -16,7 +16,7 @@
// clang-format on // clang-format on
#ifdef USE_OPENCV #ifdef USE_OPENCV
#include <opencv2/core.hpp> # include <opencv2/core.hpp>
#endif #endif
namespace benchmark_app { namespace benchmark_app {
@ -54,8 +54,13 @@ size_t InputInfo::depth() const {
} // namespace benchmark_app } // namespace benchmark_app
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) { uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds {{"CPU", 60}, {"GPU", 60}, {"VPU", 60}, {"MYRIAD", 60}, static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds{{"CPU", 60},
{"HDDL", 60}, {"FPGA", 120}, {"UNKNOWN", 120}}; {"GPU", 60},
{"VPU", 60},
{"MYRIAD", 60},
{"HDDL", 60},
{"FPGA", 120},
{"UNKNOWN", 120}};
uint32_t duration = 0; uint32_t duration = 0;
for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) { for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) {
if (device.find(deviceDurationInSeconds.first) != std::string::npos) { if (device.find(deviceDurationInSeconds.first) != std::string::npos) {
@ -63,8 +68,9 @@ uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
} }
} }
if (duration == 0) { if (duration == 0) {
const auto unknownDeviceIt = const auto unknownDeviceIt = find_if(deviceDefaultDurationInSeconds.begin(),
find_if(deviceDefaultDurationInSeconds.begin(), deviceDefaultDurationInSeconds.end(), [](std::pair<std::string, uint32_t> deviceDuration) { deviceDefaultDurationInSeconds.end(),
[](std::pair<std::string, uint32_t> deviceDuration) {
return deviceDuration.first == "UNKNOWN"; return deviceDuration.first == "UNKNOWN";
}); });
@ -72,7 +78,8 @@ uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
throw std::logic_error("UNKNOWN device was not found in the device duration list"); throw std::logic_error("UNKNOWN device was not found in the device duration list");
} }
duration = unknownDeviceIt->second; duration = unknownDeviceIt->second;
slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used" << slog::endl; slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used"
<< slog::endl;
} }
return duration; return duration;
} }
@ -112,7 +119,8 @@ std::vector<std::string> parseDevices(const std::string& device_string) {
return devices; return devices;
} }
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices, const std::string& values_string) { std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string) {
// Format: <device1>:<value1>,<device2>:<value2> or just <value> // Format: <device1>:<value1>,<device2>:<value2> or just <value>
std::map<std::string, std::string> result; std::map<std::string, std::string> result;
auto device_value_strings = split(values_string, ','); auto device_value_strings = split(values_string, ',');
@ -125,7 +133,8 @@ std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector
if (it != devices.end()) { if (it != devices.end()) {
result[device_name] = nstreams; result[device_name] = nstreams;
} else { } else {
throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" + device_name + "'! Incorrect device name!"); throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" +
device_name + "'! Incorrect device name!");
} }
} else if (device_value_vec.size() == 1) { } else if (device_value_vec.size() == 1) {
auto value = device_value_vec.at(0); auto value = device_value_vec.at(0);
@ -172,7 +181,8 @@ std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& sha
return ss.str(); return ss.str();
} }
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info) { std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
const benchmark_app::InputsInfo& inputs_info) {
// Format: data:[255,255,255],info[255,255,255] // Format: data:[255,255,255],info[255,255,255]
std::map<std::string, std::vector<float>> return_value; std::map<std::string, std::vector<float>> return_value;

View File

@ -29,14 +29,17 @@ using InputsInfo = std::map<std::string, InputInfo>;
std::vector<std::string> parseDevices(const std::string& device_string); std::vector<std::string> parseDevices(const std::string& device_string);
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device); uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices, const std::string& values_string); std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string);
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes); std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info); size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
std::vector<std::string> split(const std::string& s, char delim); std::vector<std::string> split(const std::string& s, char delim);
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info); std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
const benchmark_app::InputsInfo& inputs_info);
template <typename T> template <typename T>
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string, const std::map<std::string, T>& input_info) { std::map<std::string, std::string> parseInputParameters(const std::string parameter_string,
const std::map<std::string, T>& input_info) {
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
// inputs) // inputs)
std::map<std::string, std::string> return_value; std::map<std::string, std::string> return_value;
@ -67,8 +70,12 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame
} }
template <typename T> template <typename T>
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size, benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info, const std::string& layout_string,
const size_t batch_size,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info,
bool& reshape_required) { bool& reshape_required) {
std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info); std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info); std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
@ -134,10 +141,20 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const s
} }
template <typename T> template <typename T>
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size, benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info) { const std::string& layout_string,
const size_t batch_size,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info) {
bool reshape_required = false; bool reshape_required = false;
return getInputsInfo<T>(shape_string, layout_string, batch_size, scale_string, mean_string, input_info, reshape_required); return getInputsInfo<T>(shape_string,
layout_string,
batch_size,
scale_string,
mean_string,
input_info,
reshape_required);
} }
#ifdef USE_OPENCV #ifdef USE_OPENCV

View File

@ -17,11 +17,13 @@ static const char help_message[] = "Print a usage message.";
static const char model_message[] = "Required. Path to an .xml file with a trained model."; static const char model_message[] = "Required. Path to an .xml file with a trained model.";
/// @brief message for images argument /// @brief message for images argument
static const char image_message[] = "Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet" static const char image_message[] =
"Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"
" and a .bmp file for the other networks."; " and a .bmp file for the other networks.";
/// @brief message for assigning cnn calculation to device /// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " static const char target_device_message[] =
"Optional. Specify the target device to infer on (the list of available devices is shown below). "
"Default value is CPU. Use \"-d HETERO:<comma_separated_devices_list>\" format to specify HETERO plugin. " "Default value is CPU. Use \"-d HETERO:<comma_separated_devices_list>\" format to specify HETERO plugin. "
"Sample will look for a suitable plugin for device specified."; "Sample will look for a suitable plugin for device specified.";

View File

@ -100,7 +100,8 @@ int main(int argc, char* argv[]) {
// Config for device plugin custom extension is loaded from an .xml // Config for device plugin custom extension is loaded from an .xml
// description // description
ie.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, FLAGS_d); ie.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, FLAGS_d);
slog::info << "Config for " << FLAGS_d << " device plugin custom extension loaded: " << FLAGS_c << slog::endl; slog::info << "Config for " << FLAGS_d << " device plugin custom extension loaded: " << FLAGS_c
<< slog::endl;
} }
// ----------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------
@ -142,8 +143,8 @@ int main(int argc, char* argv[]) {
continue; continue;
} }
/** Store image data **/ /** Store image data **/
std::shared_ptr<unsigned char> data( std::shared_ptr<unsigned char> data(reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3], inputInfoItem.second->getTensorDesc().getDims()[2])); inputInfoItem.second->getTensorDesc().getDims()[2]));
if (data != nullptr) { if (data != nullptr) {
imagesData.push_back(data); imagesData.push_back(data);
validImageNames.push_back(i); validImageNames.push_back(i);
@ -203,7 +204,8 @@ int main(int argc, char* argv[]) {
for (size_t ch = 0; ch < num_channels; ++ch) { for (size_t ch = 0; ch < num_channels; ++ch) {
/** [images stride + channels stride + pixel id ] all in /** [images stride + channels stride + pixel id ] all in
* bytes **/ * bytes **/
data[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid * num_channels + ch]; data[image_id * image_size * num_channels + ch * image_size + pid] =
imagesData.at(image_id).get()[pid * num_channels + ch];
} }
} }
} }
@ -255,8 +257,9 @@ int main(int argc, char* argv[]) {
/** Validating -nt value **/ /** Validating -nt value **/
const size_t resultsCnt = outputBlob->size() / batchSize; const size_t resultsCnt = outputBlob->size() / batchSize;
if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) { if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " << resultsCnt + 1 slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than "
<< " and more than 0)\n Maximal value " << resultsCnt << " will be used." << slog::endl; << resultsCnt + 1 << " and more than 0)\n Maximal value " << resultsCnt
<< " will be used." << slog::endl;
FLAGS_nt = resultsCnt; FLAGS_nt = resultsCnt;
} }

View File

@ -42,7 +42,8 @@ MnistUbyte::MnistUbyte(const std::string& filename) {
n_cols = reverseInt(n_cols); n_cols = reverseInt(n_cols);
_width = (size_t)n_cols; _width = (size_t)n_cols;
if (number_of_images > 1) { if (number_of_images > 1) {
std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images << ". Only a first image will be read." << std::endl; std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images
<< ". Only a first image will be read." << std::endl;
} }
size_t size = _width * _height * 1; size_t size = _width * _height * 1;

View File

@ -14,19 +14,19 @@
#include <vector> #include <vector>
#if defined(_WIN32) #if defined(_WIN32)
#ifdef IMPLEMENT_FORMAT_READER # ifdef IMPLEMENT_FORMAT_READER
#define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type # define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type
#else # else
#define FORMAT_READER_API(type) extern "C" type # define FORMAT_READER_API(type) extern "C" type
#endif # endif
#elif (__GNUC__ >= 4) #elif (__GNUC__ >= 4)
#ifdef IMPLEMENT_FORMAT_READER # ifdef IMPLEMENT_FORMAT_READER
#define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type # define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type
#else # else
#define FORMAT_READER_API(type) extern "C" type # define FORMAT_READER_API(type) extern "C" type
#endif # endif
#else #else
#define FORMAT_READER_API(TYPE) extern "C" TYPE # define FORMAT_READER_API(TYPE) extern "C" TYPE
#endif #endif
namespace FormatReader { namespace FormatReader {

Some files were not shown because too many files have changed in this diff Show More