Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-08-13 10:49:35 +09:00
commit 508c616151
544 changed files with 5952 additions and 2038 deletions

View File

@ -117,7 +117,6 @@ jobs:
-DPYTHON_EXECUTABLE=/usr/bin/python3.8
-DENABLE_WHEEL=ON
-DENABLE_TESTS=ON
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON
-DENABLE_FASTER_BUILD=ON
-DENABLE_STRICT_DEPENDENCIES=OFF

View File

@ -94,7 +94,6 @@ jobs:
-DENABLE_PROFILING_ITT=OFF
-DENABLE_SAMPLES=OFF
-DENABLE_SPEECH_DEMO=OFF
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON
-DNGRAPH_DEBUG_ENABLE=OFF
$(REPO_DIR)

View File

@ -68,7 +68,6 @@ RUN cmake .. \
-DENABLE_SPEECH_DEMO=OFF \
-DENABLE_PYTHON=ON \
-DPYTHON_EXECUTABLE=/usr/bin/python3 \
-DNGRAPH_ONNX_IMPORT_ENABLE=ON \
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
-DNGRAPH_DEBUG_ENABLE=OFF \
-DCMAKE_INSTALL_PREFIX=/openvino/dist \

View File

@ -34,7 +34,7 @@ jobs:
- name: Create code style diff
if: failure()
run: |
cmake --build . --target clang_format_fix_all
cmake --build . --target clang_format_fix_all -j8
git diff > code_style_diff.diff
working-directory: build
@ -42,7 +42,7 @@ jobs:
if: failure()
with:
name: code_style_diff
path: code_style_diff.diff
path: build/code_style_diff.diff
ShellCheck:
runs-on: ubuntu-18.04

8
.gitmodules vendored
View File

@ -18,12 +18,12 @@
path = thirdparty/ade
url = https://github.com/opencv/ade.git
ignore = dirty
[submodule "thirdparty/gflags"]
path = thirdparty/gflags
[submodule "thirdparty/gflags/gflags"]
path = thirdparty/gflags/gflags
url = https://github.com/gflags/gflags.git
ignore = dirty
[submodule "thirdparty/gtest"]
path = thirdparty/gtest
[submodule "thirdparty/gtest/gtest"]
path = thirdparty/gtest/gtest
url = https://github.com/openvinotoolkit/googletest.git
ignore = dirty
[submodule "thirdparty/ocl/icd_loader"]

View File

@ -90,17 +90,10 @@ ie_coverage_extract(INPUT "openvino" OUTPUT "ngraph"
ie_coverage_genhtml(INFO_FILE "ngraph"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
if(NGRAPH_ONNX_IMPORT_ENABLE)
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*"
"${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*")
ie_coverage_genhtml(INFO_FILE "onnx_importer"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
endif()
if(NGRAPH_ONNX_FRONTEND_ENABLE)
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*")
ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend"
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*"
"${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*")
ie_coverage_genhtml(INFO_FILE "onnx"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
endif()

View File

@ -122,13 +122,12 @@ else()
set(protoc_available ON)
endif()
ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF)
ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" ON "protoc_available" OFF)
ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON
"NGRAPH_ONNX_IMPORT_ENABLE" OFF)
"NGRAPH_ONNX_FRONTEND_ENABLE" OFF)
ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
"NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
"NGRAPH_ONNX_FRONTEND_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
"NGRAPH_UNIT_TEST_ENABLE" OFF)

View File

@ -28,9 +28,8 @@
#
# ngraph::frontend_manager - nGraph frontend manager
#
# ngraph_onnx_importer_FOUND - True if the system has onnx_importer library
# ngraph::onnx_importer - ONNX importer target (optional)
# ONNX_IMPORTER_LIBRARIES - ONNX importer libraries
# ngraph_onnx_ngraph_frontend_FOUND - True if the system has onnx_ngraph_frontend library
# ngraph::onnx_ngraph_frontend - ONNX FrontEnd target (optional)
#
# ngraph_paddlepaddle_frontend_FOUND - True if the system has PDPD frontend
# ngraph::paddlepaddle_ngraph_frontend - nGraph PDPD frontend (optional)
@ -38,18 +37,30 @@
@PACKAGE_INIT@
function(set_imported_global target)
get_target_property(IS_IMPORTED_GLOBAL ${target} IMPORTED_GLOBAL)
if (NOT IS_IMPORTED_GLOBAL)
set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE)
endif()
endfunction()
if(NOT TARGET ngraph)
include("${CMAKE_CURRENT_LIST_DIR}/ngraphTargets.cmake")
set_imported_global(ngraph::ngraph)
set_imported_global(ngraph::frontend_manager)
endif()
set(ngraph_ngraph_FOUND ON)
set(NGRAPH_LIBRARIES ngraph::ngraph)
set(ngraph_onnx_importer_FOUND @NGRAPH_ONNX_IMPORT_ENABLE@)
if(ngraph_onnx_importer_FOUND)
set(ONNX_IMPORTER_LIBRARIES ngraph::onnx_importer)
set(ngraph_onnx_ngraph_frontend_FOUND @NGRAPH_ONNX_FRONTEND_ENABLE@)
if (ngraph_onnx_ngraph_frontend_FOUND AND NOT TARGET onnx_ngraph_frontend AND NOT TARGET ngraph::onnx_importer)
set_imported_global(ngraph::onnx_ngraph_frontend)
add_library(ngraph::onnx_importer ALIAS ngraph::onnx_ngraph_frontend)
endif()
set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
if(ngraph_paddlepaddle_frontend_FOUND AND NOT TARGET paddlepaddle_ngraph_frontend)
set_imported_global(ngraph::paddlepaddle_ngraph_frontend)
endif()
set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
check_required_components(ngraph)

View File

@ -17,7 +17,7 @@ function(ov_model_convert SRC DST OUT)
get_filename_component(name_we "${in_file}" NAME_WE)
set(model_source_dir "${SRC}/${rel_dir}")
if(NOT NGRAPH_ONNX_IMPORT_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$")
if(NOT NGRAPH_ONNX_FRONTEND_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$")
# don't copy / process ONNX / prototxt files
continue()
endif()
@ -78,7 +78,7 @@ ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
ie_onnx_import_out_files)
if(ENABLE_TESTS)
if(NGRAPH_ONNX_IMPORT_ENABLE AND ENABLE_REQUIREMENTS_INSTALL)
if(NGRAPH_ONNX_FRONTEND_ENABLE AND ENABLE_REQUIREMENTS_INSTALL)
find_package(PythonInterp 3 REQUIRED)
get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY)

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
ColumnLimit: 120
Language: Cpp
Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4
AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160
# Specialize this comment pragma in order to avoid changes in SEA copyrights
BinPackArguments: false
BinPackParameters: false
CommentPragmas: '^#'
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
IndentPPDirectives: BeforeHash
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: false
IndentPPDirectives: AfterHash
ForEachMacros:
- foreach
- FOREACH_CHILD

View File

@ -25,7 +25,7 @@ if(NOT ENABLE_DOCKER)
set(InferenceEngine_DIR ${CMAKE_BINARY_DIR})
endif()
if(NGRAPH_ONNX_IMPORT_ENABLE)
if(NGRAPH_ONNX_FRONTEND_ENABLE)
add_subdirectory(onnx_custom_op)
endif()
add_subdirectory(template_extension)
@ -223,7 +223,11 @@ function(build_docs)
"${OpenVINO_SOURCE_DIR}/inference-engine/*.md"
"${OpenVINO_SOURCE_DIR}/inference-engine/*.png"
"${OpenVINO_SOURCE_DIR}/inference-engine/*.gif"
"${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg")
"${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg"
"${OpenVINO_SOURCE_DIR}/tools/*.md"
"${OpenVINO_SOURCE_DIR}/tools/*.png"
"${OpenVINO_SOURCE_DIR}/tools/*.gif"
"${OpenVINO_SOURCE_DIR}/tools/*.jpg")
foreach(source_file ${ovino_doc_files})
list(APPEND commands COMMAND ${CMAKE_COMMAND} -E copy

View File

@ -50,10 +50,9 @@ The example below demonstrates how to unregister an operator from the destructor
## Requirements for Building with CMake
A program that uses the `register_operator` functionality requires `ngraph` and `onnx_importer` libraries in addition to the Inference Engine.
The `onnx_importer` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_importer)` can find both.
The `ngraph` package exposes two variables, `${NGRAPH_LIBRARIES}` and `${ONNX_IMPORTER_LIBRARIES}`, which reference the `ngraph` and `onnx_importer` libraries.
Those variables need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
A program that uses the `register_operator` functionality requires `ngraph::ngraph` and `ngraph::onnx_ngraph_frontend` libraries in addition to the Inference Engine.
The `onnx_ngraph_frontend` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)` can find both.
Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
See CMakeLists.txt below for reference:
@snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op

View File

@ -14,7 +14,7 @@ Inference Engine sample applications include the following:
- [Automatic Speech Recognition Python Sample](../../inference-engine/ie_bridges/python/sample/speech_sample/README.md)
- **Benchmark Application** Estimates deep learning inference performance on supported devices for synchronous and asynchronous modes.
- [Benchmark C++ Tool](../../inference-engine/samples/benchmark_app/README.md)
- [Benchmark Python Tool](../../inference-engine/tools/benchmark_tool/README.md)
- [Benchmark Python Tool](../../tools/benchmark_tool/README.md)
- **Hello Classification Sample** Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API. Input of any size and layout can be set to an infer request which will be pre-processed automatically during inference (the sample supports only images as inputs and supports Unicode paths).
- [Hello Classification C++ Sample](../../inference-engine/samples/hello_classification/README.md)
- [Hello Classification C Sample](../../inference-engine/ie_bridges/c/samples/hello_classification/README.md)

View File

@ -15,7 +15,7 @@ The models used in the performance benchmarks were chosen based on general adopt
CF means Caffe*, while TF means TensorFlow*.
#### 5. How can I run the benchmark results on my own?
All of the performance benchmarks were generated using the open-sourced tool within the Intel® Distribution of OpenVINO™ toolkit called `benchmark_app`, which is available in both [C++](../../inference-engine/samples/benchmark_app/README.md) and [Python](../../inference-engine/tools/benchmark_tool/README.md).
All of the performance benchmarks were generated using the open-sourced tool within the Intel® Distribution of OpenVINO™ toolkit called `benchmark_app`, which is available in both [C++](../../inference-engine/samples/benchmark_app/README.md) and [Python](../../tools/benchmark_tool/README.md).
#### 6. What image sizes are used for the classification network models?
The image size used in the inference depends on the network being benchmarked. The following table shows the list of input sizes for each network model.

View File

@ -61,7 +61,7 @@ limitations under the License.
<tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
<tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
<tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
<tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN">
<tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN"/>
<tab type="user" title="Convert PyTorch* BERT-NER Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner"/>
</tab>
</tab>

View File

@ -7,11 +7,11 @@ set(CMAKE_CXX_STANDARD 11)
set(TARGET_NAME "onnx_custom_op")
find_package(ngraph REQUIRED COMPONENTS onnx_importer)
find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)
add_library(${TARGET_NAME} STATIC onnx_custom_op.cpp onnx_custom_op.hpp)
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} ${ONNX_IMPORTER_LIBRARIES})
target_link_libraries(${TARGET_NAME} PUBLIC ngraph::ngraph ngraph::onnx_ngraph_frontend)
# [cmake:onnx_custom_op]
# Enable code style check

View File

@ -15,7 +15,7 @@
Let *min_value* and *max_value* be *min* and *max*, respectively. The mathematical formula of *Clamp* is as follows:
\f[
clamp( x_{i} )=\min\big( \max\left( x_{i}, min\_value \right), max\_value \big)
clamp( x_{i} )=\min\big( \max\left( x_{i},\ min\_value \right),\ max\_value \big)
\f]
**Attributes**:

View File

@ -12,7 +12,7 @@
It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
\f[
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right]
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\frac{x}{\sqrt{2}}\right]
\f]
where Φ(x) is the Cumulative Distribution Function for Gaussian Distribution.

View File

@ -22,13 +22,13 @@ The *Gelu* function may be approximated in two different ways based on *approxim
For `erf` approximation mode, *Gelu* function is represented as:
\f[
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right]
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\frac{x}{\sqrt{2}}\right]
\f]
For `tanh` approximation mode, *Gelu* function is represented as:
\f[
Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{2/\pi} \cdot (x + 0.044715 \cdot x^3)\right]\right)
Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{\frac{2}{\pi}} \cdot (x + 0.044715 \cdot x^3)\right]\right)
\f]
**Attributes**

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula:
\f[
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
HSigmoid(x) = \frac{min(max(x + 3,\ 0),\ 6)}{6}
\f]
The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula:
\f[
HSwish(x) = x \frac{min(max(x + 3, 0), 6)}{6}
HSwish(x) = x \cdot \frac{min(max(x + 3,\ 0),\ 6)}{6}
\f]
The HSwish operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).

View File

@ -12,10 +12,13 @@
For each element from the input tensor calculates corresponding
element in the output tensor with the following formula:
\f[
y = max(0, min(1, alpha * x + beta))
y = max(0,\ min(1,\ \alpha x + \beta))
\f]
where α corresponds to `alpha` scalar input and β corresponds to `beta` scalar input.
**Inputs**
* **1**: An tensor of type *T*. **Required.**

View File

@ -8,8 +8,8 @@
**Note**: This is recommended to not compute LogSoftmax directly as Log(Softmax(x, axis)), more numeric stable is to compute LogSoftmax as:
\f[
t = (x - ReduceMax(x, axis)) \\
LogSoftmax(x, axis) = t - Log(ReduceSum(Exp(t), axis))
t = (x - ReduceMax(x,\ axis)) \\
LogSoftmax(x, axis) = t - Log(ReduceSum(Exp(t),\ axis))
\f]
**Attributes**

View File

@ -15,7 +15,7 @@
For each element from the input tensor calculates corresponding
element in the output tensor with the following formula:
\f[
Y_{i}^{( l )} = max(0, Y_{i}^{( l - 1 )})
Y_{i}^{( l )} = max(0,\ Y_{i}^{( l - 1 )})
\f]
**Inputs**:

View File

@ -25,7 +25,7 @@
*Abs* does the following with the input tensor *a*:
\f[
a_{i} = abs(a_{i})
a_{i} = \vert a_{i} \vert
\f]
**Examples**

View File

@ -4,11 +4,13 @@
**Category**: Arithmetic unary operation
**Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with given tensor.
**Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with a given tensor.
**Attributes**:
**Detailed description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation on a given input tensor, based on the following mathematical formula:
No attributes available.
\f[ a_{i} = atanh(a_{i}) \f]
**Attributes**: Atanh operation has no attributes.
**Inputs**
@ -16,22 +18,14 @@
**Outputs**
* **1**: The result of element-wise atanh operation. A tensor of type *T*.
* **1**: The result of element-wise atanh operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
**Types**
* *T*: any floating-point type.
*Atanh* does the following with the input tensor *a*:
\f[
a_{i} = atanh(a_{i})
\f]
* *T*: any supported numeric type.
**Examples**
*Example 1*
```xml
<layer ... type="Atanh">
<input>

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula:
\f[
a_{i} = ceiling(a_{i})
a_{i} = \lceil a_{i} \rceil
\f]
**Attributes**: *Ceiling* operation has no attributes.

View File

@ -11,7 +11,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *Divide* performs division operation for the input tensors *a* and *b* using the formula below:
\f[
o_{i} = a_{i} / b_{i}
o_{i} = \frac{a_{i}}{b_{i}}
\f]
The result of division by zero is undefined.

View File

@ -10,7 +10,7 @@
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *FloorMod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
\f[
o_{i} = a_{i} % b_{i}
o_{i} = a_{i} \mod b_{i}
\f]
*FloorMod* operation computes a reminder of a floored division. It is the same behaviour like in Python programming language: `floor(x / y) * y + floor_mod(x, y) = x`. The sign of the result is equal to a sign of a divisor. The result of division by zero is undefined.

View File

@ -10,7 +10,7 @@
element in the output tensor with the following formula:
\f[
a_{i} = floor(a_{i})
a_{i} = \lfloor a_{i} \rfloor
\f]
**Attributes**: *Floor* operation has no attributes.

View File

@ -12,7 +12,7 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ
After broadcasting *Maximum* does the following with the input tensors *a* and *b*:
\f[
o_{i} = max(a_{i}, b_{i})
o_{i} = max(a_{i},\ b_{i})
\f]
**Attributes**:

View File

@ -10,7 +10,7 @@
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Minimum* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
\f[
o_{i} = min(a_{i}, b_{i})
o_{i} = min(a_{i},\ b_{i})
\f]
**Attributes**:

View File

@ -10,7 +10,7 @@
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Mod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
\f[
o_{i} = a_{i} % b_{i}
o_{i} = a_{i} \mod b_{i}
\f]
*Mod* operation computes a reminder of a truncated division. It is the same behaviour like in C programming language: `truncated(x / y) * y + truncated_mod(x, y) = x`. The sign of the result is equal to a sign of a dividend. The result of division by zero is undefined.

View File

@ -11,7 +11,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *Multiply* performs multiplication operation for the input tensors *a* and *b* using the formula below:
\f[
o_{i} = a_{i} * b_{i}
o_{i} = a_{i} \cdot b_{i}
\f]
**Attributes**:

View File

@ -4,35 +4,10 @@
**Category**: Comparison binary operation
**Short description**: *Equal* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
**Attributes**:
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T*. **Required.**
* **2**: A tensor of type *T*. **Required.**
**Outputs**
* **1**: The result of element-wise comparison operation. A tensor of type boolean.
**Types**
* *T*: arbitrary supported type.
**Short description**: *Equal* performs element-wise comparison operation with two given input tensors applying multi-directional broadcast rules specified in the *auto_broadcast* attribute.
**Detailed description**
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and *auto_broadcast* attributes is not *none*. Broadcasting is performed according to *auto_broadcast* value.
After broadcasting *Equal* does the following with the input tensors *a* and *b*:
@ -40,12 +15,40 @@ After broadcasting *Equal* does the following with the input tensors *a* and *b*
o_{i} = a_{i} == b_{i}
\f]
**Attributes**:
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match,
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
* **2**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise **comparison** operation applied to the input tensors. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
**Types**
* *T*: arbitrary supported type.
* *T_BOOL*: `boolean`.
**Examples**
*Example 1*
*Example 1: no broadcast*
```xml
<layer ... type="Equal">
<data auto_broadcast="none"/>
<input>
<port id="0">
<dim>256</dim>
@ -65,9 +68,10 @@ o_{i} = a_{i} == b_{i}
</layer>
```
*Example 2: broadcast*
*Example 2: numpy broadcast*
```xml
<layer ... type="Equal">
<data auto_broadcast="numpy"/>
<input>
<port id="0">
<dim>8</dim>

View File

@ -37,7 +37,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *GreaterEqual* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} >= b_{i}
o_{i} = a_{i} \geq b_{i}
\f]
**Examples**

View File

@ -12,7 +12,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *LessEqual* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} <= b_{i}
o_{i} = a_{i} \leq b_{i}
\f]
**Attributes**:

View File

@ -37,7 +37,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
After broadcasting *NotEqual* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} != b_{i}
o_{i} = a_{i} \neq b_{i}
\f]
**Examples**

View File

@ -16,15 +16,15 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1
The receptive field in each layer is calculated using the formulas:
* Jump in the output feature map:
\f[
j_{out} = j_{in} * s
j_{out} = j_{in} \cdot s
\f]
* Size of the receptive field of output feature:
\f[
r_{out} = r_{in} + ( k - 1 ) * j_{in}
r_{out} = r_{in} + ( k - 1 ) \cdot j_{in}
\f]
* Center position of the receptive field of the first output feature:
\f[
start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) * j_{in}
start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) \cdot j_{in}
\f]
* Output is calculated using the following formula:
\f[

View File

@ -12,7 +12,7 @@ Output is calculated using the following formula:
\f[
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k})
y(p) = \displaystyle{\sum_{k = 1}^{K}}w_{k}x(p + p_{k} + {\Delta}p_{k})
\f]

View File

@ -14,7 +14,7 @@ Output is calculated using the following formula:
\f[
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) * {\Delta}m_{k}
y(p) = \displaystyle{\sum_{k = 1}^{K}}w_{k}x(p + p_{k} + {\Delta}p_{k}) \cdot {\Delta}m_{k}
\f]
Where

View File

@ -8,7 +8,7 @@
**Detailed description**:
*RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`.
*RandomUniform* operation generates random numbers from a uniform distribution in the range `[minval, maxval)`.
The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm
is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns
four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized
@ -42,7 +42,7 @@ R' = mulhi(R, M) {\oplus} k {\oplus} L \\
mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\
mullo(a, b) = (a {\times} b) \mod 2^{32}
\f]
where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*,
where \f${\oplus}\f$ - bitwise xor, *k* = \f$R_{key}\f$ for updating counter, *k* = \f$L_{key}\f$ for updating *n*,
*M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*.
After each round *key* is raised by summing with another pair of const values:
@ -50,7 +50,7 @@ After each round *key* is raised by summing with another pair of const values:
L += 0x9E3779B9 \\
R += 0xBB67AE85
\f]
Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers.
Values \f$L'_{n}, R'_{n}, L'_{counter}, R'_{counter}\f$ are resulting four random numbers.
Float values between [0..1) are obtained from 32-bit integers by the following rules.

View File

@ -25,7 +25,7 @@
*LogicalNot* does the following with the input tensor *a*:
\f[
a_{i} = not(a_{i})
a_{i} = \lnot a_{i}
\f]
**Examples**

View File

@ -37,7 +37,7 @@ Before performing logical operation, input tensors *a* and *b* are broadcasted i
After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} xor b_{i}
o_{i} = a_{i} \oplus b_{i}
\f]
**Examples**

View File

@ -11,19 +11,19 @@ The kernel dimensions are calculated using the following formulae for the `NCDHW
\f[
\begin{array}{lcl}
d_{start} &=& floor(i*D_{in}/D_{out})\\
d_{end} &=& ceil((i+1)*D_{in}/D_{out})\\
h_{start} &=& floor(j*H_{in}/H_{out})\\
h_{end} &=& ceil((j+1)*H_{in}/H_{out})\\
w_{start} &=& floor(k*W_{in}/W_{out})\\
w_{end} &=& ceil((k+1)*W_{in}/W_{out})
d_{start} &=& \lfloor i \cdot \frac{D_{in}}{D_{out}}\rfloor\\
d_{end} &=& \lceil(i+1) \cdot \frac{D_{in}}{D_{out}}\rceil\\
h_{start} &=& \lfloor j \cdot \frac{H_{in}}{H_{out}}\rfloor\\
h_{end} &=& \lceil(j+1) \cdot \frac{H_{in}}{H_{out}}\rceil\\
w_{start} &=& \lfloor k \cdot \frac{W_{in}}{W_{out}}\rfloor\\
w_{end} &=& \lceil(k+1) \cdot \frac{W_{in}}{W_{out}}\rceil
\end{array}
\f]
The output is calculated with the following formula:
\f[
Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start})*(h_{end}-h_{start})*(w_{end}-w_{start})}
Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start}) \cdot (h_{end}-h_{start}) \cdot (w_{end}-w_{start})}
\f]
**Inputs**:

View File

@ -11,12 +11,12 @@ The kernel dimensions are calculated using the following formulae for the `NCDHW
\f[
\begin{array}{lcl}
d_{start} &=& floor(i*D_{in}/D_{out})\\
d_{end} &=& ceil((i+1)*D_{in}/D_{out})\\
h_{start} &=& floor(j*H_{in}/H_{out})\\
h_{end} &=& ceil((j+1)*H_{in}/H_{out})\\
w_{start} &=& floor(k*W_{in}/W_{out})\\
w_{end} &=& ceil((k+1)*W_{in}/W_{out})
d_{start} &=& \lfloor i \cdot \frac{D_{in}}{D_{out}}\rfloor\\
d_{end} &=& \lceil(i+1) \cdot \frac{D_{in}}{D_{out}}\rceil\\
h_{start} &=& \lfloor j \cdot \frac{H_{in}}{H_{out}}\rfloor\\
h_{end} &=& \lceil(j+1) \cdot \frac{H_{in}}{H_{out}}\rceil\\
w_{start} &=& \lfloor k \cdot \frac{W_{in}}{W_{out}}\rfloor\\
w_{end} &=& \lceil(k+1) \cdot \frac{W_{in}}{W_{out}}\rceil
\end{array}
\f]

View File

@ -44,8 +44,8 @@ if(OpenCV_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE opencv_core)
endif()
if(NGRAPH_ONNX_IMPORT_ENABLE)
target_link_libraries(${TARGET_NAME} PRIVATE onnx_importer)
if(NGRAPH_ONNX_FRONTEND_ENABLE)
target_link_libraries(${TARGET_NAME} PRIVATE onnx_ngraph_frontend)
endif()
if(NOT MSVC)

View File

@ -7,7 +7,7 @@ set(CMAKE_CXX_STANDARD 11)
set(TARGET_NAME "template_extension")
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_importer)
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_ngraph_frontend)
find_package(InferenceEngine REQUIRED)
find_package(OpenCV QUIET COMPONENTS core)
@ -28,9 +28,9 @@ target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_EXTENSION_
target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine
${NGRAPH_LIBRARIES})
if (ngraph_onnx_importer_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE ${ONNX_IMPORTER_LIBRARIES})
target_compile_definitions(${TARGET_NAME} PRIVATE NGRAPH_ONNX_IMPORT_ENABLED)
if (ngraph_onnx_ngraph_frontend_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE ngraph::onnx_ngraph_frontend)
target_compile_definitions(${TARGET_NAME} PRIVATE NGRAPH_ONNX_FRONTEND_ENABLED)
endif()
# [cmake:extension]

View File

@ -22,7 +22,8 @@ OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node>& node) {
IE_THROW() << "Cannot create implementation for op with dynamic shapes!";
if (castedNode->get_input_shape(0).size() != 4 || castedNode->get_output_shape(0).size() != 4)
IE_THROW() << "Operation supports only 4d tensors for input and output.";
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
if (castedNode->get_input_element_type(0) != ngraph::element::f32 ||
castedNode->get_output_element_type(0) != ngraph::element::f32)
IE_THROW() << "Operation supports only FP32 tensors.";
add = castedNode->getAddAttr();
inShape = castedNode->get_input_shape(0);
@ -34,9 +35,12 @@ OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node>& node) {
//! [cpu_implementation:ctor]
//! [cpu_implementation:getSupportedConfigurations]
InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(
std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept {
auto createConfig = [](const InferenceEngine::SizeVector inShape, const InferenceEngine::SizeVector& outShape, bool planar) {
auto createConfig = [](const InferenceEngine::SizeVector inShape,
const InferenceEngine::SizeVector& outShape,
bool planar) {
InferenceEngine::LayerConfig config;
config.dynBatchSupport = false;
InferenceEngine::DataConfig inData;
@ -45,9 +49,11 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
// Allow any offset before data
size_t offset((std::numeric_limits<size_t>::max)());
if (planar) {
inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset});
inData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset});
config.inConfs.push_back(inData);
outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
outData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
config.outConfs.push_back(outData);
} else {
// Add blocked (nChw8c) format
@ -64,9 +70,11 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
InferenceEngine::SizeVector outBlkDims = outShape;
outBlkDims[1] = div_up(outBlkDims[1], 8);
outBlkDims.push_back(8);
inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset});
inData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset});
config.inConfs.push_back(inData);
outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset});
outData.desc =
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset});
config.outConfs.push_back(outData);
}
return config;
@ -87,7 +95,8 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
//! [cpu_implementation:getSupportedConfigurations]
//! [cpu_implementation:init]
InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept {
InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& config,
InferenceEngine::ResponseDesc* resp) noexcept {
try {
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!";
@ -115,10 +124,13 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig&
//! [cpu_implementation:init]
//! [cpu_implementation:execute]
InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept {
const float* src_data = inputs[0]->cbuffer().as<const float*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
float* dst_data = outputs[0]->buffer().as<float*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
const float* src_data =
inputs[0]->cbuffer().as<const float*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
float* dst_data =
outputs[0]->buffer().as<float*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
for (size_t i = 0; i < inputs[0]->size(); i++) {
dst_data[i] = src_data[i] + add;

View File

@ -16,8 +16,10 @@ public:
explicit OpImplementation(const std::shared_ptr<ngraph::Node>& node);
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept override;
private:

View File

@ -11,7 +11,7 @@
# include "fft_op.hpp"
#endif
#include <ngraph/ngraph.hpp>
#ifdef NGRAPH_ONNX_IMPORT_ENABLED
#ifdef NGRAPH_ONNX_FRONTEND_ENABLED
# include <onnx_import/onnx_utils.hpp>
#endif
@ -24,14 +24,20 @@ using namespace TemplateExtension;
//! [extension:ctor]
Extension::Extension() {
#ifdef NGRAPH_ONNX_IMPORT_ENABLED
ngraph::onnx_import::register_operator(Operation::type_info.name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
#ifdef NGRAPH_ONNX_FRONTEND_ENABLED
ngraph::onnx_import::register_operator(Operation::type_info.name,
1,
"custom_domain",
[](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
ngraph::OutputVector ng_inputs{node.get_ng_inputs()};
int64_t add = node.get_attribute_value<int64_t>("add");
return {std::make_shared<Operation>(ng_inputs.at(0), add)};
});
# ifdef OPENCV_IMPORT_ENABLED
ngraph::onnx_import::register_operator(FFTOp::type_info.name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
ngraph::onnx_import::register_operator(FFTOp::type_info.name,
1,
"custom_domain",
[](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
ngraph::OutputVector ng_inputs{node.get_ng_inputs()};
bool inverse = node.get_attribute_value<int64_t>("inverse");
return {std::make_shared<FFTOp>(ng_inputs.at(0), inverse)};
@ -43,12 +49,12 @@ Extension::Extension() {
//! [extension:dtor]
Extension::~Extension() {
#ifdef NGRAPH_ONNX_IMPORT_ENABLED
#ifdef NGRAPH_ONNX_FRONTEND_ENABLED
ngraph::onnx_import::unregister_operator(Operation::type_info.name, 1, "custom_domain");
# ifdef OPENCV_IMPORT_ENABLED
ngraph::onnx_import::unregister_operator(FFTOp::type_info.name, 1, "custom_domain");
# endif // OPENCV_IMPORT_ENABLED
#endif // NGRAPH_ONNX_IMPORT_ENABLED
#endif // NGRAPH_ONNX_FRONTEND_ENABLED
}
//! [extension:dtor]
@ -92,7 +98,8 @@ std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::N
//! [extension:getImplTypes]
//! [extension:getImplementation]
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) {
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node,
const std::string& implType) {
if (implType == "CPU") {
if (std::dynamic_pointer_cast<Operation>(node)) {
return std::make_shared<OpImplementation>(node);

View File

@ -25,7 +25,8 @@ public:
std::map<std::string, ngraph::OpSet> getOpSets() override;
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override;
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override;
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node,
const std::string& implType) override;
};
} // namespace TemplateExtension

View File

@ -21,14 +21,16 @@ FFTImpl::FFTImpl(const std::shared_ptr<ngraph::Node>& node) {
IE_THROW() << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
IE_THROW() << "Cannot create implementation for op with dynamic shapes!";
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
if (castedNode->get_input_element_type(0) != ngraph::element::f32 ||
castedNode->get_output_element_type(0) != ngraph::element::f32)
IE_THROW() << "Operation supports only FP32 tensors.";
inpShape = castedNode->get_input_shape(0);
outShape = castedNode->get_output_shape(0);
inverse = castedNode->inverse;
}
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc* resp) noexcept {
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept {
std::vector<InferenceEngine::DataConfig> inDataConfig;
std::vector<InferenceEngine::DataConfig> outDataConfig;
InferenceEngine::SizeVector order(inpShape.size());
@ -55,7 +57,8 @@ InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<Infe
return InferenceEngine::StatusCode::OK;
}
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept {
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config,
InferenceEngine::ResponseDesc* resp) noexcept {
try {
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!";
@ -85,7 +88,8 @@ static cv::Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob) {
return cv::Mat(size, CV_32F, (void*)blob->buffer());
}
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept {
cv::Mat inp = infEngineBlobToMat(inputs[0]);
cv::Mat out = infEngineBlobToMat(outputs[0]);
@ -95,7 +99,8 @@ InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::
const int w = inp.size[3];
cv::Mat complex(h, w, CV_32FC2), interleavedOut(h, w, CV_32FC2);
for (int i = 0; i < n; ++i) {
std::vector<cv::Mat> components = {cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)), cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))};
std::vector<cv::Mat> components = {cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)),
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))};
cv::merge(components, complex);
if (!inverse)

View File

@ -16,8 +16,10 @@ public:
explicit FFTImpl(const std::shared_ptr<ngraph::Node>& node);
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
InferenceEngine::ResponseDesc* resp) noexcept override;
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
std::vector<InferenceEngine::Blob::Ptr>& outputs,
InferenceEngine::ResponseDesc* resp) noexcept override;
private:

View File

@ -9,10 +9,13 @@
using namespace TemplatePlugin;
// ! [async_infer_request:ctor]
TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor,
TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
: AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) {
: AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor),
_inferRequest(inferRequest),
_waitExecutor(waitExecutor) {
// In current implementation we have CPU only tasks and no needs in 2 executors
// So, by default single stage pipeline is created.
// This stage executes InferRequest::Infer() using cpuTaskExecutor.
@ -23,7 +26,8 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest:
if (remoteDevice) {
_pipeline = {{cpuTaskExecutor,
[this] {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "TemplateAsyncInferRequest::PreprocessingAndStartPipeline");
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin,
"TemplateAsyncInferRequest::PreprocessingAndStartPipeline");
_inferRequest->inferPreprocess();
_inferRequest->startPipeline();
}},

View File

@ -13,8 +13,10 @@ namespace TemplatePlugin {
// ! [async_infer_request:header]
class TemplateAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public:
TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
~TemplateAsyncInferRequest();

View File

@ -23,7 +23,8 @@ Configuration::Configuration(const ConfigMap& config, const Configuration& defau
if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) {
_streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value);
} else if (streamExecutorConfigKeys.end() != std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
} else if (streamExecutorConfigKeys.end() !=
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
_streamsExecutorConfig.SetConfig(key, value);
} else if (CONFIG_KEY(DEVICE_ID) == key) {
deviceId = std::stoi(value);

View File

@ -21,7 +21,9 @@ struct Configuration {
Configuration& operator=(const Configuration&) = default;
Configuration& operator=(Configuration&&) = default;
explicit Configuration(const ConfigMap& config, const Configuration& defaultCfg = {}, const bool throwOnUnsupported = true);
explicit Configuration(const ConfigMap& config,
const Configuration& defaultCfg = {},
const bool throwOnUnsupported = true);
InferenceEngine::Parameter Get(const std::string& name) const;

View File

@ -18,8 +18,10 @@ using namespace TemplatePlugin;
// ! [executable_network:ctor_cnnnetwork]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap, const InferenceEngine::OutputsDataMap& outputsInfoMap,
const Configuration& cfg, const Plugin::Ptr& plugin)
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap,
const Configuration& cfg,
const Plugin::Ptr& plugin)
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
_cfg(cfg),
_plugin(plugin) {
@ -40,7 +42,11 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const
// ! [executable_network:ctor_cnnnetwork]
// ! [executable_network:ctor_import_stream]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const Configuration& cfg, const Plugin::Ptr& plugin): _cfg(cfg), _plugin(plugin) {
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model,
const Configuration& cfg,
const Plugin::Ptr& plugin)
: _cfg(cfg),
_plugin(plugin) {
// read XML content
std::string xmlString;
std::uint64_t dataSize = 0;
@ -53,7 +59,9 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const
model.read(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
if (0 != dataSize) {
dataBlob = InferenceEngine::make_shared_blob<std::uint8_t>(
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {static_cast<std::size_t>(dataSize)}, InferenceEngine::Layout::C));
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8,
{static_cast<std::size_t>(dataSize)},
InferenceEngine::Layout::C));
dataBlob->allocate();
model.read(dataBlob->buffer(), dataSize);
}
@ -84,7 +92,8 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const
// ! [executable_network:map_graph]
// forward declaration
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap);
void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
@ -117,29 +126,36 @@ void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<con
void TemplatePlugin::ExecutableNetwork::InitExecutor() {
// Default multi-threaded configuration is balanced for throughtput and latency cases and takes into account
// real hardware cores and NUMA nodes.
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
auto streamsExecutorConfig =
InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
streamsExecutorConfig._name = "TemplateStreamsExecutor";
// As Inference Engine CPU Streams Executor creates some additional therads
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
// and memory consumption can be larger than it is expected.
// So Inference Engone provides executors cache.
_taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
// NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of inference request pipeline
// _callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
// NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of
// inference request pipeline _callbackExecutor =
// InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
}
// ! [executable_network:init_executor]
// ! [executable_network:create_infer_request_impl]
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
return std::make_shared<TemplateInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
return std::make_shared<TemplateInferRequest>(networkInputs,
networkOutputs,
std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
}
// ! [executable_network:create_infer_request_impl]
// ! [executable_network:create_infer_request]
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequest() {
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest), _taskExecutor, _plugin->_waitExecutor,
return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
_taskExecutor,
_plugin->_waitExecutor,
_callbackExecutor);
}
// ! [executable_network:create_infer_request]
@ -154,10 +170,15 @@ InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetConfig(const st
InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const std::string& name) const {
// TODO: return more supported values for metrics
if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_METRICS) == name) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, std::vector<std::string> {METRIC_KEY(NETWORK_NAME), METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
std::vector<std::string>{METRIC_KEY(NETWORK_NAME),
METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
} else if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID),
CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) {
configKeys.emplace_back(configKey);

View File

@ -23,15 +23,19 @@ class Plugin;
// ! [executable_network:header]
class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin);
ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap,
const Configuration& cfg,
const std::shared_ptr<Plugin>& plugin);
ExecutableNetwork(std::istream& model, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin);
// Methods from a base class ExecutableNetworkThreadSafeDefault
void Export(std::ostream& model) override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
InferenceEngine::Parameter GetMetric(const std::string& name) const override;
@ -40,7 +44,8 @@ public:
private:
friend class TemplateInferRequest;
void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap);
void InitExecutor();

View File

@ -23,19 +23,25 @@ using namespace InferenceEngine;
using Time = std::chrono::high_resolution_clock;
// ! [infer_request:ctor]
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs,
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const std::shared_ptr<TemplatePlugin::ExecutableNetwork>& executableNetwork)
: IInferRequestInternal(networkInputs, networkOutputs), _executableNetwork(executableNetwork) {
: IInferRequestInternal(networkInputs, networkOutputs),
_executableNetwork(executableNetwork) {
// TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks
auto requestID = std::to_string(_executableNetwork->_requestId.fetch_add(1));
std::string name = _executableNetwork->_function->get_friendly_name() + "_Req" + requestID;
_profilingTask = {
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Preprocess"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Postprocess"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_StartPipline"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_WaitPipline"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
"_Preprocess"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
"_Postprocess"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
"_StartPipline"),
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
"_WaitPipline"),
};
_executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
@ -60,7 +66,10 @@ void TemplateInferRequest::allocateDeviceBuffers() {
}
template <typename BlobDataMap, typename GetNetworkPrecisionF>
static void AllocateImpl(const BlobDataMap& userDataMap, BlobMap& userBlobMap, BlobMap& deviceBlobMap, GetNetworkPrecisionF&& GetNetworkPrecision,
static void AllocateImpl(const BlobDataMap& userDataMap,
BlobMap& userBlobMap,
BlobMap& deviceBlobMap,
GetNetworkPrecisionF&& GetNetworkPrecision,
bool isInputBlob = true) {
for (auto&& userData : userDataMap) {
const auto& dims = userData.second->getTensorDesc().getDims();
@ -95,7 +104,9 @@ void TemplateInferRequest::allocateBlobs() {
});
auto&& results = _executableNetwork->_function->get_results();
AllocateImpl(
_networkOutputs, _outputs, _networkOutputBlobs,
_networkOutputs,
_outputs,
_networkOutputBlobs,
[&](const std::string& blobName) {
return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
},
@ -114,8 +125,10 @@ void TemplateInferRequest::InferImpl() {
template <typename SrcT, typename DstT>
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
ngraph::runtime::reference::convert<SrcT, DstT>(InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>(), src->size());
ngraph::runtime::reference::convert<SrcT, DstT>(
InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>(),
src->size());
}
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
@ -128,8 +141,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<std::uint8_t, float>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -141,8 +154,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<float, std::uint8_t>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -154,8 +167,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<int64_t, int32_t>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -167,8 +180,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<int16_t, float>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -180,8 +193,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<int8_t, float>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -193,8 +206,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<bool, float>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -206,8 +219,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
blobCopy<uint16_t, float>(src, dst);
} break;
default: {
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
<< dst->getTensorDesc().getPrecision();
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
<< " to " << dst->getTensorDesc().getPrecision();
}
}
} break;
@ -230,7 +243,9 @@ void TemplateInferRequest::inferPreprocess() {
const auto& parameterShape = parameter->get_shape();
const auto& parameterType = parameter->get_element_type();
_inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
parameterType, parameterShape, InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
parameterType,
parameterShape,
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
}
for (auto&& output : _outputs) {
auto outputBlob = output.second;
@ -243,7 +258,9 @@ void TemplateInferRequest::inferPreprocess() {
const auto& resultShape = result->get_shape();
const auto& resultType = result->get_element_type();
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
resultType, resultShape, InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
resultType,
resultShape,
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
}
_durations[Preprocess] = Time::now() - start;
}

View File

@ -26,7 +26,8 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
public:
typedef std::shared_ptr<TemplateInferRequest> Ptr;
TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs,
TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const std::shared_ptr<ExecutableNetwork>& executableNetwork);
~TemplateInferRequest();

View File

@ -38,7 +38,8 @@ Plugin::Plugin() {
_backend = ngraph::runtime::Backend::create("INTERPRETER");
// create default stream executor with a given name
_waitExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
_waitExecutor =
InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
}
// ! [plugin:ctor]
@ -54,7 +55,8 @@ Plugin::~Plugin() {
// ! [plugin:transform_network]
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
const InferenceEngine::InputsDataMap& inputInfoMap,
const InferenceEngine::OutputsDataMap& outputsInfoMap) {
// 1. Copy ngraph::Function first to apply some transformations which modify original ngraph::Function
auto transformedNetwork = ngraph::clone_function(*function);
@ -70,13 +72,15 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
bool needF16toF32 = false;
for (const auto& param : function->get_parameters()) {
if (param->get_element_type() == ngraph::element::f16 &&
inputInfoMap.at(param->get_friendly_name())->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
inputInfoMap.at(param->get_friendly_name())->getTensorDesc().getPrecision() !=
InferenceEngine::Precision::FP16) {
needF16toF32 = true;
break;
}
}
if (needF16toF32)
passManager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ngraph::element::f16, ngraph::element::f32}});
passManager.register_pass<ngraph::pass::ConvertPrecision>(
precisions_array{{ngraph::element::f16, ngraph::element::f32}});
// Example: register plugin specific transformation
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
@ -92,29 +96,38 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
// ! [plugin:transform_network]
// ! [plugin:load_exe_network_impl]
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) {
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
const ConfigMap& config) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::LoadExeNetworkImpl");
InferenceEngine::InputsDataMap networkInputs = network.getInputsInfo();
InferenceEngine::OutputsDataMap networkOutputs = network.getOutputsInfo();
auto fullConfig = Configuration{config, _cfg};
return std::make_shared<ExecutableNetwork>(network.getFunction(), networkInputs, networkOutputs, fullConfig,
return std::make_shared<ExecutableNetwork>(network.getFunction(),
networkInputs,
networkOutputs,
fullConfig,
std::static_pointer_cast<Plugin>(shared_from_this()));
}
// ! [plugin:load_exe_network_impl]
// ! [plugin:import_network]
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& modelStream, const std::map<std::string, std::string>& config) {
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(
std::istream& modelStream,
const std::map<std::string, std::string>& config) {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetwork");
auto fullConfig = Configuration{config, _cfg};
return std::make_shared<ExecutableNetwork>(modelStream, fullConfig, std::static_pointer_cast<Plugin>(shared_from_this()));
return std::make_shared<ExecutableNetwork>(modelStream,
fullConfig,
std::static_pointer_cast<Plugin>(shared_from_this()));
}
// ! [plugin:import_network]
// ! [plugin:query_network]
InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) const {
InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network,
const ConfigMap& config) const {
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::QueryNetwork");
Configuration fullConfig{config, _cfg, false};
@ -160,7 +173,8 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
// 5. If some housekeeping nodes were not added - add them.
if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
for (auto&& inputNodeOutput : node->input_values()) {
if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) {
if (ngraph::op::is_constant(inputNodeOutput.get_node()) ||
ngraph::op::is_parameter(inputNodeOutput.get_node())) {
supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
}
}
@ -175,11 +189,14 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
// 6. Eliminate subgraphs that consist of housekeeping nodes only
if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
if (!InferenceEngine::details::contains(
supported,
node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
supported.erase(node->get_friendly_name());
}
} else if (ngraph::op::is_output(node)) {
if (!InferenceEngine::details::contains(supported, node->input_values().begin()->get_node()->get_friendly_name())) {
if (!InferenceEngine::details::contains(supported,
node->input_values().begin()->get_node()->get_friendly_name())) {
supported.erase(node->get_friendly_name());
}
}
@ -209,21 +226,30 @@ void Plugin::SetConfig(const ConfigMap& config) {
// ! [plugin:set_config]
// ! [plugin:get_config]
InferenceEngine::Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& /*options*/) const {
InferenceEngine::Parameter Plugin::GetConfig(
const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& /*options*/) const {
return _cfg.Get(name);
}
// ! [plugin:get_config]
// ! [plugin:get_metric]
InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const {
InferenceEngine::Parameter Plugin::GetMetric(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const {
if (METRIC_KEY(SUPPORTED_METRICS) == name) {
std::vector<std::string> supportedMetrics = {METRIC_KEY(AVAILABLE_DEVICES), METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(FULL_DEVICE_NAME),
METRIC_KEY(IMPORT_EXPORT_SUPPORT), METRIC_KEY(DEVICE_ARCHITECTURE),
METRIC_KEY(OPTIMIZATION_CAPABILITIES), METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)};
std::vector<std::string> supportedMetrics = {METRIC_KEY(AVAILABLE_DEVICES),
METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
METRIC_KEY(FULL_DEVICE_NAME),
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
METRIC_KEY(DEVICE_ARCHITECTURE),
METRIC_KEY(OPTIMIZATION_CAPABILITIES),
METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)};
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics);
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID),
CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) {
if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) {

View File

@ -23,12 +23,19 @@ public:
void SetConfig(const std::map<std::string, std::string>& config) override;
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) override;
void AddExtension(const std::shared_ptr<InferenceEngine::IExtension>& extension) override;
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& model, const std::map<std::string, std::string>& config) override;
InferenceEngine::Parameter GetConfig(
const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::Parameter GetMetric(
const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(
std::istream& model,
const std::map<std::string, std::string>& config) override;
private:
friend class ExecutableNetwork;

View File

@ -28,7 +28,10 @@ ngraph::pass::AddMeanSubtract::AddMeanSubtract(const MeanMap& inputInfoMap) {
}
auto mean_const = it->second;
NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32, "Mean for ", param->get_friendly_name(), " must have f32 type");
NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32,
"Mean for ",
param->get_friendly_name(),
" must have f32 type");
auto copy_param = param->clone_with_new_inputs({});
auto sub = std::make_shared<ngraph::opset3::Subtract>(copy_param, mean_const);

View File

@ -12,7 +12,8 @@
NGRAPH_RTTI_DEFINITION(ngraph::pass::AddPreprocessing, "AddPreprocessing", 0);
ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap& inputInfoMap): m_inputInfoMap(inputInfoMap) {}
ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap& inputInfoMap)
: m_inputInfoMap(inputInfoMap) {}
bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Function> f) {
ngraph::pass::AddMeanSubtract::MeanMap meanMap;
@ -39,10 +40,12 @@ bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Fun
has_mean_image = true;
if (c == 0) {
meanImage = pInfo[c]->meanData;
NGRAPH_CHECK(meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32,
NGRAPH_CHECK(
meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32,
"Only InferenceEngine::Precision::FP32 precision is supported for PreProcessChannel::meanData");
} else {
NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(), "TensorDesc for PreProcessChannel::meanData must be equal");
NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(),
"TensorDesc for PreProcessChannel::meanData must be equal");
}
}
}
@ -52,7 +55,8 @@ bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Fun
continue;
}
NGRAPH_CHECK(!(has_mean_image && has_scales), "Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set.");
NGRAPH_CHECK(!(has_mean_image && has_scales),
"Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set.");
if (has_scales) {
ngraph::Shape shape(inputDims.size(), 1);

View File

@ -28,7 +28,10 @@ ngraph::pass::AddStdScale::AddStdScale(const ScaleMap& inputInfoMap) {
}
auto scale_const = it->second;
NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32, "Scale for ", param->get_friendly_name(), " must have f32 type");
NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32,
"Scale for ",
param->get_friendly_name(),
" must have f32 type");
auto copy_param = param->clone_with_new_inputs({});
auto div = std::make_shared<ngraph::opset3::Divide>(copy_param, it->second);

View File

@ -24,7 +24,8 @@ bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Fun
// Check that input and output shape a fully defined (not dynamic) and number of consumers equal to 1
Input<Node> input = node->input(0);
Output<Node> output = node->output(0);
if (input.get_partial_shape().is_static() && output.get_partial_shape().is_static() && output.get_target_inputs().size() == 1) {
if (input.get_partial_shape().is_static() && output.get_partial_shape().is_static() &&
output.get_target_inputs().size() == 1) {
nodes.push_back(node);
}
}
@ -32,7 +33,8 @@ bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Fun
// Print types and names for collected nodes
for (auto& node : nodes) {
std::cout << "Type: " << node->get_type_info().name << std::endl << "Name: " << node->get_friendly_name() << std::endl;
std::cout << "Type: " << node->get_type_info().name << std::endl
<< "Name: " << node->get_friendly_name() << std::endl;
}
// Return false because we didn't change nGraph Function

View File

@ -33,7 +33,9 @@ ngraph::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() {
}
// Decompose Divide into Multiply with Power operations
auto pow = std::make_shared<ngraph::opset3::Power>(div->input_value(1), opset3::Constant::create(div->get_input_element_type(1), Shape {1}, {-1}));
auto pow = std::make_shared<ngraph::opset3::Power>(
div->input_value(1),
opset3::Constant::create(div->get_input_element_type(1), Shape{1}, {-1}));
auto mul = std::make_shared<ngraph::opset3::Multiply>(div->input_value(0), pow);
@ -70,7 +72,8 @@ ngraph::pass::ReluReluFusionMatcher::ReluReluFusionMatcher() {
auto& node_to_output = m.get_pattern_value_map();
// Create new Relu operation and add register it for additional execution
auto new_relu = register_new_node<ngraph::opset3::Relu>(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0));
auto new_relu =
register_new_node<ngraph::opset3::Relu>(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0));
// Copy runtime info attributes to newly created operation
ngraph::copy_runtime_info(m.get_matched_nodes(), new_relu);

View File

@ -0,0 +1,94 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <ie_ngraph_utils.hpp>
#include <limits>
#include <algorithm>
#include <ngraph/ngraph.hpp>
#include <shared_test_classes/base/layer_test_utils.hpp>
#include "base_reference_test.hpp"
using namespace reference_tests;
using namespace ngraph;
using namespace InferenceEngine;
struct AtanhParams {
template <class IT>
AtanhParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues)
: pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) {
std::vector<IT> oValues;
std::vector<double> output;
for (auto element : iValues)
output.push_back(static_cast<double>(element));
std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
return std::atanh(input);
});
if (std::is_integral<IT>()) {
std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
return std::round(input);
});
}
for (auto element : output)
oValues.push_back(static_cast<IT>(element));
refData = CreateBlob(outType, oValues);
}
ngraph::PartialShape pshape;
ngraph::element::Type inType;
ngraph::element::Type outType;
InferenceEngine::Blob::Ptr inputData;
InferenceEngine::Blob::Ptr refData;
};
class ReferenceAtanhLayerTest : public testing::TestWithParam<AtanhParams>, public CommonReferenceTest {
public:
void SetUp() override {
auto params = GetParam();
function = CreateFunction(params.pshape, params.inType, params.outType);
inputData = {params.inputData};
refOutData = {params.refData};
}
static std::string getTestCaseName(const testing::TestParamInfo<AtanhParams>& obj) {
auto param = obj.param;
std::ostringstream result;
result << "shape=" << param.pshape << "_";
result << "iType=" << param.inType << "_";
result << "oType=" << param.outType;
return result.str();
}
private:
static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape, const element::Type& input_type,
const element::Type& expected_output_type) {
const auto in = std::make_shared<op::Parameter>(input_type, input_shape);
const auto atanh = std::make_shared<op::Atanh>(in);
return std::make_shared<Function>(NodeVector {atanh}, ParameterVector {in});
}
};
TEST_P(ReferenceAtanhLayerTest, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(
smoke_Atanh_With_Hardcoded_Refs, ReferenceAtanhLayerTest,
::testing::Values(AtanhParams(ngraph::PartialShape {2, 4}, ngraph::element::f32,
std::vector<float> {-INFINITY, -2.0f, -1.0f, -0.5f, 0.0f, 0.8f, 1.0f, INFINITY}),
AtanhParams(ngraph::PartialShape {2, 4}, ngraph::element::f16,
std::vector<float16> {-INFINITY, -2.0f, -1.0f, -0.5f, -0.0f, 0.8f, 1.0f, INFINITY}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::i32,
std::vector<int32_t> {std::numeric_limits<int32_t>::min(), -2, -1, 1, 2, std::numeric_limits<int32_t>::max()}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::u32,
std::vector<uint32_t> {std::numeric_limits<uint32_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint32_t>::max()}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::i64,
std::vector<int64_t> {std::numeric_limits<int64_t>::min(), -2, -1, 1, 2, std::numeric_limits<int64_t>::max()}),
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::u64,
std::vector<uint64_t> {std::numeric_limits<uint64_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint64_t>::max()})),
ReferenceAtanhLayerTest::getTestCaseName);

View File

@ -75,6 +75,48 @@ std::vector<RefComparisonParams> generateComparisonCombinedParams() {
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName);
template <element::Type_t IN_ET>
std::vector<RefComparisonParams> generateNumericParams(const element::Type& type) {
using T = typename element_type_traits<IN_ET>::value_type;
std::vector<RefComparisonParams> compParams {
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{4}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, NAN}})
.input2({{4}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f}})
.expected({{4}, element::boolean, std::vector<char> {0, 0, 1, 0, }}),
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{2, 3}, type, std::vector<T> {0.0f, NAN, NAN, 1.0f, 21.0f, -INFINITY}})
.input2({{2, 3}, type, std::vector<T> {1.0f, NAN, 23.0f, 1.0f, 19.0f, 21.0f}})
.expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 0, 1, 0, 0}}),
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{1}, type, std::vector<T> {INFINITY}})
.input2({{1}, type, std::vector<T> {INFINITY}})
.expected({{1}, element::boolean, std::vector<char> {1}}),
Builder {}
.compType(ComparisonTypes::EQUAL)
.input1({{5}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}})
.input2({{5}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}})
.expected({{5}, element::boolean, std::vector<char> {0, 0, 1, 0, 0}})};
return compParams;
}
std::vector<RefComparisonParams> generateNumericCombinedParams() {
const std::vector<std::vector<RefComparisonParams>> compTypeParams {
generateNumericParams<element::Type_t::f16>(element::f16),
generateNumericParams<element::Type_t::f32>(element::f32)};
std::vector<RefComparisonParams> combinedParams;
for (const auto& params : compTypeParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_Numeric_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateNumericCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName);
} // namespace
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
ColumnLimit: 120
Language: Cpp
Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4
AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160
# Specialize this comment pragma in order to avoid changes in SEA copyrights
BinPackArguments: false
BinPackParameters: false
CommentPragmas: '^#'
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
IndentPPDirectives: BeforeHash
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: false
IndentPPDirectives: AfterHash
ForEachMacros:
- foreach
- FOREACH_CHILD

View File

@ -102,9 +102,11 @@ int image_free(c_mat_t* img) {
int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int num, int thickness) {
int colors_num = 21;
color_t colors[21] = {// colors to be used for bounding boxes
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, {30, 170, 250},
{0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0},
{70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
{153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
{180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
{100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
{81, 0, 81}};
for (int i = 0; i < num; i++) {
int x = rects[i].x_min;

View File

@ -346,7 +346,10 @@ int main(int argc, char** argv) {
goto err;
for (i = 0; i < ver.num_vers; ++i) {
printf(" %s\n", ver.versions[i].device_name);
printf(" %s version ......... %zu.%zu\n", ver.versions[i].description, ver.versions[i].major, ver.versions[i].minor);
printf(" %s version ......... %zu.%zu\n",
ver.versions[i].description,
ver.versions[i].major,
ver.versions[i].minor);
printf(" Build ......... %s\n", ver.versions[i].build_number);
}
ie_core_versions_free(&ver);
@ -360,7 +363,8 @@ int main(int argc, char** argv) {
printf("%sCustom extension loaded: %s\n", info, custom_ex_library_msg);
}
if (custom_plugin_cfg_msg && (strcmp(device_name, "GPU") == 0 || strcmp(device_name, "MYRIAD") == 0 || strcmp(device_name, "HDDL") == 0)) {
if (custom_plugin_cfg_msg &&
(strcmp(device_name, "GPU") == 0 || strcmp(device_name, "MYRIAD") == 0 || strcmp(device_name, "HDDL") == 0)) {
// Config for device plugin custom extension is loaded from an .xml
// description
ie_config_t cfg = {"CONFIG_FILE", custom_plugin_cfg_msg, NULL};
@ -480,7 +484,12 @@ int main(int argc, char** argv) {
for (j = 0; j < resized_img.mat_data_size; ++j)
resized_img.mat_data[j] = img.mat_data[j];
} else {
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n", warn, img.mat_width, img.mat_height, input_width, input_height);
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n",
warn,
img.mat_width,
img.mat_height,
input_width,
input_height);
if (image_resize(&img, &resized_img, (int)input_width, (int)input_height) == -1) {
printf("%sImage %s cannot be resized!\n", warn, file_paths[i]);
@ -623,7 +632,8 @@ int main(int argc, char** argv) {
for (ch = 0; ch < num_channels; ++ch) {
/** [images stride + channels stride + pixel id ] all in bytes
* **/
data[image_id * image_size * num_channels + ch * image_size + pid] = images[image_id].mat_data[pid * num_channels + ch];
data[image_id * image_size * num_channels + ch * image_size + pid] =
images[image_id].mat_data[pid * num_channels + ch];
}
}
image_free(&images[image_id]);
@ -704,7 +714,15 @@ int main(int argc, char** argv) {
int xmax = (int)(detection[curProposal * objectSize + 5] * originalImages[image_id].mat_width);
int ymax = (int)(detection[curProposal * objectSize + 6] * originalImages[image_id].mat_height);
printf("[%d, %d] element, prob = %f (%d, %d)-(%d, %d) batch id : %d", curProposal, label, confidence, xmin, ymin, xmax, ymax, image_id);
printf("[%d, %d] element, prob = %f (%d, %d)-(%d, %d) batch id : %d",
curProposal,
label,
confidence,
xmin,
ymin,
xmax,
ymax,
image_id);
if (confidence > 0.5) {
/** Drawing only objects with >50% probability **/
@ -722,7 +740,11 @@ int main(int argc, char** argv) {
int batch_id;
for (batch_id = 0; batch_id < batchSize; ++batch_id) {
if (object_num[batch_id] > 0) {
image_add_rectangles(&originalImages[batch_id], boxes[batch_id], classes[batch_id], object_num[batch_id], 2);
image_add_rectangles(&originalImages[batch_id],
boxes[batch_id],
classes[batch_id],
object_num[batch_id],
2);
}
const char* out = "out_";
char str_num[16] = {0};

View File

@ -16,13 +16,15 @@ static const char* model_message = "Required. Path to an .xml file with a traine
static const char* image_message = "Required. Path to one or more images or folder with images.";
/// @brief message for assigning cnn calculation to device
static const char* target_device_message = "Optional. Specify the target device to infer. "
static const char* target_device_message =
"Optional. Specify the target device to infer. "
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
"HETERO plugin. "
"Sample will look for a suitable plugin for device specified.";
/// @brief message for plugin custom kernels desc
static const char* custom_plugin_config_message = "Required for GPU, MYRIAD, HDDL custom kernels. "
static const char* custom_plugin_config_message =
"Required for GPU, MYRIAD, HDDL custom kernels. "
"Absolute path to the .xml config file with the kernels descriptions.";
/// @brief message for user extension library argument

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
ColumnLimit: 120
Language: Cpp
Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4
AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160
# Specialize this comment pragma in order to avoid changes in SEA copyrights
BinPackArguments: false
BinPackParameters: false
CommentPragmas: '^#'
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
IndentPPDirectives: BeforeHash
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: false
IndentPPDirectives: AfterHash
ForEachMacros:
- foreach
- FOREACH_CHILD

View File

@ -8,11 +8,17 @@
#include "ie_plugin_config.hpp"
const std::string EXPORTED_NETWORK_NAME = "undefined";
std::map<std::string, InferenceEngine::Precision> precision_map = {
{"FP32", InferenceEngine::Precision::FP32}, {"FP64", InferenceEngine::Precision::FP64}, {"FP16", InferenceEngine::Precision::FP16},
{"I8", InferenceEngine::Precision::I8}, {"I16", InferenceEngine::Precision::I16}, {"I32", InferenceEngine::Precision::I32},
{"I64", InferenceEngine::Precision::I64}, {"U8", InferenceEngine::Precision::U8}, {"U16", InferenceEngine::Precision::U16},
{"U32", InferenceEngine::Precision::U32}, {"U64", InferenceEngine::Precision::U64}};
std::map<std::string, InferenceEngine::Precision> precision_map = {{"FP32", InferenceEngine::Precision::FP32},
{"FP64", InferenceEngine::Precision::FP64},
{"FP16", InferenceEngine::Precision::FP16},
{"I8", InferenceEngine::Precision::I8},
{"I16", InferenceEngine::Precision::I16},
{"I32", InferenceEngine::Precision::I32},
{"I64", InferenceEngine::Precision::I64},
{"U8", InferenceEngine::Precision::U8},
{"U16", InferenceEngine::Precision::U16},
{"U32", InferenceEngine::Precision::U32},
{"U64", InferenceEngine::Precision::U64}};
std::map<std::string, InferenceEngine::Layout> layout_map = {{"ANY", InferenceEngine::Layout::ANY},
{"NCHW", InferenceEngine::Layout::NCHW},
@ -200,7 +206,8 @@ InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string
return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
}
InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network): actual(cnn_network) {
InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network)
: actual(cnn_network) {
if (actual == nullptr)
IE_THROW() << "IENetwork was not initialized.";
name = actual->getName();
@ -286,7 +293,9 @@ void InferenceEnginePython::IENetwork::reshape(const std::map<std::string, std::
actual->reshape(input_shapes);
}
InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests): infer_requests(num_requests), name(name) {
InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests)
: infer_requests(num_requests),
name(name) {
request_queue_ptr = std::make_shared<IdleInferRequestQueue>();
}
@ -333,16 +342,19 @@ std::shared_ptr<InferenceEngine::ExecutableNetwork> InferenceEnginePython::IEExe
return actual;
}
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr) {
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name,
const InferenceEngine::Blob::Ptr& blob_ptr) {
request_ptr.SetBlob(blob_name.c_str(), blob_ptr);
}
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr,
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name,
const InferenceEngine::Blob::Ptr& blob_ptr,
const InferenceEngine::PreProcessInfo& info) {
request_ptr.SetBlob(blob_name.c_str(), blob_ptr, info);
}
const InferenceEngine::PreProcessInfo& InferenceEnginePython::InferRequestWrap::getPreProcess(const std::string& blob_name) {
const InferenceEngine::PreProcessInfo& InferenceEnginePython::InferRequestWrap::getPreProcess(
const std::string& blob_name) {
return request_ptr.GetPreProcess(blob_name.c_str());
}
@ -392,7 +404,8 @@ int InferenceEnginePython::InferRequestWrap::wait(int64_t timeout) {
return static_cast<int>(code);
}
std::map<std::string, InferenceEnginePython::ProfileInfo> InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
std::map<std::string, InferenceEnginePython::ProfileInfo>
InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perf_counts = request_ptr.GetPerformanceCounts();
std::map<std::string, InferenceEnginePython::ProfileInfo> perf_map;
@ -430,7 +443,8 @@ InferenceEnginePython::IECore::IECore(const std::string& xmlConfigFile) {
actual = InferenceEngine::Core(xmlConfigFile);
}
std::map<std::string, InferenceEngine::Version> InferenceEnginePython::IECore::getVersions(const std::string& deviceName) {
std::map<std::string, InferenceEngine::Version> InferenceEnginePython::IECore::getVersions(
const std::string& deviceName) {
return actual.GetVersions(deviceName);
}
@ -485,12 +499,16 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
infer_request.request_queue_ptr = request_queue_ptr;
infer_request.request_ptr = actual->CreateInferRequest();
infer_request.request_ptr.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest r, InferenceEngine::StatusCode)>>(
infer_request.request_ptr
.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest r, InferenceEngine::StatusCode)>>(
[&](InferenceEngine::InferRequest request, InferenceEngine::StatusCode code) {
if (code != InferenceEngine::StatusCode::OK) {
IE_EXCEPTION_SWITCH(code, ExceptionType,
IE_EXCEPTION_SWITCH(code,
ExceptionType,
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
std::stringstream {} << IE_LOCATION << InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
std::stringstream{}
<< IE_LOCATION
<< InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
}
auto end_time = Time::now();
@ -504,12 +522,15 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
}
}
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath, const std::string& binPath) {
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath,
const std::string& binPath) {
InferenceEngine::CNNNetwork net = actual.ReadNetwork(modelPath, binPath);
return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
}
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size) {
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& model,
const uint8_t* bin,
size_t bin_size) {
InferenceEngine::MemoryBlob::Ptr weights_blob;
if (bin_size != 0) {
InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C);
@ -521,44 +542,58 @@ InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(cons
return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
}
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetwork(IENetwork network, const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests) {
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(network.name, num_requests);
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(*network.actual, deviceName, config));
exec_network->createInferRequests(num_requests);
return exec_network;
}
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetworkFromFile(const std::string& modelPath,
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetwork(
IENetwork network,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests) {
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(modelPath, num_requests);
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(modelPath, deviceName, config));
auto exec_network =
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(network.name, num_requests);
exec_network->actual =
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(*network.actual, deviceName, config));
exec_network->createInferRequests(num_requests);
return exec_network;
}
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::importNetwork(const std::string& modelFIle, const std::string& deviceName,
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetworkFromFile(
const std::string& modelPath,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests) {
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(EXPORTED_NETWORK_NAME, num_requests);
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.ImportNetwork(modelFIle, deviceName, config));
auto exec_network =
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(modelPath, num_requests);
exec_network->actual =
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(modelPath, deviceName, config));
exec_network->createInferRequests(num_requests);
return exec_network;
}
std::map<std::string, std::string> InferenceEnginePython::IECore::queryNetwork(InferenceEnginePython::IENetwork network, const std::string& deviceName,
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::importNetwork(
const std::string& modelFIle,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests) {
auto exec_network =
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(EXPORTED_NETWORK_NAME, num_requests);
exec_network->actual =
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.ImportNetwork(modelFIle, deviceName, config));
exec_network->createInferRequests(num_requests);
return exec_network;
}
std::map<std::string, std::string> InferenceEnginePython::IECore::queryNetwork(
InferenceEnginePython::IENetwork network,
const std::string& deviceName,
const std::map<std::string, std::string>& config) {
auto res = actual.QueryNetwork(*network.actual, deviceName, config);
return res.supportedLayersMap;
}
void InferenceEnginePython::IECore::setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName) {
void InferenceEnginePython::IECore::setConfig(const std::map<std::string, std::string>& config,
const std::string& deviceName) {
actual.SetConfig(config, deviceName);
}

View File

@ -115,7 +115,9 @@ struct InferRequestWrap {
void setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr);
void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data, const InferenceEngine::PreProcessInfo& info);
void setBlob(const std::string& name,
const InferenceEngine::Blob::Ptr& data,
const InferenceEngine::PreProcessInfo& info);
void setBatch(int size);
@ -160,13 +162,23 @@ struct IECore {
std::map<std::string, InferenceEngine::Version> getVersions(const std::string& deviceName);
InferenceEnginePython::IENetwork readNetwork(const std::string& modelPath, const std::string& binPath);
InferenceEnginePython::IENetwork readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetwork(IENetwork network, const std::string& deviceName,
const std::map<std::string, std::string>& config, int num_requests);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetworkFromFile(const std::string& modelPath, const std::string& deviceName,
const std::map<std::string, std::string>& config, int num_requests);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> importNetwork(const std::string& modelFIle, const std::string& deviceName,
const std::map<std::string, std::string>& config, int num_requests);
std::map<std::string, std::string> queryNetwork(IENetwork network, const std::string& deviceName, const std::map<std::string, std::string>& config);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetwork(IENetwork network,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetworkFromFile(
const std::string& modelPath,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests);
std::unique_ptr<InferenceEnginePython::IEExecNetwork> importNetwork(
const std::string& modelFIle,
const std::string& deviceName,
const std::map<std::string, std::string>& config,
int num_requests);
std::map<std::string, std::string> queryNetwork(IENetwork network,
const std::string& deviceName,
const std::map<std::string, std::string>& config);
void setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = std::string());
void registerPlugin(const std::string& pluginName, const std::string& deviceName);
void unregisterPlugin(const std::string& deviceName);

View File

@ -26,7 +26,8 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
manager.run_passes(network.actual->getFunction());
}
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network,
bool use_const_initializer) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
manager.run_passes(network.actual->getFunction());
@ -38,7 +39,9 @@ void InferenceEnginePython::ApplyPruningTransformation(InferenceEnginePython::IE
manager.run_passes(network.actual->getFunction());
}
void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names) {
void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network,
std::string path,
bool extract_names) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::GenerateMappingFile>(path, extract_names);
manager.run_passes(network.actual->getFunction());
@ -48,7 +51,8 @@ void InferenceEnginePython::CheckAPI() {
std::shared_ptr<ngraph::Function> f;
{
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1000, 4});
auto reshape = std::make_shared<ngraph::opset6::Reshape>(input, std::make_shared<ngraph::opset6::ShapeOf>(input), true);
auto reshape =
std::make_shared<ngraph::opset6::Reshape>(input, std::make_shared<ngraph::opset6::ShapeOf>(input), true);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape}, ngraph::ParameterVector{input});
}
ngraph::pass::Manager m;

View File

@ -7,6 +7,7 @@
#include <common_test_utils/ngraph_test_utils.hpp>
#include <string>
std::pair<bool, std::string> InferenceEnginePython::CompareNetworks(InferenceEnginePython::IENetwork lhs, InferenceEnginePython::IENetwork rhs) {
std::pair<bool, std::string> InferenceEnginePython::CompareNetworks(InferenceEnginePython::IENetwork lhs,
InferenceEnginePython::IENetwork rhs) {
return compare_functions(lhs.actual->getFunction(), rhs.actual->getFunction(), true, true, false, true, true);
}

View File

@ -1,6 +1,7 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
ColumnLimit: 120
Language: Cpp
Standard: Cpp11
@ -8,18 +9,20 @@ Standard: Cpp11
AccessModifierOffset: -4
AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
ColumnLimit: 160
# Specialize this comment pragma in order to avoid changes in SEA copyrights
BinPackArguments: false
BinPackParameters: false
CommentPragmas: '^#'
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
IndentPPDirectives: BeforeHash
SpaceBeforeCpp11BracedList: true
SpaceBeforeCtorInitializerColon: false
IndentPPDirectives: AfterHash
ForEachMacros:
- foreach
- FOREACH_CHILD

View File

@ -113,19 +113,7 @@ endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags" AND
NOT DEFINED OpenVINO_SOURCE_DIR)
function(add_gflags)
# common gflags settings
set(GFLAGS_IS_SUBPROJECT TRUE)
set(HAVE_SYS_STAT_H 1)
set(HAVE_INTTYPES_H 1)
set(INTTYPES_FORMAT C99)
set(BUILD_TESTING OFF)
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL)
set_target_properties(gflags_nothreads_static PROPERTIES FOLDER thirdparty)
endfunction()
add_gflags()
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib")

View File

@ -2,7 +2,7 @@
This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../tools/benchmark_tool/README.md).
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../../tools/benchmark_tool/README.md).
> **TIP**: You also can work with the Benchmark Tool inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench).
> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare

View File

@ -14,28 +14,33 @@
static const char help_message[] = "Print a usage message";
/// @brief message for images argument
static const char input_message[] = "Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
static const char input_message[] =
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
/// @brief message for model argument
static const char model_message[] = "Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
static const char model_message[] =
"Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
"a trained compiled model.";
/// @brief message for execution mode
static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\".";
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). "
static const char target_device_message[] =
"Optional. Specify a target device to infer on (the list of available devices is shown below). "
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
"HETERO plugin. "
"Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. "
"The application looks for a suitable plugin for the specified device.";
/// @brief message for iterations count
static const char iterations_count_message[] = "Optional. Number of iterations. "
static const char iterations_count_message[] =
"Optional. Number of iterations. "
"If not specified, the number of iterations is calculated depending on a device.";
/// @brief message for requests count
static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is determined automatically for device.";
static const char infer_requests_count_message[] =
"Optional. Number of infer requests. Default value is determined automatically for device.";
/// @brief message for execution time
static const char execution_time_message[] = "Optional. Time in seconds to execute topology.";
@ -45,7 +50,8 @@ static const char infer_num_threads_message[] = "Optional. Number of threads to
"(including HETERO and MULTI cases).";
/// @brief message for #streams for CPU inference
static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices "
static const char infer_num_streams_message[] =
"Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices "
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
"<nstreams>). "
"Default value is determined automatically for a device.Please note that although the "
@ -58,38 +64,46 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to
/// @brief message for latency percentile settings
static const char infer_latency_percentile_message[] =
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).";
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value "
"is 50 (median).";
/// @brief message for enforcing of BF16 execution where it is possible
static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced "
static const char enforce_bf16_message[] =
"Optional. By default floating point operations execution in bfloat16 precision are enforced "
"if supported by platform.\n"
" 'true' - enable bfloat16 regardless of platform support\n"
" 'false' - disable bfloat16 regardless of platform support";
/// @brief message for user library argument
static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels "
static const char custom_cpu_library_message[] =
"Required for CPU custom layers. Absolute path to a shared library with the kernels "
"implementations.";
/// @brief message for clDNN custom kernels desc
static const char custom_cldnn_message[] = "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
static const char custom_cldnn_message[] =
"Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
static const char batch_size_message[] = "Optional. Batch size value. If not specified, the batch size value is determined from "
static const char batch_size_message[] =
"Optional. Batch size value. If not specified, the batch size value is determined from "
"Intermediate Representation.";
// @brief message for CPU threads pinning option
static const char infer_threads_pinning_message[] =
"Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):\n"
"\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n"
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on the hybrid CPUs) \n"
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on "
"the hybrid CPUs) \n"
"\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n"
"\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning";
// @brief message for stream_output option
static const char stream_output_message[] = "Optional. Print progress as a plain text. When specified, an interactive progress bar is "
static const char stream_output_message[] =
"Optional. Print progress as a plain text. When specified, an interactive progress bar is "
"replaced with a "
"multiline output.";
// @brief message for report_type option
static const char report_type_message[] = "Optional. Enable collecting statistics report. \"no_counters\" report contains "
static const char report_type_message[] =
"Optional. Enable collecting statistics report. \"no_counters\" report contains "
"configuration options specified, resulting FPS and latency. \"average_counters\" "
"report extends \"no_counters\" report and additionally includes average PM "
"counters values for each layer from the network. \"detailed_counters\" report "
@ -100,10 +114,12 @@ static const char report_type_message[] = "Optional. Enable collecting statistic
static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored.";
// @brief message for exec_graph_path option
static const char exec_graph_path_message[] = "Optional. Path to a file where to store executable graph information serialized.";
static const char exec_graph_path_message[] =
"Optional. Path to a file where to store executable graph information serialized.";
// @brief message for progress bar option
static const char progress_message[] = "Optional. Show progress bar (can affect performance measurement). Default values is "
static const char progress_message[] =
"Optional. Show progress bar (can affect performance measurement). Default values is "
"\"false\".";
// @brief message for performance counters option
@ -111,19 +127,23 @@ static const char pc_message[] = "Optional. Report performance counters.";
#ifdef USE_OPENCV
// @brief message for load config option
static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file to load custom IE parameters."
static const char load_config_message[] =
"Optional. Path to XML/YAML/JSON file to load custom IE parameters."
" Please note, command line parameters have higher priority then parameters from configuration "
"file.";
// @brief message for dump config option
static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
static const char dump_config_message[] =
"Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
#endif
static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
static const char shape_message[] =
"Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
"\"[1,3,224,224]\""
" in case of one input size.";
static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. "
static const char layout_message[] =
"Optional. Prompts how network layouts should be treated by application. "
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
// @brief message for enabling caching
@ -139,19 +159,23 @@ static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8
static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network.";
static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of the network.";
static constexpr char outputs_precision_message[] =
"Optional. Specifies precision for all output layers of the network.";
static constexpr char iop_message[] = "Optional. Specifies precision for input and output layers by name.\n"
static constexpr char iop_message[] =
"Optional. Specifies precision for input and output layers by name.\n"
" Example: -iop \"input:FP16, output:FP16\".\n"
" Notice that quotes are required.\n"
" Overwrites precision from ip and op options for "
"specified layers.";
static constexpr char input_image_scale_message[] = "Optional. Scale values to be used for the input image per channel.\n"
static constexpr char input_image_scale_message[] =
"Optional. Scale values to be used for the input image per channel.\n"
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
"Example: -iscale data[255,255,255],info[255,255,255]\n";
static constexpr char input_image_mean_message[] = "Optional. Mean values to be used for the input image per channel.\n"
static constexpr char input_image_mean_message[] =
"Optional. Mean values to be used for the input image per channel.\n"
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
"Example: -imean data[255,255,255],info[255,255,255]\n";

View File

@ -23,7 +23,8 @@ typedef std::chrono::nanoseconds ns;
typedef std::function<void(size_t id, const double latency)> QueueCallbackFunction;
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution time.
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
/// time.
class InferReqWrap final {
public:
using Ptr = std::shared_ptr<InferReqWrap>;
@ -31,7 +32,9 @@ public:
~InferReqWrap() = default;
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
: _request(net.CreateInferRequest()), _id(id), _callbackQueue(callbackQueue) {
: _request(net.CreateInferRequest()),
_id(id),
_callbackQueue(callbackQueue) {
_request.SetCompletionCallback([&]() {
_endTime = Time::now();
_callbackQueue(_id, getExecutionTimeInMilliseconds());
@ -79,8 +82,10 @@ class InferRequestsQueue final {
public:
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) {
for (size_t id = 0; id < nireq; id++) {
requests.push_back(
std::make_shared<InferReqWrap>(net, id, std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
requests.push_back(std::make_shared<InferReqWrap>(
net,
id,
std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
_idleIds.push(id);
}
resetTimes();
@ -90,7 +95,8 @@ public:
// So it should be released before any context that the request can use inside internal asynchronous tasks
// For example all members of InferRequestsQueue would be destroyed before `requests` vector
// So requests can try to use this members from `putIdleRequest()` that would be called from request callback
// To avoid this we should move this vector declaration after all members declaration or just clear it manually in destructor
// To avoid this we should move this vector declaration after all members declaration or just clear it manually
// in destructor
requests.clear();
}

View File

@ -16,14 +16,15 @@
using namespace InferenceEngine;
#ifdef USE_OPENCV
static const std::vector<std::string> supported_image_extensions = {"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png",
"pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
static const std::vector<std::string> supported_image_extensions =
{"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", "pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
#else
static const std::vector<std::string> supported_image_extensions = {"bmp"};
#endif
static const std::vector<std::string> supported_binary_extensions = {"bin"};
std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>& filePaths, const std::vector<std::string>& extensions) {
std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>& filePaths,
const std::vector<std::string>& extensions) {
std::vector<std::string> filtered;
auto getExtension = [](const std::string& name) {
auto extensionPosition = name.rfind('.', name.size());
@ -40,8 +41,13 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
}
template <typename T>
void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const benchmark_app::InputInfo& app_info,
const size_t& requestId, const size_t& inputId, const size_t& inputSize) {
void fillBlobImage(Blob::Ptr& inputBlob,
const std::vector<std::string>& filePaths,
const size_t& batchSize,
const benchmark_app::InputInfo& app_info,
const size_t& requestId,
const size_t& inputId,
const size_t& inputSize) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
@ -57,7 +63,8 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
std::vector<std::shared_ptr<uint8_t>> vreader;
vreader.reserve(batchSize);
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) {
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
i++, inputIndex += inputSize) {
inputIndex %= filePaths.size();
slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl;
@ -88,11 +95,13 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
for (size_t ch = 0; ch < numChannels; ++ch) {
/** [images stride + channels stride + pixel id ] all in
* bytes **/
size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
size_t offset = imageId * numChannels * width * height +
(((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
? (ch * width * height + h * width + w)
: (h * width * numChannels + w * numChannels + ch));
inputBlobData[offset] =
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) - static_cast<T>(app_info.mean[ch])) /
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) -
static_cast<T>(app_info.mean[ch])) /
static_cast<T>(app_info.scale[ch]);
}
}
@ -101,7 +110,11 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
}
template <typename T>
void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const size_t& requestId, const size_t& inputId,
void fillBlobBinary(Blob::Ptr& inputBlob,
const std::vector<std::string>& filePaths,
const size_t& batchSize,
const size_t& requestId,
const size_t& inputId,
const size_t& inputSize) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) {
@ -114,7 +127,8 @@ void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePa
auto minputHolder = minput->wmap();
auto inputBlobData = minputHolder.as<char*>();
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) {
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
i++, inputIndex += inputSize) {
inputIndex %= filePaths.size();
slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl;
@ -140,12 +154,15 @@ void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePa
}
template <typename T>
using uniformDistribution =
typename std::conditional<std::is_floating_point<T>::value, std::uniform_real_distribution<T>,
using uniformDistribution = typename std::conditional<
std::is_floating_point<T>::value,
std::uniform_real_distribution<T>,
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
template <typename T, typename T2>
void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<uint8_t>::min(), T rand_max = std::numeric_limits<uint8_t>::max()) {
void fillBlobRandom(Blob::Ptr& inputBlob,
T rand_min = std::numeric_limits<uint8_t>::min(),
T rand_max = std::numeric_limits<uint8_t>::max()) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
@ -191,14 +208,17 @@ void fillBlobImInfo(Blob::Ptr& inputBlob, const size_t& batchSize, std::pair<siz
}
}
void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info,
void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests) {
std::vector<std::pair<size_t, size_t>> input_image_sizes;
for (auto& item : app_inputs_info) {
if (item.second.isImage()) {
input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
}
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions (" << item.second.layout << "): ";
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions ("
<< item.second.layout << "): ";
for (const auto& i : item.second.shape) {
slog::info << i << " ";
}
@ -232,10 +252,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
"extensions: "
<< ss.str() << slog::endl;
} else if (binaryToBeUsed > binaryFiles.size()) {
slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed << " files are required but only " << binaryFiles.size()
<< " are provided" << slog::endl;
slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed
<< " files are required but only " << binaryFiles.size() << " are provided" << slog::endl;
} else if (binaryToBeUsed < binaryFiles.size()) {
slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from " << binaryFiles.size() << slog::endl;
slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from "
<< binaryFiles.size() << slog::endl;
}
imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions);
@ -254,10 +275,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
"extensions: "
<< ss.str() << slog::endl;
} else if (imagesToBeUsed > imageFiles.size()) {
slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed << " files are required but only " << imageFiles.size()
<< " are provided" << slog::endl;
slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed
<< " files are required but only " << imageFiles.size() << " are provided" << slog::endl;
} else if (imagesToBeUsed < imageFiles.size()) {
slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from " << imageFiles.size() << slog::endl;
slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from "
<< imageFiles.size() << slog::endl;
}
}
@ -274,15 +296,45 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
if (!imageFiles.empty()) {
// Fill with Images
if (precision == InferenceEngine::Precision::FP32) {
fillBlobImage<float>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
fillBlobImage<float>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::FP16) {
fillBlobImage<short>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
fillBlobImage<short>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::I32) {
fillBlobImage<int32_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
fillBlobImage<int32_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::I64) {
fillBlobImage<int64_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
fillBlobImage<int64_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::U8) {
fillBlobImage<uint8_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
fillBlobImage<uint8_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else {
IE_THROW() << "Input precision is not supported for " << item.first;
}
@ -292,15 +344,41 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
if (!binaryFiles.empty()) {
// Fill with binary files
if (precision == InferenceEngine::Precision::FP32) {
fillBlobBinary<float>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
fillBlobBinary<float>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::FP16) {
fillBlobBinary<short>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
fillBlobBinary<short>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::I32) {
fillBlobBinary<int32_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
fillBlobBinary<int32_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::I64) {
fillBlobBinary<int64_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
} else if ((precision == InferenceEngine::Precision::U8) || (precision == InferenceEngine::Precision::BOOL)) {
fillBlobBinary<uint8_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
fillBlobBinary<int64_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if ((precision == InferenceEngine::Precision::U8) ||
(precision == InferenceEngine::Precision::BOOL)) {
fillBlobBinary<uint8_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else {
IE_THROW() << "Input precision is not supported for " << item.first;
}
@ -310,7 +388,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) {
// Most likely it is image info: fill with image information
auto image_size = input_image_sizes.at(0);
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x" << image_size.second << slog::endl;
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x"
<< image_size.second << slog::endl;
if (precision == InferenceEngine::Precision::FP32) {
fillBlobImInfo<float>(inputBlob, batchSize, image_size);
} else if (precision == InferenceEngine::Precision::FP16) {
@ -326,8 +405,9 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
}
}
// Fill random
slog::info << "Fill input '" << item.first << "' with random values (" << std::string((app_info.isImage() ? "image" : "some binary data"))
<< " is expected)" << slog::endl;
slog::info << "Fill input '" << item.first << "' with random values ("
<< std::string((app_info.isImage() ? "image" : "some binary data")) << " is expected)"
<< slog::endl;
if (precision == InferenceEngine::Precision::FP32) {
fillBlobRandom<float, float>(inputBlob);
} else if (precision == InferenceEngine::Precision::FP16) {

View File

@ -11,5 +11,7 @@
#include "infer_request_wrap.hpp"
#include "utils.hpp"
void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info,
void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests);

View File

@ -60,8 +60,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value.");
}
if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) {
std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" + std::string(detailedCntReport) +
if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport &&
FLAGS_report_type != detailedCntReport) {
std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" +
std::string(detailedCntReport) +
" report types are supported (invalid -report_type option value)";
throw std::logic_error(err);
}
@ -73,7 +75,8 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
bool isNetworkCompiled = fileExt(FLAGS_m) == "blob";
bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty());
if (isNetworkCompiled && isPrecisionSet) {
std::string err = std::string("Cannot set precision for a compiled network. ") + std::string("Please re-compile your network with required precision "
std::string err = std::string("Cannot set precision for a compiled network. ") +
std::string("Please re-compile your network with required precision "
"using compile_tool");
throw std::logic_error(err);
@ -83,7 +86,8 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
static void next_step(const std::string additional_info = "") {
static size_t step_id = 0;
static const std::map<size_t, std::string> step_names = {{1, "Parsing and validating input arguments"},
static const std::map<size_t, std::string> step_names = {
{1, "Parsing and validating input arguments"},
{2, "Loading Inference Engine"},
{3, "Setting device configuration"},
{4, "Reading network files"},
@ -140,11 +144,14 @@ int main(int argc, char* argv[]) {
}
}
if (!FLAGS_report_type.empty()) {
statistics = std::make_shared<StatisticsReport>(StatisticsReport::Config {FLAGS_report_type, FLAGS_report_folder});
statistics =
std::make_shared<StatisticsReport>(StatisticsReport::Config{FLAGS_report_type, FLAGS_report_folder});
statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments);
}
auto isFlagSetInCommandLine = [&command_line_arguments](const std::string& name) {
return (std::find_if(command_line_arguments.begin(), command_line_arguments.end(), [name](const std::pair<std::string, std::string>& p) {
return (std::find_if(command_line_arguments.begin(),
command_line_arguments.end(),
[name](const std::pair<std::string, std::string>& p) {
return p.first == name;
}) != command_line_arguments.end());
};
@ -213,13 +220,17 @@ int main(int argc, char* argv[]) {
if (isFlagSetInCommandLine("pc")) {
// set to user defined value
device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
} else if (device_config.count(CONFIG_KEY(PERF_COUNT)) && (device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) {
slog::warn << "Performance counters for " << device << " device is turned on. To print results use -pc option." << slog::endl;
} else if (device_config.count(CONFIG_KEY(PERF_COUNT)) &&
(device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) {
slog::warn << "Performance counters for " << device
<< " device is turned on. To print results use -pc option." << slog::endl;
} else if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == averageCntReport) {
slog::warn << "Turn on performance counters for " << device << " device since report type is " << FLAGS_report_type << "." << slog::endl;
slog::warn << "Turn on performance counters for " << device << " device since report type is "
<< FLAGS_report_type << "." << slog::endl;
device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
} else if (!FLAGS_exec_graph_path.empty()) {
slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping." << slog::endl;
slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping."
<< slog::endl;
device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
} else {
// set to default value
@ -231,8 +242,10 @@ int main(int argc, char* argv[]) {
const std::string key = device + "_THROUGHPUT_STREAMS";
if (device_nstreams.count(device)) {
// set to user defined value
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) == supported_config_keys.end()) {
std::vector<std::string> supported_config_keys =
ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) ==
supported_config_keys.end()) {
throw std::logic_error("Device " + device + " doesn't support config key '" + key + "'! " +
"Please specify -nstreams for correct devices in format "
"<dev1>:<nstreams1>,<dev2>:<nstreams2>" +
@ -267,8 +280,10 @@ int main(int argc, char* argv[]) {
// set to user defined value
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
} else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) {
if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("GPU") != std::string::npos)) {
slog::warn << "Turn off threads pinning for " << device << " device since multi-scenario with GPU device is used." << slog::endl;
if ((device_name.find("MULTI") != std::string::npos) &&
(device_name.find("GPU") != std::string::npos)) {
slog::warn << "Turn off threads pinning for " << device
<< " device since multi-scenario with GPU device is used." << slog::endl;
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO);
}
}
@ -279,7 +294,8 @@ int main(int argc, char* argv[]) {
// for GPU execution, more throughput-oriented execution via streams
setThroughputStreams();
if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("CPU") != std::string::npos)) {
if ((device_name.find("MULTI") != std::string::npos) &&
(device_name.find("CPU") != std::string::npos)) {
slog::warn << "Turn on GPU throttling. Multi-device execution with "
"the CPU + GPU performs best with GPU throttling hint, "
<< "which releases another CPU thread (that is otherwise "
@ -299,9 +315,11 @@ int main(int argc, char* argv[]) {
if (isFlagSetInCommandLine("nthreads"))
device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
} else {
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
std::vector<std::string> supported_config_keys =
ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
auto supported = [&](const std::string& key) {
return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) != std::end(supported_config_keys);
return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) !=
std::end(supported_config_keys);
};
if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
@ -351,7 +369,8 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"load network time (ms)", duration_ms}});
if (batchSize == 0) {
batchSize = 1;
}
@ -367,7 +386,8 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"read network time (ms)", duration_ms}});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"read network time (ms)", duration_ms}});
const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
if (inputInfo.empty()) {
@ -380,7 +400,13 @@ int main(int argc, char* argv[]) {
batchSize = cnnNetwork.getBatchSize();
// Parse input shapes if specified
bool reshape = false;
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, inputInfo, reshape);
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
FLAGS_iscale,
FLAGS_imean,
inputInfo,
reshape);
if (reshape) {
InferenceEngine::ICNNNetwork::InputShapes shapes = {};
for (auto& item : app_inputs_info)
@ -391,13 +417,15 @@ int main(int argc, char* argv[]) {
duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"reshape network time (ms)", duration_ms}});
}
// use batch size according to provided layout and shapes
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize();
topology_name = cnnNetwork.getName();
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl;
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
<< slog::endl;
// ----------------- 6. Configuring inputs and outputs
// ----------------------------------------------------------------------
@ -424,7 +452,8 @@ int main(int argc, char* argv[]) {
duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"load network time (ms)", duration_ms}});
} else {
next_step();
slog::info << "Skipping the step for compiled network" << slog::endl;
@ -440,8 +469,14 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}});
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, exeNetwork.GetInputsInfo());
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"import network time (ms)", duration_ms}});
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
FLAGS_iscale,
FLAGS_imean,
exeNetwork.GetInputsInfo());
if (batchSize == 0) {
batchSize = 1;
}
@ -479,8 +514,8 @@ int main(int argc, char* argv[]) {
if ((niter > 0) && (FLAGS_api == "async")) {
niter = ((niter + nireq - 1) / nireq) * nireq;
if (FLAGS_niter != niter) {
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to " << niter << " using number of requests "
<< nireq << slog::endl;
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
<< niter << " using number of requests " << nireq << slog::endl;
}
}
@ -496,7 +531,8 @@ int main(int argc, char* argv[]) {
uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds);
if (statistics) {
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
statistics->addParameters(
StatisticsReport::Category::RUNTIME_CONFIG,
{
{"topology", topology_name},
{"target device", device_name},
@ -510,7 +546,8 @@ int main(int argc, char* argv[]) {
for (auto& nstreams : device_nstreams) {
std::stringstream ss;
ss << "number of " << nstreams.first << " streams";
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG, {
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
{
{ss.str(), nstreams.second},
});
}
@ -576,7 +613,8 @@ int main(int argc, char* argv[]) {
auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]);
slog::info << "First inference took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"first inference time (ms)", duration_ms}});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"first inference time (ms)", duration_ms}});
inferRequestsQueue.resetTimes();
auto startTime = Time::now();
@ -587,7 +625,8 @@ int main(int argc, char* argv[]) {
* executed in the same conditions **/
ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress);
while ((niter != 0LL && iteration < niter) || (duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
while ((niter != 0LL && iteration < niter) ||
(duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
(FLAGS_api == "async" && iteration % nireq != 0)) {
inferRequest = inferRequestsQueue.getIdleRequest();
if (!inferRequest) {
@ -629,10 +668,12 @@ int main(int argc, char* argv[]) {
double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
double fps =
(FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"total execution time (ms)", double_to_string(totalDuration)},
{"total number of iterations", std::to_string(iteration)},
});
@ -643,11 +684,13 @@ int main(int argc, char* argv[]) {
} else {
latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
}
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{latency_label, double_to_string(latency)},
});
}
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"throughput", double_to_string(fps)}});
}
progressBar.finish();
@ -707,7 +750,8 @@ int main(int argc, char* argv[]) {
slog::err << ex.what() << slog::endl;
if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"error", ex.what()},
});
statistics->dump();

View File

@ -54,8 +54,13 @@ size_t InputInfo::depth() const {
} // namespace benchmark_app
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds {{"CPU", 60}, {"GPU", 60}, {"VPU", 60}, {"MYRIAD", 60},
{"HDDL", 60}, {"FPGA", 120}, {"UNKNOWN", 120}};
static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds{{"CPU", 60},
{"GPU", 60},
{"VPU", 60},
{"MYRIAD", 60},
{"HDDL", 60},
{"FPGA", 120},
{"UNKNOWN", 120}};
uint32_t duration = 0;
for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) {
if (device.find(deviceDurationInSeconds.first) != std::string::npos) {
@ -63,8 +68,9 @@ uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
}
}
if (duration == 0) {
const auto unknownDeviceIt =
find_if(deviceDefaultDurationInSeconds.begin(), deviceDefaultDurationInSeconds.end(), [](std::pair<std::string, uint32_t> deviceDuration) {
const auto unknownDeviceIt = find_if(deviceDefaultDurationInSeconds.begin(),
deviceDefaultDurationInSeconds.end(),
[](std::pair<std::string, uint32_t> deviceDuration) {
return deviceDuration.first == "UNKNOWN";
});
@ -72,7 +78,8 @@ uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
throw std::logic_error("UNKNOWN device was not found in the device duration list");
}
duration = unknownDeviceIt->second;
slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used" << slog::endl;
slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used"
<< slog::endl;
}
return duration;
}
@ -112,7 +119,8 @@ std::vector<std::string> parseDevices(const std::string& device_string) {
return devices;
}
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices, const std::string& values_string) {
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string) {
// Format: <device1>:<value1>,<device2>:<value2> or just <value>
std::map<std::string, std::string> result;
auto device_value_strings = split(values_string, ',');
@ -125,7 +133,8 @@ std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector
if (it != devices.end()) {
result[device_name] = nstreams;
} else {
throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" + device_name + "'! Incorrect device name!");
throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" +
device_name + "'! Incorrect device name!");
}
} else if (device_value_vec.size() == 1) {
auto value = device_value_vec.at(0);
@ -172,7 +181,8 @@ std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& sha
return ss.str();
}
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info) {
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
const benchmark_app::InputsInfo& inputs_info) {
// Format: data:[255,255,255],info[255,255,255]
std::map<std::string, std::vector<float>> return_value;

View File

@ -29,14 +29,17 @@ using InputsInfo = std::map<std::string, InputInfo>;
std::vector<std::string> parseDevices(const std::string& device_string);
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices, const std::string& values_string);
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string);
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
std::vector<std::string> split(const std::string& s, char delim);
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info);
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
const benchmark_app::InputsInfo& inputs_info);
template <typename T>
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string, const std::map<std::string, T>& input_info) {
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string,
const std::map<std::string, T>& input_info) {
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
// inputs)
std::map<std::string, std::string> return_value;
@ -67,8 +70,12 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame
}
template <typename T>
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info,
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
const std::string& layout_string,
const size_t batch_size,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info,
bool& reshape_required) {
std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
@ -134,10 +141,20 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const s
}
template <typename T>
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info) {
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
const std::string& layout_string,
const size_t batch_size,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info) {
bool reshape_required = false;
return getInputsInfo<T>(shape_string, layout_string, batch_size, scale_string, mean_string, input_info, reshape_required);
return getInputsInfo<T>(shape_string,
layout_string,
batch_size,
scale_string,
mean_string,
input_info,
reshape_required);
}
#ifdef USE_OPENCV

View File

@ -17,11 +17,13 @@ static const char help_message[] = "Print a usage message.";
static const char model_message[] = "Required. Path to an .xml file with a trained model.";
/// @brief message for images argument
static const char image_message[] = "Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"
static const char image_message[] =
"Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"
" and a .bmp file for the other networks.";
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). "
static const char target_device_message[] =
"Optional. Specify the target device to infer on (the list of available devices is shown below). "
"Default value is CPU. Use \"-d HETERO:<comma_separated_devices_list>\" format to specify HETERO plugin. "
"Sample will look for a suitable plugin for device specified.";

View File

@ -100,7 +100,8 @@ int main(int argc, char* argv[]) {
// Config for device plugin custom extension is loaded from an .xml
// description
ie.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, FLAGS_d);
slog::info << "Config for " << FLAGS_d << " device plugin custom extension loaded: " << FLAGS_c << slog::endl;
slog::info << "Config for " << FLAGS_d << " device plugin custom extension loaded: " << FLAGS_c
<< slog::endl;
}
// -----------------------------------------------------------------------------------------------------
@ -142,8 +143,8 @@ int main(int argc, char* argv[]) {
continue;
}
/** Store image data **/
std::shared_ptr<unsigned char> data(
reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3], inputInfoItem.second->getTensorDesc().getDims()[2]));
std::shared_ptr<unsigned char> data(reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
inputInfoItem.second->getTensorDesc().getDims()[2]));
if (data != nullptr) {
imagesData.push_back(data);
validImageNames.push_back(i);
@ -203,7 +204,8 @@ int main(int argc, char* argv[]) {
for (size_t ch = 0; ch < num_channels; ++ch) {
/** [images stride + channels stride + pixel id ] all in
* bytes **/
data[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid * num_channels + ch];
data[image_id * image_size * num_channels + ch * image_size + pid] =
imagesData.at(image_id).get()[pid * num_channels + ch];
}
}
}
@ -255,8 +257,9 @@ int main(int argc, char* argv[]) {
/** Validating -nt value **/
const size_t resultsCnt = outputBlob->size() / batchSize;
if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " << resultsCnt + 1
<< " and more than 0)\n Maximal value " << resultsCnt << " will be used." << slog::endl;
slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than "
<< resultsCnt + 1 << " and more than 0)\n Maximal value " << resultsCnt
<< " will be used." << slog::endl;
FLAGS_nt = resultsCnt;
}

View File

@ -42,7 +42,8 @@ MnistUbyte::MnistUbyte(const std::string& filename) {
n_cols = reverseInt(n_cols);
_width = (size_t)n_cols;
if (number_of_images > 1) {
std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images << ". Only a first image will be read." << std::endl;
std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images
<< ". Only a first image will be read." << std::endl;
}
size_t size = _width * _height * 1;

View File

@ -32,7 +32,8 @@ std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t heigh
size_t iw = img.size().width;
size_t ih = img.size().height;
if (width != iw || height != ih) {
slog::warn << "Image is resized from (" << iw << ", " << ih << ") to (" << width << ", " << height << ")" << slog::endl;
slog::warn << "Image is resized from (" << iw << ", " << ih << ") to (" << width << ", " << height << ")"
<< slog::endl;
}
cv::resize(img, resized, cv::Size(width, height));
}

View File

@ -27,8 +27,14 @@ void readInputFilesArguments(std::vector<std::string>& files, const std::string&
*/
void parseInputFilesArguments(std::vector<std::string>& files);
void processPrecision(InferenceEngine::CNNNetwork& network, const std::string& ip, const std::string& op, const std::string& iop);
void processPrecision(InferenceEngine::CNNNetwork& network,
const std::string& ip,
const std::string& op,
const std::string& iop);
void processLayout(InferenceEngine::CNNNetwork& network, const std::string& il, const std::string& ol, const std::string& iol);
void processLayout(InferenceEngine::CNNNetwork& network,
const std::string& il,
const std::string& ol,
const std::string& iol);
void printInputAndOutputsInfo(const InferenceEngine::CNNNetwork& network);

View File

@ -78,7 +78,10 @@ private:
batchData += offset;
std::iota(std::begin(indexes), std::end(indexes), 0);
std::partial_sort(std::begin(indexes), std::begin(indexes) + n, std::end(indexes), [&batchData](unsigned l, unsigned r) {
std::partial_sort(std::begin(indexes),
std::begin(indexes) + n,
std::end(indexes),
[&batchData](unsigned l, unsigned r) {
return batchData[l] > batchData[r];
});
for (unsigned j = 0; j < n; j++) {
@ -123,7 +126,10 @@ private:
}
public:
explicit ClassificationResultT(InferenceEngine::Blob::Ptr output_blob, std::vector<strType> image_names = {}, size_t batch_size = 1, size_t num_of_top = 10,
explicit ClassificationResultT(InferenceEngine::Blob::Ptr output_blob,
std::vector<strType> image_names = {},
size_t batch_size = 1,
size_t num_of_top = 10,
std::vector<std::string> labels = {})
: _nTop(num_of_top),
_outBlob(std::move(output_blob)),
@ -164,8 +170,8 @@ public:
// locked memory holder should be alive all time while access to its buffer happens
const auto result =
moutputHolder
.as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[_results.at(id) +
image_id * (_outBlob->size() / _batchSize)];
.as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()
[_results.at(id) + image_id * (_outBlob->size() / _batchSize)];
std::cout << std::setw(static_cast<int>(_classidStr.length())) << std::left << _results.at(id) << " ";
std::cout << std::left << std::setw(static_cast<int>(_probabilityStr.length())) << std::fixed << result;

View File

@ -46,7 +46,8 @@ inline void ltrim(std::string& s) {
* @param s - string to trim
*/
inline void rtrim(std::string& s) {
s.erase(std::find_if(s.rbegin(), s.rend(),
s.erase(std::find_if(s.rbegin(),
s.rend(),
[](int c) {
return !std::isspace(c);
})
@ -157,9 +158,11 @@ public:
static UNUSED void writeOutputBmp(std::vector<std::vector<size_t>> data, size_t classesNum, std::ostream& outFile) {
unsigned int seed = (unsigned int)time(NULL);
// Known colors for training classes from Cityscape dataset
static std::vector<Color> colors = {{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, {30, 170, 250},
{0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0},
{70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
static std::vector<Color> colors = {
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153},
{30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220},
{0, 0, 255}, {142, 0, 0}, {70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0},
{32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
while (classesNum > colors.size()) {
static std::mt19937 rng(seed);
@ -171,13 +174,17 @@ static UNUSED void writeOutputBmp(std::vector<std::vector<size_t>> data, size_t
unsigned char file[14] = {
'B',
'M', // magic
0, 0, 0,
0,
0,
0,
0, // size in bytes
0,
0, // app data
0,
0, // app data
40 + 14, 0, 0,
40 + 14,
0,
0,
0 // start of data offset
};
unsigned char info[40] = {
@ -262,13 +269,17 @@ static UNUSED bool writeOutputBmp(std::string name, unsigned char* data, size_t
unsigned char file[14] = {
'B',
'M', // magic
0, 0, 0,
0,
0,
0,
0, // size in bytes
0,
0, // app data
0,
0, // app data
40 + 14, 0, 0,
40 + 14,
0,
0,
0 // start of data offset
};
unsigned char info[40] = {
@ -342,11 +353,18 @@ static UNUSED bool writeOutputBmp(std::string name, unsigned char* data, size_t
* @param classes - vector of classes
* @param thickness - thickness of a line (in pixels) to be used for bounding boxes
*/
static UNUSED void addRectangles(unsigned char* data, size_t height, size_t width, std::vector<int> rectangles, std::vector<int> classes, int thickness = 1) {
static UNUSED void addRectangles(unsigned char* data,
size_t height,
size_t width,
std::vector<int> rectangles,
std::vector<int> classes,
int thickness = 1) {
std::vector<Color> colors = {// colors to be used for bounding boxes
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, {30, 170, 250},
{0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0},
{70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
{153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
{180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
{100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
{81, 0, 81}};
if (rectangles.size() % 4 != 0 || rectangles.size() / 4 != classes.size()) {
return;
}
@ -430,13 +448,17 @@ static UNUSED bool writeOutputBmp(unsigned char* data, size_t height, size_t wid
unsigned char file[14] = {
'B',
'M', // magic
0, 0, 0,
0,
0,
0,
0, // size in bytes
0,
0, // app data
0,
0, // app data
40 + 14, 0, 0,
40 + 14,
0,
0,
0 // start of data offset
};
unsigned char info[40] = {
@ -515,8 +537,11 @@ static std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfil
return sorted;
}
static UNUSED void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap, std::ostream& stream,
std::string deviceName, bool bshowHeader = true) {
static UNUSED void printPerformanceCounts(
const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
long long totalTime = 0;
// Print performance counts
if (bshowHeader) {
@ -560,12 +585,16 @@ static UNUSED void printPerformanceCounts(const std::map<std::string, InferenceE
std::cout << std::endl;
}
static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream& stream, std::string deviceName, bool bshowHeader = true) {
static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
auto performanceMap = request.GetPerformanceCounts();
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader);
}
inline std::map<std::string, std::string> getMapFullDevicesNames(InferenceEngine::Core& ie, std::vector<std::string> devices) {
inline std::map<std::string, std::string> getMapFullDevicesNames(InferenceEngine::Core& ie,
std::vector<std::string> devices) {
std::map<std::string, std::string> devicesMap;
InferenceEngine::Parameter p;
for (std::string& deviceName : devices) {
@ -608,8 +637,20 @@ public:
float xmin, xmax, ymin, ymax, prob;
bool difficult;
DetectedObject(int _objectType, float _xmin, float _ymin, float _xmax, float _ymax, float _prob, bool _difficult = false)
: objectType(_objectType), xmin(_xmin), xmax(_xmax), ymin(_ymin), ymax(_ymax), prob(_prob), difficult(_difficult) {}
DetectedObject(int _objectType,
float _xmin,
float _ymin,
float _xmax,
float _ymax,
float _prob,
bool _difficult = false)
: objectType(_objectType),
xmin(_xmin),
xmax(_xmax),
ymin(_ymin),
ymax(_ymax),
prob(_prob),
difficult(_difficult) {}
DetectedObject(const DetectedObject& other) = default;
@ -617,10 +658,18 @@ public:
// Add small space to eliminate empty squares
float epsilon = 0; // 1e-5f;
DetectedObject detectedObject1(detectedObject1_.objectType, (detectedObject1_.xmin - epsilon), (detectedObject1_.ymin - epsilon),
(detectedObject1_.xmax - epsilon), (detectedObject1_.ymax - epsilon), detectedObject1_.prob);
DetectedObject detectedObject2(detectedObject2_.objectType, (detectedObject2_.xmin + epsilon), (detectedObject2_.ymin + epsilon),
(detectedObject2_.xmax), (detectedObject2_.ymax), detectedObject2_.prob);
DetectedObject detectedObject1(detectedObject1_.objectType,
(detectedObject1_.xmin - epsilon),
(detectedObject1_.ymin - epsilon),
(detectedObject1_.xmax - epsilon),
(detectedObject1_.ymax - epsilon),
detectedObject1_.prob);
DetectedObject detectedObject2(detectedObject2_.objectType,
(detectedObject2_.xmin + epsilon),
(detectedObject2_.ymin + epsilon),
(detectedObject2_.xmax),
(detectedObject2_.ymax),
detectedObject2_.prob);
if (detectedObject1.objectType != detectedObject2.objectType) {
// objects are different, so the result is 0
@ -657,8 +706,10 @@ public:
}
// union
float square1 = (addendum + detectedObject1.xmax - detectedObject1.xmin) * (addendum + detectedObject1.ymax - detectedObject1.ymin);
float square2 = (addendum + detectedObject2.xmax - detectedObject2.xmin) * (addendum + detectedObject2.ymax - detectedObject2.ymin);
float square1 = (addendum + detectedObject1.xmax - detectedObject1.xmin) *
(addendum + detectedObject1.ymax - detectedObject1.ymin);
float square2 = (addendum + detectedObject2.xmax - detectedObject2.xmin) *
(addendum + detectedObject2.ymax - detectedObject2.ymin);
float unn = square1 + square2 - intr;
@ -666,7 +717,13 @@ public:
}
DetectedObject scale(float scale_x, float scale_y) const {
return DetectedObject(objectType, xmin * scale_x, ymin * scale_y, xmax * scale_x, ymax * scale_y, prob, difficult);
return DetectedObject(objectType,
xmin * scale_x,
ymin * scale_y,
xmax * scale_x,
ymax * scale_y,
prob,
difficult);
}
};
@ -675,7 +732,9 @@ public:
const std::list<DetectedObject> alist;
const bool check_probs;
explicit ImageDescription(const std::list<DetectedObject>& _alist, bool _check_probs = false): alist(_alist), check_probs(_check_probs) {}
explicit ImageDescription(const std::list<DetectedObject>& _alist, bool _check_probs = false)
: alist(_alist),
check_probs(_check_probs) {}
static float ioUMultiple(const ImageDescription& detectedObjects, const ImageDescription& desiredObjects) {
const ImageDescription *detectedObjectsSmall, *detectedObjectsBig;
@ -882,10 +941,15 @@ public:
* @param width - width of the rectangle
* @param detectedObjects - vector of detected objects
*/
static UNUSED void addRectangles(unsigned char* data, size_t height, size_t width, std::vector<DetectedObject> detectedObjects) {
std::vector<Color> colors = {{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, {30, 170, 250},
{0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0},
{70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
static UNUSED void addRectangles(unsigned char* data,
size_t height,
size_t width,
std::vector<DetectedObject> detectedObjects) {
std::vector<Color> colors = {{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
{153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
{180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
{100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
{81, 0, 81}};
for (size_t i = 0; i < detectedObjects.size(); i++) {
int cls = detectedObjects[i].objectType % colors.size();
@ -923,10 +987,11 @@ inline std::size_t getTensorWidth(const InferenceEngine::TensorDesc& desc) {
const auto& layout = desc.getLayout();
const auto& dims = desc.getDims();
const auto& size = dims.size();
if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW ||
layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW || layout == InferenceEngine::Layout::CHW ||
layout == InferenceEngine::Layout::HW)) {
if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NCDHW || layout == InferenceEngine::Layout::NDHWC ||
layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW ||
layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW ||
layout == InferenceEngine::Layout::CHW || layout == InferenceEngine::Layout::HW)) {
// Regardless of layout, dimensions are stored in fixed order
return dims.back();
} else {
@ -939,10 +1004,11 @@ inline std::size_t getTensorHeight(const InferenceEngine::TensorDesc& desc) {
const auto& layout = desc.getLayout();
const auto& dims = desc.getDims();
const auto& size = dims.size();
if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW ||
layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW || layout == InferenceEngine::Layout::CHW ||
layout == InferenceEngine::Layout::HW)) {
if ((size >= 2) && (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NCDHW || layout == InferenceEngine::Layout::NDHWC ||
layout == InferenceEngine::Layout::OIHW || layout == InferenceEngine::Layout::GOIHW ||
layout == InferenceEngine::Layout::OIDHW || layout == InferenceEngine::Layout::GOIDHW ||
layout == InferenceEngine::Layout::CHW || layout == InferenceEngine::Layout::HW)) {
// Regardless of layout, dimensions are stored in fixed order
return dims.at(size - 2);
} else {
@ -953,8 +1019,9 @@ inline std::size_t getTensorHeight(const InferenceEngine::TensorDesc& desc) {
inline std::size_t getTensorChannels(const InferenceEngine::TensorDesc& desc) {
const auto& layout = desc.getLayout();
if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::C || layout == InferenceEngine::Layout::CHW ||
if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NCDHW || layout == InferenceEngine::Layout::NDHWC ||
layout == InferenceEngine::Layout::C || layout == InferenceEngine::Layout::CHW ||
layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) {
// Regardless of layout, dimensions are stored in fixed order
const auto& dims = desc.getDims();
@ -982,8 +1049,9 @@ inline std::size_t getTensorChannels(const InferenceEngine::TensorDesc& desc) {
inline std::size_t getTensorBatch(const InferenceEngine::TensorDesc& desc) {
const auto& layout = desc.getLayout();
if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NDHWC || layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) {
if (layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NCDHW || layout == InferenceEngine::Layout::NDHWC ||
layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::CN) {
// Regardless of layout, dimensions are stored in fixed order
const auto& dims = desc.getDims();
switch (desc.getLayoutByDims(dims)) {

View File

@ -29,9 +29,13 @@ public:
* @param _total - maximum value that is correspondent to 100%
* @param _detalization - number of symbols(.) to use to represent progress
*/
explicit ConsoleProgress(size_t _total, bool _stream_output = false, size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS,
explicit ConsoleProgress(size_t _total,
bool _stream_output = false,
size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS,
size_t _detalization = DEFAULT_DETALIZATION)
: total(_total), detalization(_detalization), percent_to_update(_percent_to_update) {
: total(_total),
detalization(_detalization),
percent_to_update(_percent_to_update) {
stream_output = _stream_output;
if (total == 0) {
total = 1;

View File

@ -70,7 +70,9 @@ static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) {
if (!is_dense)
IE_THROW() << "Doesn't support conversion from not dense cv::Mat";
InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::U8, {1, channels, height, width}, InferenceEngine::Layout::NHWC);
InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::U8,
{1, channels, height, width},
InferenceEngine::Layout::NHWC);
return InferenceEngine::make_shared_blob<uint8_t>(tDesc, mat.data);
}

View File

@ -142,10 +142,18 @@ InferenceEngine::Precision getPrecision(std::string value, const supported_preci
InferenceEngine::Precision getPrecision(const std::string& value) {
static const supported_precisions_t supported_precisions = {
{"FP32", InferenceEngine::Precision::FP32}, {"FP16", InferenceEngine::Precision::FP16}, {"BF16", InferenceEngine::Precision::BF16},
{"U64", InferenceEngine::Precision::U64}, {"I64", InferenceEngine::Precision::I64}, {"U32", InferenceEngine::Precision::U32},
{"I32", InferenceEngine::Precision::I32}, {"U16", InferenceEngine::Precision::U16}, {"I16", InferenceEngine::Precision::I16},
{"U8", InferenceEngine::Precision::U8}, {"I8", InferenceEngine::Precision::I8}, {"BOOL", InferenceEngine::Precision::BOOL},
{"FP32", InferenceEngine::Precision::FP32},
{"FP16", InferenceEngine::Precision::FP16},
{"BF16", InferenceEngine::Precision::BF16},
{"U64", InferenceEngine::Precision::U64},
{"I64", InferenceEngine::Precision::I64},
{"U32", InferenceEngine::Precision::U32},
{"I32", InferenceEngine::Precision::I32},
{"U16", InferenceEngine::Precision::U16},
{"I16", InferenceEngine::Precision::I16},
{"U8", InferenceEngine::Precision::U8},
{"I8", InferenceEngine::Precision::I8},
{"BOOL", InferenceEngine::Precision::BOOL},
};
return getPrecision(value, supported_precisions);
@ -176,7 +184,10 @@ void setPrecisions(const InferenceEngine::CNNNetwork& network, const std::string
} // namespace
void processPrecision(InferenceEngine::CNNNetwork& network, const std::string& ip, const std::string& op, const std::string& iop) {
void processPrecision(InferenceEngine::CNNNetwork& network,
const std::string& ip,
const std::string& op,
const std::string& iop) {
if (!ip.empty()) {
const auto user_precision = getPrecision(ip);
for (auto&& layer : network.getInputsInfo()) {
@ -213,19 +224,26 @@ InferenceEngine::Layout getLayout(std::string value, const supported_layouts_t&
InferenceEngine::Layout getLayout(const std::string& value) {
static const supported_layouts_t supported_layouts = {
{"NCDHW", InferenceEngine::Layout::NCDHW}, {"NDHWC", InferenceEngine::Layout::NDHWC}, {"NCHW", InferenceEngine::Layout::NCHW},
{"NHWC", InferenceEngine::Layout::NHWC}, {"CHW", InferenceEngine::Layout::CHW}, {"HWC", InferenceEngine::Layout::HWC},
{"NC", InferenceEngine::Layout::NC}, {"C", InferenceEngine::Layout::C},
{"NCDHW", InferenceEngine::Layout::NCDHW},
{"NDHWC", InferenceEngine::Layout::NDHWC},
{"NCHW", InferenceEngine::Layout::NCHW},
{"NHWC", InferenceEngine::Layout::NHWC},
{"CHW", InferenceEngine::Layout::CHW},
{"HWC", InferenceEngine::Layout::HWC},
{"NC", InferenceEngine::Layout::NC},
{"C", InferenceEngine::Layout::C},
};
return getLayout(value, supported_layouts);
}
bool isMatchLayoutToDims(InferenceEngine::Layout layout, size_t dimension) {
static const matchLayoutToDims_t matchLayoutToDims = {
{static_cast<size_t>(InferenceEngine::Layout::NCDHW), 5}, {static_cast<size_t>(InferenceEngine::Layout::NDHWC), 5},
{static_cast<size_t>(InferenceEngine::Layout::NCHW), 4}, {static_cast<size_t>(InferenceEngine::Layout::NHWC), 4},
{static_cast<size_t>(InferenceEngine::Layout::CHW), 3}, {static_cast<size_t>(InferenceEngine::Layout::NC), 2},
static const matchLayoutToDims_t matchLayoutToDims = {{static_cast<size_t>(InferenceEngine::Layout::NCDHW), 5},
{static_cast<size_t>(InferenceEngine::Layout::NDHWC), 5},
{static_cast<size_t>(InferenceEngine::Layout::NCHW), 4},
{static_cast<size_t>(InferenceEngine::Layout::NHWC), 4},
{static_cast<size_t>(InferenceEngine::Layout::CHW), 3},
{static_cast<size_t>(InferenceEngine::Layout::NC), 2},
{static_cast<size_t>(InferenceEngine::Layout::C), 1}};
const auto dims = matchLayoutToDims.find(static_cast<size_t>(layout));
@ -269,7 +287,10 @@ void setLayouts(const InferenceEngine::CNNNetwork& network, const std::string io
} // namespace
void processLayout(InferenceEngine::CNNNetwork& network, const std::string& il, const std::string& ol, const std::string& iol) {
void processLayout(InferenceEngine::CNNNetwork& network,
const std::string& il,
const std::string& ol,
const std::string& iol) {
if (!il.empty()) {
const auto layout = getLayout(il);
for (auto&& layer : network.getInputsInfo()) {
@ -296,10 +317,12 @@ void processLayout(InferenceEngine::CNNNetwork& network, const std::string& il,
void printInputAndOutputsInfo(const InferenceEngine::CNNNetwork& network) {
std::cout << "Network inputs:" << std::endl;
for (auto&& layer : network.getInputsInfo()) {
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / "
<< layer.second->getLayout() << std::endl;
}
std::cout << "Network outputs:" << std::endl;
for (auto&& layer : network.getOutputsInfo()) {
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl;
std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / "
<< layer.second->getLayout() << std::endl;
}
}

Some files were not shown because too many files have changed in this diff Show More