Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
508c616151
@ -117,7 +117,6 @@ jobs:
|
||||
-DPYTHON_EXECUTABLE=/usr/bin/python3.8
|
||||
-DENABLE_WHEEL=ON
|
||||
-DENABLE_TESTS=ON
|
||||
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
|
||||
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON
|
||||
-DENABLE_FASTER_BUILD=ON
|
||||
-DENABLE_STRICT_DEPENDENCIES=OFF
|
||||
|
@ -94,7 +94,6 @@ jobs:
|
||||
-DENABLE_PROFILING_ITT=OFF
|
||||
-DENABLE_SAMPLES=OFF
|
||||
-DENABLE_SPEECH_DEMO=OFF
|
||||
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
|
||||
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON
|
||||
-DNGRAPH_DEBUG_ENABLE=OFF
|
||||
$(REPO_DIR)
|
||||
|
@ -68,7 +68,6 @@ RUN cmake .. \
|
||||
-DENABLE_SPEECH_DEMO=OFF \
|
||||
-DENABLE_PYTHON=ON \
|
||||
-DPYTHON_EXECUTABLE=/usr/bin/python3 \
|
||||
-DNGRAPH_ONNX_IMPORT_ENABLE=ON \
|
||||
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
|
||||
-DNGRAPH_DEBUG_ENABLE=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=/openvino/dist \
|
||||
|
4
.github/workflows/code_style.yml
vendored
4
.github/workflows/code_style.yml
vendored
@ -34,7 +34,7 @@ jobs:
|
||||
- name: Create code style diff
|
||||
if: failure()
|
||||
run: |
|
||||
cmake --build . --target clang_format_fix_all
|
||||
cmake --build . --target clang_format_fix_all -j8
|
||||
git diff > code_style_diff.diff
|
||||
working-directory: build
|
||||
|
||||
@ -42,7 +42,7 @@ jobs:
|
||||
if: failure()
|
||||
with:
|
||||
name: code_style_diff
|
||||
path: code_style_diff.diff
|
||||
path: build/code_style_diff.diff
|
||||
|
||||
ShellCheck:
|
||||
runs-on: ubuntu-18.04
|
||||
|
8
.gitmodules
vendored
8
.gitmodules
vendored
@ -18,12 +18,12 @@
|
||||
path = thirdparty/ade
|
||||
url = https://github.com/opencv/ade.git
|
||||
ignore = dirty
|
||||
[submodule "thirdparty/gflags"]
|
||||
path = thirdparty/gflags
|
||||
[submodule "thirdparty/gflags/gflags"]
|
||||
path = thirdparty/gflags/gflags
|
||||
url = https://github.com/gflags/gflags.git
|
||||
ignore = dirty
|
||||
[submodule "thirdparty/gtest"]
|
||||
path = thirdparty/gtest
|
||||
[submodule "thirdparty/gtest/gtest"]
|
||||
path = thirdparty/gtest/gtest
|
||||
url = https://github.com/openvinotoolkit/googletest.git
|
||||
ignore = dirty
|
||||
[submodule "thirdparty/ocl/icd_loader"]
|
||||
|
@ -90,17 +90,10 @@ ie_coverage_extract(INPUT "openvino" OUTPUT "ngraph"
|
||||
ie_coverage_genhtml(INFO_FILE "ngraph"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
if(NGRAPH_ONNX_IMPORT_ENABLE)
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*")
|
||||
ie_coverage_genhtml(INFO_FILE "onnx_importer"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
endif()
|
||||
|
||||
if(NGRAPH_ONNX_FRONTEND_ENABLE)
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*")
|
||||
ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend"
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "onnx"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*"
|
||||
"${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*")
|
||||
ie_coverage_genhtml(INFO_FILE "onnx"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
endif()
|
||||
|
@ -122,13 +122,12 @@ else()
|
||||
set(protoc_available ON)
|
||||
endif()
|
||||
|
||||
ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF)
|
||||
ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
|
||||
ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" ON "protoc_available" OFF)
|
||||
ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
|
||||
ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON
|
||||
"NGRAPH_ONNX_IMPORT_ENABLE" OFF)
|
||||
"NGRAPH_ONNX_FRONTEND_ENABLE" OFF)
|
||||
ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
|
||||
"NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
|
||||
"NGRAPH_ONNX_FRONTEND_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
|
||||
ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
|
||||
ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
|
||||
"NGRAPH_UNIT_TEST_ENABLE" OFF)
|
||||
|
@ -28,9 +28,8 @@
|
||||
#
|
||||
# ngraph::frontend_manager - nGraph frontend manager
|
||||
#
|
||||
# ngraph_onnx_importer_FOUND - True if the system has onnx_importer library
|
||||
# ngraph::onnx_importer - ONNX importer target (optional)
|
||||
# ONNX_IMPORTER_LIBRARIES - ONNX importer libraries
|
||||
# ngraph_onnx_ngraph_frontend_FOUND - True if the system has onnx_ngraph_frontend library
|
||||
# ngraph::onnx_ngraph_frontend - ONNX FrontEnd target (optional)
|
||||
#
|
||||
# ngraph_paddlepaddle_frontend_FOUND - True if the system has PDPD frontend
|
||||
# ngraph::paddlepaddle_ngraph_frontend - nGraph PDPD frontend (optional)
|
||||
@ -38,18 +37,30 @@
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
function(set_imported_global target)
|
||||
get_target_property(IS_IMPORTED_GLOBAL ${target} IMPORTED_GLOBAL)
|
||||
if (NOT IS_IMPORTED_GLOBAL)
|
||||
set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(NOT TARGET ngraph)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/ngraphTargets.cmake")
|
||||
set_imported_global(ngraph::ngraph)
|
||||
set_imported_global(ngraph::frontend_manager)
|
||||
endif()
|
||||
|
||||
set(ngraph_ngraph_FOUND ON)
|
||||
set(NGRAPH_LIBRARIES ngraph::ngraph)
|
||||
|
||||
set(ngraph_onnx_importer_FOUND @NGRAPH_ONNX_IMPORT_ENABLE@)
|
||||
if(ngraph_onnx_importer_FOUND)
|
||||
set(ONNX_IMPORTER_LIBRARIES ngraph::onnx_importer)
|
||||
set(ngraph_onnx_ngraph_frontend_FOUND @NGRAPH_ONNX_FRONTEND_ENABLE@)
|
||||
if (ngraph_onnx_ngraph_frontend_FOUND AND NOT TARGET onnx_ngraph_frontend AND NOT TARGET ngraph::onnx_importer)
|
||||
set_imported_global(ngraph::onnx_ngraph_frontend)
|
||||
add_library(ngraph::onnx_importer ALIAS ngraph::onnx_ngraph_frontend)
|
||||
endif()
|
||||
set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
|
||||
if(ngraph_paddlepaddle_frontend_FOUND AND NOT TARGET paddlepaddle_ngraph_frontend)
|
||||
set_imported_global(ngraph::paddlepaddle_ngraph_frontend)
|
||||
endif()
|
||||
|
||||
set(ngraph_paddlepaddle_frontend_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
|
||||
|
||||
check_required_components(ngraph)
|
||||
|
@ -17,7 +17,7 @@ function(ov_model_convert SRC DST OUT)
|
||||
get_filename_component(name_we "${in_file}" NAME_WE)
|
||||
set(model_source_dir "${SRC}/${rel_dir}")
|
||||
|
||||
if(NOT NGRAPH_ONNX_IMPORT_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$")
|
||||
if(NOT NGRAPH_ONNX_FRONTEND_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$")
|
||||
# don't copy / process ONNX / prototxt files
|
||||
continue()
|
||||
endif()
|
||||
@ -78,7 +78,7 @@ ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
|
||||
ie_onnx_import_out_files)
|
||||
|
||||
if(ENABLE_TESTS)
|
||||
if(NGRAPH_ONNX_IMPORT_ENABLE AND ENABLE_REQUIREMENTS_INSTALL)
|
||||
if(NGRAPH_ONNX_FRONTEND_ENABLE AND ENABLE_REQUIREMENTS_INSTALL)
|
||||
find_package(PythonInterp 3 REQUIRED)
|
||||
|
||||
get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY)
|
||||
|
@ -1,6 +1,7 @@
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
UseTab: Never
|
||||
ColumnLimit: 120
|
||||
|
||||
Language: Cpp
|
||||
Standard: Cpp11
|
||||
@ -8,18 +9,20 @@ Standard: Cpp11
|
||||
AccessModifierOffset: -4
|
||||
AlignConsecutiveMacros: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Empty
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
ColumnLimit: 160
|
||||
# Specialize this comment pragma in order to avoid changes in SEA copyrights
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
CommentPragmas: '^#'
|
||||
DerivePointerAlignment: false
|
||||
FixNamespaceComments: true
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: BeforeHash
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: false
|
||||
IndentPPDirectives: AfterHash
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- FOREACH_CHILD
|
||||
|
@ -25,7 +25,7 @@ if(NOT ENABLE_DOCKER)
|
||||
set(InferenceEngine_DIR ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
|
||||
if(NGRAPH_ONNX_IMPORT_ENABLE)
|
||||
if(NGRAPH_ONNX_FRONTEND_ENABLE)
|
||||
add_subdirectory(onnx_custom_op)
|
||||
endif()
|
||||
add_subdirectory(template_extension)
|
||||
@ -223,7 +223,11 @@ function(build_docs)
|
||||
"${OpenVINO_SOURCE_DIR}/inference-engine/*.md"
|
||||
"${OpenVINO_SOURCE_DIR}/inference-engine/*.png"
|
||||
"${OpenVINO_SOURCE_DIR}/inference-engine/*.gif"
|
||||
"${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg")
|
||||
"${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/*.md"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/*.png"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/*.gif"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/*.jpg")
|
||||
|
||||
foreach(source_file ${ovino_doc_files})
|
||||
list(APPEND commands COMMAND ${CMAKE_COMMAND} -E copy
|
||||
|
@ -50,10 +50,9 @@ The example below demonstrates how to unregister an operator from the destructor
|
||||
|
||||
## Requirements for Building with CMake
|
||||
|
||||
A program that uses the `register_operator` functionality requires `ngraph` and `onnx_importer` libraries in addition to the Inference Engine.
|
||||
The `onnx_importer` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_importer)` can find both.
|
||||
The `ngraph` package exposes two variables, `${NGRAPH_LIBRARIES}` and `${ONNX_IMPORTER_LIBRARIES}`, which reference the `ngraph` and `onnx_importer` libraries.
|
||||
Those variables need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
|
||||
A program that uses the `register_operator` functionality requires `ngraph::ngraph` and `ngraph::onnx_ngraph_frontend` libraries in addition to the Inference Engine.
|
||||
The `onnx_ngraph_frontend` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)` can find both.
|
||||
Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
|
||||
|
||||
See CMakeLists.txt below for reference:
|
||||
@snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op
|
||||
|
@ -14,7 +14,7 @@ Inference Engine sample applications include the following:
|
||||
- [Automatic Speech Recognition Python Sample](../../inference-engine/ie_bridges/python/sample/speech_sample/README.md)
|
||||
- **Benchmark Application** – Estimates deep learning inference performance on supported devices for synchronous and asynchronous modes.
|
||||
- [Benchmark C++ Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
- [Benchmark Python Tool](../../inference-engine/tools/benchmark_tool/README.md)
|
||||
- [Benchmark Python Tool](../../tools/benchmark_tool/README.md)
|
||||
- **Hello Classification Sample** – Inference of image classification networks like AlexNet and GoogLeNet using Synchronous Inference Request API. Input of any size and layout can be set to an infer request which will be pre-processed automatically during inference (the sample supports only images as inputs and supports Unicode paths).
|
||||
- [Hello Classification C++ Sample](../../inference-engine/samples/hello_classification/README.md)
|
||||
- [Hello Classification C Sample](../../inference-engine/ie_bridges/c/samples/hello_classification/README.md)
|
||||
|
@ -15,7 +15,7 @@ The models used in the performance benchmarks were chosen based on general adopt
|
||||
CF means Caffe*, while TF means TensorFlow*.
|
||||
|
||||
#### 5. How can I run the benchmark results on my own?
|
||||
All of the performance benchmarks were generated using the open-sourced tool within the Intel® Distribution of OpenVINO™ toolkit called `benchmark_app`, which is available in both [C++](../../inference-engine/samples/benchmark_app/README.md) and [Python](../../inference-engine/tools/benchmark_tool/README.md).
|
||||
All of the performance benchmarks were generated using the open-sourced tool within the Intel® Distribution of OpenVINO™ toolkit called `benchmark_app`, which is available in both [C++](../../inference-engine/samples/benchmark_app/README.md) and [Python](../../tools/benchmark_tool/README.md).
|
||||
|
||||
#### 6. What image sizes are used for the classification network models?
|
||||
The image size used in the inference depends on the network being benchmarked. The following table shows the list of input sizes for each network model.
|
||||
|
@ -61,7 +61,7 @@ limitations under the License.
|
||||
<tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
|
||||
<tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
|
||||
<tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
|
||||
<tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN">
|
||||
<tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN"/>
|
||||
<tab type="user" title="Convert PyTorch* BERT-NER Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner"/>
|
||||
</tab>
|
||||
</tab>
|
||||
|
@ -7,11 +7,11 @@ set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
set(TARGET_NAME "onnx_custom_op")
|
||||
|
||||
find_package(ngraph REQUIRED COMPONENTS onnx_importer)
|
||||
find_package(ngraph REQUIRED COMPONENTS onnx_ngraph_frontend)
|
||||
|
||||
add_library(${TARGET_NAME} STATIC onnx_custom_op.cpp onnx_custom_op.hpp)
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} ${ONNX_IMPORTER_LIBRARIES})
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC ngraph::ngraph ngraph::onnx_ngraph_frontend)
|
||||
# [cmake:onnx_custom_op]
|
||||
|
||||
# Enable code style check
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
Let *min_value* and *max_value* be *min* and *max*, respectively. The mathematical formula of *Clamp* is as follows:
|
||||
\f[
|
||||
clamp( x_{i} )=\min\big( \max\left( x_{i}, min\_value \right), max\_value \big)
|
||||
clamp( x_{i} )=\min\big( \max\left( x_{i},\ min\_value \right),\ max\_value \big)
|
||||
\f]
|
||||
|
||||
**Attributes**:
|
||||
|
@ -12,7 +12,7 @@
|
||||
It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
|
||||
|
||||
\f[
|
||||
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right]
|
||||
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\frac{x}{\sqrt{2}}\right]
|
||||
\f]
|
||||
|
||||
where Φ(x) is the Cumulative Distribution Function for Gaussian Distribution.
|
||||
|
@ -22,13 +22,13 @@ The *Gelu* function may be approximated in two different ways based on *approxim
|
||||
For `erf` approximation mode, *Gelu* function is represented as:
|
||||
|
||||
\f[
|
||||
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right]
|
||||
Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\frac{x}{\sqrt{2}}\right]
|
||||
\f]
|
||||
|
||||
For `tanh` approximation mode, *Gelu* function is represented as:
|
||||
|
||||
\f[
|
||||
Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{2/\pi} \cdot (x + 0.044715 \cdot x^3)\right]\right)
|
||||
Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{\frac{2}{\pi}} \cdot (x + 0.044715 \cdot x^3)\right]\right)
|
||||
\f]
|
||||
|
||||
**Attributes**
|
||||
|
@ -10,7 +10,7 @@
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
|
||||
HSigmoid(x) = \frac{min(max(x + 3,\ 0),\ 6)}{6}
|
||||
\f]
|
||||
|
||||
The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).
|
||||
|
@ -10,7 +10,7 @@
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
HSwish(x) = x \frac{min(max(x + 3, 0), 6)}{6}
|
||||
HSwish(x) = x \cdot \frac{min(max(x + 3,\ 0),\ 6)}{6}
|
||||
\f]
|
||||
|
||||
The HSwish operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).
|
||||
|
@ -12,10 +12,13 @@
|
||||
|
||||
For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
y = max(0, min(1, alpha * x + beta))
|
||||
y = max(0,\ min(1,\ \alpha x + \beta))
|
||||
\f]
|
||||
|
||||
where α corresponds to `alpha` scalar input and β corresponds to `beta` scalar input.
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: An tensor of type *T*. **Required.**
|
||||
|
@ -8,8 +8,8 @@
|
||||
|
||||
**Note**: This is recommended to not compute LogSoftmax directly as Log(Softmax(x, axis)), more numeric stable is to compute LogSoftmax as:
|
||||
\f[
|
||||
t = (x - ReduceMax(x, axis)) \\
|
||||
LogSoftmax(x, axis) = t - Log(ReduceSum(Exp(t), axis))
|
||||
t = (x - ReduceMax(x,\ axis)) \\
|
||||
LogSoftmax(x, axis) = t - Log(ReduceSum(Exp(t),\ axis))
|
||||
\f]
|
||||
|
||||
**Attributes**
|
||||
|
@ -15,7 +15,7 @@
|
||||
For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
\f[
|
||||
Y_{i}^{( l )} = max(0, Y_{i}^{( l - 1 )})
|
||||
Y_{i}^{( l )} = max(0,\ Y_{i}^{( l - 1 )})
|
||||
\f]
|
||||
|
||||
**Inputs**:
|
||||
|
@ -25,7 +25,7 @@
|
||||
*Abs* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = abs(a_{i})
|
||||
a_{i} = \vert a_{i} \vert
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
@ -4,11 +4,13 @@
|
||||
|
||||
**Category**: Arithmetic unary operation
|
||||
|
||||
**Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with given tensor.
|
||||
**Short description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation with a given tensor.
|
||||
|
||||
**Attributes**:
|
||||
**Detailed description**: *Atanh* performs element-wise hyperbolic inverse tangent (arctangenth) operation on a given input tensor, based on the following mathematical formula:
|
||||
|
||||
No attributes available.
|
||||
\f[ a_{i} = atanh(a_{i}) \f]
|
||||
|
||||
**Attributes**: Atanh operation has no attributes.
|
||||
|
||||
**Inputs**
|
||||
|
||||
@ -16,22 +18,14 @@
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise atanh operation. A tensor of type *T*.
|
||||
* **1**: The result of element-wise atanh operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any floating-point type.
|
||||
|
||||
*Atanh* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = atanh(a_{i})
|
||||
\f]
|
||||
* *T*: any supported numeric type.
|
||||
|
||||
**Examples**
|
||||
|
||||
*Example 1*
|
||||
|
||||
```xml
|
||||
<layer ... type="Atanh">
|
||||
<input>
|
||||
|
@ -10,7 +10,7 @@
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
a_{i} = ceiling(a_{i})
|
||||
a_{i} = \lceil a_{i} \rceil
|
||||
\f]
|
||||
|
||||
**Attributes**: *Ceiling* operation has no attributes.
|
||||
|
@ -11,7 +11,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
|
||||
After broadcasting *Divide* performs division operation for the input tensors *a* and *b* using the formula below:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} / b_{i}
|
||||
o_{i} = \frac{a_{i}}{b_{i}}
|
||||
\f]
|
||||
|
||||
The result of division by zero is undefined.
|
||||
|
@ -10,7 +10,7 @@
|
||||
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *FloorMod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} % b_{i}
|
||||
o_{i} = a_{i} \mod b_{i}
|
||||
\f]
|
||||
|
||||
*FloorMod* operation computes a reminder of a floored division. It is the same behaviour like in Python programming language: `floor(x / y) * y + floor_mod(x, y) = x`. The sign of the result is equal to a sign of a divisor. The result of division by zero is undefined.
|
||||
|
@ -10,7 +10,7 @@
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
a_{i} = floor(a_{i})
|
||||
a_{i} = \lfloor a_{i} \rfloor
|
||||
\f]
|
||||
|
||||
**Attributes**: *Floor* operation has no attributes.
|
||||
|
@ -12,7 +12,7 @@ As a first step input tensors *a* and *b* are broadcasted if their shapes differ
|
||||
After broadcasting *Maximum* does the following with the input tensors *a* and *b*:
|
||||
|
||||
\f[
|
||||
o_{i} = max(a_{i}, b_{i})
|
||||
o_{i} = max(a_{i},\ b_{i})
|
||||
\f]
|
||||
|
||||
**Attributes**:
|
||||
|
@ -10,7 +10,7 @@
|
||||
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Minimum* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
|
||||
|
||||
\f[
|
||||
o_{i} = min(a_{i}, b_{i})
|
||||
o_{i} = min(a_{i},\ b_{i})
|
||||
\f]
|
||||
|
||||
**Attributes**:
|
||||
|
@ -10,7 +10,7 @@
|
||||
As a first step input tensors *a* and *b* are broadcasted if their shapes differ. Broadcasting is performed according to `auto_broadcast` attribute specification. As a second step *Mod* operation is computed element-wise on the input tensors *a* and *b* according to the formula below:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} % b_{i}
|
||||
o_{i} = a_{i} \mod b_{i}
|
||||
\f]
|
||||
|
||||
*Mod* operation computes a reminder of a truncated division. It is the same behaviour like in C programming language: `truncated(x / y) * y + truncated_mod(x, y) = x`. The sign of the result is equal to a sign of a dividend. The result of division by zero is undefined.
|
||||
|
@ -11,7 +11,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
|
||||
After broadcasting *Multiply* performs multiplication operation for the input tensors *a* and *b* using the formula below:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} * b_{i}
|
||||
o_{i} = a_{i} \cdot b_{i}
|
||||
\f]
|
||||
|
||||
**Attributes**:
|
||||
|
@ -4,35 +4,10 @@
|
||||
|
||||
**Category**: Comparison binary operation
|
||||
|
||||
**Short description**: *Equal* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *auto_broadcast*
|
||||
|
||||
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
||||
* **Range of values**:
|
||||
* *none* - no auto-broadcasting is allowed, all input shapes should match
|
||||
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
|
||||
* **Type**: string
|
||||
* **Default value**: "numpy"
|
||||
* **Required**: *no*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: A tensor of type *T*. **Required.**
|
||||
* **2**: A tensor of type *T*. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise comparison operation. A tensor of type boolean.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: arbitrary supported type.
|
||||
**Short description**: *Equal* performs element-wise comparison operation with two given input tensors applying multi-directional broadcast rules specified in the *auto_broadcast* attribute.
|
||||
|
||||
**Detailed description**
|
||||
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
|
||||
Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and *auto_broadcast* attributes is not *none*. Broadcasting is performed according to *auto_broadcast* value.
|
||||
|
||||
After broadcasting *Equal* does the following with the input tensors *a* and *b*:
|
||||
|
||||
@ -40,12 +15,40 @@ After broadcasting *Equal* does the following with the input tensors *a* and *b*
|
||||
o_{i} = a_{i} == b_{i}
|
||||
\f]
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *auto_broadcast*
|
||||
|
||||
* **Description**: specifies rules used for auto-broadcasting of input tensors.
|
||||
* **Range of values**:
|
||||
* *none* - no auto-broadcasting is allowed, all input shapes should match,
|
||||
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
|
||||
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
|
||||
* **Type**: string
|
||||
* **Default value**: "numpy"
|
||||
* **Required**: *no*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
|
||||
* **2**: A tensor of type *T* and arbitrary shape. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise **comparison** operation applied to the input tensors. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: arbitrary supported type.
|
||||
* *T_BOOL*: `boolean`.
|
||||
|
||||
**Examples**
|
||||
|
||||
*Example 1*
|
||||
*Example 1: no broadcast*
|
||||
|
||||
```xml
|
||||
<layer ... type="Equal">
|
||||
<data auto_broadcast="none"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>256</dim>
|
||||
@ -65,9 +68,10 @@ o_{i} = a_{i} == b_{i}
|
||||
</layer>
|
||||
```
|
||||
|
||||
*Example 2: broadcast*
|
||||
*Example 2: numpy broadcast*
|
||||
```xml
|
||||
<layer ... type="Equal">
|
||||
<data auto_broadcast="numpy"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>8</dim>
|
||||
|
@ -37,7 +37,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
|
||||
After broadcasting *GreaterEqual* does the following with the input tensors *a* and *b*:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} >= b_{i}
|
||||
o_{i} = a_{i} \geq b_{i}
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
@ -12,7 +12,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
|
||||
After broadcasting *LessEqual* does the following with the input tensors *a* and *b*:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} <= b_{i}
|
||||
o_{i} = a_{i} \leq b_{i}
|
||||
\f]
|
||||
|
||||
**Attributes**:
|
||||
|
@ -37,7 +37,7 @@ Before performing arithmetic operation, input tensors *a* and *b* are broadcaste
|
||||
After broadcasting *NotEqual* does the following with the input tensors *a* and *b*:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} != b_{i}
|
||||
o_{i} = a_{i} \neq b_{i}
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
@ -16,15 +16,15 @@ n_{out} = \left ( \frac{n_{in} + 2p - k}{s} \right ) + 1
|
||||
The receptive field in each layer is calculated using the formulas:
|
||||
* Jump in the output feature map:
|
||||
\f[
|
||||
j_{out} = j_{in} * s
|
||||
j_{out} = j_{in} \cdot s
|
||||
\f]
|
||||
* Size of the receptive field of output feature:
|
||||
\f[
|
||||
r_{out} = r_{in} + ( k - 1 ) * j_{in}
|
||||
r_{out} = r_{in} + ( k - 1 ) \cdot j_{in}
|
||||
\f]
|
||||
* Center position of the receptive field of the first output feature:
|
||||
\f[
|
||||
start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) * j_{in}
|
||||
start_{out} = start_{in} + ( \frac{k - 1}{2} - p ) \cdot j_{in}
|
||||
\f]
|
||||
* Output is calculated using the following formula:
|
||||
\f[
|
||||
|
@ -12,7 +12,7 @@ Output is calculated using the following formula:
|
||||
|
||||
\f[
|
||||
|
||||
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k})
|
||||
y(p) = \displaystyle{\sum_{k = 1}^{K}}w_{k}x(p + p_{k} + {\Delta}p_{k})
|
||||
|
||||
\f]
|
||||
|
||||
|
@ -14,7 +14,7 @@ Output is calculated using the following formula:
|
||||
|
||||
\f[
|
||||
|
||||
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) * {\Delta}m_{k}
|
||||
y(p) = \displaystyle{\sum_{k = 1}^{K}}w_{k}x(p + p_{k} + {\Delta}p_{k}) \cdot {\Delta}m_{k}
|
||||
|
||||
\f]
|
||||
Where
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
**Detailed description**:
|
||||
|
||||
*RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`.
|
||||
*RandomUniform* operation generates random numbers from a uniform distribution in the range `[minval, maxval)`.
|
||||
The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm
|
||||
is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns
|
||||
four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized
|
||||
@ -42,7 +42,7 @@ R' = mulhi(R, M) {\oplus} k {\oplus} L \\
|
||||
mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\
|
||||
mullo(a, b) = (a {\times} b) \mod 2^{32}
|
||||
\f]
|
||||
where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*,
|
||||
where \f${\oplus}\f$ - bitwise xor, *k* = \f$R_{key}\f$ for updating counter, *k* = \f$L_{key}\f$ for updating *n*,
|
||||
*M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*.
|
||||
|
||||
After each round *key* is raised by summing with another pair of const values:
|
||||
@ -50,7 +50,7 @@ After each round *key* is raised by summing with another pair of const values:
|
||||
L += 0x9E3779B9 \\
|
||||
R += 0xBB67AE85
|
||||
\f]
|
||||
Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers.
|
||||
Values \f$L'_{n}, R'_{n}, L'_{counter}, R'_{counter}\f$ are resulting four random numbers.
|
||||
|
||||
Float values between [0..1) are obtained from 32-bit integers by the following rules.
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
*LogicalNot* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = not(a_{i})
|
||||
a_{i} = \lnot a_{i}
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
@ -37,7 +37,7 @@ Before performing logical operation, input tensors *a* and *b* are broadcasted i
|
||||
After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
|
||||
|
||||
\f[
|
||||
o_{i} = a_{i} xor b_{i}
|
||||
o_{i} = a_{i} \oplus b_{i}
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
@ -11,19 +11,19 @@ The kernel dimensions are calculated using the following formulae for the `NCDHW
|
||||
|
||||
\f[
|
||||
\begin{array}{lcl}
|
||||
d_{start} &=& floor(i*D_{in}/D_{out})\\
|
||||
d_{end} &=& ceil((i+1)*D_{in}/D_{out})\\
|
||||
h_{start} &=& floor(j*H_{in}/H_{out})\\
|
||||
h_{end} &=& ceil((j+1)*H_{in}/H_{out})\\
|
||||
w_{start} &=& floor(k*W_{in}/W_{out})\\
|
||||
w_{end} &=& ceil((k+1)*W_{in}/W_{out})
|
||||
d_{start} &=& \lfloor i \cdot \frac{D_{in}}{D_{out}}\rfloor\\
|
||||
d_{end} &=& \lceil(i+1) \cdot \frac{D_{in}}{D_{out}}\rceil\\
|
||||
h_{start} &=& \lfloor j \cdot \frac{H_{in}}{H_{out}}\rfloor\\
|
||||
h_{end} &=& \lceil(j+1) \cdot \frac{H_{in}}{H_{out}}\rceil\\
|
||||
w_{start} &=& \lfloor k \cdot \frac{W_{in}}{W_{out}}\rfloor\\
|
||||
w_{end} &=& \lceil(k+1) \cdot \frac{W_{in}}{W_{out}}\rceil
|
||||
\end{array}
|
||||
\f]
|
||||
|
||||
The output is calculated with the following formula:
|
||||
|
||||
\f[
|
||||
Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start})*(h_{end}-h_{start})*(w_{end}-w_{start})}
|
||||
Output(i,j,k) = \frac{Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end}]}{(d_{end}-d_{start}) \cdot (h_{end}-h_{start}) \cdot (w_{end}-w_{start})}
|
||||
\f]
|
||||
|
||||
**Inputs**:
|
||||
|
@ -11,12 +11,12 @@ The kernel dimensions are calculated using the following formulae for the `NCDHW
|
||||
|
||||
\f[
|
||||
\begin{array}{lcl}
|
||||
d_{start} &=& floor(i*D_{in}/D_{out})\\
|
||||
d_{end} &=& ceil((i+1)*D_{in}/D_{out})\\
|
||||
h_{start} &=& floor(j*H_{in}/H_{out})\\
|
||||
h_{end} &=& ceil((j+1)*H_{in}/H_{out})\\
|
||||
w_{start} &=& floor(k*W_{in}/W_{out})\\
|
||||
w_{end} &=& ceil((k+1)*W_{in}/W_{out})
|
||||
d_{start} &=& \lfloor i \cdot \frac{D_{in}}{D_{out}}\rfloor\\
|
||||
d_{end} &=& \lceil(i+1) \cdot \frac{D_{in}}{D_{out}}\rceil\\
|
||||
h_{start} &=& \lfloor j \cdot \frac{H_{in}}{H_{out}}\rfloor\\
|
||||
h_{end} &=& \lceil(j+1) \cdot \frac{H_{in}}{H_{out}}\rceil\\
|
||||
w_{start} &=& \lfloor k \cdot \frac{W_{in}}{W_{out}}\rfloor\\
|
||||
w_{end} &=& \lceil(k+1) \cdot \frac{W_{in}}{W_{out}}\rceil
|
||||
\end{array}
|
||||
\f]
|
||||
|
||||
|
@ -44,8 +44,8 @@ if(OpenCV_FOUND)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE opencv_core)
|
||||
endif()
|
||||
|
||||
if(NGRAPH_ONNX_IMPORT_ENABLE)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE onnx_importer)
|
||||
if(NGRAPH_ONNX_FRONTEND_ENABLE)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE onnx_ngraph_frontend)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
|
@ -7,7 +7,7 @@ set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
set(TARGET_NAME "template_extension")
|
||||
|
||||
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_importer)
|
||||
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_ngraph_frontend)
|
||||
find_package(InferenceEngine REQUIRED)
|
||||
find_package(OpenCV QUIET COMPONENTS core)
|
||||
|
||||
@ -28,9 +28,9 @@ target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_EXTENSION_
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine
|
||||
${NGRAPH_LIBRARIES})
|
||||
|
||||
if (ngraph_onnx_importer_FOUND)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ${ONNX_IMPORTER_LIBRARIES})
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE NGRAPH_ONNX_IMPORT_ENABLED)
|
||||
if (ngraph_onnx_ngraph_frontend_FOUND)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ngraph::onnx_ngraph_frontend)
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE NGRAPH_ONNX_FRONTEND_ENABLED)
|
||||
endif()
|
||||
# [cmake:extension]
|
||||
|
||||
|
@ -22,7 +22,8 @@ OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node>& node) {
|
||||
IE_THROW() << "Cannot create implementation for op with dynamic shapes!";
|
||||
if (castedNode->get_input_shape(0).size() != 4 || castedNode->get_output_shape(0).size() != 4)
|
||||
IE_THROW() << "Operation supports only 4d tensors for input and output.";
|
||||
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
|
||||
if (castedNode->get_input_element_type(0) != ngraph::element::f32 ||
|
||||
castedNode->get_output_element_type(0) != ngraph::element::f32)
|
||||
IE_THROW() << "Operation supports only FP32 tensors.";
|
||||
add = castedNode->getAddAttr();
|
||||
inShape = castedNode->get_input_shape(0);
|
||||
@ -34,9 +35,12 @@ OpImplementation::OpImplementation(const std::shared_ptr<ngraph::Node>& node) {
|
||||
//! [cpu_implementation:ctor]
|
||||
|
||||
//! [cpu_implementation:getSupportedConfigurations]
|
||||
InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
auto createConfig = [](const InferenceEngine::SizeVector inShape, const InferenceEngine::SizeVector& outShape, bool planar) {
|
||||
InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(
|
||||
std::vector<InferenceEngine::LayerConfig>& conf,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
auto createConfig = [](const InferenceEngine::SizeVector inShape,
|
||||
const InferenceEngine::SizeVector& outShape,
|
||||
bool planar) {
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = false;
|
||||
InferenceEngine::DataConfig inData;
|
||||
@ -45,9 +49,11 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
|
||||
// Allow any offset before data
|
||||
size_t offset((std::numeric_limits<size_t>::max)());
|
||||
if (planar) {
|
||||
inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset});
|
||||
inData.desc =
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inShape, order, offset});
|
||||
config.inConfs.push_back(inData);
|
||||
outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
|
||||
outData.desc =
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
|
||||
config.outConfs.push_back(outData);
|
||||
} else {
|
||||
// Add blocked (nChw8c) format
|
||||
@ -64,9 +70,11 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
|
||||
InferenceEngine::SizeVector outBlkDims = outShape;
|
||||
outBlkDims[1] = div_up(outBlkDims[1], 8);
|
||||
outBlkDims.push_back(8);
|
||||
inData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset});
|
||||
inData.desc =
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inShape, {inBlkDims, order, offset});
|
||||
config.inConfs.push_back(inData);
|
||||
outData.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset});
|
||||
outData.desc =
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outBlkDims, order, offset});
|
||||
config.outConfs.push_back(outData);
|
||||
}
|
||||
return config;
|
||||
@ -87,7 +95,8 @@ InferenceEngine::StatusCode OpImplementation::getSupportedConfigurations(std::ve
|
||||
//! [cpu_implementation:getSupportedConfigurations]
|
||||
|
||||
//! [cpu_implementation:init]
|
||||
InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& config,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
try {
|
||||
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
|
||||
IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!";
|
||||
@ -115,10 +124,13 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig&
|
||||
//! [cpu_implementation:init]
|
||||
|
||||
//! [cpu_implementation:execute]
|
||||
InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::StatusCode OpImplementation::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
|
||||
std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
const float* src_data = inputs[0]->cbuffer().as<const float*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
float* dst_data = outputs[0]->buffer().as<float*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
const float* src_data =
|
||||
inputs[0]->cbuffer().as<const float*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
float* dst_data =
|
||||
outputs[0]->buffer().as<float*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
|
||||
for (size_t i = 0; i < inputs[0]->size(); i++) {
|
||||
dst_data[i] = src_data[i] + add;
|
||||
|
@ -16,8 +16,10 @@ public:
|
||||
explicit OpImplementation(const std::shared_ptr<ngraph::Node>& node);
|
||||
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
|
||||
std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
|
||||
private:
|
||||
|
@ -7,12 +7,12 @@
|
||||
#include "cpu_kernel.hpp"
|
||||
#include "op.hpp"
|
||||
#ifdef OPENCV_IMPORT_ENABLED
|
||||
#include "fft_kernel.hpp"
|
||||
#include "fft_op.hpp"
|
||||
# include "fft_kernel.hpp"
|
||||
# include "fft_op.hpp"
|
||||
#endif
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#ifdef NGRAPH_ONNX_IMPORT_ENABLED
|
||||
#include <onnx_import/onnx_utils.hpp>
|
||||
#ifdef NGRAPH_ONNX_FRONTEND_ENABLED
|
||||
# include <onnx_import/onnx_utils.hpp>
|
||||
#endif
|
||||
|
||||
#include <map>
|
||||
@ -24,31 +24,37 @@ using namespace TemplateExtension;
|
||||
|
||||
//! [extension:ctor]
|
||||
Extension::Extension() {
|
||||
#ifdef NGRAPH_ONNX_IMPORT_ENABLED
|
||||
ngraph::onnx_import::register_operator(Operation::type_info.name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
|
||||
ngraph::OutputVector ng_inputs {node.get_ng_inputs()};
|
||||
int64_t add = node.get_attribute_value<int64_t>("add");
|
||||
return {std::make_shared<Operation>(ng_inputs.at(0), add)};
|
||||
});
|
||||
#ifdef OPENCV_IMPORT_ENABLED
|
||||
ngraph::onnx_import::register_operator(FFTOp::type_info.name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
|
||||
ngraph::OutputVector ng_inputs {node.get_ng_inputs()};
|
||||
bool inverse = node.get_attribute_value<int64_t>("inverse");
|
||||
return {std::make_shared<FFTOp>(ng_inputs.at(0), inverse)};
|
||||
});
|
||||
#endif
|
||||
#ifdef NGRAPH_ONNX_FRONTEND_ENABLED
|
||||
ngraph::onnx_import::register_operator(Operation::type_info.name,
|
||||
1,
|
||||
"custom_domain",
|
||||
[](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
|
||||
ngraph::OutputVector ng_inputs{node.get_ng_inputs()};
|
||||
int64_t add = node.get_attribute_value<int64_t>("add");
|
||||
return {std::make_shared<Operation>(ng_inputs.at(0), add)};
|
||||
});
|
||||
# ifdef OPENCV_IMPORT_ENABLED
|
||||
ngraph::onnx_import::register_operator(FFTOp::type_info.name,
|
||||
1,
|
||||
"custom_domain",
|
||||
[](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector {
|
||||
ngraph::OutputVector ng_inputs{node.get_ng_inputs()};
|
||||
bool inverse = node.get_attribute_value<int64_t>("inverse");
|
||||
return {std::make_shared<FFTOp>(ng_inputs.at(0), inverse)};
|
||||
});
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
//! [extension:ctor]
|
||||
|
||||
//! [extension:dtor]
|
||||
Extension::~Extension() {
|
||||
#ifdef NGRAPH_ONNX_IMPORT_ENABLED
|
||||
#ifdef NGRAPH_ONNX_FRONTEND_ENABLED
|
||||
ngraph::onnx_import::unregister_operator(Operation::type_info.name, 1, "custom_domain");
|
||||
#ifdef OPENCV_IMPORT_ENABLED
|
||||
# ifdef OPENCV_IMPORT_ENABLED
|
||||
ngraph::onnx_import::unregister_operator(FFTOp::type_info.name, 1, "custom_domain");
|
||||
#endif // OPENCV_IMPORT_ENABLED
|
||||
#endif // NGRAPH_ONNX_IMPORT_ENABLED
|
||||
# endif // OPENCV_IMPORT_ENABLED
|
||||
#endif // NGRAPH_ONNX_FRONTEND_ENABLED
|
||||
}
|
||||
//! [extension:dtor]
|
||||
|
||||
@ -92,7 +98,8 @@ std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::N
|
||||
//! [extension:getImplTypes]
|
||||
|
||||
//! [extension:getImplementation]
|
||||
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) {
|
||||
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node>& node,
|
||||
const std::string& implType) {
|
||||
if (implType == "CPU") {
|
||||
if (std::dynamic_pointer_cast<Operation>(node)) {
|
||||
return std::make_shared<OpImplementation>(node);
|
||||
|
@ -25,7 +25,8 @@ public:
|
||||
|
||||
std::map<std::string, ngraph::OpSet> getOpSets() override;
|
||||
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override;
|
||||
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override;
|
||||
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node,
|
||||
const std::string& implType) override;
|
||||
};
|
||||
|
||||
} // namespace TemplateExtension
|
||||
|
@ -21,14 +21,16 @@ FFTImpl::FFTImpl(const std::shared_ptr<ngraph::Node>& node) {
|
||||
IE_THROW() << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
|
||||
if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
|
||||
IE_THROW() << "Cannot create implementation for op with dynamic shapes!";
|
||||
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
|
||||
if (castedNode->get_input_element_type(0) != ngraph::element::f32 ||
|
||||
castedNode->get_output_element_type(0) != ngraph::element::f32)
|
||||
IE_THROW() << "Operation supports only FP32 tensors.";
|
||||
inpShape = castedNode->get_input_shape(0);
|
||||
outShape = castedNode->get_output_shape(0);
|
||||
inverse = castedNode->inverse;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
std::vector<InferenceEngine::DataConfig> inDataConfig;
|
||||
std::vector<InferenceEngine::DataConfig> outDataConfig;
|
||||
InferenceEngine::SizeVector order(inpShape.size());
|
||||
@ -55,7 +57,8 @@ InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<Infe
|
||||
return InferenceEngine::StatusCode::OK;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
try {
|
||||
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
|
||||
IE_THROW() << "Operation cannot be initialized with incorrect number of inputs/outputs!";
|
||||
@ -85,7 +88,8 @@ static cv::Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob) {
|
||||
return cv::Mat(size, CV_32F, (void*)blob->buffer());
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
|
||||
std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept {
|
||||
cv::Mat inp = infEngineBlobToMat(inputs[0]);
|
||||
cv::Mat out = infEngineBlobToMat(outputs[0]);
|
||||
@ -95,7 +99,8 @@ InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::
|
||||
const int w = inp.size[3];
|
||||
cv::Mat complex(h, w, CV_32FC2), interleavedOut(h, w, CV_32FC2);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::vector<cv::Mat> components = {cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)), cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))};
|
||||
std::vector<cv::Mat> components = {cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)),
|
||||
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))};
|
||||
cv::merge(components, complex);
|
||||
|
||||
if (!inverse)
|
||||
|
@ -16,8 +16,10 @@ public:
|
||||
explicit FFTImpl(const std::shared_ptr<ngraph::Node>& node);
|
||||
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
|
||||
std::vector<InferenceEngine::Blob::Ptr>& outputs,
|
||||
InferenceEngine::ResponseDesc* resp) noexcept override;
|
||||
|
||||
private:
|
||||
|
@ -9,7 +9,7 @@ using namespace TemplateExtension;
|
||||
|
||||
constexpr ngraph::NodeTypeInfo FFTOp::type_info;
|
||||
|
||||
FFTOp::FFTOp(const ngraph::Output<ngraph::Node>& inp, bool _inverse): Op({inp}) {
|
||||
FFTOp::FFTOp(const ngraph::Output<ngraph::Node>& inp, bool _inverse) : Op({inp}) {
|
||||
constructor_validate_and_infer_types();
|
||||
inverse = _inverse;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ namespace TemplateExtension {
|
||||
|
||||
class FFTOp : public ngraph::op::Op {
|
||||
public:
|
||||
static constexpr ngraph::NodeTypeInfo type_info {"FFT", 0};
|
||||
static constexpr ngraph::NodeTypeInfo type_info{"FFT", 0};
|
||||
const ngraph::NodeTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ using namespace TemplateExtension;
|
||||
//! [op:ctor]
|
||||
NGRAPH_RTTI_DEFINITION(TemplateExtension::Operation, "Template", 0);
|
||||
|
||||
Operation::Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add): Op({arg}), add(add) {
|
||||
Operation::Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add) : Op({arg}), add(add) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
//! [op:ctor]
|
||||
|
@ -9,10 +9,13 @@
|
||||
using namespace TemplatePlugin;
|
||||
|
||||
// ! [async_infer_request:ctor]
|
||||
TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor,
|
||||
TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
|
||||
: AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) {
|
||||
: AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor),
|
||||
_inferRequest(inferRequest),
|
||||
_waitExecutor(waitExecutor) {
|
||||
// In current implementation we have CPU only tasks and no needs in 2 executors
|
||||
// So, by default single stage pipeline is created.
|
||||
// This stage executes InferRequest::Infer() using cpuTaskExecutor.
|
||||
@ -23,7 +26,8 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest(const TemplateInferRequest:
|
||||
if (remoteDevice) {
|
||||
_pipeline = {{cpuTaskExecutor,
|
||||
[this] {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "TemplateAsyncInferRequest::PreprocessingAndStartPipeline");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin,
|
||||
"TemplateAsyncInferRequest::PreprocessingAndStartPipeline");
|
||||
_inferRequest->inferPreprocess();
|
||||
_inferRequest->startPipeline();
|
||||
}},
|
||||
|
@ -13,8 +13,10 @@ namespace TemplatePlugin {
|
||||
// ! [async_infer_request:header]
|
||||
class TemplateAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
|
||||
public:
|
||||
TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest, const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
|
||||
TemplateAsyncInferRequest(const TemplateInferRequest::Ptr& inferRequest,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
|
||||
|
||||
~TemplateAsyncInferRequest();
|
||||
|
||||
|
@ -23,7 +23,8 @@ Configuration::Configuration(const ConfigMap& config, const Configuration& defau
|
||||
|
||||
if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) {
|
||||
_streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value);
|
||||
} else if (streamExecutorConfigKeys.end() != std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
|
||||
} else if (streamExecutorConfigKeys.end() !=
|
||||
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
|
||||
_streamsExecutorConfig.SetConfig(key, value);
|
||||
} else if (CONFIG_KEY(DEVICE_ID) == key) {
|
||||
deviceId = std::stoi(value);
|
||||
|
@ -21,7 +21,9 @@ struct Configuration {
|
||||
Configuration& operator=(const Configuration&) = default;
|
||||
Configuration& operator=(Configuration&&) = default;
|
||||
|
||||
explicit Configuration(const ConfigMap& config, const Configuration& defaultCfg = {}, const bool throwOnUnsupported = true);
|
||||
explicit Configuration(const ConfigMap& config,
|
||||
const Configuration& defaultCfg = {},
|
||||
const bool throwOnUnsupported = true);
|
||||
|
||||
InferenceEngine::Parameter Get(const std::string& name) const;
|
||||
|
||||
|
@ -18,8 +18,10 @@ using namespace TemplatePlugin;
|
||||
|
||||
// ! [executable_network:ctor_cnnnetwork]
|
||||
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
||||
const InferenceEngine::InputsDataMap& inputInfoMap, const InferenceEngine::OutputsDataMap& outputsInfoMap,
|
||||
const Configuration& cfg, const Plugin::Ptr& plugin)
|
||||
const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
const InferenceEngine::OutputsDataMap& outputsInfoMap,
|
||||
const Configuration& cfg,
|
||||
const Plugin::Ptr& plugin)
|
||||
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
|
||||
_cfg(cfg),
|
||||
_plugin(plugin) {
|
||||
@ -40,7 +42,11 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const
|
||||
// ! [executable_network:ctor_cnnnetwork]
|
||||
|
||||
// ! [executable_network:ctor_import_stream]
|
||||
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const Configuration& cfg, const Plugin::Ptr& plugin): _cfg(cfg), _plugin(plugin) {
|
||||
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model,
|
||||
const Configuration& cfg,
|
||||
const Plugin::Ptr& plugin)
|
||||
: _cfg(cfg),
|
||||
_plugin(plugin) {
|
||||
// read XML content
|
||||
std::string xmlString;
|
||||
std::uint64_t dataSize = 0;
|
||||
@ -53,7 +59,9 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const
|
||||
model.read(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
|
||||
if (0 != dataSize) {
|
||||
dataBlob = InferenceEngine::make_shared_blob<std::uint8_t>(
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {static_cast<std::size_t>(dataSize)}, InferenceEngine::Layout::C));
|
||||
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8,
|
||||
{static_cast<std::size_t>(dataSize)},
|
||||
InferenceEngine::Layout::C));
|
||||
dataBlob->allocate();
|
||||
model.read(dataBlob->buffer(), dataSize);
|
||||
}
|
||||
@ -84,7 +92,8 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const
|
||||
|
||||
// ! [executable_network:map_graph]
|
||||
// forward declaration
|
||||
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
||||
const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
const InferenceEngine::OutputsDataMap& outputsInfoMap);
|
||||
|
||||
void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
||||
@ -117,29 +126,36 @@ void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<con
|
||||
void TemplatePlugin::ExecutableNetwork::InitExecutor() {
|
||||
// Default multi-threaded configuration is balanced for throughtput and latency cases and takes into account
|
||||
// real hardware cores and NUMA nodes.
|
||||
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
|
||||
auto streamsExecutorConfig =
|
||||
InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
|
||||
streamsExecutorConfig._name = "TemplateStreamsExecutor";
|
||||
// As Inference Engine CPU Streams Executor creates some additional therads
|
||||
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
|
||||
// and memory consumption can be larger than it is expected.
|
||||
// So Inference Engone provides executors cache.
|
||||
_taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
|
||||
// NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of inference request pipeline
|
||||
// _callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
|
||||
// NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of
|
||||
// inference request pipeline _callbackExecutor =
|
||||
// InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
|
||||
}
|
||||
// ! [executable_network:init_executor]
|
||||
|
||||
// ! [executable_network:create_infer_request_impl]
|
||||
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs) {
|
||||
return std::make_shared<TemplateInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
|
||||
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(
|
||||
InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs) {
|
||||
return std::make_shared<TemplateInferRequest>(networkInputs,
|
||||
networkOutputs,
|
||||
std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
|
||||
}
|
||||
// ! [executable_network:create_infer_request_impl]
|
||||
|
||||
// ! [executable_network:create_infer_request]
|
||||
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequest() {
|
||||
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
|
||||
return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest), _taskExecutor, _plugin->_waitExecutor,
|
||||
return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
|
||||
_taskExecutor,
|
||||
_plugin->_waitExecutor,
|
||||
_callbackExecutor);
|
||||
}
|
||||
// ! [executable_network:create_infer_request]
|
||||
@ -154,11 +170,16 @@ InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetConfig(const st
|
||||
InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
// TODO: return more supported values for metrics
|
||||
if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_METRICS) == name) {
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, std::vector<std::string> {METRIC_KEY(NETWORK_NAME), METRIC_KEY(SUPPORTED_METRICS),
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
|
||||
std::vector<std::string>{METRIC_KEY(NETWORK_NAME),
|
||||
METRIC_KEY(SUPPORTED_METRICS),
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
|
||||
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
|
||||
} else if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
|
||||
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
|
||||
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config {}.SupportedKeys();
|
||||
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID),
|
||||
CONFIG_KEY(PERF_COUNT),
|
||||
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
|
||||
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config{}.SupportedKeys();
|
||||
for (auto&& configKey : streamExecutorConfigKeys) {
|
||||
configKeys.emplace_back(configKey);
|
||||
}
|
||||
|
@ -23,16 +23,20 @@ class Plugin;
|
||||
// ! [executable_network:header]
|
||||
class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
|
||||
public:
|
||||
ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
const InferenceEngine::OutputsDataMap& outputsInfoMap, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin);
|
||||
ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
||||
const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
const InferenceEngine::OutputsDataMap& outputsInfoMap,
|
||||
const Configuration& cfg,
|
||||
const std::shared_ptr<Plugin>& plugin);
|
||||
|
||||
ExecutableNetwork(std::istream& model, const Configuration& cfg, const std::shared_ptr<Plugin>& plugin);
|
||||
|
||||
// Methods from a base class ExecutableNetworkThreadSafeDefault
|
||||
|
||||
void Export(std::ostream& model) override;
|
||||
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs) override;
|
||||
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(
|
||||
InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs) override;
|
||||
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
|
||||
InferenceEngine::Parameter GetMetric(const std::string& name) const override;
|
||||
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
|
||||
@ -40,7 +44,8 @@ public:
|
||||
private:
|
||||
friend class TemplateInferRequest;
|
||||
|
||||
void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
||||
const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
const InferenceEngine::OutputsDataMap& outputsInfoMap);
|
||||
void InitExecutor();
|
||||
|
||||
|
@ -23,19 +23,25 @@ using namespace InferenceEngine;
|
||||
using Time = std::chrono::high_resolution_clock;
|
||||
|
||||
// ! [infer_request:ctor]
|
||||
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
|
||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
const std::shared_ptr<TemplatePlugin::ExecutableNetwork>& executableNetwork)
|
||||
: IInferRequestInternal(networkInputs, networkOutputs), _executableNetwork(executableNetwork) {
|
||||
: IInferRequestInternal(networkInputs, networkOutputs),
|
||||
_executableNetwork(executableNetwork) {
|
||||
// TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks
|
||||
|
||||
auto requestID = std::to_string(_executableNetwork->_requestId.fetch_add(1));
|
||||
|
||||
std::string name = _executableNetwork->_function->get_friendly_name() + "_Req" + requestID;
|
||||
_profilingTask = {
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Preprocess"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Postprocess"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_StartPipline"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_WaitPipline"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
|
||||
"_Preprocess"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
|
||||
"_Postprocess"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
|
||||
"_StartPipline"),
|
||||
openvino::itt::handle("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name +
|
||||
"_WaitPipline"),
|
||||
};
|
||||
|
||||
_executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
|
||||
@ -60,7 +66,10 @@ void TemplateInferRequest::allocateDeviceBuffers() {
|
||||
}
|
||||
|
||||
template <typename BlobDataMap, typename GetNetworkPrecisionF>
|
||||
static void AllocateImpl(const BlobDataMap& userDataMap, BlobMap& userBlobMap, BlobMap& deviceBlobMap, GetNetworkPrecisionF&& GetNetworkPrecision,
|
||||
static void AllocateImpl(const BlobDataMap& userDataMap,
|
||||
BlobMap& userBlobMap,
|
||||
BlobMap& deviceBlobMap,
|
||||
GetNetworkPrecisionF&& GetNetworkPrecision,
|
||||
bool isInputBlob = true) {
|
||||
for (auto&& userData : userDataMap) {
|
||||
const auto& dims = userData.second->getTensorDesc().getDims();
|
||||
@ -95,7 +104,9 @@ void TemplateInferRequest::allocateBlobs() {
|
||||
});
|
||||
auto&& results = _executableNetwork->_function->get_results();
|
||||
AllocateImpl(
|
||||
_networkOutputs, _outputs, _networkOutputBlobs,
|
||||
_networkOutputs,
|
||||
_outputs,
|
||||
_networkOutputBlobs,
|
||||
[&](const std::string& blobName) {
|
||||
return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
|
||||
},
|
||||
@ -114,8 +125,10 @@ void TemplateInferRequest::InferImpl() {
|
||||
|
||||
template <typename SrcT, typename DstT>
|
||||
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
ngraph::runtime::reference::convert<SrcT, DstT>(InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>(), src->size());
|
||||
ngraph::runtime::reference::convert<SrcT, DstT>(
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>(),
|
||||
src->size());
|
||||
}
|
||||
|
||||
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
@ -128,8 +141,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<std::uint8_t, float>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -141,8 +154,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<float, std::uint8_t>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -154,8 +167,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<int64_t, int32_t>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -167,8 +180,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<int16_t, float>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -180,8 +193,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<int8_t, float>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -193,8 +206,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<bool, float>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -206,8 +219,8 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
blobCopy<uint16_t, float>(src, dst);
|
||||
} break;
|
||||
default: {
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision() << " to "
|
||||
<< dst->getTensorDesc().getPrecision();
|
||||
IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
@ -230,7 +243,9 @@ void TemplateInferRequest::inferPreprocess() {
|
||||
const auto& parameterShape = parameter->get_shape();
|
||||
const auto& parameterType = parameter->get_element_type();
|
||||
_inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
|
||||
parameterType, parameterShape, InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
|
||||
parameterType,
|
||||
parameterShape,
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput.second)->rmap().as<void*>());
|
||||
}
|
||||
for (auto&& output : _outputs) {
|
||||
auto outputBlob = output.second;
|
||||
@ -243,7 +258,9 @@ void TemplateInferRequest::inferPreprocess() {
|
||||
const auto& resultShape = result->get_shape();
|
||||
const auto& resultType = result->get_element_type();
|
||||
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(
|
||||
resultType, resultShape, InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
|
||||
resultType,
|
||||
resultShape,
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
|
||||
}
|
||||
_durations[Preprocess] = Time::now() - start;
|
||||
}
|
||||
|
@ -26,7 +26,8 @@ class TemplateInferRequest : public InferenceEngine::IInferRequestInternal {
|
||||
public:
|
||||
typedef std::shared_ptr<TemplateInferRequest> Ptr;
|
||||
|
||||
TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs, const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
|
||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
const std::shared_ptr<ExecutableNetwork>& executableNetwork);
|
||||
~TemplateInferRequest();
|
||||
|
||||
|
@ -38,7 +38,8 @@ Plugin::Plugin() {
|
||||
_backend = ngraph::runtime::Backend::create("INTERPRETER");
|
||||
|
||||
// create default stream executor with a given name
|
||||
_waitExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
|
||||
_waitExecutor =
|
||||
InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
|
||||
}
|
||||
// ! [plugin:ctor]
|
||||
|
||||
@ -54,7 +55,8 @@ Plugin::~Plugin() {
|
||||
|
||||
// ! [plugin:transform_network]
|
||||
|
||||
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
||||
const InferenceEngine::InputsDataMap& inputInfoMap,
|
||||
const InferenceEngine::OutputsDataMap& outputsInfoMap) {
|
||||
// 1. Copy ngraph::Function first to apply some transformations which modify original ngraph::Function
|
||||
auto transformedNetwork = ngraph::clone_function(*function);
|
||||
@ -70,13 +72,15 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
|
||||
bool needF16toF32 = false;
|
||||
for (const auto& param : function->get_parameters()) {
|
||||
if (param->get_element_type() == ngraph::element::f16 &&
|
||||
inputInfoMap.at(param->get_friendly_name())->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
|
||||
inputInfoMap.at(param->get_friendly_name())->getTensorDesc().getPrecision() !=
|
||||
InferenceEngine::Precision::FP16) {
|
||||
needF16toF32 = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (needF16toF32)
|
||||
passManager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ngraph::element::f16, ngraph::element::f32}});
|
||||
passManager.register_pass<ngraph::pass::ConvertPrecision>(
|
||||
precisions_array{{ngraph::element::f16, ngraph::element::f32}});
|
||||
// Example: register plugin specific transformation
|
||||
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
|
||||
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
|
||||
@ -92,32 +96,41 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
|
||||
// ! [plugin:transform_network]
|
||||
|
||||
// ! [plugin:load_exe_network_impl]
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) {
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
|
||||
const ConfigMap& config) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::LoadExeNetworkImpl");
|
||||
|
||||
InferenceEngine::InputsDataMap networkInputs = network.getInputsInfo();
|
||||
InferenceEngine::OutputsDataMap networkOutputs = network.getOutputsInfo();
|
||||
|
||||
auto fullConfig = Configuration {config, _cfg};
|
||||
return std::make_shared<ExecutableNetwork>(network.getFunction(), networkInputs, networkOutputs, fullConfig,
|
||||
auto fullConfig = Configuration{config, _cfg};
|
||||
return std::make_shared<ExecutableNetwork>(network.getFunction(),
|
||||
networkInputs,
|
||||
networkOutputs,
|
||||
fullConfig,
|
||||
std::static_pointer_cast<Plugin>(shared_from_this()));
|
||||
}
|
||||
// ! [plugin:load_exe_network_impl]
|
||||
|
||||
// ! [plugin:import_network]
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istream& modelStream, const std::map<std::string, std::string>& config) {
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(
|
||||
std::istream& modelStream,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::ImportNetwork");
|
||||
|
||||
auto fullConfig = Configuration {config, _cfg};
|
||||
return std::make_shared<ExecutableNetwork>(modelStream, fullConfig, std::static_pointer_cast<Plugin>(shared_from_this()));
|
||||
auto fullConfig = Configuration{config, _cfg};
|
||||
return std::make_shared<ExecutableNetwork>(modelStream,
|
||||
fullConfig,
|
||||
std::static_pointer_cast<Plugin>(shared_from_this()));
|
||||
}
|
||||
// ! [plugin:import_network]
|
||||
|
||||
// ! [plugin:query_network]
|
||||
InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, const ConfigMap& config) const {
|
||||
InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const ConfigMap& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::QueryNetwork");
|
||||
|
||||
Configuration fullConfig {config, _cfg, false};
|
||||
Configuration fullConfig{config, _cfg, false};
|
||||
auto function = network.getFunction();
|
||||
|
||||
// 1. First of all we should store initial input operation set
|
||||
@ -160,7 +173,8 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
|
||||
// 5. If some housekeeping nodes were not added - add them.
|
||||
if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
|
||||
for (auto&& inputNodeOutput : node->input_values()) {
|
||||
if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) {
|
||||
if (ngraph::op::is_constant(inputNodeOutput.get_node()) ||
|
||||
ngraph::op::is_parameter(inputNodeOutput.get_node())) {
|
||||
supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
|
||||
}
|
||||
}
|
||||
@ -175,11 +189,14 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
|
||||
|
||||
// 6. Eliminate subgraphs that consist of housekeeping nodes only
|
||||
if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
|
||||
if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
|
||||
if (!InferenceEngine::details::contains(
|
||||
supported,
|
||||
node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
|
||||
supported.erase(node->get_friendly_name());
|
||||
}
|
||||
} else if (ngraph::op::is_output(node)) {
|
||||
if (!InferenceEngine::details::contains(supported, node->input_values().begin()->get_node()->get_friendly_name())) {
|
||||
if (!InferenceEngine::details::contains(supported,
|
||||
node->input_values().begin()->get_node()->get_friendly_name())) {
|
||||
supported.erase(node->get_friendly_name());
|
||||
}
|
||||
}
|
||||
@ -204,27 +221,36 @@ void Plugin::AddExtension(const InferenceEngine::IExtensionPtr& /*extension*/) {
|
||||
|
||||
// ! [plugin:set_config]
|
||||
void Plugin::SetConfig(const ConfigMap& config) {
|
||||
_cfg = Configuration {config, _cfg};
|
||||
_cfg = Configuration{config, _cfg};
|
||||
}
|
||||
// ! [plugin:set_config]
|
||||
|
||||
// ! [plugin:get_config]
|
||||
InferenceEngine::Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& /*options*/) const {
|
||||
InferenceEngine::Parameter Plugin::GetConfig(
|
||||
const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& /*options*/) const {
|
||||
return _cfg.Get(name);
|
||||
}
|
||||
// ! [plugin:get_config]
|
||||
|
||||
// ! [plugin:get_metric]
|
||||
InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const {
|
||||
InferenceEngine::Parameter Plugin::GetMetric(const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& options) const {
|
||||
if (METRIC_KEY(SUPPORTED_METRICS) == name) {
|
||||
std::vector<std::string> supportedMetrics = {METRIC_KEY(AVAILABLE_DEVICES), METRIC_KEY(SUPPORTED_METRICS),
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(FULL_DEVICE_NAME),
|
||||
METRIC_KEY(IMPORT_EXPORT_SUPPORT), METRIC_KEY(DEVICE_ARCHITECTURE),
|
||||
METRIC_KEY(OPTIMIZATION_CAPABILITIES), METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)};
|
||||
std::vector<std::string> supportedMetrics = {METRIC_KEY(AVAILABLE_DEVICES),
|
||||
METRIC_KEY(SUPPORTED_METRICS),
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
|
||||
METRIC_KEY(FULL_DEVICE_NAME),
|
||||
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
|
||||
METRIC_KEY(DEVICE_ARCHITECTURE),
|
||||
METRIC_KEY(OPTIMIZATION_CAPABILITIES),
|
||||
METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)};
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics);
|
||||
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
|
||||
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
|
||||
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config {}.SupportedKeys();
|
||||
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID),
|
||||
CONFIG_KEY(PERF_COUNT),
|
||||
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
|
||||
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config{}.SupportedKeys();
|
||||
for (auto&& configKey : streamExecutorConfigKeys) {
|
||||
if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) {
|
||||
configKeys.emplace_back(configKey);
|
||||
@ -251,7 +277,7 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
|
||||
} else if (METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) == name) {
|
||||
// TODO: fill with actual values
|
||||
using uint = unsigned int;
|
||||
IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, std::make_tuple(uint {1}, uint {1}, uint {1}));
|
||||
IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, std::make_tuple(uint{1}, uint{1}, uint{1}));
|
||||
} else {
|
||||
IE_THROW(NotFound) << "Unsupported device metric: " << name;
|
||||
}
|
||||
|
@ -23,12 +23,19 @@ public:
|
||||
void SetConfig(const std::map<std::string, std::string>& config) override;
|
||||
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) const override;
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) override;
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
|
||||
const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) override;
|
||||
void AddExtension(const std::shared_ptr<InferenceEngine::IExtension>& extension) override;
|
||||
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(std::istream& model, const std::map<std::string, std::string>& config) override;
|
||||
InferenceEngine::Parameter GetConfig(
|
||||
const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
InferenceEngine::Parameter GetMetric(
|
||||
const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr ImportNetwork(
|
||||
std::istream& model,
|
||||
const std::map<std::string, std::string>& config) override;
|
||||
|
||||
private:
|
||||
friend class ExecutableNetwork;
|
||||
|
@ -28,7 +28,10 @@ ngraph::pass::AddMeanSubtract::AddMeanSubtract(const MeanMap& inputInfoMap) {
|
||||
}
|
||||
|
||||
auto mean_const = it->second;
|
||||
NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32, "Mean for ", param->get_friendly_name(), " must have f32 type");
|
||||
NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32,
|
||||
"Mean for ",
|
||||
param->get_friendly_name(),
|
||||
" must have f32 type");
|
||||
|
||||
auto copy_param = param->clone_with_new_inputs({});
|
||||
auto sub = std::make_shared<ngraph::opset3::Subtract>(copy_param, mean_const);
|
||||
|
@ -12,7 +12,8 @@
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::AddPreprocessing, "AddPreprocessing", 0);
|
||||
|
||||
ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap& inputInfoMap): m_inputInfoMap(inputInfoMap) {}
|
||||
ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap& inputInfoMap)
|
||||
: m_inputInfoMap(inputInfoMap) {}
|
||||
|
||||
bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
ngraph::pass::AddMeanSubtract::MeanMap meanMap;
|
||||
@ -39,10 +40,12 @@ bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Fun
|
||||
has_mean_image = true;
|
||||
if (c == 0) {
|
||||
meanImage = pInfo[c]->meanData;
|
||||
NGRAPH_CHECK(meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32,
|
||||
"Only InferenceEngine::Precision::FP32 precision is supported for PreProcessChannel::meanData");
|
||||
NGRAPH_CHECK(
|
||||
meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32,
|
||||
"Only InferenceEngine::Precision::FP32 precision is supported for PreProcessChannel::meanData");
|
||||
} else {
|
||||
NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(), "TensorDesc for PreProcessChannel::meanData must be equal");
|
||||
NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(),
|
||||
"TensorDesc for PreProcessChannel::meanData must be equal");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -52,7 +55,8 @@ bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Fun
|
||||
continue;
|
||||
}
|
||||
|
||||
NGRAPH_CHECK(!(has_mean_image && has_scales), "Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set.");
|
||||
NGRAPH_CHECK(!(has_mean_image && has_scales),
|
||||
"Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set.");
|
||||
|
||||
if (has_scales) {
|
||||
ngraph::Shape shape(inputDims.size(), 1);
|
||||
|
@ -28,7 +28,10 @@ ngraph::pass::AddStdScale::AddStdScale(const ScaleMap& inputInfoMap) {
|
||||
}
|
||||
|
||||
auto scale_const = it->second;
|
||||
NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32, "Scale for ", param->get_friendly_name(), " must have f32 type");
|
||||
NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32,
|
||||
"Scale for ",
|
||||
param->get_friendly_name(),
|
||||
" must have f32 type");
|
||||
|
||||
auto copy_param = param->clone_with_new_inputs({});
|
||||
auto div = std::make_shared<ngraph::opset3::Divide>(copy_param, it->second);
|
||||
|
@ -24,7 +24,8 @@ bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Fun
|
||||
// Check that input and output shape a fully defined (not dynamic) and number of consumers equal to 1
|
||||
Input<Node> input = node->input(0);
|
||||
Output<Node> output = node->output(0);
|
||||
if (input.get_partial_shape().is_static() && output.get_partial_shape().is_static() && output.get_target_inputs().size() == 1) {
|
||||
if (input.get_partial_shape().is_static() && output.get_partial_shape().is_static() &&
|
||||
output.get_target_inputs().size() == 1) {
|
||||
nodes.push_back(node);
|
||||
}
|
||||
}
|
||||
@ -32,7 +33,8 @@ bool pass::MyFunctionTransformation::run_on_function(std::shared_ptr<ngraph::Fun
|
||||
|
||||
// Print types and names for collected nodes
|
||||
for (auto& node : nodes) {
|
||||
std::cout << "Type: " << node->get_type_info().name << std::endl << "Name: " << node->get_friendly_name() << std::endl;
|
||||
std::cout << "Type: " << node->get_type_info().name << std::endl
|
||||
<< "Name: " << node->get_friendly_name() << std::endl;
|
||||
}
|
||||
|
||||
// Return false because we didn't change nGraph Function
|
||||
|
@ -33,7 +33,9 @@ ngraph::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() {
|
||||
}
|
||||
|
||||
// Decompose Divide into Multiply with Power operations
|
||||
auto pow = std::make_shared<ngraph::opset3::Power>(div->input_value(1), opset3::Constant::create(div->get_input_element_type(1), Shape {1}, {-1}));
|
||||
auto pow = std::make_shared<ngraph::opset3::Power>(
|
||||
div->input_value(1),
|
||||
opset3::Constant::create(div->get_input_element_type(1), Shape{1}, {-1}));
|
||||
|
||||
auto mul = std::make_shared<ngraph::opset3::Multiply>(div->input_value(0), pow);
|
||||
|
||||
@ -70,7 +72,8 @@ ngraph::pass::ReluReluFusionMatcher::ReluReluFusionMatcher() {
|
||||
auto& node_to_output = m.get_pattern_value_map();
|
||||
|
||||
// Create new Relu operation and add register it for additional execution
|
||||
auto new_relu = register_new_node<ngraph::opset3::Relu>(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0));
|
||||
auto new_relu =
|
||||
register_new_node<ngraph::opset3::Relu>(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0));
|
||||
|
||||
// Copy runtime info attributes to newly created operation
|
||||
ngraph::copy_runtime_info(m.get_matched_nodes(), new_relu);
|
||||
|
94
docs/template_plugin/tests/functional/op_reference/atanh.cpp
Normal file
94
docs/template_plugin/tests/functional/op_reference/atanh.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <shared_test_classes/base/layer_test_utils.hpp>
|
||||
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
struct AtanhParams {
|
||||
template <class IT>
|
||||
AtanhParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues)
|
||||
: pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) {
|
||||
std::vector<IT> oValues;
|
||||
std::vector<double> output;
|
||||
for (auto element : iValues)
|
||||
output.push_back(static_cast<double>(element));
|
||||
|
||||
std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
|
||||
return std::atanh(input);
|
||||
});
|
||||
|
||||
if (std::is_integral<IT>()) {
|
||||
std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
|
||||
return std::round(input);
|
||||
});
|
||||
}
|
||||
|
||||
for (auto element : output)
|
||||
oValues.push_back(static_cast<IT>(element));
|
||||
refData = CreateBlob(outType, oValues);
|
||||
}
|
||||
ngraph::PartialShape pshape;
|
||||
ngraph::element::Type inType;
|
||||
ngraph::element::Type outType;
|
||||
InferenceEngine::Blob::Ptr inputData;
|
||||
InferenceEngine::Blob::Ptr refData;
|
||||
};
|
||||
|
||||
class ReferenceAtanhLayerTest : public testing::TestWithParam<AtanhParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
function = CreateFunction(params.pshape, params.inType, params.outType);
|
||||
inputData = {params.inputData};
|
||||
refOutData = {params.refData};
|
||||
}
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<AtanhParams>& obj) {
|
||||
auto param = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "shape=" << param.pshape << "_";
|
||||
result << "iType=" << param.inType << "_";
|
||||
result << "oType=" << param.outType;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape, const element::Type& input_type,
|
||||
const element::Type& expected_output_type) {
|
||||
const auto in = std::make_shared<op::Parameter>(input_type, input_shape);
|
||||
const auto atanh = std::make_shared<op::Atanh>(in);
|
||||
return std::make_shared<Function>(NodeVector {atanh}, ParameterVector {in});
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceAtanhLayerTest, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Atanh_With_Hardcoded_Refs, ReferenceAtanhLayerTest,
|
||||
::testing::Values(AtanhParams(ngraph::PartialShape {2, 4}, ngraph::element::f32,
|
||||
std::vector<float> {-INFINITY, -2.0f, -1.0f, -0.5f, 0.0f, 0.8f, 1.0f, INFINITY}),
|
||||
AtanhParams(ngraph::PartialShape {2, 4}, ngraph::element::f16,
|
||||
std::vector<float16> {-INFINITY, -2.0f, -1.0f, -0.5f, -0.0f, 0.8f, 1.0f, INFINITY}),
|
||||
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::i32,
|
||||
std::vector<int32_t> {std::numeric_limits<int32_t>::min(), -2, -1, 1, 2, std::numeric_limits<int32_t>::max()}),
|
||||
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::u32,
|
||||
std::vector<uint32_t> {std::numeric_limits<uint32_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint32_t>::max()}),
|
||||
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::i64,
|
||||
std::vector<int64_t> {std::numeric_limits<int64_t>::min(), -2, -1, 1, 2, std::numeric_limits<int64_t>::max()}),
|
||||
AtanhParams(ngraph::PartialShape {2, 3}, ngraph::element::u64,
|
||||
std::vector<uint64_t> {std::numeric_limits<uint64_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint64_t>::max()})),
|
||||
ReferenceAtanhLayerTest::getTestCaseName);
|
@ -75,6 +75,48 @@ std::vector<RefComparisonParams> generateComparisonCombinedParams() {
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
|
||||
ReferenceComparisonLayerTest::getTestCaseName);
|
||||
|
||||
template <element::Type_t IN_ET>
|
||||
std::vector<RefComparisonParams> generateNumericParams(const element::Type& type) {
|
||||
using T = typename element_type_traits<IN_ET>::value_type;
|
||||
std::vector<RefComparisonParams> compParams {
|
||||
Builder {}
|
||||
.compType(ComparisonTypes::EQUAL)
|
||||
.input1({{4}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, NAN}})
|
||||
.input2({{4}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f}})
|
||||
.expected({{4}, element::boolean, std::vector<char> {0, 0, 1, 0, }}),
|
||||
Builder {}
|
||||
.compType(ComparisonTypes::EQUAL)
|
||||
.input1({{2, 3}, type, std::vector<T> {0.0f, NAN, NAN, 1.0f, 21.0f, -INFINITY}})
|
||||
.input2({{2, 3}, type, std::vector<T> {1.0f, NAN, 23.0f, 1.0f, 19.0f, 21.0f}})
|
||||
.expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 0, 1, 0, 0}}),
|
||||
Builder {}
|
||||
.compType(ComparisonTypes::EQUAL)
|
||||
.input1({{1}, type, std::vector<T> {INFINITY}})
|
||||
.input2({{1}, type, std::vector<T> {INFINITY}})
|
||||
.expected({{1}, element::boolean, std::vector<char> {1}}),
|
||||
Builder {}
|
||||
.compType(ComparisonTypes::EQUAL)
|
||||
.input1({{5}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}})
|
||||
.input2({{5}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}})
|
||||
.expected({{5}, element::boolean, std::vector<char> {0, 0, 1, 0, 0}})};
|
||||
return compParams;
|
||||
}
|
||||
|
||||
std::vector<RefComparisonParams> generateNumericCombinedParams() {
|
||||
const std::vector<std::vector<RefComparisonParams>> compTypeParams {
|
||||
generateNumericParams<element::Type_t::f16>(element::f16),
|
||||
generateNumericParams<element::Type_t::f32>(element::f32)};
|
||||
std::vector<RefComparisonParams> combinedParams;
|
||||
|
||||
for (const auto& params : compTypeParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Numeric_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateNumericCombinedParams()),
|
||||
ReferenceComparisonLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace ComparisonOpsRefTestDefinitions
|
||||
} // namespace reference_tests
|
||||
|
@ -1,6 +1,7 @@
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
UseTab: Never
|
||||
ColumnLimit: 120
|
||||
|
||||
Language: Cpp
|
||||
Standard: Cpp11
|
||||
@ -8,18 +9,20 @@ Standard: Cpp11
|
||||
AccessModifierOffset: -4
|
||||
AlignConsecutiveMacros: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Empty
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
ColumnLimit: 160
|
||||
# Specialize this comment pragma in order to avoid changes in SEA copyrights
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
CommentPragmas: '^#'
|
||||
DerivePointerAlignment: false
|
||||
FixNamespaceComments: true
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: BeforeHash
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: false
|
||||
IndentPPDirectives: AfterHash
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- FOREACH_CHILD
|
||||
|
@ -24,8 +24,8 @@ int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int n
|
||||
|
||||
#else
|
||||
|
||||
#include <algorithm>
|
||||
#include <opencv2/opencv.hpp>
|
||||
# include <algorithm>
|
||||
# include <opencv2/opencv.hpp>
|
||||
|
||||
int image_read(const char* img_path, c_mat_t* img) {
|
||||
if (img_path == nullptr || img == nullptr) {
|
||||
@ -102,9 +102,11 @@ int image_free(c_mat_t* img) {
|
||||
int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int num, int thickness) {
|
||||
int colors_num = 21;
|
||||
color_t colors[21] = {// colors to be used for bounding boxes
|
||||
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190}, {153, 153, 153}, {30, 170, 250},
|
||||
{0, 220, 220}, {35, 142, 107}, {152, 251, 152}, {180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0},
|
||||
{70, 0, 0}, {100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111}, {81, 0, 81}};
|
||||
{128, 64, 128}, {232, 35, 244}, {70, 70, 70}, {156, 102, 102}, {153, 153, 190},
|
||||
{153, 153, 153}, {30, 170, 250}, {0, 220, 220}, {35, 142, 107}, {152, 251, 152},
|
||||
{180, 130, 70}, {60, 20, 220}, {0, 0, 255}, {142, 0, 0}, {70, 0, 0},
|
||||
{100, 60, 0}, {90, 0, 0}, {230, 0, 0}, {32, 11, 119}, {0, 74, 111},
|
||||
{81, 0, 81}};
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
int x = rects[i].x_min;
|
||||
|
@ -6,23 +6,23 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define OPENCV_C_EXTERN extern "C"
|
||||
# define OPENCV_C_EXTERN extern "C"
|
||||
#else
|
||||
#define OPENCV_C_EXTERN
|
||||
# define OPENCV_C_EXTERN
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__ < 4)
|
||||
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __VA_ARGS__
|
||||
# define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __VA_ARGS__
|
||||
#else
|
||||
#if defined(_WIN32)
|
||||
#ifdef opencv_c_wrapper_EXPORTS
|
||||
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllexport) __VA_ARGS__ __cdecl
|
||||
#else
|
||||
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllimport) __VA_ARGS__ __cdecl
|
||||
#endif
|
||||
#else
|
||||
#define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __attribute__((visibility("default"))) __VA_ARGS__
|
||||
#endif
|
||||
# if defined(_WIN32)
|
||||
# ifdef opencv_c_wrapper_EXPORTS
|
||||
# define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllexport) __VA_ARGS__ __cdecl
|
||||
# else
|
||||
# define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllimport) __VA_ARGS__ __cdecl
|
||||
# endif
|
||||
# else
|
||||
# define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -6,43 +6,43 @@
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN_UNDEF
|
||||
#endif
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN_UNDEF
|
||||
# endif
|
||||
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#define NOMINMAX_UNDEF
|
||||
#endif
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# define NOMINMAX_UNDEF
|
||||
# endif
|
||||
|
||||
#if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
|
||||
#define _X86_
|
||||
#endif
|
||||
# if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_)
|
||||
# define _X86_
|
||||
# endif
|
||||
|
||||
#if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
|
||||
#define _AMD64_
|
||||
#endif
|
||||
# if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_)
|
||||
# define _AMD64_
|
||||
# endif
|
||||
|
||||
#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
#define _ARM_
|
||||
#endif
|
||||
# if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
# define _ARM_
|
||||
# endif
|
||||
|
||||
#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
#define _ARM64_
|
||||
#endif
|
||||
# if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
# define _ARM64_
|
||||
# endif
|
||||
|
||||
// clang-format off
|
||||
// clang-format off
|
||||
#include <string.h>
|
||||
#include <windef.h>
|
||||
#include <fileapi.h>
|
||||
#include <Winbase.h>
|
||||
#include <sys/stat.h>
|
||||
// clang-format on
|
||||
// clang-format on
|
||||
|
||||
// Copied from linux libc sys/stat.h:
|
||||
#define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
|
||||
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||
// Copied from linux libc sys/stat.h:
|
||||
# define S_ISREG(m) (((m)&S_IFMT) == S_IFREG)
|
||||
# define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
|
||||
|
||||
/// @brief structure to store directory names
|
||||
typedef struct dirent {
|
||||
@ -171,19 +171,19 @@ static void closedir(DIR* dp) {
|
||||
free(dp);
|
||||
}
|
||||
|
||||
#ifdef WIN32_LEAN_AND_MEAN_UNDEF
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
#undef WIN32_LEAN_AND_MEAN_UNDEF
|
||||
#endif
|
||||
# ifdef WIN32_LEAN_AND_MEAN_UNDEF
|
||||
# undef WIN32_LEAN_AND_MEAN
|
||||
# undef WIN32_LEAN_AND_MEAN_UNDEF
|
||||
# endif
|
||||
|
||||
#ifdef NOMINMAX_UNDEF
|
||||
#undef NOMINMAX_UNDEF
|
||||
#undef NOMINMAX
|
||||
#endif
|
||||
# ifdef NOMINMAX_UNDEF
|
||||
# undef NOMINMAX_UNDEF
|
||||
# undef NOMINMAX
|
||||
# endif
|
||||
|
||||
#else
|
||||
|
||||
#include <dirent.h>
|
||||
#include <sys/types.h>
|
||||
# include <dirent.h>
|
||||
# include <sys/types.h>
|
||||
|
||||
#endif
|
@ -12,9 +12,9 @@
|
||||
#include "object_detection_sample_ssd.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "c_w_dirent.h"
|
||||
# include "c_w_dirent.h"
|
||||
#else
|
||||
#include <dirent.h>
|
||||
# include <dirent.h>
|
||||
#endif
|
||||
|
||||
#define MAX_IMAGES 20
|
||||
@ -346,7 +346,10 @@ int main(int argc, char** argv) {
|
||||
goto err;
|
||||
for (i = 0; i < ver.num_vers; ++i) {
|
||||
printf(" %s\n", ver.versions[i].device_name);
|
||||
printf(" %s version ......... %zu.%zu\n", ver.versions[i].description, ver.versions[i].major, ver.versions[i].minor);
|
||||
printf(" %s version ......... %zu.%zu\n",
|
||||
ver.versions[i].description,
|
||||
ver.versions[i].major,
|
||||
ver.versions[i].minor);
|
||||
printf(" Build ......... %s\n", ver.versions[i].build_number);
|
||||
}
|
||||
ie_core_versions_free(&ver);
|
||||
@ -360,7 +363,8 @@ int main(int argc, char** argv) {
|
||||
printf("%sCustom extension loaded: %s\n", info, custom_ex_library_msg);
|
||||
}
|
||||
|
||||
if (custom_plugin_cfg_msg && (strcmp(device_name, "GPU") == 0 || strcmp(device_name, "MYRIAD") == 0 || strcmp(device_name, "HDDL") == 0)) {
|
||||
if (custom_plugin_cfg_msg &&
|
||||
(strcmp(device_name, "GPU") == 0 || strcmp(device_name, "MYRIAD") == 0 || strcmp(device_name, "HDDL") == 0)) {
|
||||
// Config for device plugin custom extension is loaded from an .xml
|
||||
// description
|
||||
ie_config_t cfg = {"CONFIG_FILE", custom_plugin_cfg_msg, NULL};
|
||||
@ -480,7 +484,12 @@ int main(int argc, char** argv) {
|
||||
for (j = 0; j < resized_img.mat_data_size; ++j)
|
||||
resized_img.mat_data[j] = img.mat_data[j];
|
||||
} else {
|
||||
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n", warn, img.mat_width, img.mat_height, input_width, input_height);
|
||||
printf("%sImage is resized from (%d, %d) to (%zu, %zu)\n",
|
||||
warn,
|
||||
img.mat_width,
|
||||
img.mat_height,
|
||||
input_width,
|
||||
input_height);
|
||||
|
||||
if (image_resize(&img, &resized_img, (int)input_width, (int)input_height) == -1) {
|
||||
printf("%sImage %s cannot be resized!\n", warn, file_paths[i]);
|
||||
@ -623,7 +632,8 @@ int main(int argc, char** argv) {
|
||||
for (ch = 0; ch < num_channels; ++ch) {
|
||||
/** [images stride + channels stride + pixel id ] all in bytes
|
||||
* **/
|
||||
data[image_id * image_size * num_channels + ch * image_size + pid] = images[image_id].mat_data[pid * num_channels + ch];
|
||||
data[image_id * image_size * num_channels + ch * image_size + pid] =
|
||||
images[image_id].mat_data[pid * num_channels + ch];
|
||||
}
|
||||
}
|
||||
image_free(&images[image_id]);
|
||||
@ -704,7 +714,15 @@ int main(int argc, char** argv) {
|
||||
int xmax = (int)(detection[curProposal * objectSize + 5] * originalImages[image_id].mat_width);
|
||||
int ymax = (int)(detection[curProposal * objectSize + 6] * originalImages[image_id].mat_height);
|
||||
|
||||
printf("[%d, %d] element, prob = %f (%d, %d)-(%d, %d) batch id : %d", curProposal, label, confidence, xmin, ymin, xmax, ymax, image_id);
|
||||
printf("[%d, %d] element, prob = %f (%d, %d)-(%d, %d) batch id : %d",
|
||||
curProposal,
|
||||
label,
|
||||
confidence,
|
||||
xmin,
|
||||
ymin,
|
||||
xmax,
|
||||
ymax,
|
||||
image_id);
|
||||
|
||||
if (confidence > 0.5) {
|
||||
/** Drawing only objects with >50% probability **/
|
||||
@ -722,7 +740,11 @@ int main(int argc, char** argv) {
|
||||
int batch_id;
|
||||
for (batch_id = 0; batch_id < batchSize; ++batch_id) {
|
||||
if (object_num[batch_id] > 0) {
|
||||
image_add_rectangles(&originalImages[batch_id], boxes[batch_id], classes[batch_id], object_num[batch_id], 2);
|
||||
image_add_rectangles(&originalImages[batch_id],
|
||||
boxes[batch_id],
|
||||
classes[batch_id],
|
||||
object_num[batch_id],
|
||||
2);
|
||||
}
|
||||
const char* out = "out_";
|
||||
char str_num[16] = {0};
|
||||
|
@ -16,14 +16,16 @@ static const char* model_message = "Required. Path to an .xml file with a traine
|
||||
static const char* image_message = "Required. Path to one or more images or folder with images.";
|
||||
|
||||
/// @brief message for assigning cnn calculation to device
|
||||
static const char* target_device_message = "Optional. Specify the target device to infer. "
|
||||
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
|
||||
"HETERO plugin. "
|
||||
"Sample will look for a suitable plugin for device specified.";
|
||||
static const char* target_device_message =
|
||||
"Optional. Specify the target device to infer. "
|
||||
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
|
||||
"HETERO plugin. "
|
||||
"Sample will look for a suitable plugin for device specified.";
|
||||
|
||||
/// @brief message for plugin custom kernels desc
|
||||
static const char* custom_plugin_config_message = "Required for GPU, MYRIAD, HDDL custom kernels. "
|
||||
"Absolute path to the .xml config file with the kernels descriptions.";
|
||||
static const char* custom_plugin_config_message =
|
||||
"Required for GPU, MYRIAD, HDDL custom kernels. "
|
||||
"Absolute path to the .xml config file with the kernels descriptions.";
|
||||
|
||||
/// @brief message for user extension library argument
|
||||
static const char* custom_ex_library_message = "Required for CPU plugin custom layers. "
|
||||
|
@ -1,6 +1,7 @@
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
UseTab: Never
|
||||
ColumnLimit: 120
|
||||
|
||||
Language: Cpp
|
||||
Standard: Cpp11
|
||||
@ -8,18 +9,20 @@ Standard: Cpp11
|
||||
AccessModifierOffset: -4
|
||||
AlignConsecutiveMacros: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Empty
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
ColumnLimit: 160
|
||||
# Specialize this comment pragma in order to avoid changes in SEA copyrights
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
CommentPragmas: '^#'
|
||||
DerivePointerAlignment: false
|
||||
FixNamespaceComments: true
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: BeforeHash
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: false
|
||||
IndentPPDirectives: AfterHash
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- FOREACH_CHILD
|
||||
|
@ -8,11 +8,17 @@
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
const std::string EXPORTED_NETWORK_NAME = "undefined";
|
||||
std::map<std::string, InferenceEngine::Precision> precision_map = {
|
||||
{"FP32", InferenceEngine::Precision::FP32}, {"FP64", InferenceEngine::Precision::FP64}, {"FP16", InferenceEngine::Precision::FP16},
|
||||
{"I8", InferenceEngine::Precision::I8}, {"I16", InferenceEngine::Precision::I16}, {"I32", InferenceEngine::Precision::I32},
|
||||
{"I64", InferenceEngine::Precision::I64}, {"U8", InferenceEngine::Precision::U8}, {"U16", InferenceEngine::Precision::U16},
|
||||
{"U32", InferenceEngine::Precision::U32}, {"U64", InferenceEngine::Precision::U64}};
|
||||
std::map<std::string, InferenceEngine::Precision> precision_map = {{"FP32", InferenceEngine::Precision::FP32},
|
||||
{"FP64", InferenceEngine::Precision::FP64},
|
||||
{"FP16", InferenceEngine::Precision::FP16},
|
||||
{"I8", InferenceEngine::Precision::I8},
|
||||
{"I16", InferenceEngine::Precision::I16},
|
||||
{"I32", InferenceEngine::Precision::I32},
|
||||
{"I64", InferenceEngine::Precision::I64},
|
||||
{"U8", InferenceEngine::Precision::U8},
|
||||
{"U16", InferenceEngine::Precision::U16},
|
||||
{"U32", InferenceEngine::Precision::U32},
|
||||
{"U64", InferenceEngine::Precision::U64}};
|
||||
|
||||
std::map<std::string, InferenceEngine::Layout> layout_map = {{"ANY", InferenceEngine::Layout::ANY},
|
||||
{"NCHW", InferenceEngine::Layout::NCHW},
|
||||
@ -200,7 +206,8 @@ InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string
|
||||
return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
|
||||
}
|
||||
|
||||
InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network): actual(cnn_network) {
|
||||
InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network)
|
||||
: actual(cnn_network) {
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "IENetwork was not initialized.";
|
||||
name = actual->getName();
|
||||
@ -286,7 +293,9 @@ void InferenceEnginePython::IENetwork::reshape(const std::map<std::string, std::
|
||||
actual->reshape(input_shapes);
|
||||
}
|
||||
|
||||
InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests): infer_requests(num_requests), name(name) {
|
||||
InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests)
|
||||
: infer_requests(num_requests),
|
||||
name(name) {
|
||||
request_queue_ptr = std::make_shared<IdleInferRequestQueue>();
|
||||
}
|
||||
|
||||
@ -333,16 +342,19 @@ std::shared_ptr<InferenceEngine::ExecutableNetwork> InferenceEnginePython::IEExe
|
||||
return actual;
|
||||
}
|
||||
|
||||
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr) {
|
||||
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name,
|
||||
const InferenceEngine::Blob::Ptr& blob_ptr) {
|
||||
request_ptr.SetBlob(blob_name.c_str(), blob_ptr);
|
||||
}
|
||||
|
||||
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr,
|
||||
void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name,
|
||||
const InferenceEngine::Blob::Ptr& blob_ptr,
|
||||
const InferenceEngine::PreProcessInfo& info) {
|
||||
request_ptr.SetBlob(blob_name.c_str(), blob_ptr, info);
|
||||
}
|
||||
|
||||
const InferenceEngine::PreProcessInfo& InferenceEnginePython::InferRequestWrap::getPreProcess(const std::string& blob_name) {
|
||||
const InferenceEngine::PreProcessInfo& InferenceEnginePython::InferRequestWrap::getPreProcess(
|
||||
const std::string& blob_name) {
|
||||
return request_ptr.GetPreProcess(blob_name.c_str());
|
||||
}
|
||||
|
||||
@ -392,7 +404,8 @@ int InferenceEnginePython::InferRequestWrap::wait(int64_t timeout) {
|
||||
return static_cast<int>(code);
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEnginePython::ProfileInfo> InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
|
||||
std::map<std::string, InferenceEnginePython::ProfileInfo>
|
||||
InferenceEnginePython::InferRequestWrap::getPerformanceCounts() {
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perf_counts = request_ptr.GetPerformanceCounts();
|
||||
std::map<std::string, InferenceEnginePython::ProfileInfo> perf_map;
|
||||
|
||||
@ -430,7 +443,8 @@ InferenceEnginePython::IECore::IECore(const std::string& xmlConfigFile) {
|
||||
actual = InferenceEngine::Core(xmlConfigFile);
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::Version> InferenceEnginePython::IECore::getVersions(const std::string& deviceName) {
|
||||
std::map<std::string, InferenceEngine::Version> InferenceEnginePython::IECore::getVersions(
|
||||
const std::string& deviceName) {
|
||||
return actual.GetVersions(deviceName);
|
||||
}
|
||||
|
||||
@ -485,31 +499,38 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
|
||||
infer_request.request_queue_ptr = request_queue_ptr;
|
||||
infer_request.request_ptr = actual->CreateInferRequest();
|
||||
|
||||
infer_request.request_ptr.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest r, InferenceEngine::StatusCode)>>(
|
||||
[&](InferenceEngine::InferRequest request, InferenceEngine::StatusCode code) {
|
||||
if (code != InferenceEngine::StatusCode::OK) {
|
||||
IE_EXCEPTION_SWITCH(code, ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType> {} <<=
|
||||
std::stringstream {} << IE_LOCATION << InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
|
||||
}
|
||||
infer_request.request_ptr
|
||||
.SetCompletionCallback<std::function<void(InferenceEngine::InferRequest r, InferenceEngine::StatusCode)>>(
|
||||
[&](InferenceEngine::InferRequest request, InferenceEngine::StatusCode code) {
|
||||
if (code != InferenceEngine::StatusCode::OK) {
|
||||
IE_EXCEPTION_SWITCH(code,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
|
||||
std::stringstream{}
|
||||
<< IE_LOCATION
|
||||
<< InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
|
||||
}
|
||||
|
||||
auto end_time = Time::now();
|
||||
auto execTime = std::chrono::duration_cast<ns>(end_time - infer_request.start_time);
|
||||
infer_request.exec_time = static_cast<double>(execTime.count()) * 0.000001;
|
||||
infer_request.request_queue_ptr->setRequestIdle(infer_request.index);
|
||||
if (infer_request.user_callback) {
|
||||
infer_request.user_callback(infer_request.user_data, code);
|
||||
}
|
||||
});
|
||||
auto end_time = Time::now();
|
||||
auto execTime = std::chrono::duration_cast<ns>(end_time - infer_request.start_time);
|
||||
infer_request.exec_time = static_cast<double>(execTime.count()) * 0.000001;
|
||||
infer_request.request_queue_ptr->setRequestIdle(infer_request.index);
|
||||
if (infer_request.user_callback) {
|
||||
infer_request.user_callback(infer_request.user_data, code);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath, const std::string& binPath) {
|
||||
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath,
|
||||
const std::string& binPath) {
|
||||
InferenceEngine::CNNNetwork net = actual.ReadNetwork(modelPath, binPath);
|
||||
return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
|
||||
}
|
||||
|
||||
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size) {
|
||||
InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& model,
|
||||
const uint8_t* bin,
|
||||
size_t bin_size) {
|
||||
InferenceEngine::MemoryBlob::Ptr weights_blob;
|
||||
if (bin_size != 0) {
|
||||
InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C);
|
||||
@ -521,44 +542,58 @@ InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(cons
|
||||
return IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
|
||||
}
|
||||
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetwork(IENetwork network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests) {
|
||||
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(network.name, num_requests);
|
||||
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(*network.actual, deviceName, config));
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetwork(
|
||||
IENetwork network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests) {
|
||||
auto exec_network =
|
||||
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(network.name, num_requests);
|
||||
exec_network->actual =
|
||||
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(*network.actual, deviceName, config));
|
||||
exec_network->createInferRequests(num_requests);
|
||||
|
||||
return exec_network;
|
||||
}
|
||||
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetworkFromFile(const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests) {
|
||||
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(modelPath, num_requests);
|
||||
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(modelPath, deviceName, config));
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::loadNetworkFromFile(
|
||||
const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests) {
|
||||
auto exec_network =
|
||||
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(modelPath, num_requests);
|
||||
exec_network->actual =
|
||||
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.LoadNetwork(modelPath, deviceName, config));
|
||||
exec_network->createInferRequests(num_requests);
|
||||
|
||||
return exec_network;
|
||||
}
|
||||
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::importNetwork(const std::string& modelFIle, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests) {
|
||||
auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(EXPORTED_NETWORK_NAME, num_requests);
|
||||
exec_network->actual = std::make_shared<InferenceEngine::ExecutableNetwork>(actual.ImportNetwork(modelFIle, deviceName, config));
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> InferenceEnginePython::IECore::importNetwork(
|
||||
const std::string& modelFIle,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests) {
|
||||
auto exec_network =
|
||||
InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(EXPORTED_NETWORK_NAME, num_requests);
|
||||
exec_network->actual =
|
||||
std::make_shared<InferenceEngine::ExecutableNetwork>(actual.ImportNetwork(modelFIle, deviceName, config));
|
||||
exec_network->createInferRequests(num_requests);
|
||||
|
||||
return exec_network;
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> InferenceEnginePython::IECore::queryNetwork(InferenceEnginePython::IENetwork network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
std::map<std::string, std::string> InferenceEnginePython::IECore::queryNetwork(
|
||||
InferenceEnginePython::IENetwork network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto res = actual.QueryNetwork(*network.actual, deviceName, config);
|
||||
return res.supportedLayersMap;
|
||||
}
|
||||
|
||||
void InferenceEnginePython::IECore::setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName) {
|
||||
void InferenceEnginePython::IECore::setConfig(const std::map<std::string, std::string>& config,
|
||||
const std::string& deviceName) {
|
||||
actual.SetConfig(config, deviceName);
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,9 @@ struct InferRequestWrap {
|
||||
|
||||
void setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr);
|
||||
|
||||
void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data, const InferenceEngine::PreProcessInfo& info);
|
||||
void setBlob(const std::string& name,
|
||||
const InferenceEngine::Blob::Ptr& data,
|
||||
const InferenceEngine::PreProcessInfo& info);
|
||||
|
||||
void setBatch(int size);
|
||||
|
||||
@ -160,13 +162,23 @@ struct IECore {
|
||||
std::map<std::string, InferenceEngine::Version> getVersions(const std::string& deviceName);
|
||||
InferenceEnginePython::IENetwork readNetwork(const std::string& modelPath, const std::string& binPath);
|
||||
InferenceEnginePython::IENetwork readNetwork(const std::string& model, const uint8_t* bin, size_t bin_size);
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetwork(IENetwork network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config, int num_requests);
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetworkFromFile(const std::string& modelPath, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config, int num_requests);
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> importNetwork(const std::string& modelFIle, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config, int num_requests);
|
||||
std::map<std::string, std::string> queryNetwork(IENetwork network, const std::string& deviceName, const std::map<std::string, std::string>& config);
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetwork(IENetwork network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests);
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> loadNetworkFromFile(
|
||||
const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests);
|
||||
std::unique_ptr<InferenceEnginePython::IEExecNetwork> importNetwork(
|
||||
const std::string& modelFIle,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config,
|
||||
int num_requests);
|
||||
std::map<std::string, std::string> queryNetwork(IENetwork network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config);
|
||||
void setConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = std::string());
|
||||
void registerPlugin(const std::string& pluginName, const std::string& deviceName);
|
||||
void unregisterPlugin(const std::string& deviceName);
|
||||
|
@ -26,7 +26,8 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
}
|
||||
|
||||
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, bool use_const_initializer) {
|
||||
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network,
|
||||
bool use_const_initializer) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
@ -38,7 +39,9 @@ void InferenceEnginePython::ApplyPruningTransformation(InferenceEnginePython::IE
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
}
|
||||
|
||||
void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names) {
|
||||
void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network,
|
||||
std::string path,
|
||||
bool extract_names) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::GenerateMappingFile>(path, extract_names);
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
@ -47,9 +50,10 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork
|
||||
void InferenceEnginePython::CheckAPI() {
|
||||
std::shared_ptr<ngraph::Function> f;
|
||||
{
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape {1, 1000, 4});
|
||||
auto reshape = std::make_shared<ngraph::opset6::Reshape>(input, std::make_shared<ngraph::opset6::ShapeOf>(input), true);
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector {reshape}, ngraph::ParameterVector {input});
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1000, 4});
|
||||
auto reshape =
|
||||
std::make_shared<ngraph::opset6::Reshape>(input, std::make_shared<ngraph::opset6::ShapeOf>(input), true);
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape}, ngraph::ParameterVector{input});
|
||||
}
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::ConstantFolding>();
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <common_test_utils/ngraph_test_utils.hpp>
|
||||
#include <string>
|
||||
|
||||
std::pair<bool, std::string> InferenceEnginePython::CompareNetworks(InferenceEnginePython::IENetwork lhs, InferenceEnginePython::IENetwork rhs) {
|
||||
std::pair<bool, std::string> InferenceEnginePython::CompareNetworks(InferenceEnginePython::IENetwork lhs,
|
||||
InferenceEnginePython::IENetwork rhs) {
|
||||
return compare_functions(lhs.actual->getFunction(), rhs.actual->getFunction(), true, true, false, true, true);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
UseTab: Never
|
||||
ColumnLimit: 120
|
||||
|
||||
Language: Cpp
|
||||
Standard: Cpp11
|
||||
@ -8,18 +9,20 @@ Standard: Cpp11
|
||||
AccessModifierOffset: -4
|
||||
AlignConsecutiveMacros: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Empty
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
ColumnLimit: 160
|
||||
# Specialize this comment pragma in order to avoid changes in SEA copyrights
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
CommentPragmas: '^#'
|
||||
DerivePointerAlignment: false
|
||||
FixNamespaceComments: true
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: BeforeHash
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
SpaceBeforeCtorInitializerColon: false
|
||||
IndentPPDirectives: AfterHash
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- FOREACH_CHILD
|
||||
|
@ -113,19 +113,7 @@ endif()
|
||||
|
||||
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags" AND
|
||||
NOT DEFINED OpenVINO_SOURCE_DIR)
|
||||
function(add_gflags)
|
||||
# common gflags settings
|
||||
set(GFLAGS_IS_SUBPROJECT TRUE)
|
||||
set(HAVE_SYS_STAT_H 1)
|
||||
set(HAVE_INTTYPES_H 1)
|
||||
set(INTTYPES_FORMAT C99)
|
||||
set(BUILD_TESTING OFF)
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
|
||||
add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL)
|
||||
set_target_properties(gflags_nothreads_static PROPERTIES FOLDER thirdparty)
|
||||
endfunction()
|
||||
add_gflags()
|
||||
add_subdirectory(thirdparty/gflags EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
|
||||
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/zlib")
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented).
|
||||
|
||||
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../tools/benchmark_tool/README.md).
|
||||
> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../../tools/benchmark_tool/README.md).
|
||||
|
||||
> **TIP**: You also can work with the Benchmark Tool inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench).
|
||||
> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare
|
||||
|
@ -14,28 +14,33 @@
|
||||
static const char help_message[] = "Print a usage message";
|
||||
|
||||
/// @brief message for images argument
|
||||
static const char input_message[] = "Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
|
||||
static const char input_message[] =
|
||||
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
|
||||
|
||||
/// @brief message for model argument
|
||||
static const char model_message[] = "Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
|
||||
"a trained compiled model.";
|
||||
static const char model_message[] =
|
||||
"Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
|
||||
"a trained compiled model.";
|
||||
|
||||
/// @brief message for execution mode
|
||||
static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\".";
|
||||
|
||||
/// @brief message for assigning cnn calculation to device
|
||||
static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). "
|
||||
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
|
||||
"HETERO plugin. "
|
||||
"Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. "
|
||||
"The application looks for a suitable plugin for the specified device.";
|
||||
static const char target_device_message[] =
|
||||
"Optional. Specify a target device to infer on (the list of available devices is shown below). "
|
||||
"Default value is CPU. Use \"-d HETERO:<comma-separated_devices_list>\" format to specify "
|
||||
"HETERO plugin. "
|
||||
"Use \"-d MULTI:<comma-separated_devices_list>\" format to specify MULTI plugin. "
|
||||
"The application looks for a suitable plugin for the specified device.";
|
||||
|
||||
/// @brief message for iterations count
|
||||
static const char iterations_count_message[] = "Optional. Number of iterations. "
|
||||
"If not specified, the number of iterations is calculated depending on a device.";
|
||||
static const char iterations_count_message[] =
|
||||
"Optional. Number of iterations. "
|
||||
"If not specified, the number of iterations is calculated depending on a device.";
|
||||
|
||||
/// @brief message for requests count
|
||||
static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is determined automatically for device.";
|
||||
static const char infer_requests_count_message[] =
|
||||
"Optional. Number of infer requests. Default value is determined automatically for device.";
|
||||
|
||||
/// @brief message for execution time
|
||||
static const char execution_time_message[] = "Optional. Time in seconds to execute topology.";
|
||||
@ -45,86 +50,101 @@ static const char infer_num_threads_message[] = "Optional. Number of threads to
|
||||
"(including HETERO and MULTI cases).";
|
||||
|
||||
/// @brief message for #streams for CPU inference
|
||||
static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices "
|
||||
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
|
||||
"<nstreams>). "
|
||||
"Default value is determined automatically for a device.Please note that although the "
|
||||
"automatic selection "
|
||||
"usually provides a reasonable performance, it still may be non - optimal for some cases, "
|
||||
"especially for "
|
||||
"very small networks. See sample's README for more details. "
|
||||
"Also, using nstreams>1 is inherently throughput-oriented option, "
|
||||
"while for the best-latency estimations the number of streams should be set to 1.";
|
||||
static const char infer_num_streams_message[] =
|
||||
"Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices "
|
||||
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
|
||||
"<nstreams>). "
|
||||
"Default value is determined automatically for a device.Please note that although the "
|
||||
"automatic selection "
|
||||
"usually provides a reasonable performance, it still may be non - optimal for some cases, "
|
||||
"especially for "
|
||||
"very small networks. See sample's README for more details. "
|
||||
"Also, using nstreams>1 is inherently throughput-oriented option, "
|
||||
"while for the best-latency estimations the number of streams should be set to 1.";
|
||||
|
||||
/// @brief message for latency percentile settings
|
||||
static const char infer_latency_percentile_message[] =
|
||||
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).";
|
||||
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value "
|
||||
"is 50 (median).";
|
||||
|
||||
/// @brief message for enforcing of BF16 execution where it is possible
|
||||
static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced "
|
||||
"if supported by platform.\n"
|
||||
" 'true' - enable bfloat16 regardless of platform support\n"
|
||||
" 'false' - disable bfloat16 regardless of platform support";
|
||||
static const char enforce_bf16_message[] =
|
||||
"Optional. By default floating point operations execution in bfloat16 precision are enforced "
|
||||
"if supported by platform.\n"
|
||||
" 'true' - enable bfloat16 regardless of platform support\n"
|
||||
" 'false' - disable bfloat16 regardless of platform support";
|
||||
|
||||
/// @brief message for user library argument
|
||||
static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels "
|
||||
"implementations.";
|
||||
static const char custom_cpu_library_message[] =
|
||||
"Required for CPU custom layers. Absolute path to a shared library with the kernels "
|
||||
"implementations.";
|
||||
|
||||
/// @brief message for clDNN custom kernels desc
|
||||
static const char custom_cldnn_message[] = "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
|
||||
static const char custom_cldnn_message[] =
|
||||
"Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
|
||||
|
||||
static const char batch_size_message[] = "Optional. Batch size value. If not specified, the batch size value is determined from "
|
||||
"Intermediate Representation.";
|
||||
static const char batch_size_message[] =
|
||||
"Optional. Batch size value. If not specified, the batch size value is determined from "
|
||||
"Intermediate Representation.";
|
||||
|
||||
// @brief message for CPU threads pinning option
|
||||
static const char infer_threads_pinning_message[] =
|
||||
"Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):\n"
|
||||
"\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n"
|
||||
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on the hybrid CPUs) \n"
|
||||
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on "
|
||||
"the hybrid CPUs) \n"
|
||||
"\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n"
|
||||
"\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning";
|
||||
// @brief message for stream_output option
|
||||
static const char stream_output_message[] = "Optional. Print progress as a plain text. When specified, an interactive progress bar is "
|
||||
"replaced with a "
|
||||
"multiline output.";
|
||||
static const char stream_output_message[] =
|
||||
"Optional. Print progress as a plain text. When specified, an interactive progress bar is "
|
||||
"replaced with a "
|
||||
"multiline output.";
|
||||
|
||||
// @brief message for report_type option
|
||||
static const char report_type_message[] = "Optional. Enable collecting statistics report. \"no_counters\" report contains "
|
||||
"configuration options specified, resulting FPS and latency. \"average_counters\" "
|
||||
"report extends \"no_counters\" report and additionally includes average PM "
|
||||
"counters values for each layer from the network. \"detailed_counters\" report "
|
||||
"extends \"average_counters\" report and additionally includes per-layer PM "
|
||||
"counters and latency for each executed infer request.";
|
||||
static const char report_type_message[] =
|
||||
"Optional. Enable collecting statistics report. \"no_counters\" report contains "
|
||||
"configuration options specified, resulting FPS and latency. \"average_counters\" "
|
||||
"report extends \"no_counters\" report and additionally includes average PM "
|
||||
"counters values for each layer from the network. \"detailed_counters\" report "
|
||||
"extends \"average_counters\" report and additionally includes per-layer PM "
|
||||
"counters and latency for each executed infer request.";
|
||||
|
||||
// @brief message for report_folder option
|
||||
static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored.";
|
||||
|
||||
// @brief message for exec_graph_path option
|
||||
static const char exec_graph_path_message[] = "Optional. Path to a file where to store executable graph information serialized.";
|
||||
static const char exec_graph_path_message[] =
|
||||
"Optional. Path to a file where to store executable graph information serialized.";
|
||||
|
||||
// @brief message for progress bar option
|
||||
static const char progress_message[] = "Optional. Show progress bar (can affect performance measurement). Default values is "
|
||||
"\"false\".";
|
||||
static const char progress_message[] =
|
||||
"Optional. Show progress bar (can affect performance measurement). Default values is "
|
||||
"\"false\".";
|
||||
|
||||
// @brief message for performance counters option
|
||||
static const char pc_message[] = "Optional. Report performance counters.";
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
// @brief message for load config option
|
||||
static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file to load custom IE parameters."
|
||||
" Please note, command line parameters have higher priority then parameters from configuration "
|
||||
"file.";
|
||||
static const char load_config_message[] =
|
||||
"Optional. Path to XML/YAML/JSON file to load custom IE parameters."
|
||||
" Please note, command line parameters have higher priority then parameters from configuration "
|
||||
"file.";
|
||||
|
||||
// @brief message for dump config option
|
||||
static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
|
||||
static const char dump_config_message[] =
|
||||
"Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
|
||||
#endif
|
||||
|
||||
static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
|
||||
"\"[1,3,224,224]\""
|
||||
" in case of one input size.";
|
||||
static const char shape_message[] =
|
||||
"Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
|
||||
"\"[1,3,224,224]\""
|
||||
" in case of one input size.";
|
||||
|
||||
static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. "
|
||||
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
|
||||
static const char layout_message[] =
|
||||
"Optional. Prompts how network layouts should be treated by application. "
|
||||
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
|
||||
|
||||
// @brief message for enabling caching
|
||||
static const char cache_dir_message[] = "Optional. Enables caching of loaded models to specified directory. "
|
||||
@ -139,21 +159,25 @@ static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8
|
||||
|
||||
static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network.";
|
||||
|
||||
static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of the network.";
|
||||
static constexpr char outputs_precision_message[] =
|
||||
"Optional. Specifies precision for all output layers of the network.";
|
||||
|
||||
static constexpr char iop_message[] = "Optional. Specifies precision for input and output layers by name.\n"
|
||||
" Example: -iop \"input:FP16, output:FP16\".\n"
|
||||
" Notice that quotes are required.\n"
|
||||
" Overwrites precision from ip and op options for "
|
||||
"specified layers.";
|
||||
static constexpr char iop_message[] =
|
||||
"Optional. Specifies precision for input and output layers by name.\n"
|
||||
" Example: -iop \"input:FP16, output:FP16\".\n"
|
||||
" Notice that quotes are required.\n"
|
||||
" Overwrites precision from ip and op options for "
|
||||
"specified layers.";
|
||||
|
||||
static constexpr char input_image_scale_message[] = "Optional. Scale values to be used for the input image per channel.\n"
|
||||
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
|
||||
"Example: -iscale data[255,255,255],info[255,255,255]\n";
|
||||
static constexpr char input_image_scale_message[] =
|
||||
"Optional. Scale values to be used for the input image per channel.\n"
|
||||
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
|
||||
"Example: -iscale data[255,255,255],info[255,255,255]\n";
|
||||
|
||||
static constexpr char input_image_mean_message[] = "Optional. Mean values to be used for the input image per channel.\n"
|
||||
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
|
||||
"Example: -imean data[255,255,255],info[255,255,255]\n";
|
||||
static constexpr char input_image_mean_message[] =
|
||||
"Optional. Mean values to be used for the input image per channel.\n"
|
||||
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
|
||||
"Example: -imean data[255,255,255],info[255,255,255]\n";
|
||||
|
||||
/// @brief Define flag for showing help message <br>
|
||||
DEFINE_bool(h, false, help_message);
|
||||
|
@ -23,7 +23,8 @@ typedef std::chrono::nanoseconds ns;
|
||||
|
||||
typedef std::function<void(size_t id, const double latency)> QueueCallbackFunction;
|
||||
|
||||
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution time.
|
||||
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
|
||||
/// time.
|
||||
class InferReqWrap final {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<InferReqWrap>;
|
||||
@ -31,7 +32,9 @@ public:
|
||||
~InferReqWrap() = default;
|
||||
|
||||
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
|
||||
: _request(net.CreateInferRequest()), _id(id), _callbackQueue(callbackQueue) {
|
||||
: _request(net.CreateInferRequest()),
|
||||
_id(id),
|
||||
_callbackQueue(callbackQueue) {
|
||||
_request.SetCompletionCallback([&]() {
|
||||
_endTime = Time::now();
|
||||
_callbackQueue(_id, getExecutionTimeInMilliseconds());
|
||||
@ -79,8 +82,10 @@ class InferRequestsQueue final {
|
||||
public:
|
||||
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) {
|
||||
for (size_t id = 0; id < nireq; id++) {
|
||||
requests.push_back(
|
||||
std::make_shared<InferReqWrap>(net, id, std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
|
||||
requests.push_back(std::make_shared<InferReqWrap>(
|
||||
net,
|
||||
id,
|
||||
std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
|
||||
_idleIds.push(id);
|
||||
}
|
||||
resetTimes();
|
||||
@ -90,7 +95,8 @@ public:
|
||||
// So it should be released before any context that the request can use inside internal asynchronous tasks
|
||||
// For example all members of InferRequestsQueue would be destroyed before `requests` vector
|
||||
// So requests can try to use this members from `putIdleRequest()` that would be called from request callback
|
||||
// To avoid this we should move this vector declaration after all members declaration or just clear it manually in destructor
|
||||
// To avoid this we should move this vector declaration after all members declaration or just clear it manually
|
||||
// in destructor
|
||||
requests.clear();
|
||||
}
|
||||
|
||||
|
@ -16,14 +16,15 @@
|
||||
using namespace InferenceEngine;
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
static const std::vector<std::string> supported_image_extensions = {"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png",
|
||||
"pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
|
||||
static const std::vector<std::string> supported_image_extensions =
|
||||
{"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", "pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
|
||||
#else
|
||||
static const std::vector<std::string> supported_image_extensions = {"bmp"};
|
||||
#endif
|
||||
static const std::vector<std::string> supported_binary_extensions = {"bin"};
|
||||
|
||||
std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>& filePaths, const std::vector<std::string>& extensions) {
|
||||
std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>& filePaths,
|
||||
const std::vector<std::string>& extensions) {
|
||||
std::vector<std::string> filtered;
|
||||
auto getExtension = [](const std::string& name) {
|
||||
auto extensionPosition = name.rfind('.', name.size());
|
||||
@ -40,8 +41,13 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const benchmark_app::InputInfo& app_info,
|
||||
const size_t& requestId, const size_t& inputId, const size_t& inputSize) {
|
||||
void fillBlobImage(Blob::Ptr& inputBlob,
|
||||
const std::vector<std::string>& filePaths,
|
||||
const size_t& batchSize,
|
||||
const benchmark_app::InputInfo& app_info,
|
||||
const size_t& requestId,
|
||||
const size_t& inputId,
|
||||
const size_t& inputSize) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
if (!minput) {
|
||||
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
|
||||
@ -57,7 +63,8 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
|
||||
std::vector<std::shared_ptr<uint8_t>> vreader;
|
||||
vreader.reserve(batchSize);
|
||||
|
||||
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) {
|
||||
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
|
||||
i++, inputIndex += inputSize) {
|
||||
inputIndex %= filePaths.size();
|
||||
|
||||
slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl;
|
||||
@ -88,11 +95,13 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
|
||||
for (size_t ch = 0; ch < numChannels; ++ch) {
|
||||
/** [images stride + channels stride + pixel id ] all in
|
||||
* bytes **/
|
||||
size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
|
||||
? (ch * width * height + h * width + w)
|
||||
: (h * width * numChannels + w * numChannels + ch));
|
||||
size_t offset = imageId * numChannels * width * height +
|
||||
(((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
|
||||
? (ch * width * height + h * width + w)
|
||||
: (h * width * numChannels + w * numChannels + ch));
|
||||
inputBlobData[offset] =
|
||||
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) - static_cast<T>(app_info.mean[ch])) /
|
||||
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) -
|
||||
static_cast<T>(app_info.mean[ch])) /
|
||||
static_cast<T>(app_info.scale[ch]);
|
||||
}
|
||||
}
|
||||
@ -101,7 +110,11 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePaths, const size_t& batchSize, const size_t& requestId, const size_t& inputId,
|
||||
void fillBlobBinary(Blob::Ptr& inputBlob,
|
||||
const std::vector<std::string>& filePaths,
|
||||
const size_t& batchSize,
|
||||
const size_t& requestId,
|
||||
const size_t& inputId,
|
||||
const size_t& inputSize) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
if (!minput) {
|
||||
@ -114,7 +127,8 @@ void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePa
|
||||
auto minputHolder = minput->wmap();
|
||||
|
||||
auto inputBlobData = minputHolder.as<char*>();
|
||||
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) {
|
||||
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
|
||||
i++, inputIndex += inputSize) {
|
||||
inputIndex %= filePaths.size();
|
||||
|
||||
slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl;
|
||||
@ -140,12 +154,15 @@ void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector<std::string>& filePa
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using uniformDistribution =
|
||||
typename std::conditional<std::is_floating_point<T>::value, std::uniform_real_distribution<T>,
|
||||
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
|
||||
using uniformDistribution = typename std::conditional<
|
||||
std::is_floating_point<T>::value,
|
||||
std::uniform_real_distribution<T>,
|
||||
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
|
||||
|
||||
template <typename T, typename T2>
|
||||
void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits<uint8_t>::min(), T rand_max = std::numeric_limits<uint8_t>::max()) {
|
||||
void fillBlobRandom(Blob::Ptr& inputBlob,
|
||||
T rand_min = std::numeric_limits<uint8_t>::min(),
|
||||
T rand_max = std::numeric_limits<uint8_t>::max()) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
if (!minput) {
|
||||
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
|
||||
@ -191,14 +208,17 @@ void fillBlobImInfo(Blob::Ptr& inputBlob, const size_t& batchSize, std::pair<siz
|
||||
}
|
||||
}
|
||||
|
||||
void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info,
|
||||
void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests) {
|
||||
std::vector<std::pair<size_t, size_t>> input_image_sizes;
|
||||
for (auto& item : app_inputs_info) {
|
||||
if (item.second.isImage()) {
|
||||
input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
|
||||
}
|
||||
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions (" << item.second.layout << "): ";
|
||||
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions ("
|
||||
<< item.second.layout << "): ";
|
||||
for (const auto& i : item.second.shape) {
|
||||
slog::info << i << " ";
|
||||
}
|
||||
@ -232,10 +252,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
|
||||
"extensions: "
|
||||
<< ss.str() << slog::endl;
|
||||
} else if (binaryToBeUsed > binaryFiles.size()) {
|
||||
slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed << " files are required but only " << binaryFiles.size()
|
||||
<< " are provided" << slog::endl;
|
||||
slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed
|
||||
<< " files are required but only " << binaryFiles.size() << " are provided" << slog::endl;
|
||||
} else if (binaryToBeUsed < binaryFiles.size()) {
|
||||
slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from " << binaryFiles.size() << slog::endl;
|
||||
slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from "
|
||||
<< binaryFiles.size() << slog::endl;
|
||||
}
|
||||
|
||||
imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions);
|
||||
@ -254,10 +275,11 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
|
||||
"extensions: "
|
||||
<< ss.str() << slog::endl;
|
||||
} else if (imagesToBeUsed > imageFiles.size()) {
|
||||
slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed << " files are required but only " << imageFiles.size()
|
||||
<< " are provided" << slog::endl;
|
||||
slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed
|
||||
<< " files are required but only " << imageFiles.size() << " are provided" << slog::endl;
|
||||
} else if (imagesToBeUsed < imageFiles.size()) {
|
||||
slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from " << imageFiles.size() << slog::endl;
|
||||
slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from "
|
||||
<< imageFiles.size() << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,15 +296,45 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
|
||||
if (!imageFiles.empty()) {
|
||||
// Fill with Images
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobImage<float>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
|
||||
fillBlobImage<float>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBlobImage<short>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
|
||||
fillBlobImage<short>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBlobImage<int32_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
|
||||
fillBlobImage<int32_t>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBlobImage<int64_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
|
||||
fillBlobImage<int64_t>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::U8) {
|
||||
fillBlobImage<uint8_t>(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount);
|
||||
fillBlobImage<uint8_t>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << item.first;
|
||||
}
|
||||
@ -292,15 +344,41 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
|
||||
if (!binaryFiles.empty()) {
|
||||
// Fill with binary files
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobBinary<float>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
fillBlobBinary<float>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBlobBinary<short>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
fillBlobBinary<short>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBlobBinary<int32_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
fillBlobBinary<int32_t>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBlobBinary<int64_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
} else if ((precision == InferenceEngine::Precision::U8) || (precision == InferenceEngine::Precision::BOOL)) {
|
||||
fillBlobBinary<uint8_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
fillBlobBinary<int64_t>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if ((precision == InferenceEngine::Precision::U8) ||
|
||||
(precision == InferenceEngine::Precision::BOOL)) {
|
||||
fillBlobBinary<uint8_t>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << item.first;
|
||||
}
|
||||
@ -310,7 +388,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
|
||||
if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) {
|
||||
// Most likely it is image info: fill with image information
|
||||
auto image_size = input_image_sizes.at(0);
|
||||
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x" << image_size.second << slog::endl;
|
||||
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x"
|
||||
<< image_size.second << slog::endl;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobImInfo<float>(inputBlob, batchSize, image_size);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
@ -326,8 +405,9 @@ void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSi
|
||||
}
|
||||
}
|
||||
// Fill random
|
||||
slog::info << "Fill input '" << item.first << "' with random values (" << std::string((app_info.isImage() ? "image" : "some binary data"))
|
||||
<< " is expected)" << slog::endl;
|
||||
slog::info << "Fill input '" << item.first << "' with random values ("
|
||||
<< std::string((app_info.isImage() ? "image" : "some binary data")) << " is expected)"
|
||||
<< slog::endl;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobRandom<float, float>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
|
@ -11,5 +11,7 @@
|
||||
#include "infer_request_wrap.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info,
|
||||
void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests);
|
@ -60,8 +60,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value.");
|
||||
}
|
||||
|
||||
if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) {
|
||||
std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" + std::string(detailedCntReport) +
|
||||
if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport &&
|
||||
FLAGS_report_type != detailedCntReport) {
|
||||
std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" +
|
||||
std::string(detailedCntReport) +
|
||||
" report types are supported (invalid -report_type option value)";
|
||||
throw std::logic_error(err);
|
||||
}
|
||||
@ -73,8 +75,9 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
bool isNetworkCompiled = fileExt(FLAGS_m) == "blob";
|
||||
bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty());
|
||||
if (isNetworkCompiled && isPrecisionSet) {
|
||||
std::string err = std::string("Cannot set precision for a compiled network. ") + std::string("Please re-compile your network with required precision "
|
||||
"using compile_tool");
|
||||
std::string err = std::string("Cannot set precision for a compiled network. ") +
|
||||
std::string("Please re-compile your network with required precision "
|
||||
"using compile_tool");
|
||||
|
||||
throw std::logic_error(err);
|
||||
}
|
||||
@ -83,17 +86,18 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
|
||||
static void next_step(const std::string additional_info = "") {
|
||||
static size_t step_id = 0;
|
||||
static const std::map<size_t, std::string> step_names = {{1, "Parsing and validating input arguments"},
|
||||
{2, "Loading Inference Engine"},
|
||||
{3, "Setting device configuration"},
|
||||
{4, "Reading network files"},
|
||||
{5, "Resizing network to match image sizes and given batch"},
|
||||
{6, "Configuring input of the model"},
|
||||
{7, "Loading the model to the device"},
|
||||
{8, "Setting optimal runtime parameters"},
|
||||
{9, "Creating infer requests and filling input blobs with images"},
|
||||
{10, "Measuring performance"},
|
||||
{11, "Dumping statistics report"}};
|
||||
static const std::map<size_t, std::string> step_names = {
|
||||
{1, "Parsing and validating input arguments"},
|
||||
{2, "Loading Inference Engine"},
|
||||
{3, "Setting device configuration"},
|
||||
{4, "Reading network files"},
|
||||
{5, "Resizing network to match image sizes and given batch"},
|
||||
{6, "Configuring input of the model"},
|
||||
{7, "Loading the model to the device"},
|
||||
{8, "Setting optimal runtime parameters"},
|
||||
{9, "Creating infer requests and filling input blobs with images"},
|
||||
{10, "Measuring performance"},
|
||||
{11, "Dumping statistics report"}};
|
||||
|
||||
step_id++;
|
||||
if (step_names.count(step_id) == 0)
|
||||
@ -140,13 +144,16 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
}
|
||||
if (!FLAGS_report_type.empty()) {
|
||||
statistics = std::make_shared<StatisticsReport>(StatisticsReport::Config {FLAGS_report_type, FLAGS_report_folder});
|
||||
statistics =
|
||||
std::make_shared<StatisticsReport>(StatisticsReport::Config{FLAGS_report_type, FLAGS_report_folder});
|
||||
statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments);
|
||||
}
|
||||
auto isFlagSetInCommandLine = [&command_line_arguments](const std::string& name) {
|
||||
return (std::find_if(command_line_arguments.begin(), command_line_arguments.end(), [name](const std::pair<std::string, std::string>& p) {
|
||||
return p.first == name;
|
||||
}) != command_line_arguments.end());
|
||||
return (std::find_if(command_line_arguments.begin(),
|
||||
command_line_arguments.end(),
|
||||
[name](const std::pair<std::string, std::string>& p) {
|
||||
return p.first == name;
|
||||
}) != command_line_arguments.end());
|
||||
};
|
||||
|
||||
std::string device_name = FLAGS_d;
|
||||
@ -213,13 +220,17 @@ int main(int argc, char* argv[]) {
|
||||
if (isFlagSetInCommandLine("pc")) {
|
||||
// set to user defined value
|
||||
device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
|
||||
} else if (device_config.count(CONFIG_KEY(PERF_COUNT)) && (device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) {
|
||||
slog::warn << "Performance counters for " << device << " device is turned on. To print results use -pc option." << slog::endl;
|
||||
} else if (device_config.count(CONFIG_KEY(PERF_COUNT)) &&
|
||||
(device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) {
|
||||
slog::warn << "Performance counters for " << device
|
||||
<< " device is turned on. To print results use -pc option." << slog::endl;
|
||||
} else if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == averageCntReport) {
|
||||
slog::warn << "Turn on performance counters for " << device << " device since report type is " << FLAGS_report_type << "." << slog::endl;
|
||||
slog::warn << "Turn on performance counters for " << device << " device since report type is "
|
||||
<< FLAGS_report_type << "." << slog::endl;
|
||||
device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
|
||||
} else if (!FLAGS_exec_graph_path.empty()) {
|
||||
slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping." << slog::endl;
|
||||
slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping."
|
||||
<< slog::endl;
|
||||
device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
|
||||
} else {
|
||||
// set to default value
|
||||
@ -231,8 +242,10 @@ int main(int argc, char* argv[]) {
|
||||
const std::string key = device + "_THROUGHPUT_STREAMS";
|
||||
if (device_nstreams.count(device)) {
|
||||
// set to user defined value
|
||||
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) == supported_config_keys.end()) {
|
||||
std::vector<std::string> supported_config_keys =
|
||||
ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) ==
|
||||
supported_config_keys.end()) {
|
||||
throw std::logic_error("Device " + device + " doesn't support config key '" + key + "'! " +
|
||||
"Please specify -nstreams for correct devices in format "
|
||||
"<dev1>:<nstreams1>,<dev2>:<nstreams2>" +
|
||||
@ -267,8 +280,10 @@ int main(int argc, char* argv[]) {
|
||||
// set to user defined value
|
||||
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
|
||||
} else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) {
|
||||
if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("GPU") != std::string::npos)) {
|
||||
slog::warn << "Turn off threads pinning for " << device << " device since multi-scenario with GPU device is used." << slog::endl;
|
||||
if ((device_name.find("MULTI") != std::string::npos) &&
|
||||
(device_name.find("GPU") != std::string::npos)) {
|
||||
slog::warn << "Turn off threads pinning for " << device
|
||||
<< " device since multi-scenario with GPU device is used." << slog::endl;
|
||||
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO);
|
||||
}
|
||||
}
|
||||
@ -279,7 +294,8 @@ int main(int argc, char* argv[]) {
|
||||
// for GPU execution, more throughput-oriented execution via streams
|
||||
setThroughputStreams();
|
||||
|
||||
if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("CPU") != std::string::npos)) {
|
||||
if ((device_name.find("MULTI") != std::string::npos) &&
|
||||
(device_name.find("CPU") != std::string::npos)) {
|
||||
slog::warn << "Turn on GPU throttling. Multi-device execution with "
|
||||
"the CPU + GPU performs best with GPU throttling hint, "
|
||||
<< "which releases another CPU thread (that is otherwise "
|
||||
@ -299,9 +315,11 @@ int main(int argc, char* argv[]) {
|
||||
if (isFlagSetInCommandLine("nthreads"))
|
||||
device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
|
||||
} else {
|
||||
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
std::vector<std::string> supported_config_keys =
|
||||
ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
auto supported = [&](const std::string& key) {
|
||||
return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) != std::end(supported_config_keys);
|
||||
return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) !=
|
||||
std::end(supported_config_keys);
|
||||
};
|
||||
if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
|
||||
device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
|
||||
@ -351,7 +369,8 @@ int main(int argc, char* argv[]) {
|
||||
auto duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"load network time (ms)", duration_ms}});
|
||||
if (batchSize == 0) {
|
||||
batchSize = 1;
|
||||
}
|
||||
@ -367,7 +386,8 @@ int main(int argc, char* argv[]) {
|
||||
auto duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"read network time (ms)", duration_ms}});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"read network time (ms)", duration_ms}});
|
||||
|
||||
const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
|
||||
if (inputInfo.empty()) {
|
||||
@ -380,7 +400,13 @@ int main(int argc, char* argv[]) {
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
// Parse input shapes if specified
|
||||
bool reshape = false;
|
||||
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, inputInfo, reshape);
|
||||
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape,
|
||||
FLAGS_layout,
|
||||
FLAGS_b,
|
||||
FLAGS_iscale,
|
||||
FLAGS_imean,
|
||||
inputInfo,
|
||||
reshape);
|
||||
if (reshape) {
|
||||
InferenceEngine::ICNNNetwork::InputShapes shapes = {};
|
||||
for (auto& item : app_inputs_info)
|
||||
@ -391,13 +417,15 @@ int main(int argc, char* argv[]) {
|
||||
duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"reshape network time (ms)", duration_ms}});
|
||||
}
|
||||
// use batch size according to provided layout and shapes
|
||||
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize();
|
||||
|
||||
topology_name = cnnNetwork.getName();
|
||||
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl;
|
||||
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
|
||||
<< slog::endl;
|
||||
|
||||
// ----------------- 6. Configuring inputs and outputs
|
||||
// ----------------------------------------------------------------------
|
||||
@ -424,7 +452,8 @@ int main(int argc, char* argv[]) {
|
||||
duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"load network time (ms)", duration_ms}});
|
||||
} else {
|
||||
next_step();
|
||||
slog::info << "Skipping the step for compiled network" << slog::endl;
|
||||
@ -440,8 +469,14 @@ int main(int argc, char* argv[]) {
|
||||
auto duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}});
|
||||
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, exeNetwork.GetInputsInfo());
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"import network time (ms)", duration_ms}});
|
||||
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
|
||||
FLAGS_layout,
|
||||
FLAGS_b,
|
||||
FLAGS_iscale,
|
||||
FLAGS_imean,
|
||||
exeNetwork.GetInputsInfo());
|
||||
if (batchSize == 0) {
|
||||
batchSize = 1;
|
||||
}
|
||||
@ -479,8 +514,8 @@ int main(int argc, char* argv[]) {
|
||||
if ((niter > 0) && (FLAGS_api == "async")) {
|
||||
niter = ((niter + nireq - 1) / nireq) * nireq;
|
||||
if (FLAGS_niter != niter) {
|
||||
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to " << niter << " using number of requests "
|
||||
<< nireq << slog::endl;
|
||||
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
|
||||
<< niter << " using number of requests " << nireq << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -496,23 +531,25 @@ int main(int argc, char* argv[]) {
|
||||
uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds);
|
||||
|
||||
if (statistics) {
|
||||
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
|
||||
{
|
||||
{"topology", topology_name},
|
||||
{"target device", device_name},
|
||||
{"API", FLAGS_api},
|
||||
{"precision", std::string(precision.name())},
|
||||
{"batch size", std::to_string(batchSize)},
|
||||
{"number of iterations", std::to_string(niter)},
|
||||
{"number of parallel infer requests", std::to_string(nireq)},
|
||||
{"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))},
|
||||
});
|
||||
statistics->addParameters(
|
||||
StatisticsReport::Category::RUNTIME_CONFIG,
|
||||
{
|
||||
{"topology", topology_name},
|
||||
{"target device", device_name},
|
||||
{"API", FLAGS_api},
|
||||
{"precision", std::string(precision.name())},
|
||||
{"batch size", std::to_string(batchSize)},
|
||||
{"number of iterations", std::to_string(niter)},
|
||||
{"number of parallel infer requests", std::to_string(nireq)},
|
||||
{"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))},
|
||||
});
|
||||
for (auto& nstreams : device_nstreams) {
|
||||
std::stringstream ss;
|
||||
ss << "number of " << nstreams.first << " streams";
|
||||
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG, {
|
||||
{ss.str(), nstreams.second},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
|
||||
{
|
||||
{ss.str(), nstreams.second},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -576,7 +613,8 @@ int main(int argc, char* argv[]) {
|
||||
auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]);
|
||||
slog::info << "First inference took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"first inference time (ms)", duration_ms}});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"first inference time (ms)", duration_ms}});
|
||||
inferRequestsQueue.resetTimes();
|
||||
|
||||
auto startTime = Time::now();
|
||||
@ -587,7 +625,8 @@ int main(int argc, char* argv[]) {
|
||||
* executed in the same conditions **/
|
||||
ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress);
|
||||
|
||||
while ((niter != 0LL && iteration < niter) || (duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
|
||||
while ((niter != 0LL && iteration < niter) ||
|
||||
(duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
|
||||
(FLAGS_api == "async" && iteration % nireq != 0)) {
|
||||
inferRequest = inferRequestsQueue.getIdleRequest();
|
||||
if (!inferRequest) {
|
||||
@ -629,13 +668,15 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
|
||||
double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
|
||||
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
|
||||
double fps =
|
||||
(FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
|
||||
|
||||
if (statistics) {
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
|
||||
{"total execution time (ms)", double_to_string(totalDuration)},
|
||||
{"total number of iterations", std::to_string(iteration)},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"total execution time (ms)", double_to_string(totalDuration)},
|
||||
{"total number of iterations", std::to_string(iteration)},
|
||||
});
|
||||
if (device_name.find("MULTI") == std::string::npos) {
|
||||
std::string latency_label;
|
||||
if (FLAGS_latency_percentile == 50) {
|
||||
@ -643,11 +684,13 @@ int main(int argc, char* argv[]) {
|
||||
} else {
|
||||
latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
|
||||
}
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
|
||||
{latency_label, double_to_string(latency)},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{latency_label, double_to_string(latency)},
|
||||
});
|
||||
}
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"throughput", double_to_string(fps)}});
|
||||
}
|
||||
|
||||
progressBar.finish();
|
||||
@ -707,9 +750,10 @@ int main(int argc, char* argv[]) {
|
||||
slog::err << ex.what() << slog::endl;
|
||||
|
||||
if (statistics) {
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {
|
||||
{"error", ex.what()},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"error", ex.what()},
|
||||
});
|
||||
statistics->dump();
|
||||
}
|
||||
|
||||
|
@ -35,14 +35,14 @@ public:
|
||||
EXECUTION_RESULTS,
|
||||
};
|
||||
|
||||
explicit StatisticsReport(Config config): _config(std::move(config)) {
|
||||
explicit StatisticsReport(Config config) : _config(std::move(config)) {
|
||||
_separator =
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
#if defined UNICODE
|
||||
# if defined UNICODE
|
||||
L"\\";
|
||||
#else
|
||||
# else
|
||||
"\\";
|
||||
#endif
|
||||
# endif
|
||||
#else
|
||||
"/";
|
||||
#endif
|
||||
|
@ -16,7 +16,7 @@
|
||||
// clang-format on
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
#include <opencv2/core.hpp>
|
||||
# include <opencv2/core.hpp>
|
||||
#endif
|
||||
|
||||
namespace benchmark_app {
|
||||
@ -54,8 +54,13 @@ size_t InputInfo::depth() const {
|
||||
} // namespace benchmark_app
|
||||
|
||||
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
|
||||
static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds {{"CPU", 60}, {"GPU", 60}, {"VPU", 60}, {"MYRIAD", 60},
|
||||
{"HDDL", 60}, {"FPGA", 120}, {"UNKNOWN", 120}};
|
||||
static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds{{"CPU", 60},
|
||||
{"GPU", 60},
|
||||
{"VPU", 60},
|
||||
{"MYRIAD", 60},
|
||||
{"HDDL", 60},
|
||||
{"FPGA", 120},
|
||||
{"UNKNOWN", 120}};
|
||||
uint32_t duration = 0;
|
||||
for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) {
|
||||
if (device.find(deviceDurationInSeconds.first) != std::string::npos) {
|
||||
@ -63,16 +68,18 @@ uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
|
||||
}
|
||||
}
|
||||
if (duration == 0) {
|
||||
const auto unknownDeviceIt =
|
||||
find_if(deviceDefaultDurationInSeconds.begin(), deviceDefaultDurationInSeconds.end(), [](std::pair<std::string, uint32_t> deviceDuration) {
|
||||
return deviceDuration.first == "UNKNOWN";
|
||||
});
|
||||
const auto unknownDeviceIt = find_if(deviceDefaultDurationInSeconds.begin(),
|
||||
deviceDefaultDurationInSeconds.end(),
|
||||
[](std::pair<std::string, uint32_t> deviceDuration) {
|
||||
return deviceDuration.first == "UNKNOWN";
|
||||
});
|
||||
|
||||
if (unknownDeviceIt == deviceDefaultDurationInSeconds.end()) {
|
||||
throw std::logic_error("UNKNOWN device was not found in the device duration list");
|
||||
}
|
||||
duration = unknownDeviceIt->second;
|
||||
slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used" << slog::endl;
|
||||
slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used"
|
||||
<< slog::endl;
|
||||
}
|
||||
return duration;
|
||||
}
|
||||
@ -112,7 +119,8 @@ std::vector<std::string> parseDevices(const std::string& device_string) {
|
||||
return devices;
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices, const std::string& values_string) {
|
||||
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
|
||||
const std::string& values_string) {
|
||||
// Format: <device1>:<value1>,<device2>:<value2> or just <value>
|
||||
std::map<std::string, std::string> result;
|
||||
auto device_value_strings = split(values_string, ',');
|
||||
@ -125,7 +133,8 @@ std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector
|
||||
if (it != devices.end()) {
|
||||
result[device_name] = nstreams;
|
||||
} else {
|
||||
throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" + device_name + "'! Incorrect device name!");
|
||||
throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" +
|
||||
device_name + "'! Incorrect device name!");
|
||||
}
|
||||
} else if (device_value_vec.size() == 1) {
|
||||
auto value = device_value_vec.at(0);
|
||||
@ -172,7 +181,8 @@ std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& sha
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info) {
|
||||
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
|
||||
const benchmark_app::InputsInfo& inputs_info) {
|
||||
// Format: data:[255,255,255],info[255,255,255]
|
||||
std::map<std::string, std::vector<float>> return_value;
|
||||
|
||||
|
@ -29,14 +29,17 @@ using InputsInfo = std::map<std::string, InputInfo>;
|
||||
|
||||
std::vector<std::string> parseDevices(const std::string& device_string);
|
||||
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
|
||||
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices, const std::string& values_string);
|
||||
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
|
||||
const std::string& values_string);
|
||||
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
|
||||
size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
|
||||
std::vector<std::string> split(const std::string& s, char delim);
|
||||
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info);
|
||||
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
|
||||
const benchmark_app::InputsInfo& inputs_info);
|
||||
|
||||
template <typename T>
|
||||
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string, const std::map<std::string, T>& input_info) {
|
||||
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string,
|
||||
const std::map<std::string, T>& input_info) {
|
||||
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
|
||||
// inputs)
|
||||
std::map<std::string, std::string> return_value;
|
||||
@ -67,8 +70,12 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
|
||||
const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info,
|
||||
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
|
||||
const std::string& layout_string,
|
||||
const size_t batch_size,
|
||||
const std::string& scale_string,
|
||||
const std::string& mean_string,
|
||||
const std::map<std::string, T>& input_info,
|
||||
bool& reshape_required) {
|
||||
std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
|
||||
std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
|
||||
@ -134,10 +141,20 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const s
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
|
||||
const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info) {
|
||||
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
|
||||
const std::string& layout_string,
|
||||
const size_t batch_size,
|
||||
const std::string& scale_string,
|
||||
const std::string& mean_string,
|
||||
const std::map<std::string, T>& input_info) {
|
||||
bool reshape_required = false;
|
||||
return getInputsInfo<T>(shape_string, layout_string, batch_size, scale_string, mean_string, input_info, reshape_required);
|
||||
return getInputsInfo<T>(shape_string,
|
||||
layout_string,
|
||||
batch_size,
|
||||
scale_string,
|
||||
mean_string,
|
||||
input_info,
|
||||
reshape_required);
|
||||
}
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
|
@ -17,13 +17,15 @@ static const char help_message[] = "Print a usage message.";
|
||||
static const char model_message[] = "Required. Path to an .xml file with a trained model.";
|
||||
|
||||
/// @brief message for images argument
|
||||
static const char image_message[] = "Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"
|
||||
" and a .bmp file for the other networks.";
|
||||
static const char image_message[] =
|
||||
"Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"
|
||||
" and a .bmp file for the other networks.";
|
||||
|
||||
/// @brief message for assigning cnn calculation to device
|
||||
static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). "
|
||||
"Default value is CPU. Use \"-d HETERO:<comma_separated_devices_list>\" format to specify HETERO plugin. "
|
||||
"Sample will look for a suitable plugin for device specified.";
|
||||
static const char target_device_message[] =
|
||||
"Optional. Specify the target device to infer on (the list of available devices is shown below). "
|
||||
"Default value is CPU. Use \"-d HETERO:<comma_separated_devices_list>\" format to specify HETERO plugin. "
|
||||
"Sample will look for a suitable plugin for device specified.";
|
||||
|
||||
/// @brief message for top results number
|
||||
static const char ntop_message[] = "Optional. Number of top results. Default value is 10.";
|
||||
|
@ -100,7 +100,8 @@ int main(int argc, char* argv[]) {
|
||||
// Config for device plugin custom extension is loaded from an .xml
|
||||
// description
|
||||
ie.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, FLAGS_d);
|
||||
slog::info << "Config for " << FLAGS_d << " device plugin custom extension loaded: " << FLAGS_c << slog::endl;
|
||||
slog::info << "Config for " << FLAGS_d << " device plugin custom extension loaded: " << FLAGS_c
|
||||
<< slog::endl;
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
@ -142,8 +143,8 @@ int main(int argc, char* argv[]) {
|
||||
continue;
|
||||
}
|
||||
/** Store image data **/
|
||||
std::shared_ptr<unsigned char> data(
|
||||
reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3], inputInfoItem.second->getTensorDesc().getDims()[2]));
|
||||
std::shared_ptr<unsigned char> data(reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
|
||||
inputInfoItem.second->getTensorDesc().getDims()[2]));
|
||||
if (data != nullptr) {
|
||||
imagesData.push_back(data);
|
||||
validImageNames.push_back(i);
|
||||
@ -203,7 +204,8 @@ int main(int argc, char* argv[]) {
|
||||
for (size_t ch = 0; ch < num_channels; ++ch) {
|
||||
/** [images stride + channels stride + pixel id ] all in
|
||||
* bytes **/
|
||||
data[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid * num_channels + ch];
|
||||
data[image_id * image_size * num_channels + ch * image_size + pid] =
|
||||
imagesData.at(image_id).get()[pid * num_channels + ch];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -255,8 +257,9 @@ int main(int argc, char* argv[]) {
|
||||
/** Validating -nt value **/
|
||||
const size_t resultsCnt = outputBlob->size() / batchSize;
|
||||
if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
|
||||
slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " << resultsCnt + 1
|
||||
<< " and more than 0)\n Maximal value " << resultsCnt << " will be used." << slog::endl;
|
||||
slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than "
|
||||
<< resultsCnt + 1 << " and more than 0)\n Maximal value " << resultsCnt
|
||||
<< " will be used." << slog::endl;
|
||||
FLAGS_nt = resultsCnt;
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,8 @@ MnistUbyte::MnistUbyte(const std::string& filename) {
|
||||
n_cols = reverseInt(n_cols);
|
||||
_width = (size_t)n_cols;
|
||||
if (number_of_images > 1) {
|
||||
std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images << ". Only a first image will be read." << std::endl;
|
||||
std::cout << "[MNIST] Warning: number_of_images in mnist file equals " << number_of_images
|
||||
<< ". Only a first image will be read." << std::endl;
|
||||
}
|
||||
|
||||
size_t size = _width * _height * 1;
|
||||
|
@ -14,19 +14,19 @@
|
||||
#include <vector>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#ifdef IMPLEMENT_FORMAT_READER
|
||||
#define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type
|
||||
#else
|
||||
#define FORMAT_READER_API(type) extern "C" type
|
||||
#endif
|
||||
# ifdef IMPLEMENT_FORMAT_READER
|
||||
# define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type
|
||||
# else
|
||||
# define FORMAT_READER_API(type) extern "C" type
|
||||
# endif
|
||||
#elif (__GNUC__ >= 4)
|
||||
#ifdef IMPLEMENT_FORMAT_READER
|
||||
#define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type
|
||||
#else
|
||||
#define FORMAT_READER_API(type) extern "C" type
|
||||
#endif
|
||||
# ifdef IMPLEMENT_FORMAT_READER
|
||||
# define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type
|
||||
# else
|
||||
# define FORMAT_READER_API(type) extern "C" type
|
||||
# endif
|
||||
#else
|
||||
#define FORMAT_READER_API(TYPE) extern "C" TYPE
|
||||
# define FORMAT_READER_API(TYPE) extern "C" TYPE
|
||||
#endif
|
||||
|
||||
namespace FormatReader {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user