From 3826a0d08df35c8c936a1828420b8843ffd14cb0 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Mon, 9 Aug 2021 06:26:37 +0200 Subject: [PATCH 01/24] Remove leaky relu alpha check (#6910) * remove leaky relu check * make alpha_node scalar --- ngraph/frontend/onnx/onnx_import/src/op/leaky_relu.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ngraph/frontend/onnx/onnx_import/src/op/leaky_relu.cpp b/ngraph/frontend/onnx/onnx_import/src/op/leaky_relu.cpp index e40f85c4b6d..8b12626f0fa 100644 --- a/ngraph/frontend/onnx/onnx_import/src/op/leaky_relu.cpp +++ b/ngraph/frontend/onnx/onnx_import/src/op/leaky_relu.cpp @@ -21,11 +21,8 @@ namespace ngraph auto data = node.get_ng_inputs().at(0); double alpha = node.get_attribute_value("alpha", 0.01); - CHECK_VALID_NODE( - node, alpha >= 0 && alpha <= 1, " alpha value should be in range (0,1)"); - std::shared_ptr alpha_node = - default_opset::Constant::create(data.get_element_type(), Shape{}, {alpha}); + default_opset::Constant::create(data.get_element_type(), Shape{1}, {alpha}); return {std::make_shared(data, alpha_node)}; } From aa3645cb53564d2a58856922a80cc34182f91d93 Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Mon, 9 Aug 2021 10:42:05 +0200 Subject: [PATCH 02/24] Avoid template test from executing multiple times (#6948) --- .../tests/functional/op_reference/comparison.cpp | 15 +++++++++++++++ .../tests/functional/op_reference/comparison.hpp | 2 +- .../tests/functional/op_reference/equal.cpp | 6 +----- .../tests/functional/op_reference/less.cpp | 5 +---- .../tests/functional/op_reference/less_eq.cpp | 5 +---- 5 files changed, 19 insertions(+), 14 deletions(-) create mode 100644 docs/template_plugin/tests/functional/op_reference/comparison.cpp diff --git a/docs/template_plugin/tests/functional/op_reference/comparison.cpp b/docs/template_plugin/tests/functional/op_reference/comparison.cpp new file mode 100644 index 00000000000..4332f78e252 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/comparison.cpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "comparison.hpp" + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { +TEST_P(ReferenceComparisonLayerTest, CompareWithHardcodedRefs) { + Exec(); +} +} // namespace +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/comparison.hpp b/docs/template_plugin/tests/functional/op_reference/comparison.hpp index 0d520b73ba2..ec8509de4c8 100644 --- a/docs/template_plugin/tests/functional/op_reference/comparison.hpp +++ b/docs/template_plugin/tests/functional/op_reference/comparison.hpp @@ -60,4 +60,4 @@ private: } }; } // namespace ComparisonOpsRefTestDefinitions -} // namespace reference_tests \ No newline at end of file +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/equal.cpp b/docs/template_plugin/tests/functional/op_reference/equal.cpp index d80ec3271fb..01cd430be72 100644 --- a/docs/template_plugin/tests/functional/op_reference/equal.cpp +++ b/docs/template_plugin/tests/functional/op_reference/equal.cpp @@ -20,10 +20,6 @@ namespace reference_tests { namespace ComparisonOpsRefTestDefinitions { namespace { -TEST_P(ReferenceComparisonLayerTest, EqualCompareWithHardcodedRefs) { - Exec(); -} - template std::vector generateComparisonParams(const element::Type& type) { using T = typename element_type_traits::value_type; @@ -81,4 +77,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparis ReferenceComparisonLayerTest::getTestCaseName); } // namespace } // namespace ComparisonOpsRefTestDefinitions -} // namespace reference_tests \ No newline at end of file +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/less.cpp b/docs/template_plugin/tests/functional/op_reference/less.cpp index 5d01cdfab64..87feca926a3 100644 --- a/docs/template_plugin/tests/functional/op_reference/less.cpp +++ b/docs/template_plugin/tests/functional/op_reference/less.cpp @@ -18,9 +18,6 @@ using ComparisonTypes = ngraph::helpers::ComparisonTypes; namespace reference_tests { namespace ComparisonOpsRefTestDefinitions { namespace { -TEST_P(ReferenceComparisonLayerTest, LessCompareWithHardcodedRefs) { - Exec(); -} template std::vector generateComparisonParams(const element::Type& type) { @@ -79,4 +76,4 @@ std::vector generateComparisonCombinedParams() { INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), ReferenceComparisonLayerTest::getTestCaseName); } // namespace ComparisonOpsRefTestDefinitions -} // namespace reference_tests \ No newline at end of file +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/less_eq.cpp b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp index f530867f847..028ef909f67 100644 --- a/docs/template_plugin/tests/functional/op_reference/less_eq.cpp +++ b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp @@ -18,9 +18,6 @@ using ComparisonTypes = ngraph::helpers::ComparisonTypes; namespace reference_tests { namespace ComparisonOpsRefTestDefinitions { namespace { -TEST_P(ReferenceComparisonLayerTest, LessEqualCompareWithHardcodedRefs) { - Exec(); -} template std::vector generateComparisonParams(const element::Type& type) { @@ -79,4 +76,4 @@ std::vector generateComparisonCombinedParams() { INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), ReferenceComparisonLayerTest::getTestCaseName); } // namespace ComparisonOpsRefTestDefinitions -} // namespace reference_tests \ No newline at end of file +} // namespace reference_tests From ad9078d9eb28860837917c3dcce2f3ae02e71a0a Mon Sep 17 00:00:00 2001 From: Anastasiya Koryachikhina Date: Mon, 9 Aug 2021 12:40:56 +0300 Subject: [PATCH 03/24] add itt_collector build (#6859) * add itt_collector build * changed format for itt_collector build * replace build_dir with sea_itt_lib_path * change type of build_target * replace os.join with Path * change built_target arg type --- tests/conditional_compilation/conftest.py | 3 +++ tests/conditional_compilation/test_utils.py | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/conditional_compilation/conftest.py b/tests/conditional_compilation/conftest.py index 85e2c333d66..a4f9a07acd4 100644 --- a/tests/conditional_compilation/conftest.py +++ b/tests/conditional_compilation/conftest.py @@ -139,10 +139,13 @@ def openvino_ref(request, artifacts): log.info("--openvino_ref is not specified. Preparing instrumented build at %s", build_dir) + build_target = {"sea_itt_lib": Path(build_dir / "thirdparty" / "itt_collector" / "sea_itt_lib")} + return_code, output = make_build( openvino_root_dir, build_dir, openvino_ref_path, + build_target=build_target, cmake_additional_args=["-DSELECTIVE_BUILD=COLLECT"], log=log ) diff --git a/tests/conditional_compilation/test_utils.py b/tests/conditional_compilation/test_utils.py index 9fa503efa83..fd91029a509 100644 --- a/tests/conditional_compilation/test_utils.py +++ b/tests/conditional_compilation/test_utils.py @@ -15,7 +15,6 @@ from install_pkg import get_openvino_environment # pylint: disable=import-error from path_utils import get_lib_path # pylint: disable=import-error from proc_utils import cmd_exec # pylint: disable=import-error - SESSION_INFO_FILE = "cc_tests.json" infer_tool = str((Path(getsourcefile(lambda: 0)) / ".." / "tools" / "infer_tool.py").resolve()) @@ -75,15 +74,19 @@ def run_infer(models, out_dir, install_dir): return return_code, output -def make_build(openvino_root_dir, build_dir, install_dir, cmake_additional_args=None, log=None): +def make_build(openvino_root_dir, build_dir, install_dir, build_target: dict = None, cmake_additional_args=None, + log=None): """Parametrized build and install OpenVINO package.""" additional_args_line = " ".join(cmake_additional_args) + " " if cmake_additional_args else "" + build_target_arg_line = [f"cmake --build {build_target[target]} --target {target} && " for target in + build_target.keys()] if build_target else "" nproc = multiprocessing.cpu_count() cmd = ( f"cmake -DENABLE_PROFILING_ITT=ON -DCMAKE_BUILD_TYPE=Release " f"-DPYTHON_EXECUTABLE={sys.executable} {additional_args_line}" - f"-S {openvino_root_dir} -B {build_dir} &&" + f"-S {openvino_root_dir} -B {build_dir} && " f"cmake --build {build_dir} -j{nproc} && " + f"{' '.join(build_target_arg_line)}" f"cmake --install {build_dir} --prefix {install_dir}" ) return cmd_exec([cmd], shell=True, log=log) From 9899264d61f50c86293377565fa08b810b4ea4e5 Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Mon, 9 Aug 2021 18:53:29 +0900 Subject: [PATCH 04/24] Resolve python build issue on Windows (#6517) Signed-off-by: Min, Byungil --- inference-engine/ie_bridges/python/cmake/CythonConfig.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inference-engine/ie_bridges/python/cmake/CythonConfig.cmake b/inference-engine/ie_bridges/python/cmake/CythonConfig.cmake index aa525557a8f..f00ddb4d829 100644 --- a/inference-engine/ie_bridges/python/cmake/CythonConfig.cmake +++ b/inference-engine/ie_bridges/python/cmake/CythonConfig.cmake @@ -33,9 +33,10 @@ find_package(PythonInterp 3 QUIET) if( PYTHONINTERP_FOUND ) get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH ) + file(TO_CMAKE_PATH "$ENV{HOME}" ENV_HOME) find_host_program( CYTHON_EXECUTABLE NAMES cython cython.bat cython3 - HINTS ${_python_path} $ENV{HOME}/.local/bin + HINTS ${_python_path} ${ENV_HOME}/.local/bin ) else() find_host_program( CYTHON_EXECUTABLE From 461584ffb1d794c25f66f45f15a89ff35c23703b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ko=C5=BCykowski?= Date: Mon, 9 Aug 2021 12:50:48 +0200 Subject: [PATCH 05/24] Update spec for LogicalOr operation (#6852) * update markdown file for logical or operator * update information on broadcasting in logical operators spec * change *T* to *T_BOOL* * add information about input shape * remove divergence between logical_or and logical_and specs --- docs/ops/logical/LogicalAnd_1.md | 13 ++++++------ docs/ops/logical/LogicalOr_1.md | 34 ++++++++++++++++---------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/docs/ops/logical/LogicalAnd_1.md b/docs/ops/logical/LogicalAnd_1.md index a653d1abbc2..385dd89a3a3 100644 --- a/docs/ops/logical/LogicalAnd_1.md +++ b/docs/ops/logical/LogicalAnd_1.md @@ -29,21 +29,20 @@ o_{i} = a_{i} \wedge b_{i} **Inputs** -* **1**: A tensor of type *T* and arbitrary shape. **Required.** -* **2**: A tensor of type *T* and arbitrary shape. **Required.** +* **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.** +* **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type boolean. +* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs. **Types** -* *T*: boolean type. - +* *T_BOOL*: `boolean`. **Examples** -*Example 1* +*Example 1: no broadcast* ```xml @@ -66,7 +65,7 @@ o_{i} = a_{i} \wedge b_{i} ``` -*Example 2: broadcast* +*Example 2: numpy broadcast* ```xml diff --git a/docs/ops/logical/LogicalOr_1.md b/docs/ops/logical/LogicalOr_1.md index ac4810ec74e..0e88a6c82fa 100644 --- a/docs/ops/logical/LogicalOr_1.md +++ b/docs/ops/logical/LogicalOr_1.md @@ -6,43 +6,43 @@ **Short description**: *LogicalOr* performs element-wise logical OR operation with two given tensors applying multi-directional broadcast rules. +**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. + +After broadcasting *LogicalOr* does the following with the input tensors *a* and *b*: + +\f[ +o_{i} = a_{i} \lor b_{i} +\f] + **Attributes**: * *auto_broadcast* * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. + * *none* - no auto-broadcasting is allowed, all input shapes must match + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md), + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md). * **Type**: string * **Default value**: "numpy" * **Required**: *no* **Inputs** -* **1**: A tensor of type *T*. **Required.** -* **2**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.** +* **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise logical OR operation. A tensor of type boolean. +* **1**: The result of element-wise *LogicalOr* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs. **Types** -* *T*: boolean type. - -**Detailed description** -Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. - -After broadcasting *LogicalOr* does the following with the input tensors *a* and *b*: - -\f[ -o_{i} = a_{i} or b_{i} -\f] +* *T_BOOL*: `boolean`. **Examples** -*Example 1* +*Example 1: no broadcast* ```xml @@ -65,7 +65,7 @@ o_{i} = a_{i} or b_{i} ``` -*Example 2: broadcast* +*Example 2: numpy broadcast* ```xml From 6a63cb9122276fc9bf1be31227c322fc6b334468 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 9 Aug 2021 15:38:08 +0300 Subject: [PATCH 06/24] Enabled naming style check for FEM, ONNX FE (#6970) * Enabled naming style check for FEM, ONNX FE * Used custom version of ncc * Added self-check * Removed submdoule * Moved to public ncc --- .github/workflows/code_style.yml | 19 ++++--- cmake/developer_package/ncc_naming_style/ncc | 2 +- .../ncc_naming_style/ncc_naming_style.cmake | 35 ++++++++----- .../ncc_naming_style/ncc_run.cmake | 18 ++++++- .../ncc_naming_style/ncc_wrapper.py.in | 52 ------------------- .../ncc_naming_style/openvino.style | 3 +- .../self_check/class_name.hpp | 8 +++ .../self_check/function_name.hpp | 5 ++ .../self_check/method_name.hpp | 10 ++++ .../frontend/frontend_manager/CMakeLists.txt | 6 +++ ngraph/frontend/onnx/frontend/CMakeLists.txt | 5 ++ .../frontend/onnx/onnx_import/CMakeLists.txt | 8 +++ ngraph/frontend/paddlepaddle/CMakeLists.txt | 6 +++ 13 files changed, 99 insertions(+), 78 deletions(-) delete mode 100644 cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in create mode 100644 cmake/developer_package/ncc_naming_style/self_check/class_name.hpp create mode 100644 cmake/developer_package/ncc_naming_style/self_check/function_name.hpp create mode 100644 cmake/developer_package/ncc_naming_style/self_check/method_name.hpp diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index f452feb67d7..29ec8446c48 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -28,13 +28,15 @@ jobs: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT .. - name: Check code style - run: cmake --build build --target clang_format_check_all -j8 + run: cmake --build . --target clang_format_check_all -j8 + working-directory: build - name: Create code style diff if: failure() run: | - cmake --build build --target clang_format_fix_all + cmake --build . --target clang_format_fix_all git diff > code_style_diff.diff + working-directory: build - uses: actions/upload-artifact@v2 if: failure() @@ -53,15 +55,13 @@ jobs: run: sudo apt --assume-yes install shellcheck - name: Install dependencies - run: | - sudo apt --assume-yes install libusb-1.0-0-dev - python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt + run: python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt - name: CMake run: | mkdir build cd build - cmake .. + cmake -DENABLE_VPU=OFF .. - name: ShellCheck run: cmake --build . --target ie_shellcheck -j8 @@ -75,17 +75,16 @@ jobs: submodules: recursive - name: Install Clang dependency - run: sudo apt --assume-yes install libusb-1.0-0-dev libclang-9-dev + run: sudo apt --assume-yes install libclang-9-dev - name: Install Python-based dependencies - run: | - python3 -m pip install pyyaml clang==9.0 + run: python3 -m pip install -r cmake/developer_package/ncc_naming_style/requirements_dev.txt - name: CMake run: | mkdir build cd build - cmake .. + cmake -DENABLE_VPU=OFF .. - name: Naming convention check run: cmake --build . --target ncc_all -j8 diff --git a/cmake/developer_package/ncc_naming_style/ncc b/cmake/developer_package/ncc_naming_style/ncc index d7d83049708..63e59ed312b 160000 --- a/cmake/developer_package/ncc_naming_style/ncc +++ b/cmake/developer_package/ncc_naming_style/ncc @@ -1 +1 @@ -Subproject commit d7d83049708eaa18ea6796adf0eeef85b28ebc1f +Subproject commit 63e59ed312ba7a946779596e86124c1633f67607 diff --git a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake index 60b03e2f726..c5b03abc778 100644 --- a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake +++ b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake @@ -33,8 +33,6 @@ if(ENABLE_NCC_STYLE) find_host_package(Clang QUIET) if(Clang_FOUND AND TARGET libclang) get_target_property(libclang_location libclang LOCATION) - set(ncc_wrapper_py "${ncc_style_bin_dir}/ncc_wrapper.py") - configure_file("${ncc_style_dir}/ncc_wrapper.py.in" ${ncc_wrapper_py} @ONLY) message(STATUS "Found libclang: ${libclang_location}") else() message(WARNING "libclang is not found (required for ncc naming style check)") @@ -59,7 +57,6 @@ foreach(req IN LISTS req_lines) ie_check_pip_package(${req} STATUS) endforeach() -set(ncc_script_dir "${ncc_style_dir}/ncc/") set(ncc_script_py "${ncc_style_dir}/ncc/ncc.py") if(NOT EXISTS ${ncc_script_py}) @@ -77,26 +74,33 @@ endif() # # ov_ncc_naming_style(FOR_TARGET target_name # INCLUDE_DIRECTORY dir -# [ADDITIONAL_INCLUDE_DIRECTORIES dir1 dir2 ..]) +# [ADDITIONAL_INCLUDE_DIRECTORIES dir1 dir2 ..] +# [DEFINITIONS def1 def2 ..]) # # FOR_TARGET - name of the target # INCLUDE_DIRECTORY - directory to check headers from # ADDITIONAL_INCLUDE_DIRECTORIES - additional include directories used in checked headers +# DEFINITIONS - additional definitions passed to preprocessor stage # function(ov_ncc_naming_style) if(NOT ENABLE_NCC_STYLE) return() endif() - cmake_parse_arguments(NCC_STYLE "" - "FOR_TARGET;INCLUDE_DIRECTORY" "ADDITIONAL_INCLUDE_DIRECTORIES" ${ARGN}) + cmake_parse_arguments(NCC_STYLE "FAIL" + "FOR_TARGET;INCLUDE_DIRECTORY" "ADDITIONAL_INCLUDE_DIRECTORIES;DEFINITIONS" ${ARGN}) + + foreach(var FOR_TARGET INCLUDE_DIRECTORY) + if(NOT DEFINED NCC_STYLE_${var}) + message(FATAL_ERROR "${var} is not defined in ov_ncc_naming_style function") + endif() + endforeach() file(GLOB_RECURSE headers RELATIVE "${NCC_STYLE_INCLUDE_DIRECTORY}" "${NCC_STYLE_INCLUDE_DIRECTORY}/*.hpp") - set(new_pythonpath "${ncc_script_dir}:$ENV{PYTHOPATH}") - list(APPEND ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_INCLUDE_DIRECTORY}") + list(APPEND NCC_STYLE_ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_INCLUDE_DIRECTORY}") foreach(header IN LISTS headers) set(output_file "${ncc_style_bin_dir}/${header}.ncc_style") @@ -106,20 +110,21 @@ function(ov_ncc_naming_style) OUTPUT ${output_file} COMMAND - "${CMAKE_COMMAND}" -E env PYTHONPATH=${new_pythonpath} "${CMAKE_COMMAND}" -D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}" - -D "NCC_PY_SCRIPT=${ncc_wrapper_py}" + -D "NCC_PY_SCRIPT=${ncc_script_py}" -D "INPUT_FILE=${full_header_path}" -D "OUTPUT_FILE=${output_file}" + -D "DEFINITIONS=${NCC_STYLE_DEFINITIONS}" + -D "CLANG_LIB_PATH=${libclang_location}" -D "STYLE_FILE=${ncc_style_dir}/openvino.style" - -D "ADDITIONAL_INCLUDE_DIRECTORIES=${ADDITIONAL_INCLUDE_DIRECTORIES}" + -D "ADDITIONAL_INCLUDE_DIRECTORIES=${NCC_STYLE_ADDITIONAL_INCLUDE_DIRECTORIES}" + -D "EXPECTED_FAIL=${NCC_STYLE_FAIL}" -P "${ncc_style_dir}/ncc_run.cmake" DEPENDS "${full_header_path}" "${ncc_style_dir}/openvino.style" "${ncc_script_py}" - "${ncc_wrapper_py}" "${ncc_style_dir}/ncc_run.cmake" COMMENT "[ncc naming style] ${header}" @@ -135,3 +140,9 @@ function(ov_ncc_naming_style) add_dependencies(${NCC_STYLE_FOR_TARGET} ${ncc_target}) add_dependencies(ncc_all ${ncc_target}) endfunction() + +if(TARGET ncc_all) + ov_ncc_naming_style(FOR_TARGET ncc_all + INCLUDE_DIRECTORY "${ncc_style_dir}/self_check" + FAIL) +endif() diff --git a/cmake/developer_package/ncc_naming_style/ncc_run.cmake b/cmake/developer_package/ncc_naming_style/ncc_run.cmake index 9d161b9c373..4b60cd9c031 100644 --- a/cmake/developer_package/ncc_naming_style/ncc_run.cmake +++ b/cmake/developer_package/ncc_naming_style/ncc_run.cmake @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # -foreach(var NCC_PY_SCRIPT PYTHON_EXECUTABLE OUTPUT_FILE - INPUT_FILE ADDITIONAL_INCLUDE_DIRECTORIES STYLE_FILE) +foreach(var NCC_PY_SCRIPT PYTHON_EXECUTABLE OUTPUT_FILE DEFINITIONS EXPECTED_FAIL + INPUT_FILE ADDITIONAL_INCLUDE_DIRECTORIES STYLE_FILE CLANG_LIB_PATH) if(NOT DEFINED ${var}) message(FATAL_ERROR "${var} is not defined for ncc_run.cmake") endif() @@ -11,12 +11,18 @@ endforeach() file(REMOVE "${OUTPUT_FILE}") +if(DEFINITIONS) + set(defs --definition ${DEFINITIONS}) +endif() + execute_process( COMMAND "${PYTHON_EXECUTABLE}" "${NCC_PY_SCRIPT}" --path ${INPUT_FILE} --style ${STYLE_FILE} + --clang-lib ${CLANG_LIB_PATH} + ${defs} --include ${ADDITIONAL_INCLUDE_DIRECTORIES} RESULT_VARIABLE result OUTPUT_VARIABLE output @@ -25,6 +31,14 @@ execute_process( file(WRITE "${OUTPUT_FILE}" "${output}") if(NOT result EQUAL "0") + set(failed ON) +endif() + +if(EXPECTED_FAIL AND NOT failed) + message(FATAL_ERROR "[ncc self check] Self check is not failed for ${INPUT_FILE}") +endif() + +if(failed AND NOT EXPECTED_FAIL) # Display the output to console (to parse it form IDE) message("${output}") message(FATAL_ERROR "[ncc naming style] Naming style check failed for ${INPUT_FILE}") diff --git a/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in b/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in deleted file mode 100644 index ed70e960b50..00000000000 --- a/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/python3 - -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -import os -import sys - -from clang.cindex import Config -from ncc import Options, RulesDb, do_validate, Validator - -if __name__ == "__main__": - # set path to speicific clang library location - Config.set_library_file('@libclang_location@') - - logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', - filename='log.txt', filemode='w') - - """ Parse all command line arguments and validate """ - op = Options() - op.parse_cmd_line() - - if op.args.path is None: - sys.exit(1) - - """ Creating the rules database """ - rules_db = RulesDb(op._style_file) - - """ Check the source code against the configured rules """ - errors = 0 - for path in op.args.path: - if os.path.isfile(path): - if do_validate(op, path): - v = Validator(rules_db, path, op) - errors += v.validate() - elif os.path.isdir(path): - for (root, subdirs, files) in os.walk(path): - for filename in files: - path = root + '/' + filename - if do_validate(op, path): - v = Validator(rules_db, path, op) - errors += v.validate() - - if not op.args.recurse: - break - else: - sys.stderr.write("File '{}' not found!\n".format(path)) - - if errors: - print("Total number of errors = {}".format(errors)) - sys.exit(1) diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style index c44fc5c5e4a..2279862cda3 100644 --- a/cmake/developer_package/ncc_naming_style/openvino.style +++ b/cmake/developer_package/ncc_naming_style/openvino.style @@ -104,7 +104,7 @@ NullStatement: 'XXXX' DeclarationStatement: '^.*$' TranslationUnit: 'XXXX' UnexposedAttribute: '^.*$' -CxxFinalAttribute: 'XXXX' +CxxFinalAttribute: '^.*$' CxxOverrideAttribute: '^.*$' AnnotateAttribute: 'XXXX' AsmLabelAttribute: 'XXXX' @@ -116,6 +116,7 @@ PreprocessingDirective: 'XXXX' MacroDefinition: 'XXXX' MacroInstantiation: 'XXXX' InclusionDirective: 'XXXX' +TypeAliasTeplateDeclaration: '^.*$' VariableName: ScopePrefix: Global: '' diff --git a/cmake/developer_package/ncc_naming_style/self_check/class_name.hpp b/cmake/developer_package/ncc_naming_style/self_check/class_name.hpp new file mode 100644 index 00000000000..0d950b5d45c --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/self_check/class_name.hpp @@ -0,0 +1,8 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +class name { +public: + name() = default; +}; diff --git a/cmake/developer_package/ncc_naming_style/self_check/function_name.hpp b/cmake/developer_package/ncc_naming_style/self_check/function_name.hpp new file mode 100644 index 00000000000..0436884928e --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/self_check/function_name.hpp @@ -0,0 +1,5 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +void Function(); diff --git a/cmake/developer_package/ncc_naming_style/self_check/method_name.hpp b/cmake/developer_package/ncc_naming_style/self_check/method_name.hpp new file mode 100644 index 00000000000..9b579696107 --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/self_check/method_name.hpp @@ -0,0 +1,10 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +class name { +public: + name() = default; + + void Method(); +}; diff --git a/ngraph/frontend/frontend_manager/CMakeLists.txt b/ngraph/frontend/frontend_manager/CMakeLists.txt index 83069aa1675..cdb93d5fc0f 100644 --- a/ngraph/frontend/frontend_manager/CMakeLists.txt +++ b/ngraph/frontend/frontend_manager/CMakeLists.txt @@ -28,6 +28,7 @@ target_compile_definitions(${TARGET_NAME}_static PUBLIC USE_STATIC_FRONTEND_MANA add_library(${TARGET_NAME} SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${LIBRARY_PUBLIC_HEADERS}) add_library(ngraph::${TARGET_NAME} ALIAS ${TARGET_NAME}) + target_include_directories(${TARGET_NAME} PUBLIC $ $) target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) @@ -35,6 +36,11 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${CMAKE_DL_LIBS} PUBLIC ngraph) add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) +ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME} + INCLUDE_DIRECTORY "${FRONTEND_INCLUDE_DIR}" + ADDITIONAL_INCLUDE_DIRECTORIES + $) + if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "Manager of OpenVINO nGraph Frontends") diff --git a/ngraph/frontend/onnx/frontend/CMakeLists.txt b/ngraph/frontend/onnx/frontend/CMakeLists.txt index aab7a150db8..2daed8156b3 100644 --- a/ngraph/frontend/onnx/frontend/CMakeLists.txt +++ b/ngraph/frontend/onnx/frontend/CMakeLists.txt @@ -20,6 +20,11 @@ add_library(ngraph::onnx_ngraph_frontend ALIAS onnx_ngraph_frontend) add_clang_format_target(onnx_ngraph_frontend_clang FOR_TARGETS onnx_ngraph_frontend) +ov_ncc_naming_style(FOR_TARGET onnx_ngraph_frontend + INCLUDE_DIRECTORY "${ONNX_FRONTEND_INCLUDE_DIR}" + ADDITIONAL_INCLUDE_DIRECTORIES + $) + if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME onnx_ngraph_frontend FILEDESCRIPTION "nGraph ONNX frontend library") diff --git a/ngraph/frontend/onnx/onnx_import/CMakeLists.txt b/ngraph/frontend/onnx/onnx_import/CMakeLists.txt index 61df1f6443d..d7ee771e855 100644 --- a/ngraph/frontend/onnx/onnx_import/CMakeLists.txt +++ b/ngraph/frontend/onnx/onnx_import/CMakeLists.txt @@ -31,6 +31,14 @@ source_group("public include" FILES ${LIBRARY_PUBLIC_HEADERS}) add_library(onnx_importer SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${LIBRARY_PUBLIC_HEADERS}) add_library(ngraph::onnx_importer ALIAS onnx_importer) +# TOD: fix empty class name +# ov_ncc_naming_style(FOR_TARGET onnx_importer +# INCLUDE_DIRECTORY "${ONNX_IMPORT_INCLUDE_DIR}" +# DEFINITIONS +# $ +# ADDITIONAL_INCLUDE_DIRECTORIES +# $) + add_clang_format_target(onnx_importer_clang FOR_TARGETS onnx_importer) if(COMMAND ie_add_vs_version_file) diff --git a/ngraph/frontend/paddlepaddle/CMakeLists.txt b/ngraph/frontend/paddlepaddle/CMakeLists.txt index ab9c5bcef84..5c8e9f6b39a 100644 --- a/ngraph/frontend/paddlepaddle/CMakeLists.txt +++ b/ngraph/frontend/paddlepaddle/CMakeLists.txt @@ -52,6 +52,12 @@ add_library(${TARGET_NAME} SHARED ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${LIBRARY_P add_dependencies(${TARGET_NAME} paddlepaddle_ngraph_frontend_proto) +# TODO enable: PDPD_ASSERT is in capital letters while it's a function +# ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME} +# INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include" +# ADDITIONAL_INCLUDE_DIRECTORIES +# $) + target_include_directories(${TARGET_NAME} PUBLIC $ From 838e701e5e3762fdec04dca339121007f09e51fb Mon Sep 17 00:00:00 2001 From: Vladimir Gavrilov Date: Mon, 9 Aug 2021 16:49:07 +0300 Subject: [PATCH 07/24] ExpandRangeConstant adjustment for bidirectional Broadcast (#6739) * Fixes in the transformation ExpandRangeConstant. * Fixed test. * Now we use use ShapeOf for both inputs of Broadcast. * Now the transformation ExpandRangeConstant uses two Gather layers. * Deletec commented code. * Fixed tests for the transformation ExpandRangeConstant. * Rewritten the transformation ExpandRangeConstant using Select. --- .../extensions/front/broadcast_with_range.py | 22 +++++- .../front/broadcast_with_range_test.py | 67 +++++++++++++------ 2 files changed, 65 insertions(+), 24 deletions(-) diff --git a/model-optimizer/extensions/front/broadcast_with_range.py b/model-optimizer/extensions/front/broadcast_with_range.py index 55fac8f84ac..25aabc97a8f 100644 --- a/model-optimizer/extensions/front/broadcast_with_range.py +++ b/model-optimizer/extensions/front/broadcast_with_range.py @@ -3,12 +3,15 @@ import numpy as np +from extensions.ops.elementwise import Equal from extensions.ops.gather import Gather from extensions.ops.range import Range +from extensions.ops.select import Select from mo.front.common.partial_infer.utils import int64_array from mo.front.common.replacement import FrontReplacementSubgraph from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input from mo.graph.graph import Graph, rename_nodes, Node +from mo.ops.shape import Shape from mo.ops.unsqueeze import Unsqueeze @@ -51,16 +54,31 @@ class ExpandRangeConstant(FrontReplacementSubgraph): positive_idx = non_one_dims.item(0) negative_idx = positive_idx - len(shape) + + node_name = node.soft_get('name', node.id) gather = create_op_with_const_inputs(graph, Gather, {1: int64_array(negative_idx), 2: int64_array(0)}, - {'name': node.soft_get('name', node.id) + '/BroadcastingDim'}) + {'name': node_name + '/BroadcastingDim'}) + gather_for_const = create_op_with_const_inputs(graph, Gather, {1: int64_array(negative_idx), 2: int64_array(0)}, + {'name': const_name + '/BroadcastingDim'}) + shapeof_node = Shape(graph, {'name': const_name + '/ShapeOf'}).create_node() + shapeof_node.out_port(0).connect(gather_for_const.in_port(0)) + + equal_node = create_op_with_const_inputs(graph, Equal, {1: int64_array(1)}, {'name': node_name + '/ConstOne'}) + gather.out_port(0).connect(equal_node.in_port(0)) + + select_node = Select(graph, {'name': node_name + '/Select', + 'auto_broadcast': 'numpy'}).create_node([equal_node, gather_for_const, gather]) + + const.out_port(0).connect(shapeof_node.in_port(0)) range_node = create_op_with_const_inputs(graph, Range, {0: np.array(0, dtype=value.dtype), 2: np.array(1, dtype=value.dtype)}, {'name': const_name + '/Range', 'dtype': value.dtype}) + select_node.out_port(0).connect(range_node.in_port(1)) node.in_port(1).get_connection().add_destination(gather.in_port(0)) - gather.out_port(0).connect(range_node.in_port(1)) + node.in_port(0).get_connection().set_source(range_node.out_port(0)) if one_dims.size: diff --git a/model-optimizer/unit_tests/extensions/front/broadcast_with_range_test.py b/model-optimizer/unit_tests/extensions/front/broadcast_with_range_test.py index ca5cd19f3e2..ee4ed63beea 100644 --- a/model-optimizer/unit_tests/extensions/front/broadcast_with_range_test.py +++ b/model-optimizer/unit_tests/extensions/front/broadcast_with_range_test.py @@ -8,7 +8,7 @@ import numpy as np from extensions.front.broadcast_with_range import ExpandRangeConstant from mo.utils.ir_engine.compare_graphs import compare_graphs from unit_tests.utils.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \ - regular_op_with_empty_data, connect_data + regular_op_with_empty_data class TestRangeBroadcast(unittest.TestCase): @@ -25,38 +25,61 @@ class TestRangeBroadcast(unittest.TestCase): ], nodes_with_edges_only=True) ExpandRangeConstant().find_and_replace_pattern(graph) - graph_ref = build_graph({ + graph_ref = build_graph(nodes_attrs={ **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}), + **valued_const_with_data('value', np.arange(0, 384).reshape((1, 384))), + **regular_op_with_empty_data('bc', {'type': 'Broadcast'}), + **regular_op_with_empty_data('shapeof', {'type': 'ShapeOf'}), + **regular_op_with_empty_data('select', {'type': 'Select'}), + **regular_op_with_empty_data('gather', {'type': 'Gather'}), + 'gather_const': {'type': 'Gather', 'kind': 'op', 'op': 'Gather'}, + 'equal': {'type': 'Equal', 'kind': 'op', 'op': 'Equal'}, # start **valued_const_with_data('start', np.array(0)), # limit - **valued_const_with_data('minus_one', np.array(-1)), - **valued_const_with_data('zero', np.array(0)), - **regular_op_with_empty_data('range_dim', {'type': 'Gather'}), + **valued_const_with_data('minus_one_0', np.array(-1)), + **valued_const_with_data('zero_0', np.array(0)), + **valued_const_with_data('minus_one_1', np.array(-1)), + **valued_const_with_data('zero_1', np.array(0)), # delta **valued_const_with_data('delta', np.array(1)), - **regular_op_with_empty_data('range', {'type': 'Range'}), + **regular_op_with_shaped_data('range', [1, 384], {'type': 'Range'}), # keep dims **valued_const_with_data('axes', np.array([0])), - **regular_op_with_empty_data('keep_shape', {'type': 'Unsqueeze'}), + **regular_op_with_shaped_data('keep_shape', [1, 384], {'type': 'Unsqueeze'}), + + **valued_const_with_data('one', np.array(1)), - **regular_op_with_empty_data('bc', {'type': 'Broadcast'}), **result(), - }, [ - *connect('start', '0:range'), - *connect('shape', '0:range_dim'), - *connect('minus_one', '1:range_dim'), - *connect('zero', '2:range_dim'), - *connect('range_dim', '1:range'), - *connect('delta', '2:range'), - *connect('range', '0:keep_shape'), - *connect('axes', '1:keep_shape'), - *connect('keep_shape', '0:bc'), - *connect_data('shape', '1:bc'), - *connect('bc', 'output'), - ], nodes_with_edges_only=True) + }, + edges=[ + *connect('value', 'shapeof'), + *connect('gather', '0:equal'), + ('gather', 'select', {'in': 2, 'out': 0}), + ('gather_const', 'select', {'in': 1}), + ('equal', 'select', {'in': 0}), + *connect('minus_one_0', '1:gather'), + *connect('zero_0', '2:gather'), + *connect('shapeof', '0:gather_const'), + *connect('minus_one_1', '1:gather_const'), + *connect('zero_1', '2:gather_const'), + *connect('start', '0:range'), + *connect('select', '1:range'), + *connect('delta', '2:range'), + *connect('range', '0:keep_shape'), + *connect('axes', '1:keep_shape'), + *connect('keep_shape', '0:bc'), + *connect('one', '1:equal'), + *connect('shape', '1:bc'), + ('shape_d', 'gather', {'out': 0, 'in': 0}), + *connect('bc', 'output'), + ], + update_attributes={ + 'range_d': {'value': np.arange(0, 384).reshape((1, 384))}, + 'keep_shape_d': {'value': np.arange(0, 384).reshape((1, 384))}, + }) (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) - self.assertTrue(flag, resp) + self.assertTrue(flag, resp) \ No newline at end of file From eadeae6c474907caab841f2645ac488d46445026 Mon Sep 17 00:00:00 2001 From: iliya mironov Date: Mon, 9 Aug 2021 19:22:21 +0300 Subject: [PATCH 08/24] Fix ChangeOutputTypeAttributes BackReplacementPattern (#6949) * Hot fix * Add unit test --- .../extensions/back/ChangeOutputTypeAttributes.py | 2 +- .../back/ChangeOutputTypeAttributes_test.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py b/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py index b75c7a86c76..5e525d48791 100644 --- a/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py +++ b/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py @@ -52,7 +52,7 @@ class ChangeOutputTypeAttributes(BackReplacementPattern): if node[dst_type] in [np.float32, np.float64] and ir_data_type == np.float16 and \ not node.has_and_set('returns_shape_value'): final_type = np.float16 - elif node.has_and_set('returns_shape_value') and node.dst_type == np.float16: + elif node.has_and_set('returns_shape_value') and node[dst_type] == np.float16: # return back FP32 for all nodes with shape values final_type = np.float32 diff --git a/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py b/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py index b40797397d3..2f607113dc8 100644 --- a/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py +++ b/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py @@ -26,6 +26,13 @@ class ChangeOutputTypeAttributesTests(unittest.TestCase): (flag, resp) = compare_graphs(graph, graph_ref, 'res', check_op_attrs=True) self.assertTrue(flag, resp) + def test_range_correct_case_returns_shape_value(self): + graph, graph_ref = build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP32', + src_type_str='FP16', returns_shape_value=True) + ChangeOutputTypeAttributes().find_and_replace_pattern(graph) + (flag, resp) = compare_graphs(graph, graph_ref, 'res', check_op_attrs=True) + self.assertTrue(flag, resp) + # starting from ~1000 FP16 absolute difference between neighbor values is more than 1 # fails because of shape inconsistency def test_range_different_values(self): @@ -58,13 +65,15 @@ class ChangeOutputTypeAttributesTests(unittest.TestCase): self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph) -def build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP16'): +def build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP16', + src_type_str='FP32', returns_shape_value=None): nodes = { **valued_const_with_data('start', float32_array(start)), **valued_const_with_data('limit', float32_array(limit)), **valued_const_with_data('delta', float32_array(delta)), **regular_op_with_empty_data('range', {'type': 'Range', 'op': 'Range', - 'output_type': np.float32, + 'returns_shape_value': returns_shape_value, + 'output_type': data_type_str_to_np(src_type_str), 'infer': Range.infer}), **result('res'), } @@ -72,6 +81,7 @@ def build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP16'): nodes_ref = deepcopy(nodes) nodes_ref.update({ **regular_op_with_empty_data('range', {'type': 'Range', 'op': 'Range', + 'returns_shape_value': returns_shape_value, 'output_type': data_type_str_to_np(dst_type_str), 'infer': Range.infer}), }) From 0c8a5d527985ecf903576eec5355dfce7a003708 Mon Sep 17 00:00:00 2001 From: Polina Brzezinskaya Date: Mon, 9 Aug 2021 20:35:03 +0300 Subject: [PATCH 09/24] [VPU] Removes MYRIAD specific tools (#6908) --- inference-engine/tools/CMakeLists.txt | 1 - inference-engine/tools/vpu/CMakeLists.txt | 8 - .../tools/vpu/vpu_compile/CMakeLists.txt | 39 - .../tools/vpu/vpu_compile/README.md | 82 -- .../tools/vpu/vpu_compile/main.cpp | 271 ------- .../vpu/vpu_compile/vpu_tools_common.cpp | 299 ------- .../vpu/vpu_compile/vpu_tools_common.hpp | 71 -- .../tools/vpu/vpu_perfcheck/CMakeLists.txt | 47 -- .../tools/vpu/vpu_perfcheck/main.cpp | 749 ------------------ .../deployment_manager/configs/darwin.json | 2 - scripts/deployment_manager/configs/linux.json | 2 - .../deployment_manager/configs/windows.json | 4 +- 12 files changed, 1 insertion(+), 1574 deletions(-) delete mode 100644 inference-engine/tools/vpu/CMakeLists.txt delete mode 100644 inference-engine/tools/vpu/vpu_compile/CMakeLists.txt delete mode 100644 inference-engine/tools/vpu/vpu_compile/README.md delete mode 100644 inference-engine/tools/vpu/vpu_compile/main.cpp delete mode 100644 inference-engine/tools/vpu/vpu_compile/vpu_tools_common.cpp delete mode 100644 inference-engine/tools/vpu/vpu_compile/vpu_tools_common.hpp delete mode 100644 inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt delete mode 100644 inference-engine/tools/vpu/vpu_perfcheck/main.cpp diff --git a/inference-engine/tools/CMakeLists.txt b/inference-engine/tools/CMakeLists.txt index 22fa6e3e6c2..9112658ce5e 100644 --- a/inference-engine/tools/CMakeLists.txt +++ b/inference-engine/tools/CMakeLists.txt @@ -2,5 +2,4 @@ # SPDX-License-Identifier: Apache-2.0 # -add_subdirectory(vpu) add_subdirectory(compile_tool) diff --git a/inference-engine/tools/vpu/CMakeLists.txt b/inference-engine/tools/vpu/CMakeLists.txt deleted file mode 100644 index fa0357111b2..00000000000 --- a/inference-engine/tools/vpu/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -if(ENABLE_MYRIAD) - add_subdirectory(vpu_compile) - add_subdirectory(vpu_perfcheck) -endif() diff --git a/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt b/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt deleted file mode 100644 index c590bc0e5b0..00000000000 --- a/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set(TARGET_NAME myriad_compile) - -file(GLOB SRCS - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp -) - -add_executable(${TARGET_NAME} ${SRCS}) - -if (CMAKE_COMPILER_IS_GNUCXX) - target_compile_options(${TARGET_NAME} PRIVATE -Wall) -endif() - -target_link_libraries(${TARGET_NAME} PRIVATE - inference_engine - vpu_graph_transformer - gflags - ie_samples_utils -) - -add_dependencies(${TARGET_NAME} myriadPlugin) - -set_target_properties(${TARGET_NAME} PROPERTIES - COMPILE_PDB_NAME ${TARGET_NAME} - FOLDER tools -) - -add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) - -# install - -ie_cpack_add_component(myriad_dev DEPENDS myriad) - -install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} - COMPONENT myriad_dev) diff --git a/inference-engine/tools/vpu/vpu_compile/README.md b/inference-engine/tools/vpu/vpu_compile/README.md deleted file mode 100644 index 465ac888724..00000000000 --- a/inference-engine/tools/vpu/vpu_compile/README.md +++ /dev/null @@ -1,82 +0,0 @@ -# myriad_compile tool {#openvino_inference_engine_tools_vpu_vpu_compile_README} - -This topic demonstrates how to run the `myriad_compile` tool application, which intended to dump blob for `vpu` plugins of Inference Engine by configuration options. - -## How It Works - -Upon the start-up, the tool application reads command line parameters and loads a network to the Inference Engine plugin. -Then application exports blob and writes it to the output file. - -## Running - -Running the application with the -h option yields the following usage message: - -```sh -./myriad_compile -h -Inference Engine: - API version ............ - Build .................. - -myriad_compile [OPTIONS] -[OPTIONS]: - -h Optional. Print a usage message. - -m Required. Path to xml model. - -pp Optional. Path to a plugin folder. - -o Optional. Path to the output file. Default value: ".blob". - -c Optional. Path to the configuration file. Default value: "config". - -ip Optional. Specifies precision for all input layers of network. Supported values: FP32, FP16, U8. Default value: FP16. - -op Optional. Specifies precision for all output layers of network. Supported values: FP32, FP16, U8. Default value: FP16. - -iop "" Optional. Specifies precision for input/output layers by name. - By default all inputs and outputs have FP16 precision. - Available precisions: FP32, FP16, U8. - Example: -iop "input:FP16, output:FP16". - Notice that quotes are required. - Overwrites precision from ip and op options for specified layers. - -VPU_NUMBER_OF_SHAVES Optional. Specifies number of shaves. Should be set with "VPU_NUMBER_OF_CMX_SLICES". Overwrites value from config. - -VPU_NUMBER_OF_CMX_SLICES Optional. Specifies number of CMX slices. Should be set with "VPU_NUMBER_OF_SHAVES". Overwrites value from config. - -VPU_TILING_CMX_LIMIT_KB Optional. Specifies CMX limit for data tiling in kB. Value should be equal or greater than -1, where -1 means default value of limit. Overwrites value from config. -``` - -Running the application with the empty list of options yields an error message. - -You can use the following command to dump blob using a trained Faster R-CNN network: - -```sh -./myriad_compile -m /model_name.xml -``` - -## Import and Export functionality - -#### Export - -You can save a blob file from your application. -To do this, you should call the `Export()` method on the `ExecutableNetwork` object. -`Export()` has the following argument: -* Name of output blob [IN] - -Example: - -```sh -InferenceEngine::Core core; -InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(network); -executableNetwork.Export("model_name.blob"); -``` - -#### Import - -You can upload blob with network into your application. -To do this, you should call the `ImportNetwork()` method on the `Core` object. -`ImportNetwork()` has the following arguments: -* Path to blob [IN] -* Config options [IN] -And returns `ExecutableNetwork` object - -Example: - -```sh -std::string modelFilename ("model_name.blob"); -InferenceEngine::Core core; -InferenceEngine::ExecutableNetwork importedNetwork = core.ImportNetwork(modelFilename); -``` - -> **NOTE**: Models should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer). diff --git a/inference-engine/tools/vpu/vpu_compile/main.cpp b/inference-engine/tools/vpu/vpu_compile/main.cpp deleted file mode 100644 index 01a7177e069..00000000000 --- a/inference-engine/tools/vpu/vpu_compile/main.cpp +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "inference_engine.hpp" -#include -#include "samples/common.hpp" -#include - -#include "vpu_tools_common.hpp" - -static constexpr char help_message[] = "Optional. Print a usage message."; -static constexpr char model_message[] = "Required. Path to xml model."; -static constexpr char plugin_path_message[] = "Optional. Path to a plugin folder."; -static constexpr char output_message[] = "Optional. Path to the output file. Default value: \".blob\"."; -static constexpr char config_message[] = "Optional. Path to the configuration file. Default value: \"config\"."; -static constexpr char number_of_shaves_message[] = "Optional. Specifies number of shaves." - " Should be set with \"VPU_NUMBER_OF_CMX_SLICES\"." - " Overwrites value from config."; -static constexpr char number_of_cmx_slices_message[] = "Optional. Specifies number of CMX slices." - " Should be set with \"VPU_NUMBER_OF_SHAVES\"." - " Overwrites value from config."; -static constexpr char tiling_cmx_limit_message[] = "Optional. Specifies CMX limit for data tiling." - " Value should be equal or greater than -1." - " Overwrites value from config."; -static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of network." - " Supported values: FP32, FP16, U8. Default value: FP16."; -static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of network." - " Supported values: FP32, FP16, U8. Default value: FP16."; -static constexpr char iop_message[] = "Optional. Specifies precision for input/output layers by name.\n" -" By default all inputs and outputs have FP16 precision.\n" -" Available precisions: FP32, FP16, U8.\n" -" Example: -iop \"input:FP16, output:FP16\".\n" -" Notice that quotes are required.\n" -" Overwrites precision from ip and op options for specified layers."; - -DEFINE_bool(h, false, help_message); -DEFINE_string(m, "", model_message); -DEFINE_string(pp, "", plugin_path_message); -DEFINE_string(o, "", output_message); -DEFINE_string(c, "config", config_message); -DEFINE_string(ip, "", inputs_precision_message); -DEFINE_string(op, "", outputs_precision_message); -DEFINE_string(iop, "", iop_message); -DEFINE_string(VPU_NUMBER_OF_SHAVES, "", number_of_shaves_message); -DEFINE_string(VPU_NUMBER_OF_CMX_SLICES, "", number_of_cmx_slices_message); -DEFINE_string(VPU_TILING_CMX_LIMIT_KB, "", tiling_cmx_limit_message); - -static void showUsage() { - std::cout << std::endl; - std::cout << "myriad_compile [OPTIONS]" << std::endl; - std::cout << "[OPTIONS]:" << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -m " << model_message << std::endl; - std::cout << " -pp " << plugin_path_message << std::endl; - std::cout << " -o " << output_message << std::endl; - std::cout << " -c " << config_message << std::endl; - std::cout << " -ip " << inputs_precision_message << std::endl; - std::cout << " -op " << outputs_precision_message << std::endl; - std::cout << " -iop \"\" " << iop_message << std::endl; - std::cout << " -VPU_NUMBER_OF_SHAVES " << number_of_shaves_message << std::endl; - std::cout << " -VPU_NUMBER_OF_CMX_SLICES " << number_of_cmx_slices_message << std::endl; - std::cout << " -VPU_TILING_CMX_LIMIT_KB " << tiling_cmx_limit_message << std::endl; - std::cout << std::endl; -} - -static bool parseCommandLine(int *argc, char ***argv) { - gflags::ParseCommandLineNonHelpFlags(argc, argv, true); - - if (FLAGS_h) { - showUsage(); - return false; - } - - if (FLAGS_m.empty()) { - throw std::invalid_argument("Path to model xml file is required"); - } - - if (1 < *argc) { - std::stringstream message; - message << "Unknown arguments: "; - for (auto arg = 1; arg < *argc; arg++) { - message << argv[arg]; - if (arg < *argc) { - message << " "; - } - } - throw std::invalid_argument(message.str()); - } - - return true; -} - -static std::map configure(const std::string &configFile, const std::string &xmlFileName) { - auto config = parseConfig(configFile); - - IE_SUPPRESS_DEPRECATED_START - config[VPU_MYRIAD_CONFIG_KEY(PLATFORM)] = "VPU_MYRIAD_2480"; - IE_SUPPRESS_DEPRECATED_END - - if (!FLAGS_VPU_NUMBER_OF_SHAVES.empty()) { - config[InferenceEngine::MYRIAD_NUMBER_OF_SHAVES] = FLAGS_VPU_NUMBER_OF_SHAVES; - } - - if (!FLAGS_VPU_NUMBER_OF_CMX_SLICES.empty()) { - config[InferenceEngine::MYRIAD_NUMBER_OF_CMX_SLICES] = FLAGS_VPU_NUMBER_OF_CMX_SLICES; - } - - if (!FLAGS_VPU_TILING_CMX_LIMIT_KB.empty()) { - config[InferenceEngine::MYRIAD_TILING_CMX_LIMIT_KB] = FLAGS_VPU_TILING_CMX_LIMIT_KB; - } - - return config; -} - -static std::map parsePrecisions(const std::string &iop) { - std::string user_input = iop; - user_input.erase(std::remove_if(user_input.begin(), user_input.end(), ::isspace), user_input.end()); - - std::vector inputs; - vpu::splitStringList(user_input, inputs, ','); - - std::map precisions; - for (auto &&input : inputs) { - std::vector precision; - vpu::splitStringList(input, precision, ':'); - if (precision.size() != 2) { - throw std::invalid_argument("Invalid precision " + input + ". Expected layer_name : precision_value"); - } - - precisions[precision[0]] = precision[1]; - } - - return precisions; -} - -using supported_precisions_t = std::unordered_map; - -static InferenceEngine::Precision getPrecision(const std::string &value, - const supported_precisions_t &supported_precisions, - const std::string& error_report = std::string()) { - std::string upper_value = value; - std::transform(value.begin(), value.end(), upper_value.begin(), ::toupper); - auto precision = supported_precisions.find(upper_value); - if (precision == supported_precisions.end()) { - std::string report = error_report.empty() ? ("") : (" " + error_report); - throw std::logic_error("\"" + value + "\"" + " is not a valid precision" + report); - } - - return precision->second; -} - -static InferenceEngine::Precision getInputPrecision(const std::string &value) { - static const supported_precisions_t supported_precisions = { - { "FP32", InferenceEngine::Precision::FP32 }, - { "FP16", InferenceEngine::Precision::FP16 }, - { "U8", InferenceEngine::Precision::U8 } - }; - return getPrecision(value, supported_precisions, "for input layer"); -} - -static InferenceEngine::Precision getOutputPrecision(const std::string &value) { - static const supported_precisions_t supported_precisions = { - { "FP32", InferenceEngine::Precision::FP32 }, - { "FP16", InferenceEngine::Precision::FP16 } - }; - return getPrecision(value, supported_precisions, "for output layer"); -} - -void setPrecisions(const InferenceEngine::CNNNetwork &network, const std::string &iop) { - auto user_precisions_map = parsePrecisions(iop); - auto inputs = network.getInputsInfo(); - auto outputs = network.getOutputsInfo(); - - for (auto &&item : user_precisions_map) { - std::string layer_name = item.first; - std::string user_precision = item.second; - - auto input = inputs.find(layer_name); - auto output = outputs.find(layer_name); - - if (input != inputs.end()) { - const auto input_precision = input->second->getPrecision(); - if ((isFloat(input_precision) && isFloat(getInputPrecision(user_precision))) || - (isFP16(input_precision) && isU8(getInputPrecision(user_precision)))) { - input->second->setPrecision(getInputPrecision(user_precision)); - } - } else if (output != outputs.end()) { - auto output_precision = output->second->getPrecision(); - if (isFloat(output_precision) && isFloat(getOutputPrecision(user_precision))) { - output->second->setPrecision(getOutputPrecision(user_precision)); - } - } else { - throw std::logic_error(layer_name + " is not an input neither output"); - } - } -} - -static void processPrecisions(InferenceEngine::CNNNetwork &network, - const std::string &inputs_precision, const std::string &outputs_precision, - const std::string &iop) { - setPrecisions(network); - - if (!inputs_precision.empty()) { - auto precision = getInputPrecision(inputs_precision); - for (auto &&layer : network.getInputsInfo()) { - const auto layerPrecision = layer.second->getPrecision(); - if ((isFloat(layerPrecision) && isFloat(precision)) || - (isFP16(layerPrecision) && isU8(precision))) { - layer.second->setPrecision(precision); - } - } - } - - if (!outputs_precision.empty()) { - auto precision = getOutputPrecision(outputs_precision); - for (auto &&layer : network.getOutputsInfo()) { - const auto layerPrecision = layer.second->getPrecision(); - if (isFloat(layerPrecision) && isFloat(precision)) { - layer.second->setPrecision(precision); - } - } - } - - if (!iop.empty()) { - setPrecisions(network, iop); - } -} - -int main(int argc, char *argv[]) { - try { - std::cout << "Inference Engine: " << InferenceEngine::GetInferenceEngineVersion() << std::endl; - - if (!parseCommandLine(&argc, &argv)) { - return EXIT_SUCCESS; - } - - auto network = readNetwork(FLAGS_m); - - processPrecisions(network, FLAGS_ip, FLAGS_op, FLAGS_iop); - - InferenceEngine::Core ie; - auto executableNetwork = ie.LoadNetwork(network, "MYRIAD", configure(FLAGS_c, FLAGS_m)); - - std::string outputName = FLAGS_o; - if (outputName.empty()) { - outputName = fileNameNoExt(FLAGS_m) + ".blob"; - } - executableNetwork.Export(outputName); - } catch (const std::exception &error) { - std::cerr << error.what() << std::endl; - return EXIT_FAILURE; - } catch (...) { - std::cerr << "Unknown/internal exception happened." << std::endl; - return EXIT_FAILURE; - } - - std::cout << "Done" << std::endl; - return EXIT_SUCCESS; -} diff --git a/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.cpp b/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.cpp deleted file mode 100644 index e3071a557be..00000000000 --- a/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.cpp +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/* on windows min and max already defined that makes using numeric_limits impossible */ -#ifdef _WIN32 -# define NOMINMAX -#endif - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "vpu_tools_common.hpp" -#include - -#include -#include - -#include "precision_utils.h" - -InferenceEngine::CNNNetwork readNetwork(const std::string &xmlFileName) { - return InferenceEngine::Core().ReadNetwork(xmlFileName); -} - -bool isFP16(InferenceEngine::Precision precision) { - return precision == InferenceEngine::Precision::FP16; -} - -bool isFP32(InferenceEngine::Precision precision) { - return precision == InferenceEngine::Precision::FP32; -} - -bool isU8(InferenceEngine::Precision precision) { - return precision == InferenceEngine::Precision::U8; -} - -bool isFloat(InferenceEngine::Precision precision) { - return isFP16(precision) || isFP32(precision); -} - -void setPrecisions(const InferenceEngine::CNNNetwork &network) { - for (auto &&layer : network.getInputsInfo()) { - if (isFP32(layer.second->getPrecision())) { - layer.second->setPrecision(InferenceEngine::Precision::FP16); - } - } - - for (auto &&layer : network.getOutputsInfo()) { - if (isFP32(layer.second->getPrecision())) { - layer.second->setPrecision(InferenceEngine::Precision::FP16); - } - } -} - -BitMap::BitMap(const std::string &filename) { - BmpHeader header; - BmpInfoHeader infoHeader; - - std::ifstream input(filename, std::ios::binary); - if (!input) { - return; - } - - input.read(reinterpret_cast(&header.type), 2); - - if (header.type != 'M'*256+'B') { - std::cerr << "[BMP] file is not bmp type\n"; - return; - } - - input.read(reinterpret_cast(&header.size), 4); - input.read(reinterpret_cast(&header.reserved), 4); - input.read(reinterpret_cast(&header.offset), 4); - - input.read(reinterpret_cast(&infoHeader), sizeof(BmpInfoHeader)); - - bool rowsReversed = infoHeader.height < 0; - _width = static_cast(infoHeader.width); - _height = static_cast(std::abs(infoHeader.height)); - - if (infoHeader.bits != 24) { - std::cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n"; - return; - } - - if (infoHeader.compression != 0) { - std::cerr << "[BMP] compression not supported\n"; - } - - auto padSize = _width & 3; - char pad[3]; - size_t size = _width * _height * 3; - - _data.reset(new unsigned char[size], std::default_delete()); - - input.seekg(header.offset, std::ios::beg); - - // reading by rows in invert vertically - for (uint32_t i = 0; i < _height; i++) { - uint32_t storeAt = rowsReversed ? i : (uint32_t)_height - 1 - i; - input.read(reinterpret_cast(_data.get()) + _width * 3 * storeAt, _width * 3); - input.read(pad, padSize); - } -} - -void loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) { - throw std::invalid_argument("Input must have FP16 precision"); - } - - BitMap reader(imageFilename); - - const auto dims = tensDesc.getDims(); - auto numBlobChannels = dims[1]; - size_t batch = dims[0]; - size_t w = dims[3]; - size_t h = dims[2]; - size_t img_w = reader.width(); - size_t img_h = reader.height(); - - size_t numImageChannels = reader.size() / (reader.width() * reader.height()); - if (numBlobChannels != numImageChannels && numBlobChannels != 1) { - throw std::invalid_argument("Input channels mismatch: image channels " + std::to_string(numImageChannels) + - ", network channels " + std::to_string(numBlobChannels) + - ", expecting count of image channels are equal to count if network channels" - "or count of network channels are equal to 1"); - } - - int16_t *blobDataPtr = std::dynamic_pointer_cast>(blob)->data(); - auto nPixels = w * h; - unsigned char *RGB8 = reader.getData().get(); - float xscale = 1.0f * img_w / w; - float yscale = 1.0f * img_h / h; - - for (std::size_t n = 0; n != batch; n++) { - for (std::size_t i = 0; i < h; ++i) { - int y = static_cast(std::floor((i + 0.5f) * yscale)); - for (std::size_t j = 0; j < w; ++j) { - int x = static_cast(std::floor((j + 0.5f) * xscale)); - for (std::size_t k = 0; k < numBlobChannels; k++) { - float src = 1.0f * RGB8[(y * img_w + x) * numImageChannels + k]; - if (tensDesc.getLayout() == InferenceEngine::NHWC) { - blobDataPtr[n * h * w * numBlobChannels + (i * w + j) * numBlobChannels + k] = - InferenceEngine::PrecisionUtils::f32tof16(src); - } else { - blobDataPtr[n * h * w * numBlobChannels + (i * w + j) + k * nPixels] = - InferenceEngine::PrecisionUtils::f32tof16(src); - } - } - } - } - } -} - -void printPerformanceCounts(const std::map& perfMap, const std::string report) { - std::vector> perfVec(perfMap.begin(), - perfMap.end()); - std::sort(perfVec.begin(), perfVec.end(), - [=](const std::pair &pair1, - const std::pair &pair2) -> bool { - return pair1.second.execution_index < pair2.second.execution_index; - }); - - size_t maxLayerName = 0u, maxExecType = 0u; - for (auto &&entry : perfVec) { - maxLayerName = std::max(maxLayerName, entry.first.length()); - maxExecType = std::max(maxExecType, std::strlen(entry.second.exec_type)); - } - - size_t indexWidth = 7, nameWidth = maxLayerName + 5, typeWidth = maxExecType + 5, timeWidth = 10; - size_t totalWidth = indexWidth + nameWidth + typeWidth + timeWidth; - - std::cout << std::endl << "Detailed " << report << " Profile" << std::endl; - for (size_t i = 0; i < totalWidth; i++) - std::cout << "="; - std::cout << std::endl; - std::cout << std::setw(static_cast(indexWidth)) << std::left << "Index" - << std::setw(static_cast(nameWidth)) << std::left << "Name" - << std::setw(static_cast(typeWidth)) << std::left << "Type" - << std::setw(static_cast(timeWidth)) << std::right << "Time (ms)" - << std::endl; - - for (size_t i = 0; i < totalWidth; i++) - std::cout << "-"; - std::cout << std::endl; - - long long totalTime = 0; - for (const auto& p : perfVec) { - const auto& stageName = p.first; - const auto& info = p.second; - if (info.status == InferenceEngine::InferenceEngineProfileInfo::EXECUTED) { - std::cout << std::setw(static_cast(indexWidth)) << std::left << info.execution_index - << std::setw(static_cast(nameWidth)) << std::left << stageName - << std::setw(static_cast(typeWidth)) << std::left << info.exec_type - << std::setw(static_cast(timeWidth)) << std::right << info.realTime_uSec / 1000.0 - << std::endl; - - totalTime += info.realTime_uSec; - } - } - - for (std::size_t i = 0; i < totalWidth; i++) - std::cout << "-"; - std::cout << std::endl; - std::cout << std::setw(static_cast(totalWidth / 2)) << std::right << "Total inference time:" - << std::setw(static_cast(totalWidth / 2 + 1)) << std::right << totalTime / 1000.0 - << std::endl; - for (std::size_t i = 0; i < totalWidth; i++) - std::cout << "-"; - std::cout << std::endl; -} - -std::vector extractFilesByExtension(const std::string& directory, const std::string& extension) { - return extractFilesByExtension(directory, extension, std::numeric_limits::max()); -} - -std::vector extractFilesByExtension(const std::string& directory, const std::string& extension, - std::size_t max_size) { - if (max_size == 0) { - return {}; - } - - std::vector files; - - DIR* dir = opendir(directory.c_str()); - if (!dir) { - throw std::invalid_argument("Can not open " + directory); - } - - auto getExtension = [](const std::string& name) { - auto extensionPosition = name.rfind('.', name.size()); - return extensionPosition == std::string::npos ? "" : name.substr(extensionPosition + 1, name.size() - 1); - }; - - dirent* ent = nullptr; - while ((ent = readdir(dir)) && files.size() < max_size) { - std::string file_name = ent->d_name; - if (getExtension(file_name) != extension) { - continue; - } - - std::string full_file_name = directory + "/" + file_name; - - struct stat st = {}; - if (stat(full_file_name.c_str(), &st) != 0) { - continue; - } - - bool is_directory = (st.st_mode & S_IFDIR) != 0; - if (is_directory) { - continue; - } - - files.emplace_back(full_file_name); - } - - closedir(dir); - - return files; -} - -void loadBinaryTensor(const std::string &binaryFileName, InferenceEngine::Blob::Ptr& blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) { - throw std::invalid_argument("Input must have FP16 precision"); - } - - std::ifstream binaryFile(binaryFileName, std::ios_base::binary | std::ios_base::ate); - if (!binaryFile) { - throw std::invalid_argument("Can not open \"" + binaryFileName + "\""); - } - - auto fileSize = static_cast(binaryFile.tellg()); - binaryFile.seekg(0, std::ios_base::beg); - if (!binaryFile.good()) { - throw std::invalid_argument("Can not read \"" + binaryFileName + "\""); - } - - auto expected_size = blob->byteSize(); - if (fileSize != expected_size) { - throw std::invalid_argument("File \"" + binaryFileName + "\" contains " + std::to_string(fileSize) + " bytes " - "but network expects " + std::to_string(expected_size)); - } - /* try to read 32 bits data */ - std::int16_t *blobDataPtr = std::dynamic_pointer_cast>(blob)->data(); - for (std::size_t i = 0; i < blob->size(); i++) { - float tmp = 0.f; - binaryFile.read(reinterpret_cast(&tmp), sizeof(float)); - blobDataPtr[i] = InferenceEngine::PrecisionUtils::f32tof16(tmp); - } -} diff --git a/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.hpp b/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.hpp deleted file mode 100644 index 3ce092ef12a..00000000000 --- a/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.hpp +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#include "inference_engine.hpp" - -InferenceEngine::CNNNetwork readNetwork(const std::string &xmlFileName); - -bool isFP16(InferenceEngine::Precision precision); -bool isFP32(InferenceEngine::Precision precision); -bool isU8(InferenceEngine::Precision precision); -bool isFloat(InferenceEngine::Precision precision); -/* Set FP32 to FP16, all others without changes */ -void setPrecisions(const InferenceEngine::CNNNetwork &network); - -class BitMap { -private: - typedef struct { - unsigned short type; /* Magic identifier */ - unsigned int size; /* File size in bytes */ - unsigned int reserved; - unsigned int offset; /* Offset to image data, bytes */ - } BmpHeader; - - typedef struct { - unsigned int size; /* Header size in bytes */ - int width, height; /* Width and height of image */ - unsigned short planes; /* Number of colour planes */ - unsigned short bits; /* Bits per pixel */ - unsigned int compression; /* Compression type */ - unsigned int imagesize; /* Image size in bytes */ - int xresolution, yresolution; /* Pixels per meter */ - unsigned int ncolours; /* Number of colours */ - unsigned int importantcolours; /* Important colours */ - } BmpInfoHeader; - -public: - explicit BitMap(const std::string &filename); - - ~BitMap() = default; - - size_t _height = 0; - size_t _width = 0; - std::shared_ptr _data; - -public: - size_t size() const { return _width * _height * 3; } - size_t width() const { return _width; } - size_t height() const { return _height; } - - std::shared_ptr getData() { - return _data; - } -}; - -void loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob); - -void printPerformanceCounts(const std::map& perfMap, const std::string report = "per_layer"); - -std::vector extractFilesByExtension(const std::string& directory, const std::string& extension); -std::vector extractFilesByExtension(const std::string& directory, const std::string& extension, - std::size_t max_size); - -void loadBinaryTensor(const std::string &binaryFileName, InferenceEngine::Blob::Ptr& blob); diff --git a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt b/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt deleted file mode 100644 index 89b7cabd253..00000000000 --- a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -function(add_perfcheck_target TARGET_NAME PLUGIN_NAME) - file(GLOB SOURCES *.cpp) - - add_executable(${TARGET_NAME} ${SOURCES}) - - # TODO: enable some day and fix all warnings -# if(CMAKE_COMPILER_IS_GNUCXX) -# target_compile_options(${TARGET_NAME} -# PRIVATE -# "-Wall") -# endif() - - target_include_directories(${TARGET_NAME} - SYSTEM PRIVATE - $) - - target_link_libraries(${TARGET_NAME} - PRIVATE - inference_engine - inference_engine_plugin_api - format_reader - Threads::Threads - ie_samples_utils) - - add_dependencies(${TARGET_NAME} - ${PLUGIN_NAME} ${ARGN}) - - set_target_properties(${TARGET_NAME} PROPERTIES - COMPILE_PDB_NAME ${TARGET_NAME} - FOLDER tools) - - add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) -endfunction() - -if(ENABLE_MYRIAD) - add_perfcheck_target(myriad_perfcheck myriadPlugin) - - ie_cpack_add_component(myriad_tools DEPENDS myriad) - - install(TARGETS myriad_perfcheck - RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} - COMPONENT myriad_tools) -endif() diff --git a/inference-engine/tools/vpu/vpu_perfcheck/main.cpp b/inference-engine/tools/vpu/vpu_perfcheck/main.cpp deleted file mode 100644 index 1a59be99943..00000000000 --- a/inference-engine/tools/vpu/vpu_perfcheck/main.cpp +++ /dev/null @@ -1,749 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#if defined(_WIN32) -#define NOMINMAX -#endif -#if (defined(_WIN32) || defined(_WIN64)) -#define WIN32_LEAN_AND_MEAN -#else -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include - -static char* m_exename = nullptr; - -#if defined(_WIN32) || defined(__APPLE__) || defined(ANDROID) -typedef std::chrono::time_point time_point; -#else -typedef std::chrono::time_point time_point; -#endif -typedef std::chrono::high_resolution_clock Time; -typedef std::chrono::duration> ms; -typedef std::chrono::duration fsec; - -#define TIMEDIFF(start, end) ((std::chrono::duration_cast((end) - (start))).count()) - -class BitMap { -private: - struct BmpHeader { - unsigned short type = 0u; /* Magic identifier */ - unsigned int size = 0u; /* File size in bytes */ - unsigned int reserved = 0u; - unsigned int offset = 0u; /* Offset to image data, bytes */ - }; - - struct BmpInfoHeader { - unsigned int size = 0u; /* Header size in bytes */ - int width = 0, height = 0; /* Width and height of image */ - unsigned short planes = 0u; /* Number of colour planes */ - unsigned short bits = 0u; /* Bits per pixel */ - unsigned int compression = 0u; /* Compression type */ - unsigned int imagesize = 0u; /* Image size in bytes */ - int xresolution = 0, yresolution = 0; /* Pixels per meter */ - unsigned int ncolours = 0u; /* Number of colours */ - unsigned int importantcolours = 0u; /* Important colours */ - }; - -public: - explicit BitMap(const std::string &filename) { - BmpHeader header; - BmpInfoHeader infoHeader; - - std::ifstream input(filename, std::ios::binary); - if (!input) { - return; - } - - input.read(reinterpret_cast(&header.type), 2); - - if (header.type != 'M'*256+'B') { - std::cerr << "[BMP] file is not bmp type\n"; - return; - } - - input.read(reinterpret_cast(&header.size), 4); - input.read(reinterpret_cast(&header.reserved), 4); - input.read(reinterpret_cast(&header.offset), 4); - - input.read(reinterpret_cast(&infoHeader), sizeof(BmpInfoHeader)); - - bool rowsReversed = infoHeader.height < 0; - _width = infoHeader.width; - _height = abs(infoHeader.height); - - if (infoHeader.bits != 24) { - std::cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n"; - return; - } - - if (infoHeader.compression != 0) { - std::cerr << "[BMP] compression not supported\n"; - } - - int padSize = _width & 3; - char pad[3]; - size_t size = _width * _height * 3; - - _data.reset(new unsigned char[size], std::default_delete()); - - input.seekg(header.offset, std::ios::beg); - - // reading by rows in invert vertically - for (uint32_t i = 0; i < _height; i++) { - uint32_t storeAt = rowsReversed ? i : (uint32_t)_height - 1 - i; - input.read(reinterpret_cast(_data.get()) + _width * 3 * storeAt, _width * 3); - input.read(pad, padSize); - } - } - - ~BitMap() = default; - - size_t _height = 0; - size_t _width = 0; - std::shared_ptr _data; - -public: - size_t size() const { return _width * _height * 3; } - size_t width() const { return _width; } - size_t height() const { return _height; } - - std::shared_ptr getData() { - return _data; - } -}; - - -static bool loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob); -static bool loadVideo(const std::vector &imagesFolder, InferenceEngine::Blob::Ptr &blob); -static bool loadBinaryTensor(const std::string &binaryFilename, InferenceEngine::Blob::Ptr &blob); - - -static void setConfig(std::map& config, - const std::string& file_config_cl) { - config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING); - config[InferenceEngine::MYRIAD_ENABLE_RECEIVING_TENSOR_TIME] = CONFIG_VALUE(YES); - config[InferenceEngine::MYRIAD_CUSTOM_LAYERS] = file_config_cl; -} - -static void printPerformanceCounts(const std::map& perfMap) { - std::vector> perfVec(perfMap.begin(), - perfMap.end()); - std::sort(perfVec.begin(), perfVec.end(), - [=](const std::pair &pair1, - const std::pair &pair2) -> bool { - return pair1.second.execution_index < pair2.second.execution_index; - }); - - size_t maxLayerName = 0u, maxExecType = 0u; - for (auto & it : perfVec) { - maxLayerName = std::max(maxLayerName, it.first.length()); - maxExecType = std::max(maxExecType, std::strlen(it.second.exec_type)); - } - - size_t indexWidth = 7, nameWidth = maxLayerName + 5, typeWidth = maxExecType + 5, timeWidth = 10; - size_t totalWidth = indexWidth + nameWidth + typeWidth + timeWidth; - - std::cout << std::endl << "Detailed Per Stage Profile" << std::endl; - for (size_t i = 0; i < totalWidth; i++) - std::cout << "="; - std::cout << std::endl; - std::cout << std::setw(indexWidth) << std::left << "Index" - << std::setw(nameWidth) << std::left << "Name" - << std::setw(typeWidth) << std::left << "Type" - << std::setw(timeWidth) << std::right << "Time (ms)" - << std::endl; - for (size_t i = 0; i < totalWidth; i++) - std::cout << "-"; - std::cout << std::endl; - - long long totalTime = 0; - for (const auto& p : perfVec) { - const auto& stageName = p.first; - const auto& info = p.second; - if (info.status == InferenceEngine::InferenceEngineProfileInfo::EXECUTED) { - std::cout << std::setw(indexWidth) << std::left << info.execution_index - << std::setw(nameWidth) << std::left << stageName - << std::setw(typeWidth) << std::left << info.exec_type - << std::setw(timeWidth) << std::right << info.realTime_uSec / 1000.0 - << std::endl; - - totalTime += info.realTime_uSec; - } - } - - for (int i = 0; i < totalWidth; i++) - std::cout << "-"; - std::cout << std::endl; - std::cout << std::setw(totalWidth / 2) << std::right << "Total inference time:" - << std::setw(totalWidth / 2 + 1) << std::right << totalTime / 1000.0 - << std::endl; - for (int i = 0; i < totalWidth; i++) - std::cout << "-"; - std::cout << std::endl; -} - -static std::string getAppRealName(const char* name) { - std::string filename(name); - size_t splitpos = filename.find_last_of('\\'); - if (std::string::npos == splitpos) { - splitpos = filename.find_last_of('/'); - if (std::string::npos == splitpos) { - return filename; - } - } - return filename.substr(splitpos + 1); -} - -static void print_usage() { - std::cout << "Usage:" << std::endl << getAppRealName(m_exename) << " [number of iterations >= 1000]" - << " [batch >= 1, default=1] [num_networks, default=1] [config_file_custom_layer, default='']" << std::endl; -} - -static void getBMPFiles(std::vector &out, const std::string &directory) { - const std::string ext = ".bmp"; - DIR *dir; - dirent *ent; - dir = opendir(directory.c_str()); - if (!dir) - return; - while ((ent = readdir(dir)) != nullptr) { - const std::string file_name = ent->d_name; - const std::string full_file_name = directory + "/" + file_name; - if ((file_name.length() >= ext.length()) - && (0 == file_name.compare(file_name.length() - ext.length(), ext.length(), ext))) { - // proceed - } else { - continue; - } - struct stat st; - if (stat(full_file_name.c_str(), &st) == -1) - continue; - const bool is_directory = (st.st_mode & S_IFDIR) != 0; - if (is_directory) - continue; - out.push_back(full_file_name); - } - closedir(dir); -} - -static void getBINFiles(std::vector &out, const std::string &directory) { - const std::string ext = ".bin"; - DIR *dir; - dirent *ent; - dir = opendir(directory.c_str()); - if (!dir) - return; - while ((ent = readdir(dir)) != nullptr) { - const std::string file_name = ent->d_name; - const std::string full_file_name = directory + "/" + file_name; - if ((file_name.length() >= ext.length()) - && (0 == file_name.compare(file_name.length() - ext.length(), ext.length(), ext))) { - // proceed - } else { - continue; - } - struct stat st; - if (stat(full_file_name.c_str(), &st) == -1) - continue; - const bool is_directory = (st.st_mode & S_IFDIR) != 0; - if (is_directory) - continue; - out.push_back(full_file_name); - } - closedir(dir); -} - -int num_requests = 4; - -#define MIN_ITER 1000 - -#define USE_CALLBACK - -int niter; -std::atomic iterations_to_run; -std::mutex done_mutex; -std::condition_variable alldone; -int reallydone = 0; - -std::vector iter_start; -std::vector iter_end; -std::vector iter_time; - -const int profile = 0; -std::map perfMap; - -int process(const std::string& modelFileName, const std::string& inputsDir, - std::string& file_config_cl, int nBatch, int num_networks) { - InferenceEngine::Core ie; - niter /= nBatch; - num_requests = num_requests * num_networks; - - // add some more requests. they'll be excluded on performance measurement - niter += 2 * 2 * num_requests; - -#if !(defined(_WIN32) || defined(_WIN64)) - if (pthread_setname_np( -#ifndef __APPLE__ - pthread_self(), -#endif - "MainThread") != 0) { - perror("Setting name for main thread failed"); - } -#endif - -#ifdef USE_KMB_PLUGIN - std::string deivceName = "KMB"; -#else - std::string deviceName = "MYRIAD"; -#endif - const auto pluginVersion = ie.GetVersions(deviceName); - std::cout << "InferenceEngine: " << std::endl; - std::cout << pluginVersion << std::endl << std::endl; - - std::ifstream file(file_config_cl); - if (!file.is_open()) { - file_config_cl.clear(); - } - - std::vector pictures; - getBMPFiles(pictures, inputsDir); - int numPictures = pictures.size(); - - std::vector binaries; - getBINFiles(binaries, inputsDir); - int numBinaries = binaries.size(); - - if (pictures.empty() && binaries.empty()) { - std::cout << inputsDir << " directory doesn't contain input files" << std::endl; - return 1; - } - - InferenceEngine::CNNNetwork cnnNetwork = ie.ReadNetwork(modelFileName); - - if (nBatch != 1) { - std::cout << "Setting batch to : "<< nBatch << "\n"; - cnnNetwork.setBatchSize(nBatch); - } - - InferenceEngine::InputsDataMap networkInputs; - networkInputs = cnnNetwork.getInputsInfo(); - InferenceEngine::OutputsDataMap networkOutputs; - networkOutputs = cnnNetwork.getOutputsInfo(); - - for (auto &input : networkInputs) { - const auto inputPrecision = input.second->getPrecision(); - if (inputPrecision == InferenceEngine::Precision::FP32 || - inputPrecision == InferenceEngine::Precision::U8) { - input.second->setPrecision(InferenceEngine::Precision::FP16); - } - } - - for (auto &output : networkOutputs) { - const auto outputPrecision = output.second->getPrecision(); - if (outputPrecision == InferenceEngine::Precision::FP32) { - output.second->setPrecision(InferenceEngine::Precision::FP16); - } - } - - std::vector exeNetwork(num_networks); - std::map networkConfig; - setConfig(networkConfig, file_config_cl); - - for (int n = 0; n < num_networks; ++n) { - if (num_networks > 1) - printf("Load network %d...\n", n); - else - printf("Load network... \n"); - fflush(stdout); - exeNetwork[n] = ie.LoadNetwork(cnnNetwork, deviceName, networkConfig); - } - - std::vector request(num_requests); - iter_start.resize(niter); - iter_end.resize(niter); - iter_time.resize(niter); - - iterations_to_run = niter - num_requests; - - for (int r = 0, idxPic = 0; r < num_requests; ++r) { - int n = r % num_networks; - request[r] = exeNetwork[n].CreateInferRequest(); - - for (auto &input : networkInputs) { - auto inputBlob = request[r].GetBlob(input.first); - const auto& dims = inputBlob->getTensorDesc().getDims(); - auto layout = inputBlob->getTensorDesc().getLayout(); - - // number of channels is 3 for Image, dims order is always NCHW - const bool isImage = ((layout == InferenceEngine::NHWC || layout == InferenceEngine::NCHW) && dims[1] == 3); - const bool isVideo = (inputBlob->getTensorDesc().getDims().size() == 5); - if (isImage && (numPictures > 0)) { - if (!loadImage(pictures[(idxPic++) % numPictures], inputBlob)) - return 1; - } else if (isVideo && (numPictures > 0)) { - if (!loadVideo(pictures, inputBlob)) - return 1; - } else if (numBinaries > 0) { - if (!loadBinaryTensor(binaries[(idxPic++) % numBinaries], inputBlob)) - return 1; - } else { - std::cout << inputsDir << " directory doesn't contain correct input files" << std::endl; - return 1; - } - } - - request[r].SetCompletionCallback>( - [](InferenceEngine::InferRequest request, InferenceEngine::StatusCode code) -> void { - if (code != InferenceEngine::OK) { - std::cout << "Infer failed: " << code << std::endl; - exit(1); - } - - int iter = --iterations_to_run; - int reqIdx = (niter - iter - 1) - num_requests; - - iter_end[reqIdx] = Time::now(); - - if (profile && (reqIdx == niter / 2)) { - perfMap = request.GetPerformanceCounts(); - } - - if (iter >= 0) { - iter_start[reqIdx + (num_requests)] = Time::now(); - request.StartAsync(); - } - - iter_time[reqIdx] = TIMEDIFF(iter_start[reqIdx], iter_end[reqIdx]); - // printf("request#%d %fms\n", reqIdx, iter_time[reqIdx]); - - if (iter == -num_requests) { - reallydone = 1; - alldone.notify_all(); - } - }); - } - - printf("Inference started. Running %d iterations...\n", niter - 2 * 2 * num_requests); - fflush(stdout); - for (int r = 0; r < num_requests; ++r) { - iter_start[r] = Time::now(); - request[r].StartAsync(); - } - - { - std::unique_lock lock(done_mutex); - alldone.wait(lock, [&](){return reallydone;}); - } - - // check 10 time intervals to get min/max fps values - const int fps_checks = 10; - // exclude (2 * num_requests) first and last iterations - int num_exclude = 2 * num_requests; - time_point cstart = iter_end[num_exclude - 1]; - time_point cend = iter_end[niter - num_exclude - 1]; - - double totalTime = (std::chrono::duration_cast(cend - cstart)).count(); - std::cout << std::endl << "Total time: " << (totalTime) << " ms" << std::endl; - - std::cout << "Average fps on " << (niter - 2 * num_exclude) << " iterations" - << (nBatch == 1 ? ": " : (" of " + std::to_string(nBatch) + " frames: ")) - << static_cast(niter - 2 * num_exclude) * 1000.0 * nBatch / (totalTime) << " fps" << std::endl; - - double check_time = totalTime / fps_checks; - - double min_fps = 100000; - double max_fps = -100000; - int citer = num_exclude; - for (int f = 0; f < fps_checks; ++f) { - int fiter = 0; - auto fend = (f < fps_checks - 1) ? cstart + std::chrono::microseconds((unsigned int)(check_time * 1000)) : cend; - while ((citer + fiter < niter - num_exclude) && iter_end[citer + fiter] <= fend) { - fiter++; - } - - double ffps = 1000 * fiter * nBatch / (check_time); - min_fps = std::min(min_fps, ffps); - max_fps = std::max(max_fps, ffps); - citer += fiter; - cstart = fend; - } - - std::cout << "Min fps: " << min_fps << std::endl; - std::cout << "Max fps: " << max_fps << std::endl; - - if (profile) { - printPerformanceCounts(perfMap); - } - - return 0; -} - -int main(int argc, char *argv[]) { - niter = MIN_ITER; - int num_networks = 1; - int nBatch = 1; - std::string file_config_cl; - - m_exename = argv[0]; - - if (argc < 3) { - print_usage(); - return 0; - } - - auto parse = [](const std::string& src) { - try { - return std::stol(src, nullptr, 0); - } catch (const std::invalid_argument& exception) { - std::cout << "Cannot perform conversion for " << src << ": " << exception.what() << std::endl; - print_usage(); - std::abort(); - } catch (const std::out_of_range& exception) { - std::cout << src << " is out of range: " << exception.what() << std::endl; - print_usage(); - std::abort(); - } catch (...) { - std::cout << "Unexpected exception" << std::endl; - print_usage(); - std::abort(); - } - }; - - if (argc > 3) { - niter = static_cast(parse(argv[3])); - } - - if (argc > 4) { - nBatch = static_cast(parse(argv[4])); - } - - if (argc > 5) { - num_networks = static_cast(parse(argv[5])); - } - - if (argc > 6) { - file_config_cl = std::string(argv[6]); - } - - if (niter < MIN_ITER) { - print_usage(); - return 0; - } - - if (num_networks < 1 || num_networks > 16) { - print_usage(); - return 0; - } - - if (nBatch < 1) { - print_usage(); - return 0; - } - - try { - std::string modelFileName(argv[1]); - std::string inputsDir(argv[2]); - return process(modelFileName, inputsDir, file_config_cl, nBatch, num_networks); - } - catch (const std::exception& ex) { - std::cout << ex.what(); - } - - return -1; -} - -static bool loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - const InferenceEngine::Layout layout = tensDesc.getLayout(); - if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) { - std::cout << "loadImage error: Input must have FP16 precision" << std::endl; - return false; - } - - if (layout != InferenceEngine::NHWC && layout != InferenceEngine::NCHW) { - std::cout << "loadImage error: Input must have NCHW or NHWC layout" << std::endl; - return false; - } - - BitMap reader(imageFilename); - - const auto dims = tensDesc.getDims(); - - const size_t N = dims[0]; - const size_t C = dims[1]; - const size_t H = dims[2]; - const size_t W = dims[3]; - - const size_t img_w = reader.width(); - const size_t img_h = reader.height(); - - const auto strides = tensDesc.getBlockingDesc().getStrides(); - const auto strideN = strides[0]; - const auto strideC = layout == InferenceEngine::NHWC ? strides[3] : strides[1]; - const auto strideH = layout == InferenceEngine::NHWC ? strides[1] : strides[2]; - const auto strideW = layout == InferenceEngine::NHWC ? strides[2] : strides[3]; - - const size_t numImageChannels = reader.size() / (reader.width() * reader.height()); - if (C != numImageChannels && C != 1) { - std::cout << "loadImage error: Input channels mismatch: image channels " << numImageChannels << ", " - << "network channels " << C << ", expecting count of image channels are equal " - << "to count if network channels or count of network channels are equal to 1" << std::endl; - return false; - } - - int16_t* blobDataPtr = std::dynamic_pointer_cast>(blob)->data(); - const unsigned char* RGB8 = reader.getData().get(); - const float xScale = 1.0f * img_w / W; - const float yScale = 1.0f * img_h / H; - - for (int n = 0; n != N; n++) { - for (int h = 0; h < H; ++h) { - int y = static_cast(std::floor((h + 0.5f) * yScale)); - for (int w = 0; w < W; ++w) { - int x = static_cast(std::floor((w + 0.5f) * xScale)); - for (int c = 0; c < C; c++) { - blobDataPtr[n * strideN + c * strideC + h * strideH + w * strideW] = - InferenceEngine::PrecisionUtils::f32tof16(1.0 * RGB8[(y * img_w + x) * numImageChannels + c]); - } - } - } - } - - return true; -} - -static bool loadVideo(const std::vector &imagesFolder, InferenceEngine::Blob::Ptr &blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - const InferenceEngine::Layout layout = tensDesc.getLayout(); - - if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) { - std::cout << "loadVideo error: Input must have FP16 precision" << std::endl; - return false; - } - if (layout != InferenceEngine::NDHWC && layout != InferenceEngine::NCDHW) { - std::cout << "loadVideo error: Input must have NCDHW or NDHWC layout" << std::endl; - return false; - } - - const auto dims = tensDesc.getDims(); - const size_t N = dims[0]; - const size_t C = dims[1]; - const size_t D = dims[2]; - const size_t H = dims[3]; - const size_t W = dims[4]; - - const auto numUsedImages = std::min(D, imagesFolder.size()); - const auto strides = tensDesc.getBlockingDesc().getStrides(); - const auto strideN = strides[0]; - const auto strideC = layout == InferenceEngine::NDHWC ? strides[4] : strides[1]; - const auto strideD = layout == InferenceEngine::NDHWC ? strides[1] : strides[2]; - const auto strideH = layout == InferenceEngine::NDHWC ? strides[2] : strides[3]; - const auto strideW = layout == InferenceEngine::NDHWC ? strides[3] : strides[4]; - - auto d = 0; - int16_t* blobDataPtr = std::dynamic_pointer_cast>(blob)->data(); - for ( ; d < numUsedImages; d++) { - BitMap reader(imagesFolder[d]); - const size_t img_w = reader.width(); - const size_t img_h = reader.height(); - const size_t numImageChannels = reader.size() / (reader.width() * reader.height()); - - if (C != numImageChannels && C != 1) { - std::cout << "loadVideo error: Input channels mismatch: image channels " << numImageChannels << ", " - << "network channels " << C << ", expecting count of image channels are equal " - << "to count if network channels or count of network channels are equal to 1" << std::endl; - return false; - } - - const unsigned char* RGB8 = reader.getData().get(); - const float xScale = 1.0f * img_w / W; - const float yScale = 1.0f * img_h / H; - - for (int n = 0; n != N; n++) { - for (int h = 0; h < H; ++h) { - int y = static_cast(std::floor((h + 0.5f) * yScale)); - for (int w = 0; w < W; ++w) { - int x = static_cast(std::floor((w + 0.5f) * xScale)); - for (int c = 0; c < C; c++) { - blobDataPtr[n * strideN + c * strideC + d * strideD + h * strideH + w * strideW] = - InferenceEngine::PrecisionUtils::f32tof16(1.0 * RGB8[(y * img_w + x) * numImageChannels + c]); - } - } - } - } - } - - for (; d < D; d++) - for (auto n = 0; n != N; n++) - for (auto c = 0; c < C; c++) - for (auto k = 0; k < strideD; k++) { - blobDataPtr[n * strideN + c * strideC + (d) * strideD + k] = - blobDataPtr[n * strideN + c * strideC + (d - 1) * strideD + k]; - } - - return true; -} - -bool loadBinaryTensor(const std::string &binaryFilename, InferenceEngine::Blob::Ptr &blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) { - std::cout << "loadBinaryTensor error: Input must have FP16 precision" << std::endl; - return false; - } - - std::ifstream binaryFile(binaryFilename, std::ios_base::binary | std::ios_base::ate); - - if (!binaryFile) { - std::cout << "loadBinaryTensor error: While opening a file an error is encountered" << std::endl; - return false; - } - - int fileSize = binaryFile.tellg(); - binaryFile.seekg(0, std::ios_base::beg); - size_t count = blob->size(); - if (fileSize != count * sizeof(float)) { - std::cout << "loadBinaryTensor error: File contains insufficient items" << std::endl; - return false; - } - - if (binaryFile.good()) { - int16_t *blobDataPtr = std::dynamic_pointer_cast>(blob)->data(); - for (size_t i = 0; i < count; i++) { - float tmp = 0.f; - binaryFile.read(reinterpret_cast(&tmp), sizeof(float)); - blobDataPtr[i] = InferenceEngine::PrecisionUtils::f32tof16(tmp); - } - } else { - std::cout << "loadBinaryTensor error: While reading a file an error is encountered" << std::endl; - return false; - } - return true; -} diff --git a/scripts/deployment_manager/configs/darwin.json b/scripts/deployment_manager/configs/darwin.json index 215f2716a1d..f3581e31cba 100644 --- a/scripts/deployment_manager/configs/darwin.json +++ b/scripts/deployment_manager/configs/darwin.json @@ -46,8 +46,6 @@ "dependencies" : ["ie_core"], "files": [ "deployment_tools/inference_engine/lib/intel64/libmyriadPlugin.so", - "deployment_tools/inference_engine/lib/intel64/myriad_compile", - "deployment_tools/inference_engine/lib/intel64/myriad_perfcheck", "deployment_tools/inference_engine/lib/intel64/libinference_engine_legacy.dylib", "deployment_tools/inference_engine/lib/intel64/usb-ma2x8x.mvcmd", "deployment_tools/inference_engine/lib/intel64/pcie-ma2x8x.mvcmd" diff --git a/scripts/deployment_manager/configs/linux.json b/scripts/deployment_manager/configs/linux.json index 7a3d745a78c..fd4600abb8e 100644 --- a/scripts/deployment_manager/configs/linux.json +++ b/scripts/deployment_manager/configs/linux.json @@ -66,8 +66,6 @@ "deployment_tools/inference_engine/lib/intel64/usb-ma2x8x.mvcmd", "deployment_tools/inference_engine/lib/intel64/pcie-ma2x8x.mvcmd", "deployment_tools/inference_engine/lib/intel64/libmyriadPlugin.so", - "deployment_tools/inference_engine/lib/intel64/myriad_compile", - "deployment_tools/inference_engine/lib/intel64/myriad_perfcheck", "deployment_tools/inference_engine/lib/intel64/vpu_custom_kernels", "deployment_tools/inference_engine/lib/intel64/libinference_engine_legacy.so", "install_dependencies/install_NCS_udev_rules.sh" diff --git a/scripts/deployment_manager/configs/windows.json b/scripts/deployment_manager/configs/windows.json index d47bcff3b6a..ba95c29e244 100644 --- a/scripts/deployment_manager/configs/windows.json +++ b/scripts/deployment_manager/configs/windows.json @@ -61,9 +61,7 @@ "deployment_tools/inference_engine/bin/intel64/Release/usb-ma2x8x.mvcmd", "deployment_tools/inference_engine/bin/intel64/Release/pcie-ma2x8x.elf", "deployment_tools/inference_engine/bin/intel64/Release/myriadPlugin.dll", - "deployment_tools/inference_engine/bin/intel64/Release/inference_engine_legacy.dll", - "deployment_tools/inference_engine/bin/intel64/Release/myriad_compile.exe", - "deployment_tools/inference_engine/bin/intel64/Release/myriad_perfcheck.exe" + "deployment_tools/inference_engine/bin/intel64/Release/inference_engine_legacy.dll" ] }, "gna": { From 565627a4162eb77212d157e2591513e4050e412d Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 10 Aug 2021 07:49:37 +0300 Subject: [PATCH 10/24] Add ov::runtime::Core (#6946) --- .../ncc_naming_style/openvino.style | 2 +- .../src/inference_engine/CMakeLists.txt | 14 +- .../include/ie/cpp/ie_executable_network.hpp | 7 + .../inference_engine/include/ie/ie_core.hpp | 2 +- .../include/openvino/runtime/core.hpp | 303 ++++ .../src/cnn_network_ngraph_impl.cpp | 6 +- .../src/cpp/ie_cnn_network.cpp | 2 +- .../src/inference_engine/src/ie_core.cpp | 604 ++++--- .../src/inference_engine/src/ie_itt.hpp | 15 +- .../src/ie_network_reader.cpp | 6 +- .../inference_engine/src/ie_ngraph_utils.cpp | 2 +- .../behavior/ov_core_integration.cpp | 164 ++ .../include/behavior/ov_core_integration.hpp | 1485 +++++++++++++++++ 13 files changed, 2400 insertions(+), 212 deletions(-) create mode 100644 inference-engine/src/inference_engine/include/openvino/runtime/core.hpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_core_integration.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/behavior/ov_core_integration.hpp diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style index 2279862cda3..1dc53167129 100644 --- a/cmake/developer_package/ncc_naming_style/openvino.style +++ b/cmake/developer_package/ncc_naming_style/openvino.style @@ -5,7 +5,7 @@ ClassName: '^([A-Z][\w]+|b?float16|numeric_limits|ngraph_error|stopwatch|unsuppo # TODO: remove oi_pair StructName: '^([A-Z][\w]+|element_type_traits|hash|oi_pair)$' FunctionName: '^(operator\W+|[a-z_\d]+)$' -Namespace: '^[a-z\d_]+$' +Namespace: '^([a-z\d_]+|InferenceEngine)$' NamespaceAlias: '^[a-z\d_]+$' UnionName: '[A-Z][\w]+$' TemplateTemplateParameter: '[A-Z][\w]+' diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index 1152c12392e..8325ecd5d17 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -77,7 +77,7 @@ endif() addVersionDefines(src/ie_version.cpp CI_BUILD_NUMBER) -set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include/ie") +set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") file (GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/*.hpp @@ -97,7 +97,7 @@ add_library(${TARGET_NAME}_plugin_api INTERFACE) target_include_directories(${TARGET_NAME}_plugin_api INTERFACE "${IE_MAIN_SOURCE_DIR}/src/plugin_api" $ - ${PUBLIC_HEADERS_DIR}) + ${PUBLIC_HEADERS_DIR} ${PUBLIC_HEADERS_DIR}/ie) target_link_libraries(${TARGET_NAME}_plugin_api INTERFACE pugixml::static openvino::itt) @@ -152,6 +152,10 @@ add_library(${TARGET_NAME} SHARED ${vs_version_file} $) +ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME} + INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/openvino" + ADDITIONAL_INCLUDE_DIRECTORIES $) + ie_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "Inference Engine Core Runtime library") @@ -167,6 +171,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMA target_include_directories(${TARGET_NAME} INTERFACE $ + $ + $ $ PRIVATE $ $) @@ -269,7 +275,7 @@ endif() ie_cpack_add_component(core REQUIRED DEPENDS ${core_components}) ie_cpack_add_component(core_dev REQUIRED core ngraph_dev) -install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}/include +install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR} COMPONENT core_dev) install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets @@ -299,7 +305,7 @@ install(EXPORT InferenceEngineTargets COMPONENT core_dev) set(IE_NGRAPH_DIR "${CMAKE_BINARY_DIR}/ngraph") -set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}") +set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}/ie") set(IE_PARALLEL_CMAKE "${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake") configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in" diff --git a/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp index 870f6c2a6fc..e5a9f1a36cb 100644 --- a/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp +++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp @@ -23,6 +23,12 @@ #include "details/ie_so_loader.h" #include "ie_iexecutable_network.hpp" +namespace ov { +namespace runtime { +class Core; +} // namespace runtime +} // namespace ov + namespace InferenceEngine { class IExecutableNetworkInternal; @@ -41,6 +47,7 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) { ExecutableNetwork(const details::SharedObjectLoader& so, const std::shared_ptr& impl); friend class Core; + friend class ov::runtime::Core; public: /** diff --git a/inference-engine/src/inference_engine/include/ie/ie_core.hpp b/inference-engine/src/inference_engine/include/ie/ie_core.hpp index 96f8d6b58af..573880153e8 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_core.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_core.hpp @@ -89,7 +89,7 @@ public: * For ONNX case the second parameter should contain empty blob. * @note Created InferenceEngine::CNNNetwork object shares the weights with `weights` object. * So, do not create `weights` on temporary data which can be later freed, since the network - * constant datas become to point to invalid memory. + * constant data becomes to point to invalid memory. * @return CNNNetwork */ CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const; diff --git a/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp b/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp new file mode 100644 index 00000000000..b1f5823d30f --- /dev/null +++ b/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp @@ -0,0 +1,303 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief This is a header file for the OpenVINO Runtime Core class C++ API + * + * @file openvino/runtime/core.hpp + */ +#pragma once + +#include +#include +#include +#include +#include + +#include "ie_version.hpp" +#include "ie_plugin_config.hpp" +#include "cpp/ie_executable_network.hpp" + +namespace ngraph { +class Function; +} // namespace ngraph + +namespace InferenceEngine { +class IExtension; +class Blob; +class RemoteContext; +} // namespace InferenceEngine + +namespace ov { +namespace runtime { + +/** + * @brief This class represents OpenVINO runtime Core entity. + * + * It can throw exceptions safely for the application, where it is properly handled. + */ +class INFERENCE_ENGINE_API_CLASS(Core) { + class Impl; + std::shared_ptr _impl; + +public: + /** @brief Constructs OpenVINO Core instance using XML configuration file with + * plugins description. + * + * See register_plugins for more details. + * + * @param xmlConfigFile A path to .xml file with plugins to load from. If XML configuration file is not specified, + * then default Inference Engine plugins are loaded from the default plugin.xml file. + */ + explicit Core(const std::string& xmlConfigFile = {}); + + /** + * @brief Returns plugins version information + * + * @param deviceName Device name to identify plugin + * @return A vector of versions + */ + std::map get_versions(const std::string& deviceName) const; + +#ifdef ENABLE_UNICODE_PATH_SUPPORT + /** + * @brief Reads models from IR and ONNX formats + * @param modelPath path to model + * @param binPath path to data file + * For IR format (*.bin): + * * if path is empty, will try to read bin file with the same name as xml and + * * if bin file with the same name was not found, will load IR without weights. + * For ONNX format (*.onnx or *.prototxt): + * * binPath parameter is not used. + * @return Function + */ + std::shared_ptr read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const; +#endif + + /** + * @brief Reads models from IR and ONNX formats + * @param modelPath path to model + * @param binPath path to data file + * For IR format (*.bin): + * * if path is empty, will try to read bin file with the same name as xml and + * * if bin file with the same name was not found, will load IR without weights. + * For ONNX format (*.onnx or *.prototxt): + * * binPath parameter is not used. + * @return Function + */ + std::shared_ptr read_model(const std::string& modelPath, const std::string& binPath = {}) const; + /** + * @brief Reads models from IR and ONNX formats + * @param model string with model in IR or ONNX format + * @param weights shared pointer to constant blob with weights + * Reading ONNX models doesn't support loading weights from data blobs. + * If you are using an ONNX model with external data files, please use the + * `ov::runtime::Core::read_model(const std::string& model, const Blob::CPtr& weights) const` + * function overload which takes a filesystem path to the model. + * For ONNX case the second parameter should contain empty blob. + * @note Created Function object shares the weights with `weights` object. + * So, do not create `weights` on temporary data which can be later freed, since the network + * constant data becomes to point to invalid memory. + * @return Function + */ + std::shared_ptr read_model(const std::string& model, const std::shared_ptr& weights) const; + + /** + * @brief Creates an executable network from a network object. + * + * Users can create as many networks as they need and use + * them simultaneously (up to the limitation of the hardware resources) + * + * @param network Function object acquired from Core::read_model + * @param deviceName Name of device to load network to + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load + * operation + * @return An executable network reference + */ + InferenceEngine::ExecutableNetwork compile_model( + const std::shared_ptr& network, const std::string& deviceName, + const std::map& config = {}); + + /** + * @brief Reads model and creates an executable network from IR or ONNX file + * + * This can be more efficient than using read_model + compile_model(Function) flow + * especially for cases when caching is enabled and cached model is available + * + * @param modelPath path to model + * @param deviceName Name of device to load network to + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load + * operation/ + * + * @return An executable network reference + */ + InferenceEngine::ExecutableNetwork compile_model( + const std::string& modelPath, const std::string& deviceName, + const std::map& config = {}); + + /** + * @brief Creates an executable network from a network object within a specified remote context. + * @param network Function object acquired from Core::read_model + * @param context Pointer to RemoteContext object + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load + * operation + * @return An executable network object + */ + InferenceEngine::ExecutableNetwork compile_model( + const std::shared_ptr& network, const std::shared_ptr& context, + const std::map& config = {}); + + /** + * @brief Registers extension + * @param extension Pointer to already loaded extension + */ + void add_extension(const std::shared_ptr& extension); + + /** + * @brief Creates an executable network from a previously exported network + * @param networkModel network model stream + * @param deviceName Name of device load executable network on + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load + * operation* + * @return An executable network reference + */ + InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel, const std::string& deviceName, + const std::map& config = {}); + + /** + * @brief Creates an executable network from a previously exported network within a specified + * remote context. + * + * @param networkModel Network model stream + * @param context Pointer to RemoteContext object + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load + * operation + * @return An executable network reference + */ + InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel, + const std::shared_ptr& context, + const std::map& config = {}); + + /** + * @brief Query device if it supports specified network with specified configuration + * + * @param deviceName A name of a device to query + * @param network Network object to query + * @param config Optional map of pairs: (config parameter name, config parameter value) + * @return An object containing a map of pairs a layer name -> a device name supporting this layer. + */ + InferenceEngine::QueryNetworkResult query_model( + const std::shared_ptr& network, const std::string& deviceName, + const std::map& config = {}) const; + + /** + * @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp + * + * @param deviceName An optional name of a device. If device name is not specified, the config is set for all the + * registered devices. + * + * @param config Map of pairs: (config parameter name, config parameter value) + */ + void set_config(const std::map& config, const std::string& deviceName = {}); + + /** + * @brief Gets configuration dedicated to device behaviour. + * + * The method is targeted to extract information which can be set via set_config method. + * + * @param deviceName - A name of a device to get a configuration value. + * @param name - config key. + * @return Value of config corresponding to config key. + */ + InferenceEngine::Parameter get_config(const std::string& deviceName, const std::string& name) const; + + /** + * @brief Gets general runtime metric for dedicated hardware. + * + * The method is needed to request common device properties + * which are executable network agnostic. It can be device name, temperature, other devices-specific values. + * + * @param deviceName - A name of a device to get a metric value. + * @param name - metric name to request. + * @return Metric value corresponding to metric key. + */ + InferenceEngine::Parameter get_metric(const std::string& deviceName, const std::string& name) const; + + /** + * @brief Returns devices available for neural networks inference + * + * @return A vector of devices. The devices are returned as { CPU, FPGA.0, FPGA.1, MYRIAD } + * If there more than one device of specific type, they are enumerated with .# suffix. + */ + std::vector get_available_devices() const; + + /** + * @brief Register new device and plugin which implement this device inside Inference Engine. + * + * @param pluginName A name of plugin. Depending on platform pluginName is wrapped with shared library suffix and + * prefix to identify library full name + * + * @param deviceName A device name to register plugin for. If device name is not specified, then it's taken from + * plugin itself. + */ + void register_plugin(const std::string& pluginName, const std::string& deviceName); + + /** + * @brief Unloads previously loaded plugin with a specified name from Inference Engine + * The method is needed to remove plugin instance and free its resources. If plugin for a + * specified device has not been created before, the method throws an exception. + * + * @param deviceName Device name identifying plugin to remove from Inference Engine + */ + void unload_plugin(const std::string& deviceName); + + /** @brief Registers plugin to Inference Engine Core instance using XML configuration file with + * plugins description. + * + * XML file has the following structure: + * + * ```xml + * + * + * + * + * + * + * + * + * + * + * + * + * ``` + * + * - `name` identifies name of device enabled by plugin + * - `location` specifies absolute path to dynamic library with plugin. A path can also be relative to inference + * engine shared library. It allows to have common config for different systems with different configurations. + * - Properties are set to plugin via the `set_config` method. + * - Extensions are set to plugin via the `add_extension` method. + * + * @param xmlConfigFile A path to .xml file with plugins to register. + */ + void register_plugins(const std::string& xmlConfigFile); + + /** + * @brief Create a new shared context object on specified accelerator device + * using specified plugin-specific low level device API parameters (device handle, pointer, etc.) + * @param deviceName Name of a device to create new shared context on. + * @param params Map of device-specific shared context parameters. + * @return A shared pointer to a created remote context. + */ + std::shared_ptr create_context(const std::string& deviceName, + const InferenceEngine::ParamMap& params); + + /** + * @brief Get a pointer to default(plugin-supplied) shared context object for specified accelerator device. + * @param deviceName - A name of a device to get create shared context from. + * @return A shared pointer to a default remote context. + */ + std::shared_ptr get_default_context(const std::string& deviceName); +}; +} // namespace runtime +} // namespace ov diff --git a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp index f53894e7d2d..85019779800 100644 --- a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp @@ -227,7 +227,7 @@ void CNNNetworkNGraphImpl::validate(int version) { StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t outputIndex, ResponseDesc* resp) noexcept { - OV_ITT_SCOPED_TASK(itt::domains::IE, "CNNNetworkNGraphImpl::addOutput"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::addOutput"); try { for (const auto & layer : _ngraph_function->get_ops()) { @@ -361,7 +361,7 @@ CNNNetworkNGraphImpl::reshape(const std::map>& void CNNNetworkNGraphImpl::reshape(const std::map& inputShapes) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "CNNNetworkNGraphImpl::reshape"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::reshape"); auto params = _ngraph_function->get_parameters(); @@ -388,7 +388,7 @@ CNNNetworkNGraphImpl::reshape(const std::map& } else { specialized_ngraph_function = ngraph::clone_function(*_ngraph_function); { - OV_ITT_SCOPED_TASK(itt::domains::IE, "CNNNetworkNGraphImpl::ConvertToLegacy"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::ConvertToLegacy"); ::ngraph::pass::Manager manager; // resolves dynamism by replacing dynamic operation with static version manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false); diff --git a/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp index e2506d6cdca..e2ff71d596c 100644 --- a/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp @@ -24,7 +24,7 @@ CNNNetwork::CNNNetwork(std::shared_ptr network) CNNNetwork::CNNNetwork(const std::shared_ptr& graph, const std::vector& exts) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "CNNNetwork::CNNNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetwork::CNNNetwork"); if (graph == nullptr) { IE_THROW() << "CNNNetwork was not initialized: 'graph' object is empty"; diff --git a/inference-engine/src/inference_engine/src/ie_core.cpp b/inference-engine/src/inference_engine/src/ie_core.cpp index e62674d1a46..c7896e69593 100644 --- a/inference-engine/src/inference_engine/src/ie_core.cpp +++ b/inference-engine/src/inference_engine/src/ie_core.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -31,9 +32,7 @@ using namespace InferenceEngine::PluginConfigParams; using namespace std::placeholders; -namespace InferenceEngine { - -namespace { +namespace core_detail { template struct Parsed { @@ -41,7 +40,18 @@ struct Parsed { std::map _config; }; -template +std::string parseXmlConfig(const std::string& xmlFile) { + std::string xmlConfigFile_ = xmlFile; + if (xmlConfigFile_.empty()) { + // register plugins from default plugins.xml config + FileUtils::FilePath xmlConfigFileDefault = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), + FileUtils::toFilePath("plugins.xml")); + xmlConfigFile_ = FileUtils::fromFilePath(xmlConfigFileDefault); + } + return xmlConfigFile_; +} + +template Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const std::map& config = {}) { auto config_ = config; auto deviceName_ = deviceName; @@ -64,7 +74,7 @@ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma if (deviceName_.empty()) { deviceName_ = "AUTO"; } - DeviceIDParser parser(deviceName_); + InferenceEngine::DeviceIDParser parser(deviceName_); deviceName_ = parser.getDeviceName(); std::string deviceIDLocal = parser.getDeviceID(); @@ -75,7 +85,7 @@ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma return {deviceName_, config_}; } -Parameter copyParameterValue(const Parameter & value) { +InferenceEngine::Parameter copyParameterValue(const InferenceEngine::Parameter & value) { if (value.is()) { return { value.as() }; } else if (value.is()) { @@ -107,79 +117,17 @@ template void allowNotImplemented(F && f) { try { f(); - } catch (const NotImplemented&) { } + } catch (const InferenceEngine::NotImplemented&) { } } -} // namespace - -DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) { - deviceName = deviceNameWithID; - - auto pos = deviceName.find('.'); - if (pos != std::string::npos) { - deviceName = deviceNameWithID.substr(0, pos); - deviceID = deviceNameWithID.substr(pos + 1, deviceNameWithID.size()); - } -} - -std::string DeviceIDParser::getDeviceID() const { - return deviceID; -} - -std::string DeviceIDParser::getDeviceName() const { - return deviceName; -} - -std::vector DeviceIDParser::getHeteroDevices(std::string fallbackDevice) { - std::vector deviceNames; - - std::string cdevice; - char delimiter = ','; - size_t pos = 0; - - while ((pos = fallbackDevice.find(delimiter)) != std::string::npos) { - deviceNames.push_back(fallbackDevice.substr(0, pos)); - fallbackDevice.erase(0, pos + 1); - } - - if (!fallbackDevice.empty()) deviceNames.push_back(fallbackDevice); - - return deviceNames; -} - -std::vector DeviceIDParser::getMultiDevices(std::string devicesList) { - std::vector deviceNames; - auto trim_request_info = [](std::string device_with_requests) { - auto opening_bracket = device_with_requests.find_first_of('('); - return device_with_requests.substr(0, opening_bracket); - }; - std::string device; - char delimiter = ','; - size_t pos = 0; - // in addition to the list of devices, every device can have a #requests in the brackets e.g. "CPU(100)" - // we skip the #requests info here - while ((pos = devicesList.find(delimiter)) != std::string::npos) { - auto d = devicesList.substr(0, pos); - deviceNames.push_back(trim_request_info(d)); - devicesList.erase(0, pos + 1); - } - - if (!devicesList.empty()) deviceNames.push_back(trim_request_info(devicesList)); - - return deviceNames; -} - -class Core::Impl : public ICore, public std::enable_shared_from_this { - // Fields are ordered by deletion order - ITaskExecutor::Ptr _taskExecutor = nullptr; - - mutable std::map plugins; +class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_this { + mutable std::map plugins; class CoreConfig final { public: struct CacheConfig { std::string _cacheDir; - std::shared_ptr _cacheManager; + std::shared_ptr _cacheManager; }; void setAndUpdate(std::map& config) { @@ -189,7 +137,7 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { _cacheConfig._cacheDir = it->second; if (!it->second.empty()) { FileUtils::createDirectoryRecursive(it->second); - _cacheConfig._cacheManager = std::make_shared(std::move(it->second)); + _cacheConfig._cacheManager = std::make_shared(std::move(it->second)); } else { _cacheConfig._cacheManager = nullptr; } @@ -212,7 +160,7 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { // Core settings (cache config, etc) CoreConfig coreConfig; - CacheGuard cacheGuard; + InferenceEngine::CacheGuard cacheGuard; struct PluginDescriptor { FileUtils::FilePath libraryLocation; @@ -221,7 +169,7 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { }; std::unordered_set opsetNames; - std::vector extensions; + std::vector extensions; std::map pluginRegistry; mutable std::mutex pluginsMutex; // to lock parallel access to pluginRegistry and plugins @@ -232,7 +180,7 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { return DeviceSupportsImportExport(plugin); } - bool DeviceSupportsImportExport(const InferencePlugin& plugin) const { + bool DeviceSupportsImportExport(const InferenceEngine::InferencePlugin& plugin) const { std::vector supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {}); auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(IMPORT_EXPORT_SUPPORT)); @@ -241,11 +189,11 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { return supported; } - bool DeviceSupportsCacheDir(const InferencePlugin& plugin) const { + bool DeviceSupportsCacheDir(const InferenceEngine::InferencePlugin& plugin) const { return DeviceSupportsConfigKey(plugin, CONFIG_KEY(CACHE_DIR)); } - bool DeviceSupportsConfigKey(const InferencePlugin& plugin, const std::string& key) const { + bool DeviceSupportsConfigKey(const InferenceEngine::InferencePlugin& plugin, const std::string& key) const { bool supported = false; std::vector supportedMetricKeys; try { @@ -262,25 +210,25 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { return supported; } - SoExecutableNetworkInternal LoadNetworkImpl(const CNNNetwork& network, - InferencePlugin& plugin, + InferenceEngine::SoExecutableNetworkInternal LoadNetworkImpl(const InferenceEngine::CNNNetwork& network, + InferenceEngine::InferencePlugin& plugin, const std::map& parsedConfig, - const RemoteContext::Ptr& context, + const InferenceEngine::RemoteContext::Ptr& context, const std::string& blobID, const std::string& modelPath = std::string(), bool forceDisableCache = false) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::LoadNetworkImpl"); - SoExecutableNetworkInternal execNetwork; + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::Impl::LoadNetworkImpl"); + InferenceEngine::SoExecutableNetworkInternal execNetwork; execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) : plugin.LoadNetwork(network, parsedConfig); auto cacheManager = coreConfig.getCacheConfig()._cacheManager; if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) { try { // need to export network for further import from "cache" - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Export"); + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Export"); cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) { - networkStream << CompiledBlobHeader(GetInferenceEngineVersion()->buildNumber, - NetworkCompilationContext::calculateFileInfo(modelPath)); + networkStream << InferenceEngine::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber, + InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)); execNetwork->Export(networkStream); }); } catch (...) { @@ -291,30 +239,30 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { return execNetwork; } - SoExecutableNetworkInternal LoadNetworkFromCache(const std::shared_ptr& cacheManager, + InferenceEngine::SoExecutableNetworkInternal LoadNetworkFromCache(const std::shared_ptr& cacheManager, const std::string& blobId, - InferencePlugin& plugin, + InferenceEngine::InferencePlugin& plugin, const std::map& config, - const RemoteContext::Ptr& context, + const InferenceEngine::RemoteContext::Ptr& context, bool& networkIsImported, const std::string& modelPath = std::string()) { - SoExecutableNetworkInternal execNetwork; + InferenceEngine::SoExecutableNetworkInternal execNetwork; struct HeaderException {}; IE_ASSERT(cacheManager != nullptr); try { cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport"); + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport"); try { - CompiledBlobHeader header; + InferenceEngine::CompiledBlobHeader header; networkStream >> header; - if (header.getIeVersion() != GetInferenceEngineVersion()->buildNumber) { + if (header.getIeVersion() != InferenceEngine::GetInferenceEngineVersion()->buildNumber) { // Build number mismatch, don't use this cache - throw NetworkNotRead("Version does not match"); + throw InferenceEngine::NetworkNotRead("Version does not match"); } - if (header.getFileInfo() != NetworkCompilationContext::calculateFileInfo(modelPath)) { + if (header.getFileInfo() != InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)) { // Original file is changed, don't use cache - throw NetworkNotRead("Original model file is changed"); + throw InferenceEngine::NetworkNotRead("Original model file is changed"); } } catch (...) { throw HeaderException(); @@ -338,10 +286,10 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { return execNetwork; } - std::map CreateCompileConfig(const InferencePlugin& plugin, + std::map CreateCompileConfig(const InferenceEngine::InferencePlugin& plugin, const std::string& deviceFamily, const std::map& origConfig) const { - std::map getMetricConfig; + std::map getMetricConfig; auto compileConfig = origConfig; // 0. Remove TARGET_FALLBACK key, move it to getMetricConfig @@ -373,22 +321,22 @@ class Core::Impl : public ICore, public std::enable_shared_from_this { return compileConfig; } - std::string CalculateNetworkHash(const CNNNetwork& network, const std::string& deviceFamily, - const InferencePlugin& plugin, + std::string CalculateNetworkHash(const InferenceEngine::CNNNetwork& network, const std::string& deviceFamily, + const InferenceEngine::InferencePlugin& plugin, const std::map& config) const { auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config); - return NetworkCompilationContext::computeHash(network, compileConfig); + return InferenceEngine::NetworkCompilationContext::computeHash(network, compileConfig); } std::string CalculateFileHash(const std::string& modelName, const std::string& deviceFamily, - const InferencePlugin& plugin, + const InferenceEngine::InferencePlugin& plugin, const std::map& config) const { auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config); - return NetworkCompilationContext::computeHash(modelName, compileConfig); + return InferenceEngine::NetworkCompilationContext::computeHash(modelName, compileConfig); } public: - Impl() { + CoreImpl() { opsetNames.insert("opset1"); opsetNames.insert("opset2"); opsetNames.insert("opset3"); @@ -398,7 +346,7 @@ public: opsetNames.insert("opset7"); } - ~Impl() override = default; + ~CoreImpl() override = default; /** * @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE path @@ -428,7 +376,7 @@ public: // append IR library path for default IE plugins { - FileUtils::FilePath absFilePath = FileUtils::makePath(getInferenceEngineLibraryPath(), pluginPath); + FileUtils::FilePath absFilePath = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath); if (FileUtils::fileExist(absFilePath)) pluginPath = absFilePath; } @@ -471,30 +419,30 @@ public: * @brief Returns global task executor * @return Reference to task executor */ - ITaskExecutor::Ptr GetTaskExecutor() const override { - return _taskExecutor; + InferenceEngine::ITaskExecutor::Ptr GetTaskExecutor() const override { + return nullptr; } - CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from file"); - return details::ReadNetwork(modelPath, binPath, extensions); + InferenceEngine::CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override { + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "Core::Impl::ReadNetwork from file"); + return InferenceEngine::details::ReadNetwork(modelPath, binPath, extensions); } - CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory"); - return details::ReadNetwork(model, weights, extensions); + InferenceEngine::CNNNetwork ReadNetwork(const std::string& model, const InferenceEngine::Blob::CPtr& weights) const override { + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory"); + return InferenceEngine::details::ReadNetwork(model, weights, extensions); } // TODO: In future this method can be added to ICore interface - SoExecutableNetworkInternal LoadNetwork(const CNNNetwork& network, const RemoteContext::Ptr& context, + InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network, const InferenceEngine::RemoteContext::Ptr& context, const std::map& config) { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext"); + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext"); if (context == nullptr) { IE_THROW() << "Remote context is null"; } auto parsed = parseDeviceNameIntoConfig(context->getDeviceName(), config); auto plugin = GetCPPPluginByName(parsed._deviceName); - SoExecutableNetworkInternal res; + InferenceEngine::SoExecutableNetworkInternal res; auto cacheManager = coreConfig.getCacheConfig()._cacheManager; if (cacheManager && DeviceSupportsImportExport(plugin)) { auto hash = CalculateNetworkHash(network, parsed._deviceName, plugin, parsed._config); @@ -510,10 +458,10 @@ public: return res; } - SoExecutableNetworkInternal LoadNetwork(const CNNNetwork& network, + InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network, const std::string& deviceName, const std::map& config) override { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::CNN"); + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::CNN"); bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0; auto parsed = parseDeviceNameIntoConfig(deviceName, config); if (forceDisableCache) { @@ -521,7 +469,7 @@ public: parsed._config.erase(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)); } auto plugin = GetCPPPluginByName(parsed._deviceName); - SoExecutableNetworkInternal res; + InferenceEngine::SoExecutableNetworkInternal res; auto cacheManager = coreConfig.getCacheConfig()._cacheManager; if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) { auto hash = CalculateNetworkHash(network, parsed._deviceName, plugin, parsed._config); @@ -537,13 +485,13 @@ public: return res; } - SoExecutableNetworkInternal LoadNetwork(const std::string& modelPath, + InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const std::string& modelPath, const std::string& deviceName, const std::map& config) override { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path"); + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Path"); auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto plugin = GetCPPPluginByName(parsed._deviceName); - SoExecutableNetworkInternal res; + InferenceEngine::SoExecutableNetworkInternal res; auto cacheManager = coreConfig.getCacheConfig()._cacheManager; if (cacheManager && DeviceSupportsImportExport(plugin)) { bool loadedFromCache = false; @@ -564,15 +512,15 @@ public: return res; } - SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName, + InferenceEngine::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName, const std::map& config) override { auto parsed = parseDeviceNameIntoConfig(deviceName, config); return GetCPPPluginByName(parsed._deviceName).ImportNetwork(networkModel, parsed._config); } - QueryNetworkResult QueryNetwork(const CNNNetwork& network, const std::string& deviceName, + InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, const std::string& deviceName, const std::map& config) const override { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::QueryNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::QueryNetwork"); auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto res = GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config); if (!network.getFunction() || res.supportedLayersMap.empty()) @@ -597,7 +545,7 @@ public: return res; } - Parameter GetMetric(const std::string& deviceName, const std::string& name) const override { + InferenceEngine::Parameter GetMetric(const std::string& deviceName, const std::string& name) const override { // HETERO case { if (deviceName.find("HETERO:") == 0) { @@ -637,9 +585,9 @@ public: for (auto&& deviceName : GetListOfDevicesInRegistry()) { std::vector devicesIDs; try { - const Parameter p = GetMetric(deviceName, propertyName); + const InferenceEngine::Parameter p = GetMetric(deviceName, propertyName); devicesIDs = p.as>(); - } catch (Exception&) { + } catch (InferenceEngine::Exception&) { // plugin is not created by e.g. invalid env } catch (const std::exception& ex) { IE_THROW() << "An exception is thrown while trying to create the " << deviceName @@ -666,8 +614,8 @@ public: * @param deviceName A name of device * @return Reference to a CPP plugin wrapper */ - InferencePlugin GetCPPPluginByName(const std::string& deviceName) const { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName"); + InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& deviceName) const { + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName"); std::lock_guard lock(pluginsMutex); @@ -682,7 +630,7 @@ public: PluginDescriptor desc = it->second; try { - InferencePlugin plugin{desc.libraryLocation}; + InferenceEngine::InferencePlugin plugin{desc.libraryLocation}; { plugin.SetName(deviceName); @@ -714,13 +662,13 @@ public: allowNotImplemented([&]() { for (auto&& extensionLocation : desc.listOfExtentions) { - plugin.AddExtension(std::make_shared(extensionLocation)); + plugin.AddExtension(std::make_shared(extensionLocation)); } }); } plugins[deviceName] = plugin; - } catch (const Exception& ex) { + } catch (const InferenceEngine::Exception& ex) { IE_THROW() << "Failed to create plugin " << FileUtils::fromFilePath(desc.libraryLocation) << " for device " << deviceName << "\n" << "Please, check your environment\n" @@ -766,7 +714,7 @@ public: { pluginPath = FileUtils::makePluginLibraryName({}, FileUtils::toFilePath(pluginName.c_str())); - FileUtils::FilePath absFilePath = FileUtils::makePath(getInferenceEngineLibraryPath(), pluginPath); + FileUtils::FilePath absFilePath = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath); if (FileUtils::fileExist(absFilePath)) pluginPath = absFilePath; } @@ -841,7 +789,7 @@ public: * @brief Registers the extension in a Core object * Such extensions can be used for both CNNNetwork readers and device plugins */ - void AddExtension(const IExtensionPtr& extension) { + void AddExtension(const InferenceEngine::IExtensionPtr& extension) { std::lock_guard lock(pluginsMutex); std::map opsets = extension->getOpSets(); @@ -864,63 +812,124 @@ public: * @brief Provides a list of extensions * @return A list of registered extensions */ - const std::vector& GetExtensions() const { + const std::vector& GetExtensions() const { return extensions; } + + std::map GetVersions(const std::string& deviceName) const { + std::map versions; + std::vector deviceNames; + + { + // for compatibility with samples / demo + if (deviceName.find("HETERO") == 0) { + auto pos = deviceName.find_first_of(":"); + if (pos != std::string::npos) { + deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1)); + } + deviceNames.push_back("HETERO"); + } else if (deviceName.find("MULTI") == 0) { + auto pos = deviceName.find_first_of(":"); + if (pos != std::string::npos) { + deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1)); + } + deviceNames.push_back("MULTI"); + } else if (deviceName.find("AUTO") == 0) { + auto pos = deviceName.find_first_of(":"); + if (pos != std::string::npos) { + deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1)); + } + deviceNames.emplace_back("AUTO"); + } else { + deviceNames.push_back(deviceName); + } + } + + for (auto&& deviceName_ : deviceNames) { + InferenceEngine::DeviceIDParser parser(deviceName_); + std::string deviceNameLocal = parser.getDeviceName(); + + InferenceEngine::InferencePlugin cppPlugin = GetCPPPluginByName(deviceNameLocal); + const InferenceEngine::Version version = cppPlugin.GetVersion(); + versions[deviceNameLocal] = version; + } + + return versions; + } }; +} // namespace core_detail + + +namespace InferenceEngine { + +DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) { + deviceName = deviceNameWithID; + + auto pos = deviceName.find('.'); + if (pos != std::string::npos) { + deviceName = deviceNameWithID.substr(0, pos); + deviceID = deviceNameWithID.substr(pos + 1, deviceNameWithID.size()); + } +} + +std::string DeviceIDParser::getDeviceID() const { + return deviceID; +} + +std::string DeviceIDParser::getDeviceName() const { + return deviceName; +} + +std::vector DeviceIDParser::getHeteroDevices(std::string fallbackDevice) { + std::vector deviceNames; + + std::string cdevice; + char delimiter = ','; + size_t pos = 0; + + while ((pos = fallbackDevice.find(delimiter)) != std::string::npos) { + deviceNames.push_back(fallbackDevice.substr(0, pos)); + fallbackDevice.erase(0, pos + 1); + } + + if (!fallbackDevice.empty()) deviceNames.push_back(fallbackDevice); + + return deviceNames; +} + +std::vector DeviceIDParser::getMultiDevices(std::string devicesList) { + std::vector deviceNames; + auto trim_request_info = [](std::string device_with_requests) { + auto opening_bracket = device_with_requests.find_first_of('('); + return device_with_requests.substr(0, opening_bracket); + }; + std::string device; + char delimiter = ','; + size_t pos = 0; + // in addition to the list of devices, every device can have a #requests in the brackets e.g. "CPU(100)" + // we skip the #requests info here + while ((pos = devicesList.find(delimiter)) != std::string::npos) { + auto d = devicesList.substr(0, pos); + deviceNames.push_back(trim_request_info(d)); + devicesList.erase(0, pos + 1); + } + + if (!devicesList.empty()) deviceNames.push_back(trim_request_info(devicesList)); + + return deviceNames; +} + +class Core::Impl : public core_detail::CoreImpl {}; + Core::Core(const std::string& xmlConfigFile) { _impl = std::make_shared(); - std::string xmlConfigFile_ = xmlConfigFile; - if (xmlConfigFile_.empty()) { - // register plugins from default plugins.xml config - FileUtils::FilePath xmlConfigFileDefault = FileUtils::makePath(getInferenceEngineLibraryPath(), FileUtils::toFilePath("plugins.xml")); - xmlConfigFile_ = FileUtils::fromFilePath(xmlConfigFileDefault); - } - - RegisterPlugins(xmlConfigFile_); + RegisterPlugins(core_detail::parseXmlConfig(xmlConfigFile)); } std::map Core::GetVersions(const std::string& deviceName) const { - std::map versions; - std::vector deviceNames; - - { - // for compatibility with samples / demo - if (deviceName.find("HETERO") == 0) { - auto pos = deviceName.find_first_of(":"); - if (pos != std::string::npos) { - deviceNames = DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1)); - } - deviceNames.push_back("HETERO"); - } else if (deviceName.find("MULTI") == 0) { - auto pos = deviceName.find_first_of(":"); - if (pos != std::string::npos) { - deviceNames = DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1)); - } - deviceNames.push_back("MULTI"); - } else if (deviceName.find("AUTO") == 0) { - auto pos = deviceName.find_first_of(":"); - if (pos != std::string::npos) { - deviceNames = DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1)); - } - deviceNames.emplace_back("AUTO"); - } else { - deviceNames.push_back(deviceName); - } - } - - for (auto&& deviceName_ : deviceNames) { - DeviceIDParser parser(deviceName_); - std::string deviceNameLocal = parser.getDeviceName(); - - InferenceEngine::InferencePlugin cppPlugin = _impl->GetCPPPluginByName(deviceNameLocal); - const Version version = cppPlugin.GetVersion(); - versions[deviceNameLocal] = version; - } - - return versions; + return _impl->GetVersions(deviceName); } #ifdef ENABLE_UNICODE_PATH_SUPPORT @@ -969,7 +978,7 @@ RemoteContext::Ptr Core::CreateContext(const std::string& deviceName, const Para IE_THROW() << "AUTO device does not support remote context"; } - auto parsed = parseDeviceNameIntoConfig(deviceName, params); + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, params); return _impl->GetCPPPluginByName(parsed._deviceName).CreateContext(parsed._config); } @@ -984,7 +993,7 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) { IE_THROW() << "AUTO device does not support remote context"; } - auto parsed = parseDeviceNameIntoConfig(deviceName, ParamMap()); + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, ParamMap()); return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config); } @@ -1011,21 +1020,21 @@ void Core::AddExtension(const IExtensionPtr& extension) { ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName, const std::map& config) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork"); - auto parsed = parseDeviceNameIntoConfig(deviceName, config); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config); auto exec = _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config); return { exec, exec }; } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName, const std::map& config) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); auto exec = _impl->ImportNetwork(networkModel, deviceName, config); return { exec, exec }; } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); using ExportMagic = std::array; constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}}; @@ -1049,7 +1058,7 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) { ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const RemoteContext::Ptr& context, const std::map& config) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::ImportNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); if (context == nullptr) { IE_THROW() << "Remote context is null"; @@ -1059,7 +1068,7 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, DeviceIDParser device(deviceName_); std::string deviceName = device.getDeviceName(); - auto parsed = parseDeviceNameIntoConfig(deviceName, config); + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config); auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, context, parsed._config); return { exec, exec }; } @@ -1097,7 +1106,7 @@ void Core::SetConfig(const std::map& config, const std if (deviceName.empty()) { _impl->SetConfigForPlugins(config, std::string()); } else { - auto parsed = parseDeviceNameIntoConfig(deviceName, config); + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config); _impl->SetConfigForPlugins(parsed._config, parsed._deviceName); } } @@ -1128,12 +1137,12 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name } } - auto parsed = parseDeviceNameIntoConfig(deviceName); + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName); // we need to return a copy of Parameter object which is created on Core side, // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread // TODO: remove this WA after *-31417 is resolved - return copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); + return core_detail::copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); } Parameter Core::GetMetric(const std::string& deviceName, const std::string& name) const { @@ -1160,3 +1169,210 @@ void Core::UnregisterPlugin(const std::string& deviceName_) { } } // namespace InferenceEngine + +namespace ov { +namespace runtime { + +class Core::Impl: public core_detail::CoreImpl {}; + +Core::Core(const std::string& xmlConfigFile) { + _impl = std::make_shared(); + + register_plugins(core_detail::parseXmlConfig(xmlConfigFile)); +} + +std::map Core::get_versions(const std::string& deviceName) const { + return _impl->GetVersions(deviceName); +} + +#ifdef ENABLE_UNICODE_PATH_SUPPORT +std::shared_ptr Core::read_model(const std::wstring& modelPath, const std::wstring& binPath) const { + return _impl->ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), + FileUtils::wStringtoMBCSstringChar(binPath)).getFunction(); +} +#endif +std::shared_ptr Core::read_model(const std::string& modelPath, const std::string& binPath) const { + return _impl->ReadNetwork(modelPath, binPath).getFunction(); +} +std::shared_ptr Core::read_model(const std::string& model, const InferenceEngine::Blob::CPtr& weights) const { + return _impl->ReadNetwork(model, weights).getFunction(); +} +InferenceEngine::ExecutableNetwork Core::compile_model(const std::shared_ptr& network, + const std::string& deviceName, const std::map& config) { + auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), deviceName, config); + return { exec, exec }; +} +InferenceEngine::ExecutableNetwork Core::compile_model(const std::string& modelPath, + const std::string& deviceName, const std::map& config) { + auto exec = _impl->LoadNetwork(modelPath, deviceName, config); + return { exec, exec }; +} + +InferenceEngine::ExecutableNetwork Core::compile_model(const std::shared_ptr& network, + const InferenceEngine::RemoteContext::Ptr& context, const std::map& config) { + auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), context, config); + return { exec, exec }; +} + +void Core::add_extension(const InferenceEngine::IExtensionPtr& extension) { + _impl->AddExtension(extension); +} + +InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel, + const std::string& deviceName, const std::map& config) { + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); + auto exec = _impl->ImportNetwork(networkModel, deviceName, config); + return { exec, exec }; +} + +InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel, const InferenceEngine::RemoteContext::Ptr& context, + const std::map& config) { + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); + + using ExportMagic = std::array; + constexpr static const ExportMagic exportMagic = {{0x1, 0xE, 0xE, 0x1}}; + + std::string deviceName; + ExportMagic magic = {}; + auto currentPos = networkModel.tellg(); + networkModel.read(magic.data(), magic.size()); + if (exportMagic == magic) { + std::getline(networkModel, deviceName); + } else { + IE_THROW() << "Passed compiled stream does not contain device name. " + "Please, provide device name manually"; + } + networkModel.seekg(currentPos, networkModel.beg); + + auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {}); + return { exec, exec }; +} + +InferenceEngine::QueryNetworkResult Core::query_model(const std::shared_ptr& network, + const std::string& deviceName, + const std::map& config) const { + return _impl->QueryNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), deviceName, config); +} +void Core::set_config(const std::map& config, const std::string& deviceName) { + // HETERO case + if (deviceName.find("HETERO:") == 0) { + IE_THROW() << "SetConfig is supported only for HETERO itself (without devices). " + "You can configure the devices with SetConfig before creating the HETERO on top."; + } + + // MULTI case + if (deviceName.find("MULTI:") == 0) { + IE_THROW() << "SetConfig is supported only for MULTI itself (without devices). " + "You can configure the devices with SetConfig before creating the MULTI on top."; + } + + // AUTO case + if (deviceName.find("AUTO:") == 0) { + IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). " + "You can configure the devices with SetConfig before creating the AUTO on top."; + } + + // GPU.0, FPGA.1 cases + if (deviceName.find(".") != std::string::npos) { + IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). " + "You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only"; + } + + if (deviceName.empty()) { + _impl->SetConfigForPlugins(config, std::string()); + } else { + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config); + _impl->SetConfigForPlugins(parsed._config, parsed._deviceName); + } +} + +InferenceEngine::Parameter Core::get_config(const std::string& deviceName, const std::string& name) const { + // HETERO case + { + if (deviceName.find("HETERO:") == 0) { + IE_THROW() + << "You can only GetConfig of the HETERO itself (without devices). " + "GetConfig is also possible for the individual devices before creating the HETERO on top."; + } + } + // MULTI case + { + if (deviceName.find("MULTI:") == 0) { + IE_THROW() + << "You can only GetConfig of the MULTI itself (without devices). " + "GetConfig is also possible for the individual devices before creating the MULTI on top."; + } + } + // AUTO case + { + if (deviceName.find("AUTO:") == 0) { + IE_THROW() + << "You can only GetConfig of the AUTO itself (without devices). " + "GetConfig is also possible for the individual devices before creating the AUTO on top."; + } + } + + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName); + + // we need to return a copy of Parameter object which is created on Core side, + // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread + // TODO: remove this WA after *-31417 is resolved + return core_detail::copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); +} + +InferenceEngine::Parameter Core::get_metric(const std::string& deviceName, const std::string& name) const { + return _impl->GetMetric(deviceName, name); +} + +std::vector Core::get_available_devices() const { + return _impl->GetAvailableDevices(); +} + +void Core::register_plugin(const std::string& pluginName, const std::string& deviceName) { + _impl->RegisterPluginByName(pluginName, deviceName); +} + +void Core::unload_plugin(const std::string& deviceName) { + InferenceEngine::DeviceIDParser parser(deviceName); + std::string devName = parser.getDeviceName(); + + _impl->UnloadPluginByName(devName); +} + +void Core::register_plugins(const std::string& xmlConfigFile) { + _impl->RegisterPluginsInRegistry(xmlConfigFile); +} + +InferenceEngine::RemoteContext::Ptr Core::create_context(const std::string& deviceName, const InferenceEngine::ParamMap& params) { + if (deviceName.find("HETERO") == 0) { + IE_THROW() << "HETERO device does not support remote context"; + } + if (deviceName.find("MULTI") == 0) { + IE_THROW() << "MULTI device does not support remote context"; + } + if (deviceName.find("AUTO") == 0) { + IE_THROW() << "AUTO device does not support remote context"; + } + + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, params); + return _impl->GetCPPPluginByName(parsed._deviceName).CreateContext(parsed._config); +} + +InferenceEngine::RemoteContext::Ptr Core::get_default_context(const std::string& deviceName) { + if (deviceName.find("HETERO") == 0) { + IE_THROW() << "HETERO device does not support remote context"; + } + if (deviceName.find("MULTI") == 0) { + IE_THROW() << "MULTI device does not support remote context"; + } + if (deviceName.find("AUTO") == 0) { + IE_THROW() << "AUTO device does not support remote context"; + } + + auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, InferenceEngine::ParamMap()); + + return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config); +} + +} // namespace runtime +} // namespace ov diff --git a/inference-engine/src/inference_engine/src/ie_itt.hpp b/inference-engine/src/inference_engine/src/ie_itt.hpp index 343fdc110c1..c36b550f492 100644 --- a/inference-engine/src/inference_engine/src/ie_itt.hpp +++ b/inference-engine/src/inference_engine/src/ie_itt.hpp @@ -14,9 +14,16 @@ namespace InferenceEngine { namespace itt { namespace domains { - OV_ITT_DOMAIN(IE); OV_ITT_DOMAIN(IE_LT); +} // namespace domains +} // namespace itt +} // namespace InferenceEngine + +namespace ov { +namespace itt { +namespace domains { + OV_ITT_DOMAIN(IE); OV_ITT_DOMAIN(IE_RT); -} -} -} +} // namespace domains +} // namespace itt +} // namespace ov diff --git a/inference-engine/src/inference_engine/src/ie_network_reader.cpp b/inference-engine/src/inference_engine/src/ie_network_reader.cpp index 7189a0a098a..792f95eb74d 100644 --- a/inference-engine/src/inference_engine/src/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/src/ie_network_reader.cpp @@ -67,7 +67,7 @@ public: using Ptr = std::shared_ptr; Reader(const std::string& name, const std::string location): name(name), location(location) {} bool supportModel(std::istream& model) const override { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Reader::supportModel"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Reader::supportModel"); auto reader = getReaderPtr(); return reader->supportModel(model); } @@ -94,7 +94,7 @@ namespace { std::multimap readers; void registerReaders() { - OV_ITT_SCOPED_TASK(itt::domains::IE, "registerReaders"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "registerReaders"); static bool initialized = false; static std::mutex readerMutex; std::lock_guard lock(readerMutex); @@ -212,7 +212,7 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& Blob::Ptr weights = make_shared_blob({Precision::U8, { fileSize }, C }); { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "ReadNetworkWeights"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ReadNetworkWeights"); weights->allocate(); binStream.read(weights->buffer(), fileSize); binStream.close(); diff --git a/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp b/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp index b578408cfe9..fb32d7526eb 100644 --- a/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp +++ b/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp @@ -10,7 +10,7 @@ namespace InferenceEngine { namespace details { CNNNetwork cloneNetwork(const CNNNetwork& network) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "cloneNetwork"); + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "cloneNetwork"); if (network.getFunction()) { IE_SUPPRESS_DEPRECATED_START diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_core_integration.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_core_integration.cpp new file mode 100644 index 00000000000..21b83b0330c --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_core_integration.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_core_integration.hpp" +#include "openvino/runtime/core.hpp" + +using namespace BehaviorTestsDefinitions; + +using namespace InferenceEngine::PluginConfigParams; + +namespace { +// +// IE Class Common tests with +// + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassCommon, OVClassBasicTestP, + ::testing::Values(std::make_pair("MKLDNNPlugin", "CPU"))); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassNetworkTestP, OVClassNetworkTestP, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassImportExportTestP, OVClassImportExportTestP, + ::testing::Values("HETERO:CPU")); + +// +// IE Class GetMetric +// + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_SUPPORTED_CONFIG_KEYS, + ::testing::Values("CPU", "MULTI", "HETERO", "AUTO")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_SUPPORTED_METRICS, + ::testing::Values("CPU", "MULTI", "HETERO", "AUTO")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_AVAILABLE_DEVICES, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_FULL_DEVICE_NAME, + ::testing::Values("CPU", "MULTI", "HETERO", "AUTO")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_OPTIMIZATION_CAPABILITIES, + ::testing::Values("CPU", "AUTO")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_RANGE_FOR_STREAMS, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetMetricTest, OVClassGetMetricTest_ThrowUnsupported, + ::testing::Values("CPU", "MULTI", "HETERO", "AUTO")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetConfigTest, OVClassGetConfigTest_ThrowUnsupported, + ::testing::Values("CPU", "MULTI", "HETERO", "AUTO")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetAvailableDevices, OVClassGetAvailableDevices, + ::testing::Values("CPU")); + +// +// IE Class GetConfig +// + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassGetConfigTest, OVClassGetConfigTest, + ::testing::Values("CPU")); + +// +// Executable Network GetMetric +// + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkGetMetricTest, OVClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS, + ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU", "AUTO:CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkGetMetricTest, OVClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS, + ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU", "AUTO:CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkGetMetricTest, OVClassExecutableNetworkGetMetricTest_NETWORK_NAME, + ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU", "AUTO:CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkGetMetricTest, OVClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS, + ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU", "AUTO:CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkGetMetricTest, OVClassExecutableNetworkGetMetricTest_ThrowsUnsupported, + ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU", "AUTO:CPU")); + +// +// Executable Network GetConfig / SetConfig +// + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkGetConfigTest, OVClassExecutableNetworkGetConfigTest, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassExecutableNetworkSetConfigTest, OVClassExecutableNetworkSetConfigTest, + ::testing::Values("CPU")); + +// +// Hetero Executable Network GetMetric +// + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassHeteroExecutableNetworkGetMetricTest, OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassHeteroExecutableNetworkGetMetricTest, OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_METRICS, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassHeteroExecutableNetworkGetMetricTest, OVClassHeteroExecutableNetworkGetMetricTest_NETWORK_NAME, + ::testing::Values("CPU")); + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassHeteroExecutableNetworkGetMetricTest, OVClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK, + ::testing::Values("CPU")); + +////////////////////////////////////////////////////////////////////////////////////////// + +TEST(OVClassBasicTest, smoke_SetConfigAfterCreatedThrow) { + ov::runtime::Core ie; + std::string value = {}; + + ASSERT_NO_THROW(ie.set_config({{KEY_CPU_THREADS_NUM, "1"}}, "CPU")); + ASSERT_NO_THROW(value = ie.get_config("CPU", KEY_CPU_THREADS_NUM).as()); + ASSERT_EQ("1", value); + + ASSERT_NO_THROW(ie.set_config({{KEY_CPU_THREADS_NUM, "4"}}, "CPU")); + ASSERT_NO_THROW(value = ie.get_config("CPU", KEY_CPU_THREADS_NUM).as()); + ASSERT_EQ("4", value); +} + +// IE Class Query network + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassQueryNetworkTest, OVClassQueryNetworkTest, + ::testing::Values("CPU")); + +// IE Class Load network + +INSTANTIATE_TEST_SUITE_P( + smoke_OVClassLoadNetworkTest, OVClassLoadNetworkTest, + ::testing::Values("CPU")); +} // namespace + diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/ov_core_integration.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/ov_core_integration.hpp new file mode 100644 index 00000000000..a9b90bd2954 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/ov_core_integration.hpp @@ -0,0 +1,1485 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/file_utils.hpp" +#include "common_test_utils/unicode_utils.hpp" +#include "ngraph_functions/subgraph_builders.hpp" + +#ifdef ENABLE_UNICODE_PATH_SUPPORT +# include +# define GTEST_COUT std::cerr << "[ ] [ INFO ] " +# include +# include + +#endif + +using namespace testing; +using namespace InferenceEngine; +using namespace InferenceEngine::details; +using namespace InferenceEngine::PluginConfigParams; + +namespace BehaviorTestsDefinitions { + +#define ASSERT_EXEC_METRIC_SUPPORTED(metricName) \ + { \ + std::vector metrics = exeNetwork.GetMetric(METRIC_KEY(SUPPORTED_METRICS)); \ + auto it = std::find(metrics.begin(), metrics.end(), metricName); \ + ASSERT_NE(metrics.end(), it); \ + } + +#define ASSERT_METRIC_SUPPORTED(metricName) \ + { \ + std::vector metrics = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_METRICS)); \ + auto it = std::find(metrics.begin(), metrics.end(), metricName); \ + ASSERT_NE(metrics.end(), it); \ + } + +#define SKIP_IF_NOT_IMPLEMENTED(...) \ + { \ + try { \ + __VA_ARGS__; \ + } catch (const InferenceEngine::NotImplemented&) { \ + GTEST_SKIP(); \ + } \ + } + +inline ov::runtime::Core createCoreWithTemplate() { + ov::runtime::Core ie; + std::string pluginName = "templatePlugin"; + pluginName += IE_BUILD_POSTFIX; + ie.register_plugin(pluginName, "TEMPLATE"); + return ie; +} + +class OVClassBasicTestP : public ::testing::Test, public WithParamInterface> { +protected: + std::string deviceName; + std::string pluginName; + +public: + void SetUp() override { + std::tie(pluginName, deviceName) = GetParam(); + pluginName += IE_BUILD_POSTFIX; + } +}; + +class OVClassNetworkTest : public ::testing::Test { +public: + std::shared_ptr actualNetwork, simpleNetwork, multinputNetwork, ksoNetwork; + + void SetUp() override { + // Generic network + { + actualNetwork = ngraph::builder::subgraph::makeSplitConvConcat(); + } + // Quite simple network + { + simpleNetwork = ngraph::builder::subgraph::makeSingleConv(); + } + // Multinput to substruct network + { + multinputNetwork = ngraph::builder::subgraph::make2InputSubtract(); + } + // Network with KSO + { + ksoNetwork = ngraph::builder::subgraph::makeKSOFunction(); + } + } + void setHeteroNetworkAffinity(const std::string& targetDevice) { + const std::map deviceMapping = {{"Split_2", targetDevice}, + {"Convolution_4", targetDevice}, + {"Convolution_7", CommonTestUtils::DEVICE_CPU}, + {"Relu_5", CommonTestUtils::DEVICE_CPU}, + {"Relu_8", targetDevice}, + {"Concat_9", CommonTestUtils::DEVICE_CPU}}; + + for (const auto& op : actualNetwork->get_ops()) { + auto it = deviceMapping.find(op->get_friendly_name()); + if (it != deviceMapping.end()) { + std::string affinity = it->second; + op->get_rt_info()["affinity"] = std::make_shared>(affinity); + } + } + } +}; + +class OVClassBaseTestP : public OVClassNetworkTest, public WithParamInterface { +public: + std::string deviceName; + void SetUp() override { + OVClassNetworkTest::SetUp(); + deviceName = GetParam(); + } +}; + +using OVClassNetworkTestP = OVClassBaseTestP; +using OVClassGetMetricTest = OVClassBaseTestP; +using OVClassQueryNetworkTest = OVClassBaseTestP; +using OVClassImportExportTestP = OVClassBaseTestP; +using OVClassGetMetricTest_SUPPORTED_METRICS = OVClassBaseTestP; +using OVClassGetMetricTest_SUPPORTED_CONFIG_KEYS = OVClassBaseTestP; +using OVClassGetMetricTest_AVAILABLE_DEVICES = OVClassBaseTestP; +using OVClassGetMetricTest_FULL_DEVICE_NAME = OVClassBaseTestP; +using OVClassGetMetricTest_OPTIMIZATION_CAPABILITIES = OVClassBaseTestP; +using OVClassGetMetricTest_DEVICE_GOPS = OVClassBaseTestP; +using OVClassGetMetricTest_DEVICE_TYPE = OVClassBaseTestP; +using OVClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS = OVClassBaseTestP; +using OVClassGetMetricTest_NUMBER_OF_EXEC_INFER_REQUESTS = OVClassBaseTestP; +using OVClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS = OVClassBaseTestP; +using OVClassGetMetricTest_ThrowUnsupported = OVClassBaseTestP; +using OVClassGetConfigTest = OVClassBaseTestP; +using OVClassGetConfigTest_ThrowUnsupported = OVClassBaseTestP; +using OVClassGetConfigTest_ThrowUnsupported = OVClassBaseTestP; +using OVClassGetConfigTest_ThrowUnsupported = OVClassBaseTestP; +using OVClassGetAvailableDevices = OVClassBaseTestP; +using OVClassExecutableNetworkGetMetricTest = OVClassBaseTestP; +using OVClassGetMetricTest_RANGE_FOR_STREAMS = OVClassBaseTestP; +using OVClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS = OVClassBaseTestP; +using OVClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS = OVClassBaseTestP; +using OVClassExecutableNetworkGetMetricTest_NETWORK_NAME = OVClassBaseTestP; +using OVClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS = OVClassBaseTestP; +using OVClassExecutableNetworkGetMetricTest_ThrowsUnsupported = OVClassBaseTestP; +using OVClassExecutableNetworkGetConfigTest = OVClassBaseTestP; +using OVClassExecutableNetworkSetConfigTest = OVClassBaseTestP; +using OVClassExecutableNetworkGetConfigTest = OVClassBaseTestP; +using OVClassLoadNetworkAfterCoreRecreateTest = OVClassBaseTestP; + +class OVClassExecutableNetworkGetMetricTestForSpecificConfig + : public OVClassNetworkTest, + public WithParamInterface>> { +protected: + std::string deviceName; + std::string configKey; + std::string configValue; + +public: + void SetUp() override { + OVClassNetworkTest::SetUp(); + deviceName = get<0>(GetParam()); + std::tie(configKey, configValue) = get<1>(GetParam()); + } +}; + +using OVClassExecutableNetworkSupportedConfigTest = OVClassExecutableNetworkGetMetricTestForSpecificConfig; +using OVClassExecutableNetworkUnsupportedConfigTest = OVClassExecutableNetworkGetMetricTestForSpecificConfig; + +// +// Hetero Executable network case +// +class OVClassHeteroExecutableNetworkGetMetricTest : public OVClassNetworkTest, public WithParamInterface { +protected: + std::string deviceName; + std::string heteroDeviceName; + +public: + void SetUp() override { + OVClassNetworkTest::SetUp(); + deviceName = GetParam(); + heteroDeviceName = CommonTestUtils::DEVICE_HETERO + std::string(":") + deviceName + std::string(",") + + CommonTestUtils::DEVICE_CPU; + } +}; +using OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS = OVClassHeteroExecutableNetworkGetMetricTest; +using OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_METRICS = OVClassHeteroExecutableNetworkGetMetricTest; +using OVClassHeteroExecutableNetworkGetMetricTest_NETWORK_NAME = OVClassHeteroExecutableNetworkGetMetricTest; +using OVClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK = OVClassHeteroExecutableNetworkGetMetricTest; +using OVClassLoadNetworkTest = OVClassQueryNetworkTest; + +bool supportsAvaliableDevices(ov::runtime::Core& ie, const std::string& deviceName) { + auto supportedMetricKeys = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_METRICS)).as>(); + return supportedMetricKeys.end() != + std::find(std::begin(supportedMetricKeys), std::end(supportedMetricKeys), METRIC_KEY(AVAILABLE_DEVICES)); +} + +TEST(OVClassBasicTest, smoke_createDefault) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ASSERT_NO_THROW(ov::runtime::Core ie); +} + +TEST_P(OVClassBasicTestP, registerExistingPluginThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_THROW(ie.register_plugin(pluginName, deviceName), Exception); +} + +TEST_P(OVClassBasicTestP, registerNewPluginNoThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.register_plugin(pluginName, "NEW_DEVICE_NAME")); + ASSERT_NO_THROW(ie.get_metric("NEW_DEVICE_NAME", METRIC_KEY(SUPPORTED_CONFIG_KEYS))); +} + +TEST(OVClassBasicTest, smoke_registerExistingPluginFileThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_THROW(ie.register_plugins("nonExistPlugins.xml"), Exception); +} + +TEST(OVClassBasicTest, smoke_createNonExistingConfigThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ASSERT_THROW(ov::runtime::Core ie("nonExistPlugins.xml"), Exception); +} + +#ifdef __linux__ + +TEST(OVClassBasicTest, smoke_createMockEngineConfigNoThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::string filename{"mock_engine_valid.xml"}; + std::string content{""}; + CommonTestUtils::createFile(filename, content); + ASSERT_NO_THROW(ov::runtime::Core ie(filename)); + CommonTestUtils::removeFile(filename.c_str()); +} + +TEST(OVClassBasicTest, smoke_createMockEngineConfigThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::string filename{"mock_engine.xml"}; + std::string content{""}; + CommonTestUtils::createFile(filename, content); + ASSERT_THROW(ov::runtime::Core ie(filename), Exception); + CommonTestUtils::removeFile(filename.c_str()); +} + +#endif + +#ifdef ENABLE_UNICODE_PATH_SUPPORT + +TEST_P(OVClassBasicTestP, smoke_registerPluginsXMLUnicodePath) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + std::string pluginXML{"mock_engine_valid.xml"}; + std::string content{""}; + CommonTestUtils::createFile(pluginXML, content); + + for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) { + GTEST_COUT << testIndex; + std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex]; + std::wstring pluginsXmlW = CommonTestUtils::addUnicodePostfixToPath(pluginXML, postfix); + + try { + bool is_copy_successfully; + is_copy_successfully = CommonTestUtils::copyFile(pluginXML, pluginsXmlW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << pluginXML << "' to '" + << ::FileUtils::wStringtoMBCSstringChar(pluginsXmlW) << "'"; + } + + GTEST_COUT << "Test " << testIndex << std::endl; + + ov::runtime::Core ie = createCoreWithTemplate(); + GTEST_COUT << "Core created " << testIndex << std::endl; + ASSERT_NO_THROW(ie.register_plugins(::FileUtils::wStringtoMBCSstringChar(pluginsXmlW))); + CommonTestUtils::removeFile(pluginsXmlW); +# if defined __linux__ && !defined(__APPLE__) + ASSERT_NO_THROW(ie.get_versions("mock")); // from pluginXML +# endif + ASSERT_NO_THROW(ie.get_versions(deviceName)); + GTEST_COUT << "Plugin created " << testIndex << std::endl; + + ASSERT_NO_THROW(ie.register_plugin(pluginName, "TEST_DEVICE")); + ASSERT_NO_THROW(ie.get_versions("TEST_DEVICE")); + GTEST_COUT << "Plugin registered and created " << testIndex << std::endl; + + GTEST_COUT << "OK" << std::endl; + } catch (const InferenceEngine::Exception& e_next) { + CommonTestUtils::removeFile(pluginsXmlW); + std::remove(pluginXML.c_str()); + FAIL() << e_next.what(); + } + } + CommonTestUtils::removeFile(pluginXML); +} + +#endif // ENABLE_UNICODE_PATH_SUPPORT + +// +// GetVersions() +// + +TEST_P(OVClassBasicTestP, getVersionsByExactDeviceNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.get_versions(deviceName + ".0")); +} + +TEST_P(OVClassBasicTestP, getVersionsByDeviceClassNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.get_versions(deviceName)); +} + +TEST_P(OVClassBasicTestP, getVersionsNonEmpty) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_EQ(2, ie.get_versions(CommonTestUtils::DEVICE_HETERO + std::string(":") + deviceName).size()); +} + +// +// UnregisterPlugin +// + +TEST_P(OVClassBasicTestP, unregisterExistingPluginNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + // device instance is not created yet + ASSERT_THROW(ie.unload_plugin(deviceName), Exception); + + // make the first call to IE which created device instance + ie.get_versions(deviceName); + // now, we can unregister device + ASSERT_NO_THROW(ie.unload_plugin(deviceName)); +} + +TEST_P(OVClassBasicTestP, accessToUnregisteredPluginThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_THROW(ie.unload_plugin(deviceName), Exception); + ASSERT_NO_THROW(ie.get_versions(deviceName)); + ASSERT_NO_THROW(ie.unload_plugin(deviceName)); + ASSERT_NO_THROW(ie.set_config({}, deviceName)); + ASSERT_NO_THROW(ie.get_versions(deviceName)); + ASSERT_NO_THROW(ie.unload_plugin(deviceName)); +} + +TEST(OVClassBasicTest, smoke_unregisterNonExistingPluginThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_THROW(ie.unload_plugin("unkown_device"), Exception); +} + +// +// SetConfig +// + +TEST_P(OVClassBasicTestP, SetConfigAllThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.set_config({{"unsupported_key", "4"}})); + ASSERT_ANY_THROW(ie.get_versions(deviceName)); +} + +TEST_P(OVClassBasicTestP, SetConfigForUnRegisteredDeviceThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_THROW(ie.set_config({{"unsupported_key", "4"}}, "unregistered_device"), Exception); +} + +TEST_P(OVClassBasicTestP, SetConfigNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.set_config({{KEY_PERF_COUNT, YES}}, deviceName)); +} + +TEST_P(OVClassBasicTestP, SetConfigAllNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.set_config({{KEY_PERF_COUNT, YES}})); + ASSERT_NO_THROW(ie.get_versions(deviceName)); +} + +TEST(OVClassBasicTest, smoke_SetConfigHeteroThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.set_config({{KEY_PERF_COUNT, YES}}, CommonTestUtils::DEVICE_HETERO)); +} + +TEST_P(OVClassBasicTestP, SetConfigHeteroTargetFallbackThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.set_config({{"TARGET_FALLBACK", deviceName}}, CommonTestUtils::DEVICE_HETERO)); +} + +TEST(OVClassBasicTest, smoke_SetConfigHeteroNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + bool value = false; + + ASSERT_NO_THROW(ie.set_config({{HETERO_CONFIG_KEY(DUMP_GRAPH_DOT), YES}}, CommonTestUtils::DEVICE_HETERO)); + ASSERT_NO_THROW(value = ie.get_config("HETERO", HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)).as()); + ASSERT_TRUE(value); + + ASSERT_NO_THROW(ie.set_config({{HETERO_CONFIG_KEY(DUMP_GRAPH_DOT), NO}}, CommonTestUtils::DEVICE_HETERO)); + ASSERT_NO_THROW(value = ie.get_config("HETERO", HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)).as()); + ASSERT_FALSE(value); +} + +// +// ImportNetwork +// + + +TEST_P(OVClassBasicTestP, ImportNetworkWithNullContextThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + RemoteContext::Ptr context = nullptr; + std::istringstream stream("None"); + ASSERT_THROW(ie.import_model(stream, context, {}), Exception); +} + +// +// LoadNetwork +// + +TEST_P(OVClassNetworkTestP, LoadNetworkActualNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.compile_model(actualNetwork, deviceName)); +} + +TEST_P(OVClassNetworkTestP, LoadNetworkActualHeteroDeviceNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.compile_model(actualNetwork, CommonTestUtils::DEVICE_HETERO + std::string(":") + deviceName)); +} + +TEST_P(OVClassNetworkTestP, LoadNetworkActualHeteroDevice2NoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.compile_model(actualNetwork, CommonTestUtils::DEVICE_HETERO, {{"TARGET_FALLBACK", deviceName}})); +} + +// +// ImportExportNetwork +// + +TEST_P(OVClassImportExportTestP, smoke_ImportNetworkNoThrowWithDeviceName) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + std::stringstream strm; + ExecutableNetwork executableNetwork; + ASSERT_NO_THROW(executableNetwork = ie.compile_model(actualNetwork, deviceName)); + ASSERT_NO_THROW(executableNetwork.Export(strm)); + ASSERT_NO_THROW(executableNetwork = ie.import_model(strm, deviceName)); + ASSERT_NO_THROW(executableNetwork.CreateInferRequest()); +} + +TEST_P(OVClassImportExportTestP, smoke_ExportUsingFileNameImportFromStreamNoThrowWithDeviceName) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ExecutableNetwork executableNetwork; + std::string fileName{"ExportedNetwork"}; + { + ASSERT_NO_THROW(executableNetwork = ie.compile_model(simpleNetwork, deviceName)); + ASSERT_NO_THROW(executableNetwork.Export(fileName)); + } + { + { + std::ifstream strm(fileName); + ASSERT_NO_THROW(executableNetwork = ie.import_model(strm, deviceName)); + } + ASSERT_EQ(0, remove(fileName.c_str())); + } + ASSERT_NO_THROW(executableNetwork.CreateInferRequest()); +} + +// +// QueryNetwork +// + +TEST_P(OVClassNetworkTestP, QueryNetworkActualThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_NO_THROW(ie.query_model(actualNetwork, CommonTestUtils::DEVICE_HETERO + std::string(":") + deviceName)); +} + +TEST_P(OVClassNetworkTestP, QueryNetworkActualNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + + try { + ie.query_model(actualNetwork, deviceName); + } catch (const InferenceEngine::Exception& ex) { + std::string message = ex.what(); + ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively"); + } +} + +TEST_P(OVClassNetworkTestP, QueryNetworkWithKSO) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + + try { + auto rres = ie.query_model(ksoNetwork, deviceName); + auto rl_map = rres.supportedLayersMap; + auto func = ksoNetwork; + for (const auto& op : func->get_ops()) { + if (!rl_map.count(op->get_friendly_name())) { + FAIL() << "Op " << op->get_friendly_name() << " is not supported by " << deviceName; + } + } + } catch (const InferenceEngine::Exception& ex) { + std::string message = ex.what(); + ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively"); + } +} + +TEST_P(OVClassNetworkTestP, SetAffinityWithConstantBranches) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + + try { + std::shared_ptr func; + { + ngraph::PartialShape shape({1, 84}); + ngraph::element::Type type(ngraph::element::Type_t::f32); + auto param = std::make_shared(type, shape); + auto matMulWeights = ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {10, 84}, {1}); + auto shapeOf = std::make_shared(matMulWeights); + auto gConst1 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i32, {1}, {1}); + auto gConst2 = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {}, {0}); + auto gather = std::make_shared(shapeOf, gConst1, gConst2); + auto concatConst = ngraph::opset6::Constant::create(ngraph::element::Type_t::i64, {1}, {1}); + auto concat = std::make_shared(ngraph::NodeVector{concatConst, gather}, 0); + auto relu = std::make_shared(param); + auto reshape = std::make_shared(relu, concat, false); + auto matMul = std::make_shared(reshape, matMulWeights, false, true); + auto matMulBias = ngraph::opset6::Constant::create(ngraph::element::Type_t::f32, {1, 10}, {1}); + auto addBias = std::make_shared(matMul, matMulBias); + auto result = std::make_shared(addBias); + + ngraph::ParameterVector params = {param}; + ngraph::ResultVector results = {result}; + + func = std::make_shared(results, params); + } + + auto rres = ie.query_model(func, deviceName); + auto rl_map = rres.supportedLayersMap; + for (const auto& op : func->get_ops()) { + if (!rl_map.count(op->get_friendly_name())) { + FAIL() << "Op " << op->get_friendly_name() << " is not supported by " << deviceName; + } + } + for (const auto& op : func->get_ops()) { + std::string affinity = rl_map[op->get_friendly_name()]; + op->get_rt_info()["affinity"] = std::make_shared>(affinity); + } + ExecutableNetwork exeNetwork = ie.compile_model(ksoNetwork, deviceName); + } catch (const NotImplemented& ex) { + std::string message = ex.what(); + ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively"); + } +} + +TEST_P(OVClassNetworkTestP, SetAffinityWithKSO) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + + try { + auto rres = ie.query_model(ksoNetwork, deviceName); + auto rl_map = rres.supportedLayersMap; + auto func = ksoNetwork; + for (const auto& op : func->get_ops()) { + if (!rl_map.count(op->get_friendly_name())) { + FAIL() << "Op " << op->get_friendly_name() << " is not supported by " << deviceName; + } + } + for (const auto& op : func->get_ops()) { + std::string affinity = rl_map[op->get_friendly_name()]; + op->get_rt_info()["affinity"] = std::make_shared>(affinity); + } + ExecutableNetwork exeNetwork = ie.compile_model(ksoNetwork, deviceName); + } catch (const InferenceEngine::Exception& ex) { + std::string message = ex.what(); + ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively"); + } +} + +TEST_P(OVClassNetworkTestP, QueryNetworkHeteroActualNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + QueryNetworkResult res; + ASSERT_NO_THROW( + res = ie.query_model(actualNetwork, CommonTestUtils::DEVICE_HETERO, {{"TARGET_FALLBACK", deviceName}})); + ASSERT_LT(0, res.supportedLayersMap.size()); +} + +TEST_P(OVClassNetworkTestP, QueryNetworkMultiThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + ASSERT_THROW(ie.query_model(actualNetwork, CommonTestUtils::DEVICE_MULTI), Exception); +} + +TEST(OVClassBasicTest, smoke_GetMetricSupportedMetricsHeteroNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + std::string deviceName = CommonTestUtils::DEVICE_HETERO; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_METRICS))); + std::vector t = p; + + std::cout << "Supported HETERO metrics: " << std::endl; + for (auto&& str : t) { + std::cout << str << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(SUPPORTED_METRICS)); +} + +TEST(OVClassBasicTest, smoke_GetMetricSupportedConfigKeysHeteroNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + std::string deviceName = CommonTestUtils::DEVICE_HETERO; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector t = p; + + std::cout << "Supported HETERO config keys: " << std::endl; + for (auto&& str : t) { + std::cout << str << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); +} + +TEST(OVClassBasicTest, smoke_GetMetricSupportedConfigKeysHeteroThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + // TODO: check + std::string targetDevice = CommonTestUtils::DEVICE_HETERO + std::string(":") + CommonTestUtils::DEVICE_CPU; + ASSERT_THROW(ie.get_metric(targetDevice, METRIC_KEY(SUPPORTED_CONFIG_KEYS)), Exception); +} + +TEST_P(OVClassGetMetricTest_SUPPORTED_METRICS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_METRICS))); + std::vector t = p; + + std::cout << "Supported metrics: " << std::endl; + for (auto&& str : t) { + std::cout << str << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(SUPPORTED_METRICS)); +} + +TEST_P(OVClassGetMetricTest_SUPPORTED_CONFIG_KEYS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector t = p; + + std::cout << "Supported config values: " << std::endl; + for (auto&& str : t) { + std::cout << str << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); +} + +TEST_P(OVClassGetMetricTest_AVAILABLE_DEVICES, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES))); + std::vector t = p; + + std::cout << "Available devices: " << std::endl; + for (auto&& str : t) { + std::cout << str << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(AVAILABLE_DEVICES)); +} + +TEST_P(OVClassGetMetricTest_FULL_DEVICE_NAME, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(FULL_DEVICE_NAME))); + std::string t = p; + std::cout << "Full device name: " << std::endl << t << std::endl; + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(FULL_DEVICE_NAME)); +} + +TEST_P(OVClassGetMetricTest_OPTIMIZATION_CAPABILITIES, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES))); + std::vector t = p; + + std::cout << "Optimization capabilities: " << std::endl; + for (auto&& str : t) { + std::cout << str << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(OPTIMIZATION_CAPABILITIES)); +} + +TEST_P(OVClassGetMetricTest_DEVICE_GOPS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(DEVICE_GOPS))); + std::map t = p; + + std::cout << "Device GOPS: " << std::endl; + for (auto&& kv : t) { + std::cout << kv.first << ": " << kv.second << std::endl; + } + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_GOPS)); +} + +TEST_P(OVClassGetMetricTest_DEVICE_TYPE, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(DEVICE_TYPE))); + InferenceEngine::Metrics::DeviceType t = p; + + std::cout << "Device Type: " << t << std::endl; + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_TYPE)); +} + +TEST_P(OVClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(NUMBER_OF_WAITING_INFER_REQUESTS))); + unsigned int t = p; + + std::cout << "Number of waiting infer requests: " << std::endl << t << std::endl; + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(NUMBER_OF_WAITING_INFER_REQUESTS)); +} + +TEST_P(OVClassGetMetricTest_NUMBER_OF_EXEC_INFER_REQUESTS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS))); + unsigned int t = p; + + std::cout << "Number of executing infer requests: " << std::endl << t << std::endl; + + ASSERT_METRIC_SUPPORTED(METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS)); +} + +TEST_P(OVClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS))); + std::tuple t = p; + + unsigned int start = std::get<0>(t); + unsigned int end = std::get<1>(t); + unsigned int step = std::get<2>(t); + + std::cout << "Range for async infer requests: " << std::endl; + std::cout << start << std::endl; + std::cout << end << std::endl; + std::cout << step << std::endl; + std::cout << std::endl; + + ASSERT_LE(start, end); + ASSERT_GE(step, 1); + ASSERT_METRIC_SUPPORTED(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)); +} + +TEST_P(OVClassGetMetricTest_RANGE_FOR_STREAMS, GetMetricAndPrintNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(RANGE_FOR_STREAMS))); + std::tuple t = p; + + unsigned int start = std::get<0>(t); + unsigned int end = std::get<1>(t); + + std::cout << "Range for streams: " << std::endl; + std::cout << start << std::endl; + std::cout << end << std::endl; + std::cout << std::endl; + + ASSERT_LE(start, end); + ASSERT_METRIC_SUPPORTED(METRIC_KEY(RANGE_FOR_STREAMS)); +} + +TEST_P(OVClassGetMetricTest_ThrowUnsupported, GetMetricThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_THROW(p = ie.get_metric(deviceName, "unsupported_metric"), Exception); +} + +TEST_P(OVClassGetConfigTest, GetConfigNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector configValues = p; + + for (auto&& confKey : configValues) { + Parameter defaultValue; + ASSERT_NO_THROW(defaultValue = ie.get_config(deviceName, confKey)); + ASSERT_FALSE(defaultValue.empty()); + } +} + +TEST_P(OVClassGetConfigTest, GetConfigHeteroNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector configValues = p; + + for (auto&& confKey : configValues) { + ASSERT_NO_THROW(ie.get_config(deviceName, confKey)); + } +} + +TEST_P(OVClassGetConfigTest_ThrowUnsupported, GetConfigHeteroThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_THROW(p = ie.get_config(CommonTestUtils::DEVICE_HETERO, "unsupported_config"), Exception); +} + +TEST_P(OVClassGetConfigTest_ThrowUnsupported, GetConfigHeteroWithDeviceThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_THROW(p = ie.get_config(CommonTestUtils::DEVICE_HETERO + std::string(":") + deviceName, + HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)), + Exception); +} + +TEST_P(OVClassGetConfigTest_ThrowUnsupported, GetConfigThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_THROW(p = ie.get_config(deviceName, "unsupported_config"), Exception); +} + +TEST_P(OVClassGetAvailableDevices, GetAvailableDevicesNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + std::vector devices; + + ASSERT_NO_THROW(devices = ie.get_available_devices()); + + bool deviceFound = false; + std::cout << "Available devices: " << std::endl; + for (auto&& device : devices) { + if (device.find(deviceName) != std::string::npos) { + deviceFound = true; + } + + std::cout << device << " "; + } + std::cout << std::endl; + + ASSERT_TRUE(deviceFound); +} + +// +// ExecutableNetwork GetMetric / GetConfig +// +TEST_P(OVClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector configValues = p; + + std::cout << "Supported config keys: " << std::endl; + for (auto&& conf : configValues) { + std::cout << conf << std::endl; + ASSERT_LT(0, conf.size()); + } + ASSERT_LE(0, configValues.size()); + ASSERT_EXEC_METRIC_SUPPORTED(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); +} + +TEST_P(OVClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(METRIC_KEY(SUPPORTED_METRICS))); + std::vector metricValues = p; + + std::cout << "Supported metric keys: " << std::endl; + for (auto&& conf : metricValues) { + std::cout << conf << std::endl; + ASSERT_LT(0, conf.size()); + } + ASSERT_LT(0, metricValues.size()); + ASSERT_EXEC_METRIC_SUPPORTED(METRIC_KEY(SUPPORTED_METRICS)); +} + +TEST_P(OVClassExecutableNetworkGetMetricTest_NETWORK_NAME, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(EXEC_NETWORK_METRIC_KEY(NETWORK_NAME))); + std::string networkname = p; + + std::cout << "Exe network name: " << std::endl << networkname << std::endl; + ASSERT_EQ(simpleNetwork->get_friendly_name(), networkname); + ASSERT_EXEC_METRIC_SUPPORTED(EXEC_NETWORK_METRIC_KEY(NETWORK_NAME)); +} + +TEST_P(OVClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))); + unsigned int value = p; + + std::cout << "Optimal number of Inference Requests: " << value << std::endl; + ASSERT_GE(value, 1u); + ASSERT_EXEC_METRIC_SUPPORTED(EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); +} + +TEST_P(OVClassExecutableNetworkGetMetricTest_ThrowsUnsupported, GetMetricThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_THROW(p = exeNetwork.GetMetric("unsupported_metric"), Exception); +} + +TEST_P(OVClassExecutableNetworkGetConfigTest, GetConfigNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector configValues = p; + + for (auto&& confKey : configValues) { + Parameter defaultValue; + ASSERT_NO_THROW(defaultValue = ie.get_config(deviceName, confKey)); + ASSERT_FALSE(defaultValue.empty()); + } +} + +TEST_P(OVClassExecutableNetworkGetConfigTest, GetConfigThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_THROW(p = exeNetwork.GetConfig("unsupported_config"), Exception); +} + +TEST_P(OVClassExecutableNetworkSetConfigTest, SetConfigThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_THROW(exeNetwork.SetConfig({{"unsupported_config", "some_value"}}), Exception); +} + +TEST_P(OVClassExecutableNetworkSupportedConfigTest, SupportedConfigWorks) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(exeNetwork.SetConfig({{configKey, configValue}})); + ASSERT_NO_THROW(p = exeNetwork.GetConfig(configKey)); + ASSERT_EQ(p, configValue); +} + +TEST_P(OVClassExecutableNetworkUnsupportedConfigTest, UnsupportedConfigThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_THROW(exeNetwork.SetConfig({{configKey, configValue}}), Exception); +} + +TEST_P(OVClassExecutableNetworkGetConfigTest, GetConfigNoEmptyNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ASSERT_NO_THROW(p = ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector devConfigValues = p; + + ExecutableNetwork exeNetwork = ie.compile_model(simpleNetwork, deviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector execConfigValues = p; + + /* + for (auto && configKey : devConfigValues) { + ASSERT_NE(execConfigValues.end(), std::find(execConfigValues.begin(), execConfigValues.end(), configKey)); + + Parameter configValue; + ASSERT_NO_THROW(Parameter configValue = exeNetwork.get_config(configKey)); + } + */ +} + +TEST_P(OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter pHetero, pDevice; + + ExecutableNetwork heteroExeNetwork = ie.compile_model(actualNetwork, heteroDeviceName); + ExecutableNetwork deviceExeNetwork = ie.compile_model(actualNetwork, deviceName); + + ASSERT_NO_THROW(pHetero = heteroExeNetwork.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + ASSERT_NO_THROW(pDevice = deviceExeNetwork.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS))); + std::vector heteroConfigValues = pHetero, deviceConfigValues = pDevice; + + std::cout << "Supported config keys: " << std::endl; + for (auto&& conf : heteroConfigValues) { + std::cout << conf << std::endl; + ASSERT_LT(0, conf.size()); + } + ASSERT_LE(0, heteroConfigValues.size()); + + // check that all device config values are present in hetero case + for (auto&& deviceConf : deviceConfigValues) { + auto it = std::find(heteroConfigValues.begin(), heteroConfigValues.end(), deviceConf); + ASSERT_TRUE(it != heteroConfigValues.end()); + + Parameter heteroConfigValue = heteroExeNetwork.GetConfig(deviceConf); + Parameter deviceConfigValue = deviceExeNetwork.GetConfig(deviceConf); + + // HETERO returns EXCLUSIVE_ASYNC_REQUESTS as a boolean value + if (CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS) != deviceConf) { + ASSERT_EQ(deviceConfigValue, heteroConfigValue); + } + } +} + +TEST_P(OVClassHeteroExecutableNetworkGetMetricTest_SUPPORTED_METRICS, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter pHetero, pDevice; + + ExecutableNetwork heteroExeNetwork = ie.compile_model(actualNetwork, heteroDeviceName); + ExecutableNetwork deviceExeNetwork = ie.compile_model(actualNetwork, deviceName); + + ASSERT_NO_THROW(pHetero = heteroExeNetwork.GetMetric(METRIC_KEY(SUPPORTED_METRICS))); + ASSERT_NO_THROW(pDevice = deviceExeNetwork.GetMetric(METRIC_KEY(SUPPORTED_METRICS))); + std::vector heteroMetricValues = pHetero, deviceMetricValues = pDevice; + + std::cout << "Supported metric keys: " << std::endl; + for (auto&& conf : heteroMetricValues) { + std::cout << conf << std::endl; + ASSERT_LT(0, conf.size()); + } + ASSERT_LT(0, heteroMetricValues.size()); + + const std::vector heteroSpecificMetrics = {METRIC_KEY(SUPPORTED_METRICS), + METRIC_KEY(SUPPORTED_CONFIG_KEYS)}; + + // check that all device metric values are present in hetero case + for (auto&& deviceMetricName : deviceMetricValues) { + auto it = std::find(heteroMetricValues.begin(), heteroMetricValues.end(), deviceMetricName); + ASSERT_TRUE(it != heteroMetricValues.end()); + + Parameter heteroMetricValue = heteroExeNetwork.GetMetric(deviceMetricName); + Parameter deviceMetricValue = deviceExeNetwork.GetMetric(deviceMetricName); + + if (std::find(heteroSpecificMetrics.begin(), heteroSpecificMetrics.end(), deviceMetricName) == + heteroSpecificMetrics.end()) { + ASSERT_TRUE(heteroMetricValue == deviceMetricValue); + } + } +} + +TEST_P(OVClassHeteroExecutableNetworkGetMetricTest_NETWORK_NAME, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + ExecutableNetwork exeNetwork = ie.compile_model(actualNetwork, heteroDeviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetMetric(EXEC_NETWORK_METRIC_KEY(NETWORK_NAME))); + std::string networkname = p; + + std::cout << "Exe network name: " << std::endl << networkname << std::endl; +} + +TEST_P(OVClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK, GetMetricNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + Parameter p; + + setHeteroNetworkAffinity(deviceName); + + ExecutableNetwork exeNetwork = ie.compile_model(actualNetwork, heteroDeviceName); + + ASSERT_NO_THROW(p = exeNetwork.GetConfig("TARGET_FALLBACK")); + std::string targets = p; + auto expectedTargets = deviceName + "," + CommonTestUtils::DEVICE_CPU; + + std::cout << "Exe network fallback targets: " << targets << std::endl; + ASSERT_EQ(expectedTargets, targets); +} + +// +// QueryNetwork with HETERO on particular device +// +bool supportsDeviceID(ov::runtime::Core& ie, const std::string& deviceName) { + auto supportedConfigKeys = + ie.get_metric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)).as>(); + return supportedConfigKeys.end() != + std::find(std::begin(supportedConfigKeys), std::end(supportedConfigKeys), CONFIG_KEY(DEVICE_ID)); +} + +TEST_P(OVClassQueryNetworkTest, QueryNetworkHETEROWithDeviceIDNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + auto deviceIDs = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + if (deviceIDs.empty()) + GTEST_SKIP(); + ASSERT_NO_THROW(ie.query_model(actualNetwork, + CommonTestUtils::DEVICE_HETERO, + {{"TARGET_FALLBACK", deviceName + "." + deviceIDs[0] + "," + deviceName}})); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassQueryNetworkTest, QueryNetworkWithDeviceID) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + try { + ie.query_model(simpleNetwork, deviceName + ".0"); + } catch (const InferenceEngine::Exception& ex) { + std::string message = ex.what(); + ASSERT_STR_CONTAINS(message, "[NOT_IMPLEMENTED] ngraph::Function is not supported natively"); + } + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassQueryNetworkTest, QueryNetworkWithBigDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.query_model(actualNetwork, deviceName + ".110"), Exception); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassQueryNetworkTest, QueryNetworkWithInvalidDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.query_model(actualNetwork, deviceName + ".l0"), Exception); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassQueryNetworkTest, QueryNetworkHETEROWithBigDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.query_model(actualNetwork, + CommonTestUtils::DEVICE_HETERO, + {{"TARGET_FALLBACK", deviceName + ".100," + deviceName}}), + Exception); + } else { + GTEST_SKIP(); + } +} + +// +// LoadNetwork with HETERO on particular device +// +TEST_P(OVClassLoadNetworkTest, LoadNetworkHETEROWithDeviceIDNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + auto deviceIDs = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + if (deviceIDs.empty()) + GTEST_SKIP(); + std::string heteroDevice = + CommonTestUtils::DEVICE_HETERO + std::string(":") + deviceName + "." + deviceIDs[0] + "," + deviceName; + ASSERT_NO_THROW(ie.compile_model(actualNetwork, heteroDevice)); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, LoadNetworkWithDeviceIDNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + auto deviceIDs = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + if (deviceIDs.empty()) + GTEST_SKIP(); + ASSERT_NO_THROW(ie.compile_model(simpleNetwork, deviceName + "." + deviceIDs[0])); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, LoadNetworkWithBigDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.compile_model(actualNetwork, deviceName + ".10"), Exception); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, LoadNetworkWithInvalidDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.compile_model(actualNetwork, deviceName + ".l0"), Exception); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, LoadNetworkHETEROWithBigDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.compile_model(actualNetwork, + "HETERO", + {{"TARGET_FALLBACK", deviceName + ".100," + CommonTestUtils::DEVICE_CPU}}), + Exception); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, LoadNetworkHETEROAndDeviceIDThrows) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName)) { + ASSERT_THROW(ie.compile_model(actualNetwork, + CommonTestUtils::DEVICE_HETERO, + {{"TARGET_FALLBACK", deviceName + "," + CommonTestUtils::DEVICE_CPU}, + {CONFIG_KEY(DEVICE_ID), "110"}}), + Exception); + } else { + GTEST_SKIP(); + } +} + +// +// LoadNetwork with HETERO on MULTI combinations particular device +// + +TEST_P(OVClassLoadNetworkTest, LoadNetworkHETEROwithMULTINoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) { + std::string devices; + auto availableDevices = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + for (auto&& device : availableDevices) { + devices += deviceName + '.' + device; + if (&device != &(availableDevices.back())) { + devices += ','; + } + } + std::string targetFallback(CommonTestUtils::DEVICE_MULTI + std::string(",") + deviceName); + ASSERT_NO_THROW( + ie.compile_model(actualNetwork, + CommonTestUtils::DEVICE_HETERO, + {{MULTI_CONFIG_KEY(DEVICE_PRIORITIES), devices}, {"TARGET_FALLBACK", targetFallback}})); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, LoadNetworkMULTIwithHETERONoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) { + std::string devices; + auto availableDevices = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + for (auto&& device : availableDevices) { + devices += CommonTestUtils::DEVICE_HETERO + std::string(".") + device; + if (&device != &(availableDevices.back())) { + devices += ','; + } + } + ASSERT_NO_THROW(ie.compile_model( + actualNetwork, + CommonTestUtils::DEVICE_MULTI, + {{MULTI_CONFIG_KEY(DEVICE_PRIORITIES), devices}, {"TARGET_FALLBACK", deviceName + "," + deviceName}})); + } else { + GTEST_SKIP(); + } +} + +// +// QueryNetwork with HETERO on MULTI combinations particular device +// + +TEST_P(OVClassLoadNetworkTest, QueryNetworkHETEROWithMULTINoThrow_V10) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) { + std::string devices; + auto availableDevices = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + for (auto&& device : availableDevices) { + devices += deviceName + '.' + device; + if (&device != &(availableDevices.back())) { + devices += ','; + } + } + auto function = multinputNetwork; + ASSERT_NE(nullptr, function); + std::unordered_set expectedLayers; + for (auto&& node : function->get_ops()) { + expectedLayers.emplace(node->get_friendly_name()); + } + QueryNetworkResult result; + std::string targetFallback(CommonTestUtils::DEVICE_MULTI + std::string(",") + deviceName); + ASSERT_NO_THROW(result = ie.query_model( + multinputNetwork, + CommonTestUtils::DEVICE_HETERO, + {{MULTI_CONFIG_KEY(DEVICE_PRIORITIES), devices}, {"TARGET_FALLBACK", targetFallback}})); + + std::unordered_set actualLayers; + for (auto&& layer : result.supportedLayersMap) { + actualLayers.emplace(layer.first); + } + ASSERT_EQ(expectedLayers, actualLayers); + } else { + GTEST_SKIP(); + } +} + +TEST_P(OVClassLoadNetworkTest, QueryNetworkMULTIWithHETERONoThrow_V10) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + + if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) { + std::string devices; + auto availableDevices = ie.get_metric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as>(); + for (auto&& device : availableDevices) { + devices += "HETERO." + device; + if (&device != &(availableDevices.back())) { + devices += ','; + } + } + auto function = multinputNetwork; + ASSERT_NE(nullptr, function); + std::unordered_set expectedLayers; + for (auto&& node : function->get_ops()) { + expectedLayers.emplace(node->get_friendly_name()); + } + QueryNetworkResult result; + ASSERT_NO_THROW(result = ie.query_model(multinputNetwork, + CommonTestUtils::DEVICE_MULTI, + {{MULTI_CONFIG_KEY(DEVICE_PRIORITIES), devices}, + {"TARGET_FALLBACK", deviceName + "," + deviceName}})); + + std::unordered_set actualLayers; + for (auto&& layer : result.supportedLayersMap) { + actualLayers.emplace(layer.first); + } + ASSERT_EQ(expectedLayers, actualLayers); + } else { + GTEST_SKIP(); + } +} + +// TODO: Enable this test with pre-processing +TEST_P(OVClassLoadNetworkAfterCoreRecreateTest, DISABLED_LoadAfterRecreateCoresAndPlugins) { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + ov::runtime::Core ie = createCoreWithTemplate(); + { + auto versions = ie.get_versions(std::string(CommonTestUtils::DEVICE_MULTI) + ":" + deviceName + "," + + CommonTestUtils::DEVICE_CPU); + ASSERT_EQ(3, versions.size()); + } + std::map config; + if (deviceName == CommonTestUtils::DEVICE_CPU) { + config.insert({"CPU_THREADS_NUM", "3"}); + } + // ASSERT_NO_THROW({ + // ov::runtime::Core ie = createCoreWithTemplate(); + // std::string name = actualNetwork.getInputsInfo().begin()->first; + // actualNetwork.getInputsInfo().at(name)->setPrecision(Precision::U8); + // auto executableNetwork = ie.compile_model(actualNetwork, deviceName, config); + // }); +}; +} // namespace BehaviorTestsDefinitions From 2a5584791c9f1e89654e69b2355f667c1d0c4b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ko=C5=BCykowski?= Date: Tue, 10 Aug 2021 07:38:11 +0200 Subject: [PATCH 11/24] Revise LogicalOr operation reference implementation (#6867) * add tests for logical or op * remove redundant overrides * add inputs/outputs tensor check * create type_prop typed test for logical ops * add new line in logical_or.cpp file * refactor logical_and type_prop test * update test labels * beautify or.hpp file * fix formatting to match clang-format * beautifyfunctional test files * move validate_and_infer_elementwise_logical() implementation to validate_and_infer_types() * refactor logial or functional test to * refactor logial aA functional test t * update constants file * add file to instantiate TEST_P and avoid test execution duplication * add missing empty lies at the end of files * remove unused variable --- .../tests/functional/op_reference/logical.cpp | 17 +++ .../tests/functional/op_reference/logical.hpp | 62 ++++++++ .../functional/op_reference/logical_and.cpp | 93 ++++-------- .../functional/op_reference/logical_or.cpp | 48 ++++++ .../layer_tests_summary/utils/constants.py | 1 + .../op/util/binary_elementwise_logical.hpp | 1 - ngraph/core/src/op/or.cpp | 17 +-- .../op/util/binary_elementwise_logical.cpp | 13 +- ngraph/test/CMakeLists.txt | 2 +- ngraph/test/backend/logical_or.in.cpp | 33 ----- ngraph/test/runtime/ie/unit_test.manifest | 1 - ngraph/test/type_prop/logical_and.cpp | 72 +-------- ngraph/test/type_prop/logical_ops.hpp | 139 ++++++++++++++++++ ngraph/test/type_prop/logical_or.cpp | 14 ++ 14 files changed, 326 insertions(+), 187 deletions(-) create mode 100644 docs/template_plugin/tests/functional/op_reference/logical.cpp create mode 100644 docs/template_plugin/tests/functional/op_reference/logical.hpp create mode 100644 docs/template_plugin/tests/functional/op_reference/logical_or.cpp delete mode 100644 ngraph/test/backend/logical_or.in.cpp create mode 100644 ngraph/test/type_prop/logical_ops.hpp create mode 100644 ngraph/test/type_prop/logical_or.cpp diff --git a/docs/template_plugin/tests/functional/op_reference/logical.cpp b/docs/template_plugin/tests/functional/op_reference/logical.cpp new file mode 100644 index 00000000000..0cddb32d55c --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/logical.cpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "logical.hpp" + +namespace reference_tests { +namespace LogicalOpsRefTestDefinitions { +namespace { + +TEST_P(ReferenceLogicalLayerTest, LogicalWithHardcodedRefs) { + Exec(); +} + +} // namespace +} // namespace LogicalOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/logical.hpp b/docs/template_plugin/tests/functional/op_reference/logical.hpp new file mode 100644 index 00000000000..adf8e22f957 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/logical.hpp @@ -0,0 +1,62 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" +#include "ngraph_functions/builders.hpp" + +namespace reference_tests { +namespace LogicalOpsRefTestDefinitions { + +struct RefLogicalParams { + ngraph::helpers::LogicalTypes opType; + Tensor input1; + Tensor input2; + Tensor expected; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, opType); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input1); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input2); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected); +}; + +class ReferenceLogicalLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + const auto& params = GetParam(); + function = CreateFunction(params.opType, params.input1.shape, params.input2.shape, params.input1.type); + inputData = {params.input1.data, params.input2.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const auto& param = obj.param; + std::ostringstream result; + result << "LogicalType=" << param.opType << "_"; + result << "inpt_shape1=" << param.input1.shape << "_"; + result << "inpt_shape2=" << param.input2.shape << "_"; + result << "iType=" << param.input1.type << "_"; + result << "oType=" << param.expected.type; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(ngraph::helpers::LogicalTypes op_type, const ngraph::PartialShape& input_shape1, + const ngraph::PartialShape& input_shape2, const ngraph::element::Type& elem_type) { + const auto in1 = std::make_shared(elem_type, input_shape1); + const auto in2 = std::make_shared(elem_type, input_shape2); + const auto logical_op = ngraph::builder::makeLogical(in1, in2, op_type); + return std::make_shared(ngraph::NodeVector {logical_op}, ngraph::ParameterVector {in1, in2}); + } +}; +} // namespace LogicalOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/logical_and.cpp b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp index 0313874533e..0f19bbe50b4 100644 --- a/docs/template_plugin/tests/functional/op_reference/logical_and.cpp +++ b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp @@ -10,74 +10,39 @@ #include #include -#include "base_reference_test.hpp" +#include "logical.hpp" -using namespace reference_tests; using namespace ngraph; using namespace InferenceEngine; +using LogicalTypes = ngraph::helpers::LogicalTypes; +namespace reference_tests { +namespace LogicalOpsRefTestDefinitions { +namespace { -struct LogicalAndParams { - template - LogicalAndParams(const ngraph::PartialShape& input_shape1, const ngraph::PartialShape& input_shape2 , - const std::vector& iValues1, const std::vector& iValues2, const std::vector& oValues) - : pshape1(input_shape1), pshape2(input_shape2), inType(ngraph::element::boolean), outType(ngraph::element::boolean), - inputData1(CreateBlob(ngraph::element::boolean, iValues1)), inputData2(CreateBlob(ngraph::element::boolean, iValues2)), - refData(CreateBlob(ngraph::element::boolean, oValues)) {} - ngraph::PartialShape pshape1; - ngraph::PartialShape pshape2; - ngraph::element::Type inType; - ngraph::element::Type outType; - InferenceEngine::Blob::Ptr inputData1; - InferenceEngine::Blob::Ptr inputData2; - InferenceEngine::Blob::Ptr refData; -}; - -class ReferenceLogicalAndLayerTest : public testing::TestWithParam, public CommonReferenceTest { -public: - void SetUp() override { - auto params = GetParam(); - function = CreateFunction(params.pshape1, params.pshape2, params.inType); - inputData = {params.inputData1, params.inputData2}; - refOutData = {params.refData}; - } - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; - std::ostringstream result; - result << "input_shape1=" << param.pshape1 << "_"; - result << "input_shape2=" << param.pshape2 << "_"; - result << "iType=" << param.inType << "_"; - result << "oType=" << param.outType; - return result.str(); - } - -private: - static std::shared_ptr CreateFunction(const PartialShape& input_shape1, - const PartialShape& input_shape2, const element::Type& input_type) { - const auto in = std::make_shared(input_type, input_shape1); - const auto in2 = std::make_shared(input_type, input_shape2); - const auto logical_and = std::make_shared(in, in2); - return std::make_shared(NodeVector {logical_and}, ParameterVector {in, in2}); - } -}; - -TEST_P(ReferenceLogicalAndLayerTest, CompareWithHardcodedRefs) { - Exec(); +std::vector generateLogicalParams() { + std::vector logicalParams { + Builder {} + .opType(LogicalTypes::LOGICAL_AND) + .input1({{2, 2}, element::boolean, std::vector {true, false, true, false}}) + .input2({{2, 2}, element::boolean, std::vector {false, true, true, false}}) + .expected({{2, 2}, element::boolean, std::vector {false, false, true, false}}), + Builder {} + .opType(LogicalTypes::LOGICAL_AND) + .input1({{2, 1, 2, 1}, element::boolean, std::vector {true, false, true, false}}) + .input2({{1, 1, 2, 1}, element::boolean, std::vector {true, false}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {true, false, true, false}}), + Builder {} + .opType(LogicalTypes::LOGICAL_AND) + .input1({{3, 4}, element::boolean, std::vector {true, true, true, true, true, false, true, false, false, true, true, true}}) + .input2({{3, 4}, element::boolean, std::vector {true, true, true, true, true, false, true, false, false, true, true, false}}) + .expected({{3, 4}, element::boolean, std::vector {true, true, true, true, true, false, true, false, false, true, true, false}})}; + return logicalParams; } -INSTANTIATE_TEST_SUITE_P( - smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalAndLayerTest, - ::testing::Values( - LogicalAndParams(ngraph::PartialShape {2, 2}, ngraph::PartialShape {2, 2}, - std::vector {true, false, true, false}, - std::vector {false, true, true, false}, - std::vector {false, false, true, false}), - LogicalAndParams(ngraph::PartialShape {2, 1, 2, 1}, ngraph::PartialShape {1, 1, 2, 1}, - std::vector {true, false, true, false}, - std::vector {true, false}, - std::vector {true, false, true, false}), - LogicalAndParams(ngraph::PartialShape {3, 4}, ngraph::PartialShape {3, 4}, - std::vector {true, true, true, true, true, false, true, false, false, true, true, true}, - std::vector {true, true, true, true, true, false, true, false, false, true, true, false}, - std::vector {true, true, true, true, true, false, true, false, false, true, true, false})), - ReferenceLogicalAndLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()), + ReferenceLogicalLayerTest::getTestCaseName); + +} // namespace +} // namespace LogicalOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/logical_or.cpp b/docs/template_plugin/tests/functional/op_reference/logical_or.cpp new file mode 100644 index 00000000000..9bd4c61539f --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/logical_or.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "logical.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using LogicalTypes = ngraph::helpers::LogicalTypes; + +namespace reference_tests { +namespace LogicalOpsRefTestDefinitions { +namespace { + +std::vector generateLogicalParams() { + std::vector logicalParams { + Builder {} + .opType(LogicalTypes::LOGICAL_OR) + .input1({{2, 2}, element::boolean, std::vector {true, false, true, false}}) + .input2({{2, 2}, element::boolean, std::vector {false, true, true, false}}) + .expected({{2, 2}, element::boolean, std::vector {true, true, true, false}}), + Builder {} + .opType(LogicalTypes::LOGICAL_OR) + .input1({{2, 1, 2, 1}, element::boolean, std::vector {true, false, true, false}}) + .input2({{1, 1, 2, 1}, element::boolean, std::vector {true, false}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {true, false, true, false}}), + Builder {} + .opType(LogicalTypes::LOGICAL_OR) + .input1({{3, 4}, element::boolean, std::vector {true, true, true, true, true, false, true, false, false, true, true, true}}) + .input2({{3, 4}, element::boolean, std::vector {true, true, true, true, true, true, true, false, false, true, true, false}}) + .expected({{3, 4}, element::boolean, std::vector {true, true, true, true, true, true, true, false, false, true, true, true}})}; + return logicalParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_LogicalOr_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()), + ReferenceLogicalLayerTest::getTestCaseName); + +} // namespace +} // namespace LogicalOpsRefTestDefinitions +} // namespace reference_tests diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py index fee2d9ba6d2..f847e870647 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py @@ -59,6 +59,7 @@ VERIFIED_OP_REFERENCES = [ 'LSTMCell-4', 'LSTMSequence-5', 'LogicalAnd-1' + 'LogicalOr-1' 'LogSoftmax-5', 'Loop-5', 'MVN-1', diff --git a/ngraph/core/include/ngraph/op/util/binary_elementwise_logical.hpp b/ngraph/core/include/ngraph/op/util/binary_elementwise_logical.hpp index babe24813ec..1076d663968 100644 --- a/ngraph/core/include/ngraph/op/util/binary_elementwise_logical.hpp +++ b/ngraph/core/include/ngraph/op/util/binary_elementwise_logical.hpp @@ -61,7 +61,6 @@ namespace ngraph bool visit_attributes(AttributeVisitor& visitor) override; private: - void validate_and_infer_elementwise_logical(const op::AutoBroadcastSpec& autob); AutoBroadcastSpec m_autob = AutoBroadcastSpec::NUMPY; }; } // namespace util diff --git a/ngraph/core/src/op/or.cpp b/ngraph/core/src/op/or.cpp index f5b1deafdd6..0990e85b4e3 100644 --- a/ngraph/core/src/op/or.cpp +++ b/ngraph/core/src/op/or.cpp @@ -7,6 +7,8 @@ #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/or.hpp" +#include "ngraph/validation_util.hpp" + using namespace std; using namespace ngraph; @@ -54,12 +56,6 @@ namespace logor switch (arg0->get_element_type()) { NGRAPH_TYPE_CASE(evaluate_logor, boolean, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_logor, i32, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_logor, i64, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_logor, u32, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_logor, u64, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_logor, f16, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_logor, f32, arg0, arg1, out, broadcast_spec); default: rc = false; break; } return rc; @@ -70,6 +66,7 @@ bool op::v1::LogicalOr::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { NGRAPH_OP_SCOPE(v1_LogicalOr_evaluate); + NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2)); return logor::evaluate_logor(inputs[0], inputs[1], outputs[0], get_autob()); } @@ -78,13 +75,7 @@ bool op::v1::LogicalOr::has_evaluate() const NGRAPH_OP_SCOPE(v1_LogicalOr_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: return true; + case ngraph::element::boolean: return true; default: break; } return false; diff --git a/ngraph/core/src/op/util/binary_elementwise_logical.cpp b/ngraph/core/src/op/util/binary_elementwise_logical.cpp index b754df101f9..1e43ac15b60 100644 --- a/ngraph/core/src/op/util/binary_elementwise_logical.cpp +++ b/ngraph/core/src/op/util/binary_elementwise_logical.cpp @@ -22,10 +22,11 @@ op::util::BinaryElementwiseLogical::BinaryElementwiseLogical(const Output& { } -void op::util::BinaryElementwiseLogical::validate_and_infer_elementwise_logical( - const op::AutoBroadcastSpec& autob) +void op::util::BinaryElementwiseLogical::validate_and_infer_types() { - auto args_et_pshape = op::util::validate_and_infer_elementwise_args(this, autob); + NGRAPH_OP_SCOPE(v0_util_BinaryElementwiseLogical_validate_and_infer_types); + + auto args_et_pshape = op::util::validate_and_infer_elementwise_args(this, m_autob); element::Type& args_et = std::get<0>(args_et_pshape); PartialShape& args_pshape = std::get<1>(args_et_pshape); @@ -39,12 +40,6 @@ void op::util::BinaryElementwiseLogical::validate_and_infer_elementwise_logical( set_output_type(0, element::boolean, args_pshape); } -void op::util::BinaryElementwiseLogical::validate_and_infer_types() -{ - NGRAPH_OP_SCOPE(v0_util_BinaryElementwiseLogical_validate_and_infer_types); - validate_and_infer_elementwise_logical(m_autob); -} - bool op::util::BinaryElementwiseLogical::visit_attributes(AttributeVisitor& visitor) { NGRAPH_OP_SCOPE(v0_util_BinaryElementwiseLogical_visit_attributes); diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 411fb661b92..1b0ee82ce08 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -153,6 +153,7 @@ set(SRC type_prop/idft.cpp type_prop/interpolate.cpp type_prop/logical_and.cpp + type_prop/logical_or.cpp type_prop/lrn.cpp type_prop/lstm_cell.cpp type_prop/lstm_sequence.cpp @@ -456,7 +457,6 @@ set(MULTI_TEST_SRC backend/log.in.cpp backend/log_softmax.in.cpp backend/logical_not.in.cpp - backend/logical_or.in.cpp backend/logical_xor.in.cpp backend/lrn.in.cpp backend/matmul.in.cpp diff --git a/ngraph/test/backend/logical_or.in.cpp b/ngraph/test/backend/logical_or.in.cpp deleted file mode 100644 index a91e2fd06fd..00000000000 --- a/ngraph/test/backend/logical_or.in.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "gtest/gtest.h" -#include "ngraph/ngraph.hpp" -#include "util/engine/test_engines.hpp" -#include "util/test_case.hpp" -#include "util/test_control.hpp" - -NGRAPH_SUPPRESS_DEPRECATED_START - -using namespace std; -using namespace ngraph; - -static string s_manifest = "${MANIFEST}"; -using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME}); - -NGRAPH_TEST(${BACKEND_NAME}, logical_or) -{ - Shape shape{2, 2, 2}; - auto A = make_shared(element::boolean, shape); - auto B = make_shared(element::boolean, shape); - auto f = make_shared(make_shared(A, B), ParameterVector{A, B}); - - std::vector a{1, 0, 1, 1, 1, 0, 1, 0}; - std::vector b{0, 0, 1, 0, 0, 1, 1, 0}; - - auto test_case = test::TestCase(f); - test_case.add_multiple_inputs({a, b}); - test_case.add_expected_output(shape, {1, 0, 1, 1, 1, 1, 1, 0}); - test_case.run(); -} diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index 6c420ed96b6..2f7fd7c7c4b 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -485,7 +485,6 @@ IE_CPU.onnx_roi_align_f32 # [NOT_IMPLEMENTED] Input image format BOOL is not supported yet... not logical_xor -logical_or logical_and gather_axis_0_bool auto_bcast_binary_elementwise diff --git a/ngraph/test/type_prop/logical_and.cpp b/ngraph/test/type_prop/logical_and.cpp index 2a8699cfbe8..304a920a593 100644 --- a/ngraph/test/type_prop/logical_and.cpp +++ b/ngraph/test/type_prop/logical_and.cpp @@ -2,71 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "gtest/gtest.h" -#include "ngraph/ngraph.hpp" +#include "logical_ops.hpp" #include "util/type_prop.hpp" -using namespace std; -using namespace ngraph; +using Type = + ::testing::Types>; -namespace { - void incorrect_init(const ngraph::element::Type& type, const std::string& err, const Shape& shape1 = {1, 3, 6}, const Shape& shape2 = {1, 3, 6}) { - auto input1 = make_shared(type, shape1); - auto input2 = make_shared(type, shape2); - try - { - auto logical_and = make_shared(input1, input2); - } - catch (const NodeValidationFailure& error) - { - EXPECT_HAS_SUBSTRING(error.what(), err); - } - } -} - -TEST(type_prop, logical_and_incorrect_type_f32) -{ - incorrect_init(element::f32, "Operands for logical operators must have boolean element type but have element type f32"); -} - -TEST(type_prop, logical_and_incorrect_type_f64) -{ - incorrect_init(element::f64, "Operands for logical operators must have boolean element type but have element type f64"); -} - -TEST(type_prop, logical_and_incorrect_type_i32) -{ - incorrect_init(element::i32, "Operands for logical operators must have boolean element type but have element type i32"); -} - -TEST(type_prop, logical_and_incorrect_type_i64) -{ - incorrect_init(element::i64, "Operands for logical operators must have boolean element type but have element type i64"); -} - -TEST(type_prop, logical_and_incorrect_type_u32) -{ - incorrect_init(element::u32, "Operands for logical operators must have boolean element type but have element type u32"); -} - -TEST(type_prop, logical_and_incorrect_type_u64) -{ - incorrect_init(element::u64, "Operands for logical operators must have boolean element type but have element type u64"); - -} - -TEST(type_prop, logical_and_incorrect_shape) -{ - incorrect_init(element::boolean, "Argument shapes are inconsistent", Shape {1, 3, 6}, Shape {1, 2, 3}); -} - -TEST(type_prop, logical_and_broadcast) -{ - auto input1 = make_shared(element::boolean, Shape{1, 1, 6}); - auto input2 = make_shared(element::boolean, Shape{1, 3, 1}); - - auto logical_and = make_shared(input1, input2); - - ASSERT_EQ(logical_and->get_element_type(), element::boolean); - ASSERT_EQ(logical_and->get_shape(), (Shape{1, 3, 6})); -} +INSTANTIATE_TYPED_TEST_SUITE_P(Type_prop_test, + LogicalOperatorTypeProp, + Type, + LogicalOperatorTypeName); diff --git a/ngraph/test/type_prop/logical_ops.hpp b/ngraph/test/type_prop/logical_ops.hpp new file mode 100644 index 00000000000..b9eda456c0c --- /dev/null +++ b/ngraph/test/type_prop/logical_ops.hpp @@ -0,0 +1,139 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "util/type_prop.hpp" + +template +class LogicalOperatorType +{ +public: + using op_type = T; + static constexpr ngraph::element::Type_t element_type = ELEMENT_TYPE; +}; + +template +class LogicalOperatorTypeProp : public testing::Test +{ +}; + +class LogicalOperatorTypeName +{ +public: + template + static std::string GetName(int) + { + using OP_Type = typename T::op_type; + const ngraph::Node::type_info_t typeinfo = OP_Type::get_type_info_static(); + return typeinfo.name; + } +}; + +TYPED_TEST_SUITE_P(LogicalOperatorTypeProp); + +namespace +{ + template + void incorrect_init(const ngraph::element::Type& type, + const std::string& err, + const ngraph::Shape& shape1 = {1, 3, 6}, + const ngraph::Shape& shape2 = {1, 3, 6}) + { + auto input1 = std::make_shared(type, shape1); + auto input2 = std::make_shared(type, shape2); + try + { + auto op = std::make_shared(input1, input2); + } + catch (const ngraph::NodeValidationFailure& error) + { + EXPECT_HAS_SUBSTRING(error.what(), err); + } + } +} // namespace + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_type_f32) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init( + ngraph::element::f32, + "Operands for logical operators must have boolean element type but have element type f32"); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_type_f64) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init( + ngraph::element::f64, + "Operands for logical operators must have boolean element type but have element type f64"); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_type_i32) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init( + ngraph::element::i32, + "Operands for logical operators must have boolean element type but have element type i32"); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_type_i64) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init( + ngraph::element::i64, + "Operands for logical operators must have boolean element type but have element type i64"); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_type_u32) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init( + ngraph::element::u32, + "Operands for logical operators must have boolean element type but have element type u32"); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_type_u64) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init( + ngraph::element::u64, + "Operands for logical operators must have boolean element type but have element type u64"); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, incorrect_shape) +{ + using OP_Type = typename TypeParam::op_type; + incorrect_init(ngraph::element::boolean, + "Argument shapes are inconsistent", + ngraph::Shape{1, 3, 6}, + ngraph::Shape{1, 2, 3}); +} + +TYPED_TEST_P(LogicalOperatorTypeProp, broadcast) +{ + using OP_Type = typename TypeParam::op_type; + + auto input1 = + std::make_shared(ngraph::element::boolean, ngraph::Shape{1, 1, 6}); + auto input2 = + std::make_shared(ngraph::element::boolean, ngraph::Shape{1, 3, 1}); + + auto logical_and = std::make_shared(input1, input2); + + ASSERT_EQ(logical_and->get_element_type(), ngraph::element::boolean); + ASSERT_EQ(logical_and->get_shape(), (ngraph::Shape{1, 3, 6})); +} + +REGISTER_TYPED_TEST_SUITE_P(LogicalOperatorTypeProp, + broadcast, + incorrect_type_f32, + incorrect_type_f64, + incorrect_type_i32, + incorrect_type_i64, + incorrect_type_u32, + incorrect_type_u64, + incorrect_shape); diff --git a/ngraph/test/type_prop/logical_or.cpp b/ngraph/test/type_prop/logical_or.cpp new file mode 100644 index 00000000000..a0bda12aded --- /dev/null +++ b/ngraph/test/type_prop/logical_or.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "logical_ops.hpp" +#include "util/type_prop.hpp" + +using Type = + ::testing::Types>; + +INSTANTIATE_TYPED_TEST_SUITE_P(Type_prop_test, + LogicalOperatorTypeProp, + Type, + LogicalOperatorTypeName); From c4bd0a45d39f45851565257026fafc03ddfd2fb9 Mon Sep 17 00:00:00 2001 From: Mikhail Nosov Date: Tue, 10 Aug 2021 09:23:30 +0300 Subject: [PATCH 12/24] MO: Support of discovering of suitable MOC frontend (#6888) * MO: Support of discovering of suitable MOC frontend if --framework is not specified * Ready for review * Fix: don't use FrontEndManager if framework is not in list of available frontends * Apply review comments --- model-optimizer/mo/main.py | 39 +++++++++------ model-optimizer/mo/moc_frontend/pipeline.py | 17 +++---- .../mo/frontend_ngraph_test_actual.py | 49 +++++++++++++++++-- .../mock_mo_frontend.hpp | 24 +++++++++ .../mock_mo_python_api/mock_mo_python_api.cpp | 2 + .../include/frontend_manager/frontend.hpp | 6 +++ .../frontend_manager/frontend_manager.hpp | 9 ++-- .../frontend_manager/src/frontend_manager.cpp | 5 ++ .../paddlepaddle_frontend/frontend.hpp | 6 +++ ngraph/frontend/paddlepaddle/src/frontend.cpp | 2 + .../python/src/pyngraph/frontend/frontend.cpp | 12 +++++ .../pyngraph/frontend/frontend_manager.cpp | 20 ++++++++ .../mock_py_frontend.hpp | 24 +++++++++ .../pyngraph_mock_frontend_api.cpp | 2 + .../test_frontend/test_frontendmanager.py | 20 ++++++++ ngraph/test/frontend/frontend_manager.cpp | 4 ++ ngraph/test/frontend/mock_frontend.cpp | 2 + ngraph/test/frontend/shared/src/basic_api.cpp | 1 + 18 files changed, 213 insertions(+), 31 deletions(-) diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py index 591457c36f0..0747107c8c5 100644 --- a/model-optimizer/mo/main.py +++ b/model-optimizer/mo/main.py @@ -97,16 +97,29 @@ def print_argv(argv: argparse.Namespace, is_caffe: bool, is_tf: bool, is_mxnet: def prepare_ir(argv: argparse.Namespace): - is_tf, is_caffe, is_mxnet, is_kaldi, is_onnx = deduce_framework_by_namespace(argv) - fem = argv.feManager - new_front_ends = [] - if fem is not None: # in future, check of 'use_legacy_frontend' in argv can be added here - new_front_ends = fem.get_available_front_ends() + available_moc_front_ends = [] + moc_front_end = None + + # TODO: in future, check of 'use_legacy_frontend' in argv can be added here (issue 61973) + force_use_legacy_frontend = False + + if fem and not force_use_legacy_frontend: + available_moc_front_ends = fem.get_available_front_ends() + if argv.input_model: + if not argv.framework: + moc_front_end = fem.load_by_model(argv.input_model) + if moc_front_end: + argv.framework = moc_front_end.get_name() + elif argv.framework in available_moc_front_ends: + moc_front_end = fem.load_by_framework(argv.framework) + + is_tf, is_caffe, is_mxnet, is_kaldi, is_onnx =\ + deduce_framework_by_namespace(argv) if not moc_front_end else [False, False, False, False, False] if not any([is_tf, is_caffe, is_mxnet, is_kaldi, is_onnx]): frameworks = ['tf', 'caffe', 'mxnet', 'kaldi', 'onnx'] - frameworks = list(set(frameworks + new_front_ends)) + frameworks = list(set(frameworks + available_moc_front_ends)) if argv.framework not in frameworks: raise Error('Framework {} is not a valid target. Please use --framework with one from the list: {}. ' + refer_to_faq_msg(15), argv.framework, frameworks) @@ -173,7 +186,7 @@ def prepare_ir(argv: argparse.Namespace): if argv.legacy_ir_generation and len(argv.transform) != 0: raise Error("--legacy_ir_generation and --transform keys can not be used at the same time.") - use_legacy_fe = argv.framework not in new_front_ends + use_legacy_fe = argv.framework not in available_moc_front_ends # For C++ frontends there is no specific python installation requirements, thus check only generic ones ret_code = check_requirements(framework=argv.framework if use_legacy_fe else None) if ret_code: @@ -258,7 +271,7 @@ def prepare_ir(argv: argparse.Namespace): send_framework_info('kaldi') from mo.front.kaldi.register_custom_ops import get_front_classes import_extensions.load_dirs(argv.framework, extensions, get_front_classes) - elif is_onnx: # in future check of 'use_legacy_frontend' can be added here + elif is_onnx: send_framework_info('onnx') from mo.front.onnx.register_custom_ops import get_front_classes import_extensions.load_dirs(argv.framework, extensions, get_front_classes) @@ -266,11 +279,10 @@ def prepare_ir(argv: argparse.Namespace): graph = None ngraph_function = None - # In future check of use_legacy_frontend option can be added here - if argv.feManager is None or argv.framework not in new_front_ends: + if argv.framework not in available_moc_front_ends: graph = unified_pipeline(argv) else: - ngraph_function = moc_pipeline(argv) + ngraph_function = moc_pipeline(argv, moc_front_end) return graph, ngraph_function @@ -389,7 +401,6 @@ def main(cli_parser: argparse.ArgumentParser, fem: FrontEndManager, framework: s argv = cli_parser.parse_args() send_params_info(argv, cli_parser) - if framework: argv.framework = framework argv.feManager = fem @@ -435,5 +446,5 @@ def main(cli_parser: argparse.ArgumentParser, fem: FrontEndManager, framework: s if __name__ == "__main__": from mo.utils.cli_parser import get_all_cli_parser - fem = FrontEndManager() - sys.exit(main(get_all_cli_parser(fem), fem, None)) + fe_manager = FrontEndManager() + sys.exit(main(get_all_cli_parser(fe_manager), fe_manager, None)) diff --git a/model-optimizer/mo/moc_frontend/pipeline.py b/model-optimizer/mo/moc_frontend/pipeline.py index 5ddccc15f41..ca6924824af 100644 --- a/model-optimizer/mo/moc_frontend/pipeline.py +++ b/model-optimizer/mo/moc_frontend/pipeline.py @@ -9,22 +9,18 @@ from mo.moc_frontend.extractor import fe_user_data_repack from mo.middle.passes.infer import validate_batch_in_shape from ngraph import Dimension, PartialShape # pylint: disable=no-name-in-module,import-error -from ngraph.frontend import Place # pylint: disable=no-name-in-module,import-error +from ngraph.frontend import FrontEnd, Place # pylint: disable=no-name-in-module,import-error from ngraph.utils.types import get_element_type # pylint: disable=no-name-in-module,import-error -def moc_pipeline(argv: argparse.Namespace): +def moc_pipeline(argv: argparse.Namespace, moc_front_end: FrontEnd): """ Load input model and convert it to nGraph function - :param: parsed command line arguments + :param: argv: parsed command line arguments + :param: moc_front_end: Loaded Frontend for converting input model :return: converted nGraph function ready for serialization """ - fem = argv.feManager - log.debug('Available front ends: {}'.format( - str(fem.get_available_front_ends()))) - log.debug('Initializing new FE for framework {}'.format(argv.framework)) - fe = fem.load_by_framework(argv.framework) - input_model = fe.load(argv.input_model) + input_model = moc_front_end.load(argv.input_model) user_shapes, outputs, freeze_placeholder = fe_user_data_repack( input_model, argv.placeholder_shapes, argv.placeholder_data_types, @@ -78,7 +74,6 @@ def moc_pipeline(argv: argparse.Namespace): def shape_to_array(shape: PartialShape): return [shape.get_dimension(i) for i in range(shape.rank.get_length())] - return # Set batch size if argv.batch is not None and argv.batch > 0: @@ -100,5 +95,5 @@ def moc_pipeline(argv: argparse.Namespace): joined_name, old_shape_array, new_shape)) input_model.set_partial_shape(place, new_partial_shape) - ngraph_function = fe.convert(input_model) + ngraph_function = moc_front_end.convert(input_model) return ngraph_function diff --git a/model-optimizer/unit_tests/mo/frontend_ngraph_test_actual.py b/model-optimizer/unit_tests/mo/frontend_ngraph_test_actual.py index e9cbfd5dfa8..cb87a052e61 100644 --- a/model-optimizer/unit_tests/mo/frontend_ngraph_test_actual.py +++ b/model-optimizer/unit_tests/mo/frontend_ngraph_test_actual.py @@ -46,7 +46,7 @@ mock_needed = pytest.mark.skipif(not mock_available, def replaceArgsHelper(log_level='DEBUG', silent=False, model_name='abc', - input_model='abc.abc', + input_model='abc.test_mo_mock_mdl', transform=[], legacy_ir_generation=False, scale=None, @@ -73,7 +73,8 @@ def replaceArgsHelper(log_level='DEBUG', mean_values=mean_values, scale_values=scale_values, output_dir=output_dir, - freeze_placeholder_with_value=freeze_placeholder_with_value) + freeze_placeholder_with_value=freeze_placeholder_with_value, + framework=None) class TestMainFrontend(unittest.TestCase): @@ -97,9 +98,35 @@ class TestMainFrontend(unittest.TestCase): group(1).replace("\r", "") assert xml_file and bin_file - # verify that 'convert' was called + # verify that 'convert' was called, and 'supported' was not stat = get_frontend_statistic() assert stat.convert_model == 1 + assert stat.supported == 0 + # verify that meta info is added to XML file + with open(xml_file) as file: + assert 'mock_mo_ngraph_frontend' in file.read() + + @mock_needed + @patch('argparse.ArgumentParser.parse_args', + return_value=replaceArgsHelper()) + def test_convert_framework_discover(self, mock_argparse): + f = io.StringIO() + with redirect_stdout(f): + main(argparse.ArgumentParser(), fem, None) + out = f.getvalue() + + xml_file = re.search(r'\[ SUCCESS \] XML file: (.*)', out). \ + group(1).replace("\r", "") + bin_file = re.search(r'\[ SUCCESS \] BIN file: (.*)', out). \ + group(1).replace("\r", "") + assert xml_file and bin_file + + # verify that 'convert', 'supported' and 'get_name' were called + stat = get_frontend_statistic() + assert stat.convert_model == 1 + assert stat.supported == 1 + assert stat.get_name > 0 + # verify that meta info is added to XML file with open(xml_file) as file: assert 'mock_mo_ngraph_frontend' in file.read() @@ -227,3 +254,19 @@ class TestMainFrontend(unittest.TestCase): assert stat.get_partial_shape == 1 # verify that 'set_element_type' was not called assert stat.set_partial_shape == 0 + + @mock_needed + @patch('argparse.ArgumentParser.parse_args', + return_value=replaceArgsHelper(input_model='abc.qwerty')) + def test_error_input_model_no_framework(self, mock_argparse): + # Framework is not specified and 'abc.qwerty' is not supported + # so MO shall not convert anything and produce specified error + with self.assertLogs() as logger: + main(argparse.ArgumentParser(), fem, None) + + stat = get_frontend_statistic() + + assert [s for s in logger.output if 'can not be deduced' in s] + + # verify that 'supported' was called + assert stat.supported == 1 diff --git a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp index eb8182132f9..5293a72776e 100644 --- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp +++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp @@ -393,9 +393,13 @@ struct MOCK_API FeStat { std::vector m_load_paths; int m_convert_model = 0; + int m_supported = 0; + int m_get_name = 0; // Getters std::vector load_paths() const { return m_load_paths; } int convert_model() const { return m_convert_model; } + int supported() const { return m_supported; } + int get_name() const { return m_get_name; } }; /// \brief Mock implementation of FrontEnd @@ -428,4 +432,24 @@ private: } return std::make_shared(); } + + bool supported_impl(const std::vector>& params) const override + { + m_stat.m_supported++; + if (params.size() > 0 && is_type>(params[0])) + { + auto path = as_type_ptr>(params[0])->get(); + if (path.find(".test_mo_mock_mdl") != std::string::npos) + { + return true; + } + } + return false; + } + + std::string get_name() const override + { + m_stat.m_get_name++; + return "mock_mo_ngraph_frontend"; + } }; diff --git a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp index d2d17042cfc..b0ebc36d517 100644 --- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp +++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp @@ -19,6 +19,8 @@ static void register_mock_frontend_stat(py::module m) py::class_ feStat(m, "FeStat", py::dynamic_attr()); feStat.def_property_readonly("load_paths", &FeStat::load_paths); feStat.def_property_readonly("convert_model", &FeStat::convert_model); + feStat.def_property_readonly("supported", &FeStat::supported); + feStat.def_property_readonly("get_name", &FeStat::get_name); } static void register_mock_setup(py::module m) diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp index 8af4b93464e..5f845ed4bb4 100644 --- a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp +++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp @@ -83,6 +83,12 @@ namespace ngraph /// \param function partially converted nGraph function virtual void normalize(std::shared_ptr function) const; + /// \brief Gets name of this FrontEnd. Can be used by clients + /// if frontend is selected automatically by FrontEndManager::load_by_model + /// + /// \return Current frontend name. Empty string if not implemented + virtual std::string get_name() const; + protected: virtual bool supported_impl(const std::vector>& variants) const; diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp index e917c89c83a..997c769e9ed 100644 --- a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp +++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp @@ -47,10 +47,13 @@ namespace ngraph /// Selects and loads appropriate frontend depending on model file extension and other /// file info (header) /// - /// \param framework - /// Framework name. Throws exception if name is not in list of available frontends + /// \param vars Any number of parameters of any type. What kind of parameters + /// are accepted is determined by each FrontEnd individually, typically it is + /// std::string containing path to the model file. For more information please + /// refer to specific FrontEnd documentation. /// - /// \return Frontend interface for further loading of model + /// \return Frontend interface for further loading of model. Returns 'nullptr' + /// if no suitable frontend is found template FrontEnd::Ptr load_by_model(const Types&... vars) { diff --git a/ngraph/frontend/frontend_manager/src/frontend_manager.cpp b/ngraph/frontend/frontend_manager/src/frontend_manager.cpp index 05151934192..419d52c263d 100644 --- a/ngraph/frontend/frontend_manager/src/frontend_manager.cpp +++ b/ngraph/frontend/frontend_manager/src/frontend_manager.cpp @@ -167,6 +167,11 @@ void FrontEnd::normalize(std::shared_ptr function) const FRONT_END_NOT_IMPLEMENTED(normalize); } +std::string FrontEnd::get_name() const +{ + return std::string(); +} + //----------- InputModel --------------------------- std::vector InputModel::get_inputs() const { diff --git a/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp b/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp index d872e5fedf0..0922b0e425f 100644 --- a/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp +++ b/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp @@ -43,6 +43,12 @@ namespace ngraph /// \return nGraph function after decoding std::shared_ptr decode(InputModel::Ptr model) const override; + /// \brief Gets name of this FrontEnd. Can be used by clients + /// if frontend is selected automatically by FrontEndManager::load_by_model + /// + /// \return Paddle frontend name. + std::string get_name() const override; + protected: /// \brief Check if FrontEndPDPD can recognize model from given parts /// \param params Can be path to folder which contains __model__ file or path to diff --git a/ngraph/frontend/paddlepaddle/src/frontend.cpp b/ngraph/frontend/paddlepaddle/src/frontend.cpp index 69904ea7206..7c9e90ec56f 100644 --- a/ngraph/frontend/paddlepaddle/src/frontend.cpp +++ b/ngraph/frontend/paddlepaddle/src/frontend.cpp @@ -399,6 +399,8 @@ namespace ngraph auto f = convert_each_node(pdpd_model, pdpd::make_framework_node); return f; } + + std::string FrontEndPDPD::get_name() const { return "paddle"; } } // namespace frontend } // namespace ngraph diff --git a/ngraph/python/src/pyngraph/frontend/frontend.cpp b/ngraph/python/src/pyngraph/frontend/frontend.cpp index dd98869488c..81a61e3bee6 100644 --- a/ngraph/python/src/pyngraph/frontend/frontend.cpp +++ b/ngraph/python/src/pyngraph/frontend/frontend.cpp @@ -123,4 +123,16 @@ void regclass_pyngraph_FrontEnd(py::module m) function : Function Partially converted nGraph function. )"); + + fem.def("get_name", + &ngraph::frontend::FrontEnd::get_name, + R"( + Gets name of this FrontEnd. Can be used by clients + if frontend is selected automatically by FrontEndManager::load_by_model. + + Parameters + ---------- + get_name : str + Current frontend name. Empty string if not implemented. + )"); } diff --git a/ngraph/python/src/pyngraph/frontend/frontend_manager.cpp b/ngraph/python/src/pyngraph/frontend/frontend_manager.cpp index dc0475ee973..2827764bfa6 100644 --- a/ngraph/python/src/pyngraph/frontend/frontend_manager.cpp +++ b/ngraph/python/src/pyngraph/frontend/frontend_manager.cpp @@ -35,6 +35,7 @@ void regclass_pyngraph_FrontEndManager(py::module m) get_available_front_ends : List[str] List of available frontend names. )"); + fem.def("load_by_framework", &ngraph::frontend::FrontEndManager::load_by_framework, py::arg("framework"), @@ -51,6 +52,25 @@ void regclass_pyngraph_FrontEndManager(py::module m) load_by_framework : FrontEnd Frontend interface for further loading of models. )"); + + fem.def( + "load_by_model", + [](const std::shared_ptr& fem, + const std::string& model_path) { return fem->load_by_model(model_path); }, + py::arg("model_path"), + R"( + Selects and loads appropriate frontend depending on model file extension and other file info (header). + + Parameters + ---------- + model_path : str + Path to model file/directory. + + Returns + ---------- + load_by_model : FrontEnd + Frontend interface for further loading of models. 'None' if no suitable frontend is found + )"); } void regclass_pyngraph_GeneralFailureFrontEnd(py::module m) diff --git a/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp b/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp index 624a8ee48da..adcffd142b7 100644 --- a/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp +++ b/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp @@ -485,6 +485,8 @@ struct MOCK_API FeStat int m_convert_partially = 0; int m_decode = 0; int m_normalize = 0; + int m_get_name = 0; + int m_supported = 0; // Getters std::vector load_paths() const { return m_load_paths; } int convert_model() const { return m_convert_model; } @@ -492,6 +494,8 @@ struct MOCK_API FeStat int convert_partially() const { return m_convert_partially; } int decode() const { return m_decode; } int normalize() const { return m_normalize; } + int get_name() const { return m_get_name; } + int supported() const { return m_supported; } }; class MOCK_API FrontEndMockPy : public FrontEnd @@ -509,6 +513,20 @@ public: return std::make_shared(); } + bool supported_impl(const std::vector>& params) const override + { + m_stat.m_supported++; + if (params.size() > 0 && is_type>(params[0])) + { + auto path = as_type_ptr>(params[0])->get(); + if (path.find(".test_mock_py_mdl") != std::string::npos) + { + return true; + } + } + return false; + } + std::shared_ptr convert(InputModel::Ptr model) const override { m_stat.m_convert_model++; @@ -534,5 +552,11 @@ public: m_stat.m_normalize++; } + std::string get_name() const override + { + m_stat.m_get_name++; + return "mock_py"; + } + FeStat get_stat() const { return m_stat; } }; diff --git a/ngraph/python/tests/mock/pyngraph_fe_mock_api/pyngraph_mock_frontend_api.cpp b/ngraph/python/tests/mock/pyngraph_fe_mock_api/pyngraph_mock_frontend_api.cpp index 1927e04b7a3..a1f6861b72c 100644 --- a/ngraph/python/tests/mock/pyngraph_fe_mock_api/pyngraph_mock_frontend_api.cpp +++ b/ngraph/python/tests/mock/pyngraph_fe_mock_api/pyngraph_mock_frontend_api.cpp @@ -33,6 +33,8 @@ static void register_mock_frontend_stat(py::module m) feStat.def_property_readonly("convert_partially", &FeStat::convert_partially); feStat.def_property_readonly("decode", &FeStat::decode); feStat.def_property_readonly("normalize", &FeStat::normalize); + feStat.def_property_readonly("get_name", &FeStat::get_name); + feStat.def_property_readonly("supported", &FeStat::supported); } static void register_mock_model_stat(py::module m) diff --git a/ngraph/python/tests/test_frontend/test_frontendmanager.py b/ngraph/python/tests/test_frontend/test_frontendmanager.py index 51882091fda..920746e200a 100644 --- a/ngraph/python/tests/test_frontend/test_frontendmanager.py +++ b/ngraph/python/tests/test_frontend/test_frontendmanager.py @@ -52,6 +52,16 @@ def test_load(): assert "abc.bin" in stat.load_paths +@mock_needed +def test_load_by_model(): + fe = fem.load_by_model(model_path="abc.test_mock_py_mdl") + assert fe is not None + assert fe.get_name() == "mock_py" + stat = get_fe_stat(fe) + assert stat.get_name == 1 + assert stat.supported == 1 + + @mock_needed def test_convert_model(): fe = fem.load_by_framework(framework="mock_py") @@ -90,6 +100,16 @@ def test_decode_and_normalize(): assert stat.decode == 1 +@mock_needed +def test_get_name(): + fe = fem.load_by_framework(framework="mock_py") + assert fe is not None + name = fe.get_name() + assert name == "mock_py" + stat = get_fe_stat(fe) + assert stat.get_name == 1 + + # --------InputModel tests----------------- @mock_needed def init_model(): diff --git a/ngraph/test/frontend/frontend_manager.cpp b/ngraph/test/frontend/frontend_manager.cpp index af2aeee221d..09c2a6ed36c 100644 --- a/ngraph/test/frontend/frontend_manager.cpp +++ b/ngraph/test/frontend/frontend_manager.cpp @@ -59,6 +59,9 @@ TEST(FrontEndManagerTest, testMockPluginFrontEnd) FrontEndManager fem; auto frontends = fem.get_available_front_ends(); ASSERT_NE(std::find(frontends.begin(), frontends.end(), "mock1"), frontends.end()); + FrontEnd::Ptr fe; + ASSERT_NO_THROW(fe = fem.load_by_framework("mock1")); + ASSERT_EQ(fe->get_name(), "mock1"); set_test_env("OV_FRONTEND_PATH", ""); } @@ -77,6 +80,7 @@ TEST(FrontEndManagerTest, testDefaultFrontEnd) ASSERT_ANY_THROW(fe->convert_partially(nullptr)); ASSERT_ANY_THROW(fe->decode(nullptr)); ASSERT_ANY_THROW(fe->normalize(nullptr)); + ASSERT_EQ(fe->get_name(), std::string()); } TEST(FrontEndManagerTest, testDefaultInputModel) diff --git a/ngraph/test/frontend/mock_frontend.cpp b/ngraph/test/frontend/mock_frontend.cpp index bb5fdf105ee..29c203dacec 100644 --- a/ngraph/test/frontend/mock_frontend.cpp +++ b/ngraph/test/frontend/mock_frontend.cpp @@ -18,6 +18,8 @@ using namespace ngraph::frontend; class FrontEndMock : public FrontEnd { +public: + std::string get_name() const override { return "mock1"; } }; extern "C" MOCK_API FrontEndVersion GetAPIVersion() diff --git a/ngraph/test/frontend/shared/src/basic_api.cpp b/ngraph/test/frontend/shared/src/basic_api.cpp index 92a7254c76e..aa09b84404e 100644 --- a/ngraph/test/frontend/shared/src/basic_api.cpp +++ b/ngraph/test/frontend/shared/src/basic_api.cpp @@ -37,6 +37,7 @@ void FrontEndBasicTest::doLoadFromFile() TEST_P(FrontEndBasicTest, testLoadFromFile) { ASSERT_NO_THROW(doLoadFromFile()); + ASSERT_EQ(m_frontEnd->get_name(), m_feName); std::shared_ptr function; ASSERT_NO_THROW(function = m_frontEnd->convert(m_inputModel)); ASSERT_NE(function, nullptr); From bf6c34d7b81f1984ceed1055cc9dcb79a79539fa Mon Sep 17 00:00:00 2001 From: Yury Gaydaychuk Date: Tue, 10 Aug 2021 09:46:36 +0300 Subject: [PATCH 13/24] [CPU] Interpolate handles inplace child layout (#6250) --- .../subgraph_tests/concat_resize_concat.cpp | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/concat_resize_concat.cpp diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/concat_resize_concat.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/concat_resize_concat.cpp new file mode 100644 index 00000000000..2dee7ff9937 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/concat_resize_concat.cpp @@ -0,0 +1,117 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include +#include + +namespace SubgraphTestsDefinitions { + +typedef std::tuple< + ngraph::NodeTypeInfo, // Node type + int, // channels count + int // batch count +> ConcResizeConcParams; + +class ConcatResizeConcatTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + ngraph::NodeTypeInfo resize_type; + int channels_count; + int batch_count; + std::tie(resize_type, channels_count, batch_count) = obj.param; + std::ostringstream result; + result << resize_type.name << "_"; + result << "Batches=" << batch_count << "_"; + result << "Channels=" << channels_count << "_"; + result << obj.index; + return result.str(); +} +protected: + void SetUp() override { + targetDevice = CommonTestUtils::DEVICE_CPU; + ngraph::NodeTypeInfo resize_type; + int channels_count; + int batch_count; + std::tie(resize_type, channels_count, batch_count) = this->GetParam(); + + std::vector dims1({batch_count, channels_count, 2, 2}); + std::vector dims2({batch_count, channels_count, 3, 3}); + + std::vector shape1({size_t(dims1[0]), size_t(dims1[1]), size_t(dims1[2]), size_t(dims1[3])}); + std::vector shape2({size_t(dims2[0]), size_t(dims2[1]), size_t(dims2[2]), size_t(dims2[3])}); + auto inputNode1 = std::make_shared(ngraph::element::f32, ngraph::Shape(shape1)); + auto inputNode2 = std::make_shared(ngraph::element::f32, ngraph::Shape(shape1)); + auto inputNode3 = std::make_shared(ngraph::element::f32, ngraph::Shape(shape2)); + // concat layer + ngraph::OutputVector concatNodes1; + concatNodes1.push_back(inputNode1); + concatNodes1.push_back(inputNode2); + std::shared_ptr inputNode = std::make_shared(concatNodes1, 1); + + // preresize layer + ngraph::opset4::Interpolate::InterpolateAttrs attrs; + attrs.mode = ngraph::opset4::Interpolate::InterpolateMode::linear_onnx; + attrs.shape_calculation_mode = ngraph::opset4::Interpolate::ShapeCalcMode::sizes; + attrs.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::asymmetric; + attrs.nearest_mode = ngraph::opset4::Interpolate::NearestMode::ceil; + std::vector shape = {3, 3 }; + + std::vector scales = {1.5, 1.5 }; + auto outputShape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shape.data()); + auto scalesShape = std::make_shared(ngraph::element::f32, ngraph::Shape{2}, scales.data()); + auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{2, 3}); + std::shared_ptr preresizeNode = std::make_shared(inputNode, outputShape, scalesShape, axes, attrs); + + // concat layer + ngraph::OutputVector concatNodes2; + concatNodes2.push_back(preresizeNode); + concatNodes2.push_back(inputNode3); + std::shared_ptr outputNode = std::make_shared(concatNodes2, 1); + + // Run shape inference on the nodes + ngraph::NodeVector nodes; + nodes.push_back(inputNode1); + nodes.push_back(inputNode2); + nodes.push_back(inputNode3); + nodes.push_back(inputNode); + nodes.push_back(preresizeNode); + nodes.push_back(outputNode); + + // Create graph + ngraph::ParameterVector inputs; + inputs.push_back(inputNode1); + inputs.push_back(inputNode2); + inputs.push_back(inputNode3); + ngraph::ResultVector outputs; + outputs.push_back(std::make_shared(outputNode)); + function = std::make_shared(outputs, inputs); + } +}; + +TEST_P(ConcatResizeConcatTest, CompareWithRefs) { + Run(); +} + +namespace { + + const std::vector batch_count = { 1, 2 }; + + const std::vector channel_count = { 1, 2 }; + + +INSTANTIATE_TEST_CASE_P(smoke_ConcResizeConc, + ConcatResizeConcatTest, ::testing::Combine( + ::testing::Values(ngraph::opset4::Interpolate::type_info), + ::testing::ValuesIn(channel_count), + ::testing::ValuesIn(batch_count)), + ConcatResizeConcatTest::getTestCaseName); + +} // namespace +} // namespace SubgraphTestsDefinitions From cc5dba95d45173dd608f5e3eb207cb35370fb8c8 Mon Sep 17 00:00:00 2001 From: Vladislav Volkov Date: Tue, 10 Aug 2021 11:03:57 +0300 Subject: [PATCH 14/24] [CPU] Memory leaks in gemm module (#6989) --- inference-engine/thirdparty/mkl-dnn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index 8840c3faf6c..fdc6f62b118 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 8840c3faf6c4e1131c5408e8d6795093d4f4a815 +Subproject commit fdc6f62b1184dab86dbadd55fc4bc49dcde9dba5 From e3fa6544f2afb004ef97ae61eb115b14ddeea864 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 10 Aug 2021 11:15:08 +0300 Subject: [PATCH 15/24] Enable IE clang format (#6938) * Add clang-format config files for IE and include files * Fixed IE core clang-format * Added clang-format for plugin API * Fixed style for plugin API * Fixed code style * Fixed build * Added proposed values * Fixed code style * Updated config * Fixed # for define * Fixed comment and build * Removed clang-config for include, fixed include for library headers * Set column limit to 120 and space before CtorInitializedColon=true * Fixed headers * Added support of FOREACH_CHILD * Fixed parameter print style * Fixed code style * Fixed target name * Restore comments style for public API * Fixed plugin API * Applied code style * Fixed CI --- .../src/inference_engine/.clang-format | 28 ++ .../src/inference_engine/CMakeLists.txt | 4 +- .../include/ie/cldnn/cldnn_config.hpp | 15 +- .../include/ie/cpp/ie_cnn_network.h | 7 +- .../include/ie/cpp/ie_executable_network.hpp | 16 +- .../include/ie/cpp/ie_infer_request.hpp | 30 +- .../include/ie/cpp/ie_memory_state.hpp | 16 +- .../include/ie/details/ie_blob_iterator.hpp | 4 +- .../include/ie/details/ie_pre_allocator.hpp | 4 +- .../include/ie/details/ie_so_loader.h | 2 +- .../include/ie/details/ie_so_pointer.hpp | 48 +- .../include/ie/gna/gna_config.hpp | 99 ++-- .../ie/gpu/details/gpu_context_helpers.hpp | 37 +- .../include/ie/gpu/gpu_config.hpp | 18 +- .../include/ie/gpu/gpu_context_api_dx.hpp | 72 +-- .../include/ie/gpu/gpu_context_api_ocl.hpp | 70 +-- .../include/ie/gpu/gpu_context_api_va.hpp | 53 ++- .../include/ie/gpu/gpu_ocl_wrapper.hpp | 24 +- .../include/ie/gpu/gpu_params.hpp | 3 +- .../ie/hetero/hetero_plugin_config.hpp | 2 +- .../include/ie/ie_allocator.hpp | 7 +- .../src/inference_engine/include/ie/ie_api.h | 142 +++--- .../src/inference_engine/include/ie/ie_blob.h | 68 +-- .../inference_engine/include/ie/ie_common.h | 167 +++---- .../include/ie/ie_compound_blob.h | 8 +- .../inference_engine/include/ie/ie_core.hpp | 37 +- .../src/inference_engine/include/ie/ie_data.h | 3 +- .../include/ie/ie_extension.h | 19 +- .../include/ie/ie_icnn_network.hpp | 20 +- .../include/ie/ie_iexecutable_network.hpp | 2 +- .../include/ie/ie_iextension.h | 26 +- .../include/ie/ie_iinfer_request.hpp | 13 +- .../inference_engine/include/ie/ie_layouts.h | 31 +- .../include/ie/ie_locked_memory.hpp | 21 +- .../include/ie/ie_parallel.hpp | 274 ++++++----- .../include/ie/ie_parameter.hpp | 31 +- .../include/ie/ie_plugin_config.hpp | 57 ++- .../include/ie/ie_precision.hpp | 36 +- .../include/ie/ie_preprocess.hpp | 7 +- .../include/ie/ie_remote_context.hpp | 25 +- .../include/ie/ie_transformations.hpp | 10 +- .../include/ie/ie_version.hpp | 6 +- .../include/ie/inference_engine.hpp | 2 +- .../ie/multi-device/multi_device_config.hpp | 2 +- .../include/ie/vpu/hddl_config.hpp | 59 +-- .../include/ie/vpu/hddl_plugin_config.hpp | 90 ++-- .../include/ie/vpu/myriad_plugin_config.hpp | 2 +- .../include/ie/vpu/vpu_config.hpp | 6 +- .../include/ie/vpu/vpu_plugin_config.hpp | 16 +- .../include/openvino/runtime/core.hpp | 40 +- .../inference_engine/src/blob_transform.cpp | 122 +++-- .../src/cnn_network_ngraph_impl.cpp | 188 ++++---- .../src/cnn_network_ngraph_impl.hpp | 25 +- .../src/compilation_context.cpp | 56 +-- .../src/compilation_context.hpp | 11 +- .../src/cpp/ie_cnn_network.cpp | 45 +- .../src/cpp/ie_executable_network.cpp | 39 +- .../src/cpp/ie_executable_network_base.hpp | 11 +- .../src/cpp/ie_infer_request.cpp | 137 +++--- .../src/cpp/ie_variable_state.cpp | 24 +- .../ie_iexecutable_network_internal.cpp | 19 +- .../interface/ie_iinfer_request_internal.cpp | 155 ++++--- .../interface/ie_iplugin_internal.cpp | 79 ++-- .../interface/ie_ivariable_state_internal.cpp | 2 +- .../cpu_x86_sse42/blob_transform_sse42.cpp | 112 ++++- .../cpu_x86_sse42/blob_transform_sse42.hpp | 112 ++++- .../inference_engine/src/ie_blob_common.cpp | 4 +- .../inference_engine/src/ie_cache_guard.cpp | 22 +- .../inference_engine/src/ie_cache_guard.hpp | 24 +- .../inference_engine/src/ie_cache_manager.hpp | 7 +- .../src/inference_engine/src/ie_common.cpp | 75 +-- .../inference_engine/src/ie_compound_blob.cpp | 64 ++- .../src/inference_engine/src/ie_core.cpp | 428 ++++++++++-------- .../src/inference_engine/src/ie_data.cpp | 17 +- .../src/inference_engine/src/ie_itt.hpp | 6 +- .../src/inference_engine/src/ie_layouts.cpp | 121 ++--- .../src/inference_engine/src/ie_memcpy.cpp | 3 +- .../src/ie_network_reader.cpp | 83 ++-- .../src/ie_network_reader.hpp | 11 +- .../inference_engine/src/ie_ngraph_utils.cpp | 3 +- .../inference_engine/src/ie_system_conf.cpp | 87 ++-- .../src/ie_transformations.cpp | 10 +- .../src/os/lin/lin_shared_object_loader.cpp | 10 +- .../src/os/lin/lin_system_conf.cpp | 28 +- .../src/inference_engine/src/precomp.hpp | 15 +- .../src/threading/ie_cpu_streams_executor.cpp | 203 +++++---- .../src/threading/ie_executor_manager.cpp | 17 +- .../src/threading/ie_istreams_executor.cpp | 242 +++++----- .../src/threading/ie_itask_executor.cpp | 8 +- .../threading/ie_parallel_custom_arena.cpp | 198 ++++---- .../src/threading/ie_thread_affinity.cpp | 28 +- .../src/threading/ie_thread_affinity.hpp | 8 +- .../inference_engine/src/xml_parse_utils.cpp | 74 +-- inference-engine/src/plugin_api/.clang-format | 28 ++ .../src/plugin_api/blob_factory.hpp | 16 +- inference-engine/src/plugin_api/caseless.hpp | 9 +- ...executable_network_thread_safe_default.hpp | 19 +- ...nfer_async_request_thread_safe_default.hpp | 220 ++++----- .../ie_iexecutable_network_internal.hpp | 15 +- .../interface/ie_iinfer_request_internal.hpp | 33 +- .../interface/ie_internal_plugin_config.hpp | 2 +- .../interface/ie_iplugin_internal.hpp | 52 ++- .../interface/ie_ivariable_state_internal.hpp | 6 +- .../plugin_api/cpp_interfaces/plugin_itt.hpp | 10 +- inference-engine/src/plugin_api/debug.h | 38 +- .../src/plugin_api/description_buffer.hpp | 15 +- .../src/plugin_api/exec_graph_info.hpp | 11 +- .../src/plugin_api/ie_algorithm.hpp | 14 +- inference-engine/src/plugin_api/ie_icore.hpp | 9 +- inference-engine/src/plugin_api/ie_memcpy.h | 8 +- .../src/plugin_api/ie_ngraph_utils.hpp | 12 +- .../src/plugin_api/ie_system_conf.h | 20 +- .../src/plugin_api/precision_utils.h | 90 ++-- .../threading/ie_cpu_streams_executor.hpp | 6 +- .../threading/ie_executor_manager.hpp | 10 +- .../threading/ie_immediate_executor.hpp | 8 +- .../threading/ie_istreams_executor.hpp | 130 +++--- .../plugin_api/threading/ie_thread_local.hpp | 91 ++-- .../src/plugin_api/xml_parse_utils.h | 29 +- 119 files changed, 3128 insertions(+), 2485 deletions(-) create mode 100644 inference-engine/src/inference_engine/.clang-format mode change 100755 => 100644 inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp create mode 100644 inference-engine/src/plugin_api/.clang-format diff --git a/inference-engine/src/inference_engine/.clang-format b/inference-engine/src/inference_engine/.clang-format new file mode 100644 index 00000000000..ebe747b7838 --- /dev/null +++ b/inference-engine/src/inference_engine/.clang-format @@ -0,0 +1,28 @@ +BasedOnStyle: Google +IndentWidth: 4 +UseTab: Never +ColumnLimit: 120 + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -4 +AlignConsecutiveMacros: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Empty +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: false +BinPackParameters: false +CommentPragmas: '^#' +DerivePointerAlignment: false +FixNamespaceComments: true +IndentCaseLabels: false +IndentPPDirectives: AfterHash +ForEachMacros: + - foreach + - FOREACH_CHILD diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index 8325ecd5d17..609b8a781a3 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -106,7 +106,7 @@ set_ie_threading_interface_for(${TARGET_NAME}_plugin_api) file(GLOB_RECURSE plugin_api_src "${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.hpp" "${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.h") -add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_src}) +add_clang_format_target(${TARGET_NAME}_plugin_api_clang FOR_SOURCES ${plugin_api_src}) # Create object library @@ -142,7 +142,7 @@ if (TBBBIND_2_4_FOUND) target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TBBBIND_2_4_IMPORTED_TARGETS}) endif() -add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj) +add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}_obj) # Create shared library file from object library diff --git a/inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp b/inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp index 3e5dc4cfb12..2d454545baf 100644 --- a/inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp @@ -10,9 +10,9 @@ */ #pragma once -#include "ie_plugin_config.hpp" -#include "ie_api.h" #include "gpu/gpu_config.hpp" +#include "ie_api.h" +#include "ie_plugin_config.hpp" namespace InferenceEngine { @@ -24,8 +24,8 @@ namespace CLDNNConfigParams { /** * @brief shortcut for defining configuration keys */ -#define CLDNN_CONFIG_KEY(name) InferenceEngine::CLDNNConfigParams::_CONFIG_KEY(CLDNN_##name) -#define DECLARE_CLDNN_CONFIG_KEY(name) DECLARE_CONFIG_KEY(CLDNN_##name) +#define CLDNN_CONFIG_KEY(name) InferenceEngine::CLDNNConfigParams::_CONFIG_KEY(CLDNN_##name) +#define DECLARE_CLDNN_CONFIG_KEY(name) DECLARE_CONFIG_KEY(CLDNN_##name) #define DECLARE_CLDNN_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(CLDNN_##name) /** @@ -67,9 +67,10 @@ DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR); /** * @brief This key enables FP16 precision for quantized models. - * By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then non-quantized layers - * will be converted back to FP16 after LPT, which might imrpove the performance if a model has a lot of compute operations in - * non-quantized path. This key has no effect if current device doesn't have INT8 optimization capabilities. + * By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then + * non-quantized layers will be converted back to FP16 after LPT, which might imrpove the performance if a model has a + * lot of compute operations in non-quantized path. This key has no effect if current device doesn't have INT8 + * optimization capabilities. */ DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS); diff --git a/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h index ef73b67e504..42288ba84ac 100644 --- a/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h +++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h @@ -15,12 +15,12 @@ #include #include -#include "ie_icnn_network.hpp" #include "ie_blob.h" #include "ie_common.h" #include "ie_data.h" #include "ie_extension.h" -#include +#include "ie_icnn_network.hpp" +#include "ngraph/function.hpp" namespace InferenceEngine { @@ -52,8 +52,7 @@ public: * @param network Pointer to the ngraph::Function object * @param exts Vector of pointers to IE extension objects */ - explicit CNNNetwork(const std::shared_ptr& network, - const std::vector& exts = {}); + explicit CNNNetwork(const std::shared_ptr& network, const std::vector& exts = {}); /** * @brief Gets the network output Data node information. The received info is stored in the given Data node. diff --git a/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp index e5a9f1a36cb..6c8dab11542 100644 --- a/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp +++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp @@ -10,18 +10,18 @@ #pragma once -#include #include #include +#include #include #include -#include "ie_parameter.hpp" -#include "ie_remote_context.hpp" #include "cpp/ie_cnn_network.h" #include "cpp/ie_infer_request.hpp" #include "details/ie_so_loader.h" #include "ie_iexecutable_network.hpp" +#include "ie_parameter.hpp" +#include "ie_remote_context.hpp" namespace ov { namespace runtime { @@ -36,16 +36,16 @@ class IExecutableNetworkInternal; * @brief This is an interface of an executable network */ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) { - details::SharedObjectLoader _so; - std::shared_ptr _impl; + details::SharedObjectLoader _so; + std::shared_ptr _impl; /** * @brief Constructs ExecutableNetwork from the initialized std::shared_ptr - * @param so Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin object is destroyed. + * @param so Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin + * object is destroyed. * @param impl Initialized shared pointer */ - ExecutableNetwork(const details::SharedObjectLoader& so, - const std::shared_ptr& impl); + ExecutableNetwork(const details::SharedObjectLoader& so, const std::shared_ptr& impl); friend class Core; friend class ov::runtime::Core; diff --git a/inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp index 94393cea062..7e57123f78a 100644 --- a/inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp +++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp @@ -13,10 +13,10 @@ #include #include -#include "ie_blob.h" #include "cpp/ie_memory_state.hpp" -#include "ie_iinfer_request.hpp" #include "details/ie_so_loader.h" +#include "ie_blob.h" +#include "ie_iinfer_request.hpp" namespace InferenceEngine { @@ -33,16 +33,16 @@ class ICompletionCallbackWrapper; * It can throw exceptions safely for the application, where it is properly handled. */ class INFERENCE_ENGINE_API_CLASS(InferRequest) { - details::SharedObjectLoader _so; - std::shared_ptr _impl; + details::SharedObjectLoader _so; + std::shared_ptr _impl; /** * @brief Constructs InferRequest from the initialized std::shared_ptr - * @param so Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is destroyed. + * @param so Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is + * destroyed. * @param impl Initialized shared pointer */ - InferRequest(const details::SharedObjectLoader& so, - const std::shared_ptr& impl); + InferRequest(const details::SharedObjectLoader& so, const std::shared_ptr& impl); friend class ExecutableNetwork; public: @@ -93,7 +93,7 @@ public: * @param data A reference to input. The type of Blob must correspond to the network input precision and size. * @param info Preprocess info for blob. */ - void SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info); + void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info); /** * @brief Gets pre-process for input data @@ -176,9 +176,11 @@ private: void SetCompletionCallbackImpl(IInferRequest::CompletionCallback); IE_SUPPRESS_DEPRECATED_END - template + template struct SetCallback { - void operator()(std::function f) {_this.SetCompletionCallbackImpl(std::move(f));} + void operator()(std::function f) { + _this.SetCompletionCallbackImpl(std::move(f)); + } InferRequest& _this; }; @@ -188,7 +190,7 @@ public: * * @param callbackToSet callback object which will be called on when inference finish. */ - template + template void SetCompletionCallback(F callbackToSet) { SetCallback{*this}(std::move(callbackToSet)); } @@ -207,7 +209,7 @@ public: * @return A shared pointer to IInferRequest interface */ INFERENCE_ENGINE_DEPRECATED("Will be removed") - operator std::shared_ptr (); + operator std::shared_ptr(); IE_SUPPRESS_DEPRECATED_END /** @@ -238,7 +240,7 @@ public: /** * @private */ -template<> +template <> struct InferRequest::SetCallback> { void operator()(std::function f) { _this.SetCompletionCallbackImpl(std::move(f)); @@ -251,7 +253,7 @@ IE_SUPPRESS_DEPRECATED_START /** * @private */ -template<> +template <> struct InferRequest::SetCallback { void operator()(IInferRequest::CompletionCallback f) { _this.SetCompletionCallbackImpl(std::move(f)); diff --git a/inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp index 8d54f79f06c..ab1807dac4b 100644 --- a/inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp +++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp @@ -10,12 +10,12 @@ #pragma once -#include #include +#include +#include "details/ie_so_loader.h" #include "ie_api.h" #include "ie_blob.h" -#include "details/ie_so_loader.h" namespace InferenceEngine { @@ -25,16 +25,16 @@ class IVariableStateInternal; * @brief VariableState class */ class INFERENCE_ENGINE_API_CLASS(VariableState) { - details::SharedObjectLoader _so; - std::shared_ptr _impl; + details::SharedObjectLoader _so; + std::shared_ptr _impl; /** * @brief Constructs VariableState from the initialized std::shared_ptr * @param impl Initialized shared pointer - * @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed. + * @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin + * object is destroyed. */ - VariableState(const details::SharedObjectLoader& so, - const std::shared_ptr& impl); + VariableState(const details::SharedObjectLoader& so, const std::shared_ptr& impl); friend class InferRequest; friend class ExecutableNetwork; @@ -52,7 +52,7 @@ public: /** * @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null - * terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used. + * terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used. * @return A string representing a state name */ std::string GetName() const; diff --git a/inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp index 8025fbca8a0..084066a6f6b 100644 --- a/inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp +++ b/inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp @@ -4,7 +4,7 @@ /** * @brief A header file for the BlobIterator class - * + * * @file ie_blob_iterator.hpp */ @@ -31,7 +31,7 @@ public: * @param lk Rvalue of the memory instance to move from * @param offset Size of offset in memory */ - explicit BlobIterator(LockedMemory&& lk, size_t offset = 0): _mem(std::move(lk)), _offset(offset) {} + explicit BlobIterator(LockedMemory&& lk, size_t offset = 0) : _mem(std::move(lk)), _offset(offset) {} /** * @brief Increments an offset of the current BlobIterator instance diff --git a/inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp index 0e598cbb12f..949a31b646d 100644 --- a/inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp +++ b/inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp @@ -4,7 +4,7 @@ /** * @brief The header file defines utility PreAllocator class - * + * * @file ie_pre_allocator.hpp */ #pragma once @@ -23,7 +23,7 @@ class PreAllocator final : public IAllocator { size_t _sizeInBytes; public: - PreAllocator(void* ptr, size_t bytes_size): _actualData(ptr), _sizeInBytes(bytes_size) {} + PreAllocator(void* ptr, size_t bytes_size) : _actualData(ptr), _sizeInBytes(bytes_size) {} /** * @brief Locks a handle to heap memory accessible by any memory manipulation routines * @return The generic pointer to a memory buffer diff --git a/inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h b/inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h index dc03b31655a..6497284d98a 100644 --- a/inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h +++ b/inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h @@ -41,7 +41,7 @@ public: * @brief Loads a library with the name specified. * @param pluginName Full or relative path to the plugin library */ - explicit SharedObjectLoader(const char * pluginName); + explicit SharedObjectLoader(const char* pluginName); /** * @brief A destructor diff --git a/inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp index 6c0e4bdd333..8bfd4367bd4 100644 --- a/inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp +++ b/inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp @@ -9,10 +9,10 @@ #pragma once #include +#include #include #include #include -#include #include "ie_common.h" #include "ie_so_loader.h" @@ -31,7 +31,8 @@ class SOCreatorTrait {}; * @tparam C A char type */ template -using enableIfSupportedChar = typename std::enable_if<(std::is_same::value || std::is_same::value)>::type; +using enableIfSupportedChar = + typename std::enable_if<(std::is_same::value || std::is_same::value)>::type; /** * @brief This class instantiate object using shared library @@ -44,8 +45,10 @@ class SOPointer { IE_SUPPRESS_DEPRECATED_START struct HasRelease { - template static char test(decltype(&C::Release)); - template static long test(...); + template + static char test(decltype(&C::Release)); + template + static long test(...); constexpr static const bool value = sizeof(test(nullptr)) == sizeof(char); }; IE_SUPPRESS_DEPRECATED_END @@ -60,10 +63,8 @@ public: * @brief The main constructor * @param name Name of a shared library file */ - template > - SOPointer(const std::basic_string & name) - : _so(name.c_str()) { + template > + SOPointer(const std::basic_string& name) : _so(name.c_str()) { Load(std::integral_constant{}); } @@ -78,8 +79,7 @@ public: * @brief Constructs an object with existing loader * @param so Existing pointer to a library loader */ - explicit SOPointer(const SharedObjectLoader& so) - : _so(so) { + explicit SOPointer(const SharedObjectLoader& so) : _so(so) { Load(std::integral_constant{}); } @@ -88,9 +88,8 @@ public: * @param that copied SOPointer object */ template - SOPointer(const SOPointer& that) - : _so(that._so), - _ptr(std::dynamic_pointer_cast(that._ptr)) { + SOPointer(const SOPointer& that) : _so(that._so), + _ptr(std::dynamic_pointer_cast(that._ptr)) { IE_ASSERT(_ptr != nullptr); } @@ -123,7 +122,7 @@ public: return _so; } - operator std::shared_ptr& () noexcept { + operator std::shared_ptr&() noexcept { return _ptr; } @@ -136,7 +135,8 @@ protected: void* create = nullptr; try { create = _so.get_symbol((SOCreatorTrait::name + std::string("Shared")).c_str()); - } catch (const NotFound&) {} + } catch (const NotFound&) { + } if (create == nullptr) { create = _so.get_symbol(SOCreatorTrait::name); using CreateF = StatusCode(T*&, ResponseDesc*); @@ -144,17 +144,23 @@ protected: ResponseDesc desc; StatusCode sts = reinterpret_cast(create)(object, &desc); if (sts != OK) { - IE_EXCEPTION_SWITCH(sts, ExceptionType, - InferenceEngine::details::ThrowNow{} <<= std::stringstream{} << IE_LOCATION << desc.msg) + IE_EXCEPTION_SWITCH(sts, + ExceptionType, + InferenceEngine::details::ThrowNow{} <<= + std::stringstream{} << IE_LOCATION << desc.msg) } IE_SUPPRESS_DEPRECATED_START - _ptr = std::shared_ptr(object, [] (T* ptr){ptr->Release();}); + _ptr = std::shared_ptr(object, [](T* ptr) { + ptr->Release(); + }); IE_SUPPRESS_DEPRECATED_END } else { using CreateF = void(std::shared_ptr&); reinterpret_cast(create)(_ptr); } - } catch(...) {details::Rethrow();} + } catch (...) { + details::Rethrow(); + } } /** @@ -164,7 +170,9 @@ protected: try { using CreateF = void(std::shared_ptr&); reinterpret_cast(_so.get_symbol(SOCreatorTrait::name))(_ptr); - } catch(...) {details::Rethrow();} + } catch (...) { + details::Rethrow(); + } } /** diff --git a/inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp b/inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp index 3433ab58887..a2bcb4016aa 100644 --- a/inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp @@ -31,36 +31,36 @@ namespace GNAConfigParams { */ #define GNA_CONFIG_VALUE(name) InferenceEngine::GNAConfigParams::GNA_##name -#define DECLARE_GNA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GNA_##name) +#define DECLARE_GNA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GNA_##name) #define DECLARE_GNA_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GNA_##name) /** -* @brief Scale factor that is calculated by user, in order to use static quantisation feature -* This option should be used with floating point value serialized to string with decimal separator equals to . (dot) -* @details For multiple input case, individual scale factors can be passed, using KEY_GNA_SCALE_FACTOR[_input_layer_name] -* where input_layer can be obtained from from CNNNetwork::GetInputsInfo -*/ + * @brief Scale factor that is calculated by user, in order to use static quantisation feature + * This option should be used with floating point value serialized to string with decimal separator equals to . (dot) + * @details For multiple input case, individual scale factors can be passed, using + * KEY_GNA_SCALE_FACTOR[_input_layer_name] where input_layer can be obtained from from CNNNetwork::GetInputsInfo + */ DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR); /** -* @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary, -* currently supported values are I16, I8 -*/ + * @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary, + * currently supported values are I16, I8 + */ DECLARE_GNA_CONFIG_KEY(PRECISION); /** -* @brief if turned on, dump GNA firmware model into specified file -*/ + * @brief if turned on, dump GNA firmware model into specified file + */ DECLARE_GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE); /** -* @brief information on GNA generation chosen for firmware model dump, can be overridden by GNA3 -*/ + * @brief information on GNA generation chosen for firmware model dump, can be overridden by GNA3 + */ DECLARE_GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION); /** -* @brief GNA proc_type setting that should be one of GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT -*/ + * @brief GNA proc_type setting that should be one of GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT + */ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE); DECLARE_GNA_CONFIG_VALUE(AUTO); @@ -79,62 +79,63 @@ DECLARE_GNA_CONFIG_VALUE(AVX2); DECLARE_GNA_CONFIG_VALUE(AVX2_EXACT); /** -* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. -* By default (in case of no value set) the behavior depends on GNA HW availability: -* If GNA HW is present, use the option corresponding to this HW. -* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation. -* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library. -* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0. -* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0. -* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0. -*/ + * @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. + * By default (in case of no value set) the behavior depends on GNA HW availability: + * If GNA HW is present, use the option corresponding to this HW. + * If HW is not present, use the option corresponding to the latest fully supported GNA HW generation. + * A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library. + * For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0. + * For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0. + * For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0. + */ DECLARE_GNA_CONFIG_KEY(EXEC_TARGET); DECLARE_GNA_CONFIG_VALUE(TARGET_2_0); DECLARE_GNA_CONFIG_VALUE(TARGET_3_0); /** -* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. -* By default the same as GNA_EXEC_TARGET. -*/ + * @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. + * By default the same as GNA_EXEC_TARGET. + */ DECLARE_GNA_CONFIG_KEY(COMPILE_TARGET); /** -* @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES -*/ + * @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES + */ DECLARE_GNA_CONFIG_KEY(COMPACT_MODE); /** -* @brief The option to enable/disable uniformly distributed PWL algorithm. -* By default (in case of NO value set) the optimized algorithm called "Recursive Descent Algorithm for Finding -* the Optimal Minimax Piecewise Linear Approximation of Convex Functions is used. -* If value is YES then simple uniform distribution used to create PWL approximation of activation functions -* Uniform distribution usually gives poor approximation with same number of segments -*/ + * @brief The option to enable/disable uniformly distributed PWL algorithm. + * By default (in case of NO value set) the optimized algorithm called "Recursive Descent Algorithm for Finding + * the Optimal Minimax Piecewise Linear Approximation of Convex Functions is used. + * If value is YES then simple uniform distribution used to create PWL approximation of activation functions + * Uniform distribution usually gives poor approximation with same number of segments + */ DECLARE_GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN); /** -* @brief The option to allow to specify the maximum error percent that the optimized algorithm finding -* will use to find PWL functions. -* By default (in case of NO value set), 1.0 value is used. -*/ + * @brief The option to allow to specify the maximum error percent that the optimized algorithm finding + * will use to find PWL functions. + * By default (in case of NO value set), 1.0 value is used. + */ DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT); /** -* @brief By default, the GNA plugin uses one worker thread for inference computations. -* This parameter allows you to create up to 127 threads for software modes. -* -* Note that multithreading mode does not guarantee the same computation order as order -* of issuing. Additionally, in this case, software modes do not implement any serializations. -*/ + * @brief By default, the GNA plugin uses one worker thread for inference computations. + * This parameter allows you to create up to 127 threads for software modes. + * + * Note that multithreading mode does not guarantee the same computation order as order + * of issuing. Additionally, in this case, software modes do not implement any serializations. + */ DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS); } // namespace GNAConfigParams namespace Metrics { - /** - * @brief Metric to get a std::string of GNA Library version, usually in the form ... - */ - DECLARE_METRIC_KEY(GNA_LIBRARY_FULL_VERSION, std::string); +/** + * @brief Metric to get a std::string of GNA Library version, usually in the form + * ... + */ +DECLARE_METRIC_KEY(GNA_LIBRARY_FULL_VERSION, std::string); } // namespace Metrics namespace PluginConfigParams { diff --git a/inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp b/inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp index 4a7cdca6497..3f2e7b77212 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp @@ -11,27 +11,29 @@ #include +#include "ie_parameter.hpp" + namespace InferenceEngine { namespace gpu { namespace details { /** -* @brief This wrapper class is used to obtain low-level handles -* from remote blob or context object parameters. -*/ + * @brief This wrapper class is used to obtain low-level handles + * from remote blob or context object parameters. + */ class param_map_obj_getter { protected: /** - * @brief Template function that returns specified - * object parameter typecasted to desired user type - */ + * @brief Template function that returns specified + * object parameter typecasted to desired user type + */ template Result _ObjFromParams(const ParamMap& params, - const std::string& handle_Key, - const std::string& type_Key, - const std::string& obj_T1, - const std::string& obj_T2 = "__") const { + const std::string& handle_Key, + const std::string& type_Key, + const std::string& obj_T1, + const std::string& obj_T2 = "__") const { auto itrType = params.find(type_Key); if (itrType == params.end()) IE_THROW() << "Parameter of type " << type_Key << " not found"; @@ -50,9 +52,9 @@ protected: } /** - * @brief Same as _ObjFromParams(), but should be used if check - * for object type is not required - */ + * @brief Same as _ObjFromParams(), but should be used if check + * for object type is not required + */ template Result _ObjFromParamSimple(const ParamMap& params, const std::string& handle_Key) const { auto itrHandle = params.find(handle_Key); @@ -65,11 +67,10 @@ protected: } /** - * @brief Template function that extracts string value - * from map entry under specified key - */ - std::string _StrFromParams(const ParamMap& params, - std::string Key) const { + * @brief Template function that extracts string value + * from map entry under specified key + */ + std::string _StrFromParams(const ParamMap& params, std::string Key) const { auto itrType = params.find(Key); if (itrType == params.end()) IE_THROW() << "Parameter key " << Key << " not found"; diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp index 96f8754ac86..003af2bb689 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp @@ -20,7 +20,7 @@ namespace Metrics { * @def GPU_METRIC_KEY(name) * @brief shortcut for defining GPU plugin metrics */ -#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name) +#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name) #define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__) /** @@ -30,7 +30,8 @@ namespace Metrics { #define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name) /** - * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size + * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, + * for dGPU - dedicated gpu memory size */ DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t); @@ -60,8 +61,8 @@ namespace GPUConfigParams { /** * @brief shortcut for defining configuration keys */ -#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name) -#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name) +#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name) +#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name) #define DECLARE_GPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GPU_##name) /** @@ -93,10 +94,11 @@ DECLARE_GPU_CONFIG_KEY(NV12_TWO_INPUTS); DECLARE_GPU_CONFIG_KEY(MAX_NUM_THREADS); /** - * @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count. - * This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). - * Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). - * Note that turning this key on will increase the graph loading time in proportion to the iteration counts. + * @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration + * count. This key is turned on by default. Turning this key on will achieve better inference performance for loops with + * not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better + * performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that + * turning this key on will increase the graph loading time in proportion to the iteration counts. * Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/ DECLARE_GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING); diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp index 1a529e56c78..dc7eece7bce 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp @@ -11,13 +11,13 @@ */ #pragma once +#include + #include #include #include "gpu/gpu_context_api_ocl.hpp" -#include - namespace InferenceEngine { namespace gpu { @@ -37,12 +37,13 @@ public: /** * @brief ID3D11Device conversion operator for the D3DContext object. - * @return Pointer to underlying ID3D11Device interface + * @return Pointer to underlying ID3D11Device interface */ operator ID3D11Device*() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(VA_DEVICE), - GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)); + GPU_PARAM_KEY(VA_DEVICE), + GPU_PARAM_KEY(CONTEXT_TYPE), + GPU_PARAM_VALUE(VA_SHARED)); } }; @@ -67,12 +68,13 @@ public: /** * @brief ID3D11Buffer conversion operator for the D3DContext object. - * @return Pointer to underlying ID3D11Buffer interface + * @return Pointer to underlying ID3D11Buffer interface */ operator ID3D11Buffer*() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(DEV_OBJECT_HANDLE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER)); + GPU_PARAM_KEY(DEV_OBJECT_HANDLE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(DX_BUFFER)); } }; @@ -97,12 +99,13 @@ public: /** * @brief ID3D11Texture2D conversion operator for the D3DContext object. - * @return Pointer to underlying ID3D11Texture2D interface + * @return Pointer to underlying ID3D11Texture2D interface */ operator ID3D11Texture2D*() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(DEV_OBJECT_HANDLE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)); + GPU_PARAM_KEY(DEV_OBJECT_HANDLE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(VA_SURFACE)); } /** @@ -111,8 +114,9 @@ public: */ uint32_t plane() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(VA_PLANE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)); + GPU_PARAM_KEY(VA_PLANE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(VA_SURFACE)); } }; @@ -125,18 +129,19 @@ public: * @param nv12_surf A ID3D11Texture2D instance to create NV12 blob from * @return NV12 remote blob */ -static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, ID3D11Texture2D* nv12_surf) { +static inline Blob::Ptr make_shared_blob_nv12(size_t height, + size_t width, + RemoteContext::Ptr ctx, + ID3D11Texture2D* nv12_surf) { // despite of layout, blob dimensions always follow in N,C,H,W order - TensorDesc desc(Precision::U8, { 1, 1, height, width }, Layout::NHWC); + TensorDesc desc(Precision::U8, {1, 1, height, width}, Layout::NHWC); - ParamMap blobParams = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast(nv12_surf) }, - { GPU_PARAM_KEY(VA_PLANE), uint32_t(0) } - }; + ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)}, + {GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast(nv12_surf)}, + {GPU_PARAM_KEY(VA_PLANE), uint32_t(0)}}; Blob::Ptr y_blob = std::dynamic_pointer_cast(ctx->CreateBlob(desc, blobParams)); - TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC); + TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC); blobParams[GPU_PARAM_KEY(MEM_HANDLE)] = static_cast(nv12_surf); blobParams[GPU_PARAM_KEY(VA_PLANE)] = uint32_t(1); Blob::Ptr uv_blob = std::dynamic_pointer_cast(ctx->CreateBlob(uvdesc, blobParams)); @@ -152,10 +157,12 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot * @return A shared remote context instance */ static inline D3DContext::Ptr make_shared_context(Core& core, std::string deviceName, ID3D11Device* device) { + // clang-format off ParamMap contextParams = { - { GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) }, - { GPU_PARAM_KEY(VA_DEVICE), static_cast(device) } + {GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)}, + {GPU_PARAM_KEY(VA_DEVICE), static_cast(device)} }; + // clang-format on return std::dynamic_pointer_cast(core.CreateContext(deviceName, contextParams)); } @@ -172,10 +179,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext:: IE_THROW() << "Invalid remote context passed"; } - ParamMap params = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast(buffer) } - }; + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER)}, + {GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast(buffer)}}; return std::dynamic_pointer_cast(casted->CreateBlob(desc, params)); } @@ -188,16 +193,17 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext:: * @return Smart pointer to created RemoteBlob object cast to base class * @note The underlying ID3D11Texture2D can also be a plane of output surface of DXGI video decoder */ -static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, ID3D11Texture2D* surface, uint32_t plane = 0) { +static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, + RemoteContext::Ptr ctx, + ID3D11Texture2D* surface, + uint32_t plane = 0) { auto casted = std::dynamic_pointer_cast(ctx); if (nullptr == casted) { IE_THROW() << "Invalid remote context passed"; } - ParamMap params = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast(surface) }, - { GPU_PARAM_KEY(VA_PLANE), plane } - }; + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)}, + {GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast(surface)}, + {GPU_PARAM_KEY(VA_PLANE), plane}}; return std::dynamic_pointer_cast(casted->CreateBlob(desc, params)); } diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp index 357b58d163b..8ab1393291d 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp @@ -13,13 +13,12 @@ #include #include -#include "ie_compound_blob.h" -#include "ie_remote_context.hpp" -#include "ie_core.hpp" - -#include "gpu/gpu_params.hpp" -#include "gpu/gpu_ocl_wrapper.hpp" #include "gpu/details/gpu_context_helpers.hpp" +#include "gpu/gpu_ocl_wrapper.hpp" +#include "gpu/gpu_params.hpp" +#include "ie_compound_blob.h" +#include "ie_core.hpp" +#include "ie_remote_context.hpp" namespace InferenceEngine { @@ -42,8 +41,11 @@ public: * @return `cl_context` */ cl_context get() { - return _ObjFromParams(getParams(), GPU_PARAM_KEY(OCL_CONTEXT), - GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL), GPU_PARAM_VALUE(VA_SHARED)); + return _ObjFromParams(getParams(), + GPU_PARAM_KEY(OCL_CONTEXT), + GPU_PARAM_KEY(CONTEXT_TYPE), + GPU_PARAM_VALUE(OCL), + GPU_PARAM_VALUE(VA_SHARED)); } /** @@ -105,8 +107,11 @@ public: * @return underlying OpenCL memory object handle */ cl_mem get() { - return _ObjFromParams(getParams(), GPU_PARAM_KEY(MEM_HANDLE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER), GPU_PARAM_VALUE(DX_BUFFER)); + return _ObjFromParams(getParams(), + GPU_PARAM_KEY(MEM_HANDLE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(OCL_BUFFER), + GPU_PARAM_VALUE(DX_BUFFER)); } /** @@ -150,8 +155,11 @@ public: * @return `cl_mem` */ cl_mem get() { - return _ObjFromParams(getParams(), GPU_PARAM_KEY(MEM_HANDLE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D), GPU_PARAM_VALUE(VA_SURFACE)); + return _ObjFromParams(getParams(), + GPU_PARAM_KEY(MEM_HANDLE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(OCL_IMAGE2D), + GPU_PARAM_VALUE(VA_SURFACE)); } /** @@ -179,7 +187,9 @@ public: * @param nv12_image_plane_uv cl::Image2D object containing UV plane data. * @return A shared remote blob instance */ -static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2D& nv12_image_plane_y, cl::Image2D& nv12_image_plane_uv) { +static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, + cl::Image2D& nv12_image_plane_y, + cl::Image2D& nv12_image_plane_uv) { auto casted = std::dynamic_pointer_cast(ctx); if (nullptr == casted) { IE_THROW() << "Invalid remote context passed"; @@ -189,15 +199,13 @@ static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2 size_t height = nv12_image_plane_y.getImageInfo(); // despite of layout, blob dimensions always follow in N,C,H,W order - TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC); + TensorDesc ydesc(Precision::U8, {1, 1, height, width}, Layout::NHWC); - ParamMap blobParams = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) }, - { GPU_PARAM_KEY(MEM_HANDLE), static_cast(nv12_image_plane_y.get()) } - }; + ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D)}, + {GPU_PARAM_KEY(MEM_HANDLE), static_cast(nv12_image_plane_y.get())}}; Blob::Ptr y_blob = std::dynamic_pointer_cast(casted->CreateBlob(ydesc, blobParams)); - TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC); + TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC); blobParams[GPU_PARAM_KEY(MEM_HANDLE)] = static_cast(nv12_image_plane_uv.get()); Blob::Ptr uv_blob = std::dynamic_pointer_cast(casted->CreateBlob(uvdesc, blobParams)); @@ -213,10 +221,8 @@ static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2 * @return A shared remote context instance */ static inline RemoteContext::Ptr make_shared_context(Core& core, std::string deviceName, cl_context ctx) { - ParamMap contextParams = { - { GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL) }, - { GPU_PARAM_KEY(OCL_CONTEXT), static_cast(ctx) } - }; + ParamMap contextParams = {{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL)}, + {GPU_PARAM_KEY(OCL_CONTEXT), static_cast(ctx)}}; return core.CreateContext(deviceName, contextParams); } @@ -243,10 +249,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext:: IE_THROW() << "Invalid remote context passed"; } - ParamMap params = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) }, - { GPU_PARAM_KEY(MEM_HANDLE), static_cast(buffer.get()) } - }; + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER)}, + {GPU_PARAM_KEY(MEM_HANDLE), static_cast(buffer.get())}}; return std::dynamic_pointer_cast(casted->CreateBlob(desc, params)); } @@ -263,10 +267,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext:: IE_THROW() << "Invalid remote context passed"; } - ParamMap params = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) }, - { GPU_PARAM_KEY(MEM_HANDLE), static_cast(buffer) } - }; + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER)}, + {GPU_PARAM_KEY(MEM_HANDLE), static_cast(buffer)}}; return std::dynamic_pointer_cast(casted->CreateBlob(desc, params)); } @@ -283,10 +285,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext:: IE_THROW() << "Invalid remote context passed"; } - ParamMap params = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) }, - { GPU_PARAM_KEY(MEM_HANDLE), static_cast(image.get()) } - }; + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D)}, + {GPU_PARAM_KEY(MEM_HANDLE), static_cast(image.get())}}; return std::dynamic_pointer_cast(casted->CreateBlob(desc, params)); } diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp index 0784c729db1..d4f3d0d9a5b 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp @@ -16,7 +16,9 @@ #include "gpu/gpu_context_api_ocl.hpp" +// clang-format off #include +// clang-format on namespace InferenceEngine { @@ -41,8 +43,9 @@ public: */ operator VADisplay() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(VA_DEVICE), - GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)); + GPU_PARAM_KEY(VA_DEVICE), + GPU_PARAM_KEY(CONTEXT_TYPE), + GPU_PARAM_VALUE(VA_SHARED)); } }; @@ -71,8 +74,9 @@ public: */ operator VASurfaceID() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(DEV_OBJECT_HANDLE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)); + GPU_PARAM_KEY(DEV_OBJECT_HANDLE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(VA_SURFACE)); } /** @@ -81,8 +85,9 @@ public: */ uint32_t plane() { return _ObjFromParams(getParams(), - GPU_PARAM_KEY(VA_PLANE), - GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)); + GPU_PARAM_KEY(VA_PLANE), + GPU_PARAM_KEY(SHARED_MEM_TYPE), + GPU_PARAM_VALUE(VA_SURFACE)); } }; @@ -95,17 +100,18 @@ public: * @param nv12_surf NV12 `VASurfaceID` to create NV12 from * @return A remote NV12 blob wrapping `VASurfaceID` */ -static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, VASurfaceID nv12_surf) { +static inline Blob::Ptr make_shared_blob_nv12(size_t height, + size_t width, + RemoteContext::Ptr ctx, + VASurfaceID nv12_surf) { // despite of layout, blob dimensions always follow in N, C, H, W order - TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC); - ParamMap blobParams = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), nv12_surf }, - { GPU_PARAM_KEY(VA_PLANE), uint32_t(0) } - }; + TensorDesc ydesc(Precision::U8, {1, 1, height, width}, Layout::NHWC); + ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)}, + {GPU_PARAM_KEY(DEV_OBJECT_HANDLE), nv12_surf}, + {GPU_PARAM_KEY(VA_PLANE), uint32_t(0)}}; Blob::Ptr y_blob = std::dynamic_pointer_cast(ctx->CreateBlob(ydesc, blobParams)); - TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC); + TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC); blobParams[GPU_PARAM_KEY(VA_PLANE)] = uint32_t(1); Blob::Ptr uv_blob = std::dynamic_pointer_cast(ctx->CreateBlob(uvdesc, blobParams)); @@ -120,10 +126,8 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot * @return A remote context wrapping `VADisplay` */ static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceName, VADisplay device) { - ParamMap contextParams = { - { GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) }, - { GPU_PARAM_KEY(VA_DEVICE), static_cast(device) } - }; + ParamMap contextParams = {{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)}, + {GPU_PARAM_KEY(VA_DEVICE), static_cast(device)}}; return std::dynamic_pointer_cast(core.CreateContext(deviceName, contextParams)); } @@ -135,16 +139,17 @@ static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceN * @param plane An index of a plane inside `VASurfaceID` to create blob from * @return A remote blob wrapping `VASurfaceID` */ -static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, VASurfaceID surface, uint32_t plane = 0) { +static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc, + RemoteContext::Ptr ctx, + VASurfaceID surface, + uint32_t plane = 0) { auto casted = std::dynamic_pointer_cast(ctx); if (nullptr == casted) { IE_THROW() << "Invalid remote context passed"; } - ParamMap params = { - { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) }, - { GPU_PARAM_KEY(DEV_OBJECT_HANDLE), surface }, - { GPU_PARAM_KEY(VA_PLANE), plane } - }; + ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)}, + {GPU_PARAM_KEY(DEV_OBJECT_HANDLE), surface}, + {GPU_PARAM_KEY(VA_PLANE), plane}}; return std::dynamic_pointer_cast(casted->CreateBlob(desc, params)); } diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp index 496f0974ad5..6cbcda9aa80 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp @@ -15,32 +15,32 @@ */ #ifndef CL_HPP_ENABLE_EXCEPTIONS -# define CL_HPP_ENABLE_EXCEPTIONS +# define CL_HPP_ENABLE_EXCEPTIONS #endif #ifdef CL_HPP_MINIMUM_OPENCL_VERSION -# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 -# error "CL_HPP_MINIMUM_OPENCL_VERSION must be >= 120" -# endif +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 +# error "CL_HPP_MINIMUM_OPENCL_VERSION must be >= 120" +# endif #else -# define CL_HPP_MINIMUM_OPENCL_VERSION 120 +# define CL_HPP_MINIMUM_OPENCL_VERSION 120 #endif #ifdef CL_HPP_TARGET_OPENCL_VERSION -# if CL_HPP_TARGET_OPENCL_VERSION < 120 -# error "CL_HPP_TARGET_OPENCL_VERSION must be >= 120" -# endif +# if CL_HPP_TARGET_OPENCL_VERSION < 120 +# error "CL_HPP_TARGET_OPENCL_VERSION must be >= 120" +# endif #else -# define CL_HPP_TARGET_OPENCL_VERSION 120 +# define CL_HPP_TARGET_OPENCL_VERSION 120 #endif #ifdef __GNUC__ -# pragma GCC diagnostic push -# pragma GCC system_header +# pragma GCC diagnostic push +# pragma GCC system_header #endif #include #ifdef __GNUC__ -# pragma GCC diagnostic pop +# pragma GCC diagnostic pop #endif diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp index eac4cd3f409..466e514c8d4 100644 --- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp +++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp @@ -41,8 +41,7 @@ namespace GPUContextParams { * @def DECLARE_GPU_PARAM_KEY(name, ...) * @brief Shortcut for defining object parameter keys */ -#define DECLARE_GPU_PARAM_KEY(name, ...) \ - static constexpr auto PARAM_##name = #name +#define DECLARE_GPU_PARAM_KEY(name, ...) static constexpr auto PARAM_##name = #name /** * @brief Shared device context type: can be either pure OpenCL (OCL) * or shared video decoder (VA_SHARED) context diff --git a/inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp index 6aa99fc51c3..788f86a0e1e 100644 --- a/inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp @@ -24,7 +24,7 @@ namespace HeteroConfigParams { * @def HETERO_CONFIG_KEY(name) * @brief Shortcut for defining HETERO configuration keys */ -#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name) +#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name) #define DECLARE_HETERO_CONFIG_KEY(name) DECLARE_CONFIG_KEY(HETERO_##name) /** diff --git a/inference-engine/src/inference_engine/include/ie/ie_allocator.hpp b/inference-engine/src/inference_engine/include/ie/ie_allocator.hpp index c51c90776f9..f82904a4330 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_allocator.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_allocator.hpp @@ -9,9 +9,10 @@ */ #pragma once -#include "ie_api.h" #include +#include "ie_api.h" + namespace InferenceEngine { /** @@ -19,7 +20,7 @@ namespace InferenceEngine { */ enum LockOp { LOCK_FOR_READ = 0, //!< A flag to lock data for read - LOCK_FOR_WRITE //!< A flag to lock data for write + LOCK_FOR_WRITE //!< A flag to lock data for write }; /** @@ -60,7 +61,7 @@ public: virtual bool free(void* handle) noexcept = 0; protected: - ~IAllocator() = default; + ~IAllocator() = default; }; /** diff --git a/inference-engine/src/inference_engine/include/ie/ie_api.h b/inference-engine/src/inference_engine/include/ie/ie_api.h index 327cf522f52..538795b32be 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_api.h +++ b/inference-engine/src/inference_engine/include/ie/ie_api.h @@ -10,101 +10,101 @@ #pragma once #if defined(USE_STATIC_IE) || (defined(__GNUC__) && (__GNUC__ < 4)) -# define INFERENCE_ENGINE_API(...) extern "C" __VA_ARGS__ -# define INFERENCE_ENGINE_API_CPP(...) __VA_ARGS__ -# define INFERENCE_ENGINE_API_CLASS(...) __VA_ARGS__ -# define INFERENCE_ENGINE_CDECL __attribute__((cdecl)) +# define INFERENCE_ENGINE_API(...) extern "C" __VA_ARGS__ +# define INFERENCE_ENGINE_API_CPP(...) __VA_ARGS__ +# define INFERENCE_ENGINE_API_CLASS(...) __VA_ARGS__ +# define INFERENCE_ENGINE_CDECL __attribute__((cdecl)) #else -# if defined(_WIN32) -# define INFERENCE_ENGINE_CDECL -# ifdef IMPLEMENT_INFERENCE_ENGINE_API -# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllexport) __VA_ARGS__ __cdecl -# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllexport) __VA_ARGS__ __cdecl -# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllexport) __VA_ARGS__ -# else -# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllimport) __VA_ARGS__ __cdecl -# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllimport) __VA_ARGS__ __cdecl -# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllimport) __VA_ARGS__ -# endif -# else -# define INFERENCE_ENGINE_CDECL __attribute__((cdecl)) -# define INFERENCE_ENGINE_API(...) extern "C" __attribute__((visibility("default"))) __VA_ARGS__ -# define INFERENCE_ENGINE_API_CPP(...) __attribute__((visibility("default"))) __VA_ARGS__ -# define INFERENCE_ENGINE_API_CLASS(...) __attribute__((visibility("default"))) __VA_ARGS__ -# endif +# if defined(_WIN32) +# define INFERENCE_ENGINE_CDECL +# ifdef IMPLEMENT_INFERENCE_ENGINE_API +# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllexport) __VA_ARGS__ __cdecl +# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllexport) __VA_ARGS__ __cdecl +# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllexport) __VA_ARGS__ +# else +# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllimport) __VA_ARGS__ __cdecl +# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllimport) __VA_ARGS__ __cdecl +# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllimport) __VA_ARGS__ +# endif +# else +# define INFERENCE_ENGINE_CDECL __attribute__((cdecl)) +# define INFERENCE_ENGINE_API(...) extern "C" __attribute__((visibility("default"))) __VA_ARGS__ +# define INFERENCE_ENGINE_API_CPP(...) __attribute__((visibility("default"))) __VA_ARGS__ +# define INFERENCE_ENGINE_API_CLASS(...) __attribute__((visibility("default"))) __VA_ARGS__ +# endif #endif #if defined(_WIN32) -# define INFERENCE_ENGINE_DEPRECATED(msg) __declspec(deprecated(msg)) +# define INFERENCE_ENGINE_DEPRECATED(msg) __declspec(deprecated(msg)) #elif defined __INTEL_COMPILER -# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg))) +# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg))) #elif defined(__GNUC__) -# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated((msg)))) +# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated((msg)))) #else -# define INFERENCE_ENGINE_DEPRECATED(msg) +# define INFERENCE_ENGINE_DEPRECATED(msg) #endif #if defined IMPLEMENT_INFERENCE_ENGINE_API || defined IMPLEMENT_INFERENCE_ENGINE_PLUGIN -# define INFERENCE_ENGINE_INTERNAL(msg) +# define INFERENCE_ENGINE_INTERNAL(msg) #else -# define INFERENCE_ENGINE_INTERNAL(msg) INFERENCE_ENGINE_DEPRECATED(msg) +# define INFERENCE_ENGINE_INTERNAL(msg) INFERENCE_ENGINE_DEPRECATED(msg) #endif // Suppress warning "-Wdeprecated-declarations" / C4996 #if defined(_MSC_VER) -# define IE_DO_PRAGMA(x) __pragma(x) +# define IE_DO_PRAGMA(x) __pragma(x) #elif defined(__GNUC__) -# define IE_DO_PRAGMA(x) _Pragma(#x) +# define IE_DO_PRAGMA(x) _Pragma(# x) #else -# define IE_DO_PRAGMA(x) +# define IE_DO_PRAGMA(x) #endif #if defined(_MSC_VER) && !defined(__clang__) -# define IE_SUPPRESS_DEPRECATED_START \ - IE_DO_PRAGMA(warning(push)) \ - IE_DO_PRAGMA(warning(disable : 4996)) -# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop)) +# define IE_SUPPRESS_DEPRECATED_START \ + IE_DO_PRAGMA(warning(push)) \ + IE_DO_PRAGMA(warning(disable : 4996)) +# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop)) #elif defined(__INTEL_COMPILER) -# define IE_SUPPRESS_DEPRECATED_START \ - IE_DO_PRAGMA(warning(push)) \ - IE_DO_PRAGMA(warning(disable : 1478)) - IE_DO_PRAGMA(warning(disable : 1786)) -# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop)) +# define IE_SUPPRESS_DEPRECATED_START \ + IE_DO_PRAGMA(warning(push)) \ + IE_DO_PRAGMA(warning(disable : 1478)) +IE_DO_PRAGMA(warning(disable : 1786)) +# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop)) #elif defined(__clang__) || ((__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ > 405)) -# define IE_SUPPRESS_DEPRECATED_START \ - IE_DO_PRAGMA(GCC diagnostic push) \ - IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") -# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop) +# define IE_SUPPRESS_DEPRECATED_START \ + IE_DO_PRAGMA(GCC diagnostic push) \ + IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") +# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop) #else -# define IE_SUPPRESS_DEPRECATED_START -# define IE_SUPPRESS_DEPRECATED_END +# define IE_SUPPRESS_DEPRECATED_START +# define IE_SUPPRESS_DEPRECATED_END #endif #ifdef _WIN32 -# define _IE_SUPPRESS_DEPRECATED_START_MSVC IE_SUPPRESS_DEPRECATED_START -# define _IE_SUPPRESS_DEPRECATED_END_MSVC IE_SUPPRESS_DEPRECATED_END +# define _IE_SUPPRESS_DEPRECATED_START_MSVC IE_SUPPRESS_DEPRECATED_START +# define _IE_SUPPRESS_DEPRECATED_END_MSVC IE_SUPPRESS_DEPRECATED_END #else -# define _IE_SUPPRESS_DEPRECATED_START_MSVC -# define _IE_SUPPRESS_DEPRECATED_END_MSVC +# define _IE_SUPPRESS_DEPRECATED_START_MSVC +# define _IE_SUPPRESS_DEPRECATED_END_MSVC #endif #if defined __GNUC__ && (__GNUC__ <= 4 || (__GNUC__ == 5 && __GNUC_MINOR__ <= 5) || \ (defined __i386__ || defined __arm__ || defined __aarch64__)) -# define _IE_SUPPRESS_DEPRECATED_START_GCC IE_SUPPRESS_DEPRECATED_START -# define _IE_SUPPRESS_DEPRECATED_END_GCC IE_SUPPRESS_DEPRECATED_END +# define _IE_SUPPRESS_DEPRECATED_START_GCC IE_SUPPRESS_DEPRECATED_START +# define _IE_SUPPRESS_DEPRECATED_END_GCC IE_SUPPRESS_DEPRECATED_END #else -# define _IE_SUPPRESS_DEPRECATED_START_GCC -# define _IE_SUPPRESS_DEPRECATED_END_GCC +# define _IE_SUPPRESS_DEPRECATED_START_GCC +# define _IE_SUPPRESS_DEPRECATED_END_GCC #endif #ifndef ENABLE_UNICODE_PATH_SUPPORT -# ifdef _WIN32 -# if defined __INTEL_COMPILER || defined _MSC_VER -# define ENABLE_UNICODE_PATH_SUPPORT -# endif -# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__) -# define ENABLE_UNICODE_PATH_SUPPORT -# endif +# ifdef _WIN32 +# if defined __INTEL_COMPILER || defined _MSC_VER +# define ENABLE_UNICODE_PATH_SUPPORT +# endif +# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__) +# define ENABLE_UNICODE_PATH_SUPPORT +# endif #endif /** @@ -114,17 +114,17 @@ */ #if defined(_WIN32) -# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN -# define INFERENCE_PLUGIN_API(type) extern "C" __declspec(dllexport) type -# else -# define INFERENCE_PLUGIN_API(type) extern "C" type -# endif +# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN +# define INFERENCE_PLUGIN_API(type) extern "C" __declspec(dllexport) type +# else +# define INFERENCE_PLUGIN_API(type) extern "C" type +# endif #elif (__GNUC__ >= 4) // NOLINT -# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN -# define INFERENCE_PLUGIN_API(type) extern "C" __attribute__((visibility("default"))) type -# else -# define INFERENCE_PLUGIN_API(type) extern "C" type -# endif +# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN +# define INFERENCE_PLUGIN_API(type) extern "C" __attribute__((visibility("default"))) type +# else +# define INFERENCE_PLUGIN_API(type) extern "C" type +# endif #else -# define INFERENCE_PLUGIN_API(TYPE) extern "C" TYPE +# define INFERENCE_PLUGIN_API(TYPE) extern "C" TYPE #endif diff --git a/inference-engine/src/inference_engine/include/ie/ie_blob.h b/inference-engine/src/inference_engine/include/ie/ie_blob.h index 2342d4cbcf5..b25d180747f 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_blob.h +++ b/inference-engine/src/inference_engine/include/ie/ie_blob.h @@ -19,13 +19,13 @@ #include #include +#include "details/ie_blob_iterator.hpp" +#include "details/ie_pre_allocator.hpp" #include "ie_allocator.hpp" #include "ie_common.h" #include "ie_layouts.h" #include "ie_locked_memory.hpp" #include "ie_precision.hpp" -#include "details/ie_blob_iterator.hpp" -#include "details/ie_pre_allocator.hpp" namespace InferenceEngine { @@ -120,7 +120,7 @@ public: * * @param tensorDesc Defines the layout and dims of the blob */ - explicit Blob(const TensorDesc& tensorDesc): tensorDesc(tensorDesc) {} + explicit Blob(const TensorDesc& tensorDesc) : tensorDesc(tensorDesc) {} /** * @brief Returns the tensor description @@ -146,7 +146,8 @@ public: * @return The total number of elements */ virtual size_t size() const noexcept { - if (tensorDesc.getLayout() == Layout::SCALAR) return 1; + if (tensorDesc.getLayout() == Layout::SCALAR) + return 1; return product(tensorDesc.getDims()); } @@ -233,7 +234,8 @@ protected: * @return Result of multiplication */ static size_t product(const SizeVector& dims) noexcept { - if (dims.empty()) return 0; + if (dims.empty()) + return 0; return std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies()); } @@ -278,7 +280,7 @@ std::shared_ptr as(const Blob::CPtr& blob) noexcept { * @note Any Blob implementation that represents a concept of a tensor in memory (for example, * TBlob) must be a subclass of MemoryBlob instead of Blob */ -class INFERENCE_ENGINE_API_CLASS(MemoryBlob): public Blob { +class INFERENCE_ENGINE_API_CLASS(MemoryBlob) : public Blob { public: /** * @brief A smart pointer to the MemoryBlob object @@ -300,7 +302,7 @@ public: * * @param tensorDesc Defines the layout and dims of the blob */ - explicit MemoryBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {} + explicit MemoryBlob(const TensorDesc& tensorDesc) : Blob(tensorDesc) {} /** * @brief Returns the tensor description @@ -323,7 +325,8 @@ public: * @return The total number of elements */ size_t size() const noexcept override { - if (tensorDesc.getLayout() == Layout::SCALAR) return 1; + if (tensorDesc.getLayout() == Layout::SCALAR) + return 1; return product(tensorDesc.getDims()); } @@ -493,7 +496,7 @@ public: * * @param tensorDesc Tensor description */ - explicit TBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {} + explicit TBlob(const TensorDesc& tensorDesc) : MemoryBlob(tensorDesc) {} /** * @brief The constructor creates a TBlob object with the specified dimensions and layout @@ -506,7 +509,7 @@ public: * @param data_size Length of the pre-allocated array. If not set, size is assumed equal * to the dot product of dims. */ - TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0): MemoryBlob(tensorDesc) { + TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0) : MemoryBlob(tensorDesc) { if (data_size == 0) { data_size = size(); } @@ -528,8 +531,10 @@ public: * @param alloc An allocator */ TBlob(const TensorDesc& tensorDesc, const std::shared_ptr& alloc) - : MemoryBlob(tensorDesc), _allocator(alloc) { - if (_allocator == nullptr) IE_THROW() << "TBlob allocator was not initialized."; + : MemoryBlob(tensorDesc), + _allocator(alloc) { + if (_allocator == nullptr) + IE_THROW() << "TBlob allocator was not initialized."; } /** @@ -537,7 +542,7 @@ public: * * @param blob Source blob */ - TBlob(const TBlob& blob): MemoryBlob(blob.getTensorDesc()) { + TBlob(const TBlob& blob) : MemoryBlob(blob.getTensorDesc()) { copyFrom(blob); } @@ -546,7 +551,7 @@ public: * * @param blob rvalue to make a move from */ - TBlob(TBlob&& blob): MemoryBlob(blob.getTensorDesc()) { + TBlob(TBlob&& blob) : MemoryBlob(blob.getTensorDesc()) { moveFrom(blob); } @@ -592,11 +597,9 @@ public: return; } - _handle.reset( - rawHandle, - [allocator](void* rawHandle) { - allocator->free(rawHandle); - }); + _handle.reset(rawHandle, [allocator](void* rawHandle) { + allocator->free(rawHandle); + }); } bool deallocate() noexcept override { @@ -611,14 +614,14 @@ public: return std::move(lockme()); } - LockedMemory rwmap()noexcept override { + LockedMemory rwmap() noexcept override { return std::move(lockme()); } LockedMemory rmap() const noexcept override { return std::move(lockme()); } - LockedMemory wmap()noexcept override { + LockedMemory wmap() noexcept override { return std::move(lockme()); } @@ -725,7 +728,7 @@ protected: template LockedMemory lockme() const { return LockedMemory(_allocator.get(), getHandle(), 0); - // getTensorDesc().getBlockingDesc().getOffsetPadding()); + // getTensorDesc().getBlockingDesc().getOffsetPadding()); } const std::shared_ptr& getAllocator() const noexcept override { @@ -746,11 +749,10 @@ protected: * @param origBlob An original blob * @param roi A ROI object */ - TBlob(const TBlob& origBlob, const ROI& roi) : - MemoryBlob(make_roi_desc(origBlob.getTensorDesc(), roi, true)), - _allocator(origBlob._allocator) { - IE_ASSERT(origBlob._handle != nullptr) - << "Original Blob must be allocated before ROI creation"; + TBlob(const TBlob& origBlob, const ROI& roi) + : MemoryBlob(make_roi_desc(origBlob.getTensorDesc(), roi, true)), + _allocator(origBlob._allocator) { + IE_ASSERT(origBlob._handle != nullptr) << "Original Blob must be allocated before ROI creation"; _handle = origBlob._handle; } @@ -784,7 +786,7 @@ template inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorDesc& tensorDesc) { if (!tensorDesc.getPrecision().hasStorageType()) IE_THROW() << "Cannot make shared blob! " - << "The blob type cannot be used to store objects of current precision"; + << "The blob type cannot be used to store objects of current precision"; return std::make_shared>(tensorDesc); } @@ -798,11 +800,12 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorD * @return A shared pointer to the newly created blob of the given type */ template -inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorDesc& tensorDesc, Type* ptr, +inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorDesc& tensorDesc, + Type* ptr, size_t size = 0) { if (!tensorDesc.getPrecision().hasStorageType()) IE_THROW() << "Cannot make shared blob! " - << "The blob type cannot be used to store objects of current precision"; + << "The blob type cannot be used to store objects of current precision"; return std::make_shared>(tensorDesc, ptr, size); } @@ -816,10 +819,11 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorD */ template inline typename InferenceEngine::TBlob::Ptr make_shared_blob( - const TensorDesc& tensorDesc, const std::shared_ptr& alloc) { + const TensorDesc& tensorDesc, + const std::shared_ptr& alloc) { if (!tensorDesc.getPrecision().hasStorageType()) IE_THROW() << "Cannot make shared blob! " - << "The blob type cannot be used to store objects of current precision"; + << "The blob type cannot be used to store objects of current precision"; return std::make_shared>(tensorDesc, alloc); } diff --git a/inference-engine/src/inference_engine/include/ie/ie_common.h b/inference-engine/src/inference_engine/include/ie/ie_common.h index 99e4ab440c7..0a4a7857437 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_common.h +++ b/inference-engine/src/inference_engine/include/ie/ie_common.h @@ -11,18 +11,19 @@ #include #include +#include +#include #include #include -#include -#include -#include #include #include -#include +#include +#include + +#include "ie_api.h" -#include #ifndef NDEBUG -#include +# include #endif namespace InferenceEngine { /** @@ -58,9 +59,9 @@ using DataWeakPtr = std::weak_ptr; * @brief The method holds the user values to enable binding of data per graph node. */ union UserValue { - int v_int; //!< An integer value + int v_int; //!< An integer value float v_float; //!< A floating point value - void* v_ptr; //!< A pointer to a void + void* v_ptr; //!< A pointer to a void }; /** @@ -71,15 +72,15 @@ enum Layout : uint8_t { ANY = 0, //!< "any" layout // I/O data layouts - NCHW = 1, //!< NCHW layout for input / output blobs - NHWC = 2, //!< NHWC layout for input / output blobs + NCHW = 1, //!< NCHW layout for input / output blobs + NHWC = 2, //!< NHWC layout for input / output blobs NCDHW = 3, //!< NCDHW layout for input / output blobs NDHWC = 4, //!< NDHWC layout for input / output blobs // weight layouts - OIHW = 64, //!< NDHWC layout for operation weights - GOIHW = 65, //!< NDHWC layout for operation weights - OIDHW = 66, //!< NDHWC layout for operation weights + OIHW = 64, //!< NDHWC layout for operation weights + GOIHW = 65, //!< NDHWC layout for operation weights + OIDHW = 66, //!< NDHWC layout for operation weights GOIDHW = 67, //!< NDHWC layout for operation weights // Scalar @@ -189,9 +190,9 @@ struct InferenceEngineProfileInfo { * @brief Defines the general status of the layer */ enum LayerStatus { - NOT_RUN, //!< A layer is not executed + NOT_RUN, //!< A layer is not executed OPTIMIZED_OUT, //!< A layer is optimized out during graph optimization phase - EXECUTED //!< A layer is executed + EXECUTED //!< A layer is executed }; /** @@ -292,10 +293,12 @@ using ConstOutputsDataMap = std::map; using OutputsDataMap = std::map; namespace details { -struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception") -INFERENCE_ENGINE_API_CLASS(InferenceEngineException) : public std::runtime_error { +struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception") INFERENCE_ENGINE_API_CLASS(InferenceEngineException) + : public std::runtime_error { using std::runtime_error::runtime_error; - bool hasStatus() const {return true;} + bool hasStatus() const { + return true; + } StatusCode getStatus() const; }; } // namespace details @@ -311,18 +314,22 @@ IE_SUPPRESS_DEPRECATED_END /// @cond namespace details { - template struct ExceptionTraits; +template +struct ExceptionTraits; } -#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \ -struct INFERENCE_ENGINE_API_CLASS(ExceptionType) final : public InferenceEngine::Exception { \ - using Exception::Exception; \ -}; \ -namespace details { \ -template<> struct ExceptionTraits { \ - static const char* string() {return "[ " #statusCode " ]";} \ -}; \ -} +#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \ + struct INFERENCE_ENGINE_API_CLASS(ExceptionType) final : public InferenceEngine::Exception { \ + using Exception::Exception; \ + }; \ + namespace details { \ + template <> \ + struct ExceptionTraits { \ + static const char* string() { \ + return "[ " #statusCode " ]"; \ + } \ + }; \ + } /// @endcond /** @brief This class represents StatusCode::GENERAL_ERROR exception */ @@ -380,7 +387,7 @@ namespace details { /** * @brief Tag struct used to throw exception */ -template +template struct ThrowNow final { [[noreturn]] void operator<<=(const std::ostream& ostream) { std::ostringstream stream; @@ -391,31 +398,32 @@ struct ThrowNow final { /// @cond #ifndef NDEBUG -#define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__<< ' ' +# define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__ << ' ' #else -#define IE_LOCATION "" +# define IE_LOCATION "" #endif // NDEBUG - // WARNING: DO NOT USE THIS MACRO! Use openvino/pp.hpp macro library -#define IE_PP_EXPAND(X) X -#define IE_PP_NARG(...) IE_PP_EXPAND(IE_PP_NARG_(__VA_ARGS__, IE_PP_RSEQ_N())) -#define IE_PP_NARG_(...) IE_PP_EXPAND(IE_PP_ARG_N(__VA_ARGS__)) +#define IE_PP_EXPAND(X) X +#define IE_PP_NARG(...) IE_PP_EXPAND(IE_PP_NARG_(__VA_ARGS__, IE_PP_RSEQ_N())) +#define IE_PP_NARG_(...) IE_PP_EXPAND(IE_PP_ARG_N(__VA_ARGS__)) #define IE_PP_ARG_N(_0, _1, N, ...) N -#define IE_PP_RSEQ_N() 0, 1, 0 -#define IE_PP_NO_ARGS(NAME) , -#define IE_PP_CAT3_(x, y, z) x ## y ## z -#define IE_PP_CAT3(x, y, z) IE_PP_CAT3_(x, y, z) -#define IE_PP_OVERLOAD(NAME, ...) IE_PP_EXPAND(IE_PP_CAT3(NAME, _, IE_PP_EXPAND(IE_PP_NARG(IE_PP_NO_ARGS __VA_ARGS__ (NAME))))(__VA_ARGS__)) +#define IE_PP_RSEQ_N() 0, 1, 0 +#define IE_PP_NO_ARGS(NAME) , +#define IE_PP_CAT3_(x, y, z) x##y##z +#define IE_PP_CAT3(x, y, z) IE_PP_CAT3_(x, y, z) +#define IE_PP_OVERLOAD(NAME, ...) \ + IE_PP_EXPAND(IE_PP_CAT3(NAME, _, IE_PP_EXPAND(IE_PP_NARG(IE_PP_NO_ARGS __VA_ARGS__(NAME))))(__VA_ARGS__)) // ENDWARNING -#define IE_THROW_0() \ - InferenceEngine::details::ThrowNow {} <<= std::stringstream {} \ - << IE_LOCATION +#define IE_THROW_0() \ + InferenceEngine::details::ThrowNow{} <<= std::stringstream{} << IE_LOCATION -#define IE_THROW_1(ExceptionType) \ - InferenceEngine::details::ThrowNow {} <<= std::stringstream {} \ - << IE_LOCATION << InferenceEngine::details::ExceptionTraits::string() << ' ' +#define IE_THROW_1(ExceptionType) \ + InferenceEngine::details::ThrowNow{} <<= \ + std::stringstream{} << IE_LOCATION \ + << InferenceEngine::details::ExceptionTraits::string() \ + << ' ' /// @endcond /** @@ -429,31 +437,35 @@ struct ThrowNow final { * @brief Uses assert() function if NDEBUG is not defined, InferenceEngine exception otherwise */ #ifdef NDEBUG -#define IE_ASSERT(EXPRESSION) \ - if (!(EXPRESSION)) \ - IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION // NOLINT +# define IE_ASSERT(EXPRESSION) \ + if (!(EXPRESSION)) \ + IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION // NOLINT #else /** * @private */ struct NullStream { template - NullStream& operator<<(const T&) noexcept {return *this;} + NullStream& operator<<(const T&) noexcept { + return *this; + } }; -#define IE_ASSERT(EXPRESSION) \ - assert((EXPRESSION)); \ - InferenceEngine::details::NullStream() +# define IE_ASSERT(EXPRESSION) \ + assert((EXPRESSION)); \ + InferenceEngine::details::NullStream() #endif // NDEBUG /// @cond -#define THROW_IE_EXCEPTION \ - InferenceEngine::details::ThrowNow {} <<= std::stringstream {} \ - << IE_LOCATION +#define THROW_IE_EXCEPTION \ + InferenceEngine::details::ThrowNow{} <<= std::stringstream{} \ + << IE_LOCATION -#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \ - case InferenceEngine::STATUS_CODE : { \ - using InferenceEngine::EXCEPTION_TYPE; using TYPE_ALIAS = EXCEPTION_TYPE; __VA_ARGS__; \ +#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \ + case InferenceEngine::STATUS_CODE: { \ + using InferenceEngine::EXCEPTION_TYPE; \ + using TYPE_ALIAS = EXCEPTION_TYPE; \ + __VA_ARGS__; \ } break; /// @endcond @@ -461,28 +473,29 @@ struct NullStream { * @def IE_EXCEPTION_SWITCH * @brief Generate Switch statement over error codes adn maps them to coresponding exceptions type */ -#define IE_EXCEPTION_SWITCH(STATUS, TYPE_ALIAS, ...) \ - switch (STATUS) { \ - IE_EXCEPTION_CASE(TYPE_ALIAS, GENERAL_ERROR , GeneralError , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_IMPLEMENTED , NotImplemented , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_LOADED , NetworkNotLoaded , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, PARAMETER_MISMATCH , ParameterMismatch , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_FOUND , NotFound , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, OUT_OF_BOUNDS , OutOfBounds , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, UNEXPECTED , Unexpected , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, REQUEST_BUSY , RequestBusy , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, RESULT_NOT_READY , ResultNotReady , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_ALLOCATED , NotAllocated , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_NOT_STARTED , InferNotStarted , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_READ , NetworkNotRead , __VA_ARGS__) \ - IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_CANCELLED , InferCancelled , __VA_ARGS__) \ - default: IE_ASSERT(!"Unreachable"); \ +#define IE_EXCEPTION_SWITCH(STATUS, TYPE_ALIAS, ...) \ + switch (STATUS) { \ + IE_EXCEPTION_CASE(TYPE_ALIAS, GENERAL_ERROR, GeneralError, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_IMPLEMENTED, NotImplemented, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_LOADED, NetworkNotLoaded, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, PARAMETER_MISMATCH, ParameterMismatch, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_FOUND, NotFound, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, OUT_OF_BOUNDS, OutOfBounds, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, UNEXPECTED, Unexpected, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, REQUEST_BUSY, RequestBusy, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, RESULT_NOT_READY, ResultNotReady, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_ALLOCATED, NotAllocated, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_NOT_STARTED, InferNotStarted, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_READ, NetworkNotRead, __VA_ARGS__) \ + IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_CANCELLED, InferCancelled, __VA_ARGS__) \ + default: \ + IE_ASSERT(!"Unreachable"); \ } } // namespace details } // namespace InferenceEngine #if defined(_WIN32) -#define __PRETTY_FUNCTION__ __FUNCSIG__ +# define __PRETTY_FUNCTION__ __FUNCSIG__ #else -#define __PRETTY_FUNCTION__ __PRETTY_FUNCTION__ +# define __PRETTY_FUNCTION__ __PRETTY_FUNCTION__ #endif diff --git a/inference-engine/src/inference_engine/include/ie/ie_compound_blob.h b/inference-engine/src/inference_engine/include/ie/ie_compound_blob.h index 8a0aae67c23..66dad9c6c47 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_compound_blob.h +++ b/inference-engine/src/inference_engine/include/ie/ie_compound_blob.h @@ -22,7 +22,7 @@ namespace InferenceEngine { * Compound blob is a wrapper blob over references to underlying blobs. These blobs should share * some properties and can be grouped into a single entity. */ -class INFERENCE_ENGINE_API_CLASS(CompoundBlob): public Blob { +class INFERENCE_ENGINE_API_CLASS(CompoundBlob) : public Blob { public: /** * @brief A smart pointer to the CompoundBlob object @@ -118,7 +118,7 @@ protected: /** * @brief Represents a blob that contains two planes (Y and UV) in NV12 color format */ -class INFERENCE_ENGINE_API_CLASS(NV12Blob): public CompoundBlob { +class INFERENCE_ENGINE_API_CLASS(NV12Blob) : public CompoundBlob { public: /** * @brief A smart pointer to the NV12Blob object @@ -220,7 +220,7 @@ public: * Please note that reference to Blob::Ptr is returned. I.e. the reference will be valid until * the I420Blob object is destroyed. * - * @return constant reference to shared pointer object of Y plane* + * @return constant reference to shared pointer object of Y plane* */ const Blob::Ptr& y() const noexcept; @@ -273,7 +273,7 @@ public: * in the OPTIMIZATION_CAPABILITIES metric. */ class INFERENCE_ENGINE_API_CLASS(BatchedBlob) : public CompoundBlob { - public: +public: /** * @brief A smart pointer to the BatchedBlob object */ diff --git a/inference-engine/src/inference_engine/include/ie/ie_core.hpp b/inference-engine/src/inference_engine/include/ie/ie_core.hpp index 573880153e8..a02232bc394 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_core.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_core.hpp @@ -15,11 +15,11 @@ #include #include -#include "ie_version.hpp" +#include "cpp/ie_executable_network.hpp" #include "ie_extension.h" #include "ie_plugin_config.hpp" #include "ie_remote_context.hpp" -#include "cpp/ie_executable_network.hpp" +#include "ie_version.hpp" namespace InferenceEngine { @@ -106,9 +106,9 @@ public: * operation * @return An executable network reference */ - ExecutableNetwork LoadNetwork( - const CNNNetwork& network, const std::string& deviceName, - const std::map& config = {}); + ExecutableNetwork LoadNetwork(const CNNNetwork& network, + const std::string& deviceName, + const std::map& config = {}); /** * @brief Reads model and creates an executable network from IR or ONNX file @@ -123,9 +123,9 @@ public: * * @return An executable network reference */ - ExecutableNetwork LoadNetwork( - const std::string& modelPath, const std::string& deviceName, - const std::map& config = {}); + ExecutableNetwork LoadNetwork(const std::string& modelPath, + const std::string& deviceName, + const std::map& config = {}); /** * @brief Registers extension @@ -141,9 +141,9 @@ public: * operation * @return An executable network object */ - ExecutableNetwork LoadNetwork( - const CNNNetwork& network, RemoteContext::Ptr context, - const std::map& config = {}); + ExecutableNetwork LoadNetwork(const CNNNetwork& network, + RemoteContext::Ptr context, + const std::map& config = {}); /** * @brief Registers extension for the specified plugin @@ -162,9 +162,9 @@ public: * operation* * @return An executable network reference */ - ExecutableNetwork ImportNetwork( - const std::string& modelFileName, const std::string& deviceName, - const std::map& config = {}); + ExecutableNetwork ImportNetwork(const std::string& modelFileName, + const std::string& deviceName, + const std::map& config = {}); /** * @brief Creates an executable network from a previously exported network @@ -174,7 +174,8 @@ public: * operation* * @return An executable network reference */ - ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName, + ExecutableNetwork ImportNetwork(std::istream& networkModel, + const std::string& deviceName, const std::map& config = {}); /** @@ -208,9 +209,9 @@ public: * @param config Optional map of pairs: (config parameter name, config parameter value) * @return An object containing a map of pairs a layer name -> a device name supporting this layer. */ - QueryNetworkResult QueryNetwork( - const CNNNetwork& network, const std::string& deviceName, - const std::map& config = {}) const; + QueryNetworkResult QueryNetwork(const CNNNetwork& network, + const std::string& deviceName, + const std::map& config = {}) const; /** * @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp diff --git a/inference-engine/src/inference_engine/include/ie/ie_data.h b/inference-engine/src/inference_engine/include/ie/ie_data.h index f3c83720aea..86a3e937adf 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_data.h +++ b/inference-engine/src/inference_engine/include/ie/ie_data.h @@ -27,6 +27,7 @@ namespace InferenceEngine { */ class INFERENCE_ENGINE_API_CLASS(Data) { class Impl; + public: /** * @brief An empty constructor (dimensionless) @@ -58,7 +59,7 @@ public: * @param data A data object to copy from * @return An assigned object */ - Data & operator = (const Data& data); + Data& operator=(const Data& data); /** * @brief Checks if the current node is resolved diff --git a/inference-engine/src/inference_engine/include/ie/ie_extension.h b/inference-engine/src/inference_engine/include/ie/ie_extension.h index 97184fd5ba4..79491761043 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_extension.h +++ b/inference-engine/src/inference_engine/include/ie/ie_extension.h @@ -14,9 +14,9 @@ #include #include -#include -#include "ie_iextension.h" #include "details/ie_so_pointer.hpp" +#include "ie_iextension.h" +#include "ngraph/opsets/opset.hpp" namespace InferenceEngine { namespace details { @@ -46,9 +46,8 @@ public: * * @param name Full or relative path to extension library */ - template > - explicit Extension(const std::basic_string& name): actual(name) {} + template > + explicit Extension(const std::basic_string& name) : actual(name) {} /** * @brief Gets the extension version information @@ -79,7 +78,8 @@ public: * @return vector of strings */ std::vector getImplTypes(const std::shared_ptr& node) override { - if (node == nullptr) IE_THROW() << "Provided ngraph::Node pointer is nullptr."; + if (node == nullptr) + IE_THROW() << "Provided ngraph::Node pointer is nullptr."; return actual->getImplTypes(node); } @@ -90,7 +90,8 @@ public: * @return shared pointer to implementation */ ILayerImpl::Ptr getImplementation(const std::shared_ptr& node, const std::string& implType) override { - if (node == nullptr) IE_THROW() << "Provided ngraph::Node pointer is nullptr."; + if (node == nullptr) + IE_THROW() << "Provided ngraph::Node pointer is nullptr."; return actual->getImplementation(node, implType); } @@ -107,7 +108,7 @@ protected: * @param name extension library name * @return shared pointer to extension */ -template +template INFERENCE_ENGINE_DEPRECATED("Use std::make_shared") inline std::shared_ptr make_so_pointer(const std::string& name) { return std::make_shared(name); @@ -120,7 +121,7 @@ inline std::shared_ptr make_so_pointer(const std::string& name) { * @param name extension library name * @return shared pointer to extension */ -template +template INFERENCE_ENGINE_DEPRECATED("Use std::make_shared") inline std::shared_ptr make_so_pointer(const std::wstring& name) { return std::make_shared(name); diff --git a/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp b/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp index 62ef93824ee..acedb48bb2c 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp @@ -17,8 +17,7 @@ #include "ie_common.h" #include "ie_data.h" #include "ie_input_info.hpp" - -#include +#include "ngraph/function.hpp" namespace InferenceEngine { @@ -29,7 +28,7 @@ _IE_SUPPRESS_DEPRECATED_START_GCC * @interface ICNNNetwork * @brief This is the main interface to describe the NN topology */ -class INFERENCE_ENGINE_API_CLASS(ICNNNetwork): public std::enable_shared_from_this { +class INFERENCE_ENGINE_API_CLASS(ICNNNetwork) : public std::enable_shared_from_this { public: IE_SUPPRESS_DEPRECATED_START /** @@ -127,7 +126,8 @@ public: * @return Status code of the operation */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead") - virtual StatusCode addOutput(const std::string& layerName, size_t outputIndex = 0, + virtual StatusCode addOutput(const std::string& layerName, + size_t outputIndex = 0, ResponseDesc* resp = nullptr) noexcept = 0; /** @@ -219,8 +219,7 @@ public: * @return Status code of the operation */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead") - virtual StatusCode serialize(std::ostream& xmlStream, Blob::Ptr& binData, ResponseDesc* resp) const - noexcept = 0; + virtual StatusCode serialize(std::ostream& xmlStream, Blob::Ptr& binData, ResponseDesc* resp) const noexcept = 0; /** * @deprecated Use InferenceEngine::CNNNetwork wrapper instead @@ -233,10 +232,11 @@ public: * @return Status code of the operation */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead") - virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept { - (void) ov_name; - (void) orig_name; - (void) resp; + virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const + noexcept { + (void)ov_name; + (void)orig_name; + (void)resp; return NOT_IMPLEMENTED; } diff --git a/inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp b/inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp index bb0a6f71c4a..8e311c8c698 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp @@ -9,9 +9,9 @@ */ #pragma once -#include #include #include +#include #include #include diff --git a/inference-engine/src/inference_engine/include/ie/ie_iextension.h b/inference-engine/src/inference_engine/include/ie/ie_iextension.h index be327c15376..c7032ef273c 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_iextension.h +++ b/inference-engine/src/inference_engine/include/ie/ie_iextension.h @@ -15,20 +15,20 @@ #include #include "ie_api.h" +#include "ie_blob.h" #include "ie_common.h" #include "ie_layouts.h" -#include "ie_blob.h" #include "ie_version.hpp" -#include +#include "ngraph/opsets/opset.hpp" /** * @def INFERENCE_EXTENSION_API(TYPE) * @brief Defines Inference Engine Extension API method */ #if defined(_WIN32) && defined(IMPLEMENT_INFERENCE_EXTENSION_API) -#define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE +# define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE #else -#define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE) +# define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE) #endif namespace InferenceEngine { @@ -131,7 +131,8 @@ public: * @param resp Response descriptor * @return Status code */ - virtual StatusCode execute(std::vector& inputs, std::vector& outputs, + virtual StatusCode execute(std::vector& inputs, + std::vector& outputs, ResponseDesc* resp) noexcept = 0; }; @@ -183,7 +184,8 @@ public: /** * @brief Implements deprecated API */ - INFERENCE_ENGINE_DEPRECATED("Do not override or use this method. Use IE_DEFINE_EXTENSION_CREATE_FUNCTION to export extension") + INFERENCE_ENGINE_DEPRECATED( + "Do not override or use this method. Use IE_DEFINE_EXTENSION_CREATE_FUNCTION to export extension") virtual void Release() noexcept { delete this; } @@ -217,15 +219,17 @@ INFERENCE_EXTENSION_API(StatusCode) CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept; #else INFERENCE_EXTENSION_API(StatusCode) -CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept INFERENCE_ENGINE_DEPRECATED("Use IE_DEFINE_EXTENSION_CREATE_FUNCTION macro"); +CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept INFERENCE_ENGINE_DEPRECATED( + "Use IE_DEFINE_EXTENSION_CREATE_FUNCTION macro"); #endif /** * @def IE_DEFINE_EXTENSION_CREATE_FUNCTION * @brief Generates extension creation function */ -#define IE_DEFINE_EXTENSION_CREATE_FUNCTION(ExtensionType) \ -INFERENCE_EXTENSION_API(void) InferenceEngine::CreateExtensionShared(std::shared_ptr& ext) { \ - ext = std::make_shared(); \ -} +#define IE_DEFINE_EXTENSION_CREATE_FUNCTION(ExtensionType) \ + INFERENCE_EXTENSION_API(void) \ + InferenceEngine::CreateExtensionShared(std::shared_ptr& ext) { \ + ext = std::make_shared(); \ + } } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp b/inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp index 4fd200c0252..6885e5b7ed4 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp @@ -26,7 +26,8 @@ _IE_SUPPRESS_DEPRECATED_START_GCC * @deprecated Use InferenceEngine::InferRequest C++ wrapper * @brief This is an interface of asynchronous infer request */ -class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper") IInferRequest : public std::enable_shared_from_this { +class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper") IInferRequest + : public std::enable_shared_from_this { public: /** * @enum WaitMode @@ -83,7 +84,10 @@ public: * @param resp Optional: pointer to an already allocated object to contain information in case of failure * @return Status code of the operation: OK (0) for success */ - virtual StatusCode SetBlob(const char *name, const Blob::Ptr &data, const PreProcessInfo& info, ResponseDesc *resp) noexcept = 0; + virtual StatusCode SetBlob(const char* name, + const Blob::Ptr& data, + const PreProcessInfo& info, + ResponseDesc* resp) noexcept = 0; /** * @brief Gets pre-process for input data @@ -92,7 +96,8 @@ public: * @param resp Optional: pointer to an already allocated object to contain information in case of failure * @return Status code of the operation: OK (0) for success */ - virtual StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc *resp) const noexcept = 0; + virtual StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc* resp) const + noexcept = 0; /** * @brief Infers specified input(s) in synchronous mode * @@ -200,4 +205,4 @@ protected: _IE_SUPPRESS_DEPRECATED_END_GCC -} // namespace InferenceEngine \ No newline at end of file +} // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/include/ie/ie_layouts.h b/inference-engine/src/inference_engine/include/ie/ie_layouts.h index 42fe8fbca2c..f1f8ef382ce 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_layouts.h +++ b/inference-engine/src/inference_engine/include/ie/ie_layouts.h @@ -66,8 +66,11 @@ public: * @param dimOffsets per-dimension offset from the padding to actual data, * @param strides strides for each dimension */ - BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset, - const SizeVector& dimOffsets, const SizeVector& strides); + BlockingDesc(const SizeVector& blocked_dims, + const SizeVector& order, + size_t offset, + const SizeVector& dimOffsets, + const SizeVector& strides); /** * @brief Returns the blocked dimensions vector @@ -335,11 +338,11 @@ private: * @brief This structure describes ROI data for image-like tensors. */ struct ROI { - size_t id = 0; //!< ID of a ROI (offset over batch dimension) - size_t posX = 0; //!< W upper left coordinate of ROI - size_t posY = 0; //!< H upper left coordinate of ROI - size_t sizeX = 0; //!< W size of ROI - size_t sizeY = 0; //!< H size of ROI + size_t id = 0; //!< ID of a ROI (offset over batch dimension) + size_t posX = 0; //!< W upper left coordinate of ROI + size_t posY = 0; //!< H upper left coordinate of ROI + size_t sizeX = 0; //!< W size of ROI + size_t sizeY = 0; //!< H size of ROI ROI() = default; @@ -351,9 +354,12 @@ struct ROI { * @param sizeX W size of ROI * @param sizeY H size of ROI */ - ROI(size_t id, size_t posX, size_t posY, size_t sizeX, size_t sizeY) : - id(id), posX(posX), posY(posY), sizeX(sizeX), sizeY(sizeY) { - } + ROI(size_t id, size_t posX, size_t posY, size_t sizeX, size_t sizeY) + : id(id), + posX(posX), + posY(posY), + sizeX(sizeX), + sizeY(sizeY) {} }; /** @@ -366,9 +372,6 @@ struct ROI { * * @return A newly created TensorDesc object representing ROI. */ -INFERENCE_ENGINE_API_CPP(TensorDesc) make_roi_desc( - const TensorDesc& origDesc, - const ROI& roi, - bool useOrigMemDesc); +INFERENCE_ENGINE_API_CPP(TensorDesc) make_roi_desc(const TensorDesc& origDesc, const ROI& roi, bool useOrigMemDesc); } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp b/inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp index 5242b171198..69caebc8f9f 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp @@ -43,7 +43,10 @@ public: * @param offsetInBytes Offset in originally locked region */ LockedMemoryBase(IAllocator* ptr, void* handle, LockOp lockFlag, size_t offsetInBytes) - : _allocator(ptr), _handle(handle), _lockFlag(lockFlag), _offset(offsetInBytes) {} + : _allocator(ptr), + _handle(handle), + _lockFlag(lockFlag), + _offset(offsetInBytes) {} /** * @brief A copy constructor @@ -51,7 +54,10 @@ public: * @param that An rvalue reference for the other LockedMemoryBase instance */ LockedMemoryBase(LockedMemoryBase&& that) noexcept - : _allocator(that._allocator), _handle(that._handle), _lockFlag(that._lockFlag), _offset(that._offset) { + : _allocator(that._allocator), + _handle(that._handle), + _lockFlag(that._lockFlag), + _offset(that._offset) { that._locked = nullptr; } @@ -86,7 +92,8 @@ protected: * @return The pointer to the locked object, nullptr otherwise */ virtual T* dereference() const { - if (_locked != nullptr) return _locked; + if (_locked != nullptr) + return _locked; if (_allocator == nullptr) { return nullptr; @@ -134,7 +141,7 @@ public: * @param that Rvalue reference for the other LockedMemoryBase instance * @param offset Offset value */ - LockedMemory(LockedMemory&& that, size_t offset): base(std::move(that)) { + LockedMemory(LockedMemory&& that, size_t offset) : base(std::move(that)) { base::_offset = offset; } @@ -242,7 +249,7 @@ public: * @param that Rvalue reference for the other LockedMemoryBase instance * @param offset Offset value */ - LockedMemory(LockedMemory&& that, size_t offset): base(std::move(that)) { + LockedMemory(LockedMemory&& that, size_t offset) : base(std::move(that)) { base::_offset = offset; } @@ -326,7 +333,7 @@ public: * @param handle Handle provided by allocator * @param offset Offset in bytes in originally locked region */ - LockedMemory(IAllocator* ptr, void* handle, size_t offset): base(ptr, handle, LOCK_FOR_READ, offset) {} + LockedMemory(IAllocator* ptr, void* handle, size_t offset) : base(ptr, handle, LOCK_FOR_READ, offset) {} /** * @brief A default copy constructor that accepts rvalue @@ -341,7 +348,7 @@ public: * @param that Rvalue reference for the other LockedMemoryBase instance * @param offset Offset value */ - LockedMemory(LockedMemory&& that, size_t offset): base(std::move(that)) { + LockedMemory(LockedMemory&& that, size_t offset) : base(std::move(that)) { base::_offset = offset; } diff --git a/inference-engine/src/inference_engine/include/ie/ie_parallel.hpp b/inference-engine/src/inference_engine/include/ie/ie_parallel.hpp index b7e2946a023..2411f077663 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_parallel.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_parallel.hpp @@ -17,33 +17,33 @@ #include #include -#define IE_THREAD_TBB 0 -#define IE_THREAD_OMP 1 -#define IE_THREAD_SEQ 2 +#define IE_THREAD_TBB 0 +#define IE_THREAD_OMP 1 +#define IE_THREAD_SEQ 2 #define IE_THREAD_TBB_AUTO 3 #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) -#ifndef NOMINMAX -# define NOMINMAX -#endif -#ifndef TBB_PREVIEW_LOCAL_OBSERVER -# define TBB_PREVIEW_LOCAL_OBSERVER 1 -#endif -#ifndef TBB_PREVIEW_NUMA_SUPPORT -# define TBB_PREVIEW_NUMA_SUPPORT 1 -#endif -#ifndef TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION -# define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1 -#endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef TBB_PREVIEW_LOCAL_OBSERVER +# define TBB_PREVIEW_LOCAL_OBSERVER 1 +# endif +# ifndef TBB_PREVIEW_NUMA_SUPPORT +# define TBB_PREVIEW_NUMA_SUPPORT 1 +# endif +# ifndef TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION +# define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1 +# endif -#include "tbb/blocked_range.h" -#include "tbb/blocked_range2d.h" -#include "tbb/blocked_range3d.h" -#include "tbb/parallel_for.h" -#include "tbb/parallel_reduce.h" -#include "tbb/parallel_sort.h" -#include "tbb/task_arena.h" -#include "tbb/task_scheduler_observer.h" +# include "tbb/blocked_range.h" +# include "tbb/blocked_range2d.h" +# include "tbb/blocked_range3d.h" +# include "tbb/parallel_for.h" +# include "tbb/parallel_reduce.h" +# include "tbb/parallel_sort.h" +# include "tbb/task_arena.h" +# include "tbb/task_scheduler_observer.h" inline int parallel_get_max_threads() { return tbb::this_task_arena::max_concurrency(); @@ -60,31 +60,31 @@ inline void parallel_set_num_threads(int) { inline int parallel_get_env_threads() { return 0; } -#if IE_THREAD == IE_THREAD_TBB -#define PARTITIONING , tbb::static_partitioner() +# if IE_THREAD == IE_THREAD_TBB +# define PARTITIONING , tbb::static_partitioner() // The TBB version less than 2018u1 has no static_partitioner argument for // tbb::parallel_deterministic_reduce. So will fallback to non deterministic version. -#if (TBB_INTERFACE_VERSION >= 10001) -#define _TBB_REDUCE_FUNC tbb::parallel_deterministic_reduce -#else -#define _TBB_REDUCE_FUNC tbb::parallel_reduce -#endif +# if (TBB_INTERFACE_VERSION >= 10001) +# define _TBB_REDUCE_FUNC tbb::parallel_deterministic_reduce +# else +# define _TBB_REDUCE_FUNC tbb::parallel_reduce +# endif -#else -#define PARTITIONING -#endif +# else +# define PARTITIONING +# endif #elif IE_THREAD == IE_THREAD_OMP -#include +# include -#include -#include -#include +# include +# include +# include /* MSVC still supports omp 2.0 only */ -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -#define collapse(x) -#endif // defined(_MSC_VER) && !defined(__INTEL_COMPILER) +# if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +# define collapse(x) +# endif // defined(_MSC_VER) && !defined(__INTEL_COMPILER) inline int parallel_get_max_threads() { return omp_get_max_threads(); } @@ -110,7 +110,7 @@ inline int parallel_get_env_threads() { } #elif IE_THREAD == IE_THREAD_SEQ -#include // NOLINT +# include // NOLINT inline int parallel_get_env_threads() { return 1; } @@ -133,7 +133,8 @@ namespace InferenceEngine { template void parallel_nt(int nthr, const F& func) { #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) - if (nthr == 0) nthr = parallel_get_max_threads(); + if (nthr == 0) + nthr = parallel_get_max_threads(); if (nthr == 1) { func(0, 1); return; @@ -148,7 +149,7 @@ void parallel_nt(int nthr, const F& func) { return; } -#pragma omp parallel num_threads(nthr) +# pragma omp parallel num_threads(nthr) func(parallel_get_thread_num(), parallel_get_num_threads()); #elif IE_THREAD == IE_THREAD_SEQ func(0, 1); @@ -168,18 +169,20 @@ void parallel_nt_static(int nthr, const F& func) { return; } - if (nthr == 0) nthr = parallel_get_max_threads(); + if (nthr == 0) + nthr = parallel_get_max_threads(); #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) tbb::parallel_for( - 0, nthr, + 0, + nthr, [&](int ithr) { func(ithr, nthr); }, - tbb::static_partitioner {}); + tbb::static_partitioner{}); #elif IE_THREAD == IE_THREAD_OMP -#pragma omp parallel num_threads(nthr) +# pragma omp parallel num_threads(nthr) { func(parallel_get_thread_num(), parallel_get_num_threads()); } #endif } @@ -200,10 +203,12 @@ template R parallel_sum(const T0& D0, const R& input, const F& func) { #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) return _TBB_REDUCE_FUNC( - tbb::blocked_range(0, D0), input, + tbb::blocked_range(0, D0), + input, [&](const tbb::blocked_range& r, R init) -> R { R sum = init; - for (T0 dim1 = r.begin(); dim1 < r.end(); ++dim1) sum += func(dim1); + for (T0 dim1 = r.begin(); dim1 < r.end(); ++dim1) + sum += func(dim1); return sum; }, [](R x, R y) -> R { @@ -212,15 +217,15 @@ R parallel_sum(const T0& D0, const R& input, const F& func) { #else R sum = input; -#ifdef _MSC_VER +# ifdef _MSC_VER using T0_IT = typename std::make_signed::type; -#else +# else using T0_IT = T0; -#endif +# endif -#if IE_THREAD == IE_THREAD_OMP -#pragma omp parallel for reduction(+ : sum) schedule(static) -#endif +# if IE_THREAD == IE_THREAD_OMP +# pragma omp parallel for reduction(+ : sum) schedule(static) +# endif for (T0_IT dim1 = 0; dim1 < static_cast(D0); dim1++) { sum += static_cast(func(dim1)); } @@ -232,7 +237,8 @@ template R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) { #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) return _TBB_REDUCE_FUNC( - tbb::blocked_range2d(0, D0, 0, D1), input, + tbb::blocked_range2d(0, D0, 0, D1), + input, [&](const tbb::blocked_range2d& r, R init) -> R { R sum = init; for (T0 dim2 = r.rows().begin(); dim2 < r.rows().end(); dim2++) { @@ -248,17 +254,17 @@ R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) { #else R sum = input; -#ifdef _MSC_VER +# ifdef _MSC_VER using T0_IT = typename std::make_signed::type; using T1_IT = typename std::make_signed::type; -#else +# else using T0_IT = T0; using T1_IT = T1; -#endif +# endif -#if IE_THREAD == IE_THREAD_OMP -#pragma omp parallel for collapse(2) reduction(+ : sum) schedule(static) -#endif +# if IE_THREAD == IE_THREAD_OMP +# pragma omp parallel for collapse(2) reduction(+ : sum) schedule(static) +# endif for (T0_IT dim2 = 0; dim2 < D0; dim2++) { for (T1_IT dim1 = 0; dim1 < D1; dim1++) { sum += func(dim2, dim1); @@ -271,7 +277,8 @@ template R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const F& func) { #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) return _TBB_REDUCE_FUNC( - tbb::blocked_range3d(0, D0, 0, D1, 0, D2), input, + tbb::blocked_range3d(0, D0, 0, D1, 0, D2), + input, [&](const tbb::blocked_range3d& r, R init) -> R { R sum = init; for (T0 dim1 = r.pages().begin(); dim1 < r.pages().end(); dim1++) { @@ -289,19 +296,19 @@ R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const #else R sum = input; -#ifdef _MSC_VER +# ifdef _MSC_VER using T0_IT = typename std::make_signed::type; using T1_IT = typename std::make_signed::type; using T2_IT = typename std::make_signed::type; -#else +# else using T0_IT = T0; using T1_IT = T1; using T2_IT = T2; -#endif +# endif -#if IE_THREAD == IE_THREAD_OMP -#pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static) -#endif +# if IE_THREAD == IE_THREAD_OMP +# pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static) +# endif for (T0_IT dim1 = 0; dim1 < static_cast(D0); dim1++) { for (T1_IT dim2 = 0; dim2 < static_cast(D1); dim2++) { for (T2_IT dim3 = 0; dim3 < static_cast(D2); dim3++) { @@ -353,31 +360,28 @@ inline void splitter(const T& n, const Q& team, const Q& tid, T& n_start, T& n_e } namespace details { - template - struct num_of_lambda_args : public num_of_lambda_args { - }; +template +struct num_of_lambda_args : public num_of_lambda_args {}; - template - struct num_of_lambda_args { - constexpr static int value = sizeof...(Args); - }; +template +struct num_of_lambda_args { + constexpr static int value = sizeof...(Args); +}; - template::value> - typename std::enable_if::type - call_with_args(ACT body, size_t g_id, T ...arg) { - body(g_id, arg...); - } +template ::value> +typename std::enable_if::type call_with_args(ACT body, size_t g_id, T... arg) { + body(g_id, arg...); +} - template::value> - typename std::enable_if::type - call_with_args(ACT body, size_t g_id, T ...arg) { - body(arg...); - } +template ::value> +typename std::enable_if::type call_with_args(ACT body, size_t g_id, T... arg) { + body(arg...); +} } // namespace details template void for_1d(const int& ithr, const int& nthr, const T0& D0, const F& func) { - T0 d0 {0}, end {0}; + T0 d0{0}, end{0}; splitter(D0, nthr, ithr, d0, end); for (; d0 < end; ++d0) details::call_with_args(func, ithr, d0); @@ -388,12 +392,14 @@ void parallel_for(const T0& D0, const F& func) { #if IE_THREAD == IE_THREAD_TBB auto work_amount = static_cast(D0); int nthr = parallel_get_max_threads(); - if (static_cast(nthr) > work_amount) nthr = static_cast(work_amount); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); if (nthr == 1) { for_1d(0, 1, D0, func); } else { tbb::parallel_for( - 0, nthr, + 0, + nthr, [&](int ithr) { for_1d(ithr, nthr, D0, func); }, @@ -405,7 +411,7 @@ void parallel_for(const T0& D0, const F& func) { for_1d(ithr, nthr, D0, func); }); #elif IE_THREAD == IE_THREAD_OMP -#pragma omp parallel +# pragma omp parallel for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func); #elif IE_THREAD == IE_THREAD_SEQ for_1d(0, 1, D0, func); @@ -415,12 +421,13 @@ void parallel_for(const T0& D0, const F& func) { template void for_2d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const F& func) { const size_t work_amount = (size_t)D0 * D1; - if (work_amount == 0) return; - size_t start {0}, end {0}; + if (work_amount == 0) + return; + size_t start{0}, end{0}; splitter(work_amount, nthr, ithr, start, end); - T0 d0 {0}; - T1 d1 {0}; + T0 d0{0}; + T1 d1{0}; parallel_it_init(start, d0, D0, d1, D1); for (size_t iwork = start; iwork < end; ++iwork) { details::call_with_args(func, ithr, d0, d1); @@ -433,12 +440,14 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) { #if IE_THREAD == IE_THREAD_TBB auto work_amount = static_cast(D0 * D1); int nthr = parallel_get_max_threads(); - if (static_cast(nthr) > work_amount) nthr = static_cast(work_amount); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); if (nthr == 1) { for_2d(0, 1, D0, D1, func); } else { tbb::parallel_for( - 0, nthr, + 0, + nthr, [&](int ithr) { for_2d(ithr, nthr, D0, D1, func); }, @@ -450,7 +459,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) { for_2d(ithr, nthr, D0, D1, func); }); #elif IE_THREAD == IE_THREAD_OMP -#pragma omp parallel +# pragma omp parallel for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func); #elif IE_THREAD == IE_THREAD_SEQ for_2d(0, 1, D0, D1, func); @@ -460,13 +469,14 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) { template void for_3d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const F& func) { const size_t work_amount = (size_t)D0 * D1 * D2; - if (work_amount == 0) return; - size_t start {0}, end {0}; + if (work_amount == 0) + return; + size_t start{0}, end{0}; splitter(work_amount, nthr, ithr, start, end); - T0 d0 {0}; - T1 d1 {0}; - T2 d2 {0}; + T0 d0{0}; + T1 d1{0}; + T2 d2{0}; parallel_it_init(start, d0, D0, d1, D1, d2, D2); for (size_t iwork = start; iwork < end; ++iwork) { details::call_with_args(func, ithr, d0, d1, d2); @@ -479,12 +489,14 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) { #if IE_THREAD == IE_THREAD_TBB auto work_amount = static_cast(D0 * D1 * D2); int nthr = parallel_get_max_threads(); - if (static_cast(nthr) > work_amount) nthr = static_cast(work_amount); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); if (nthr == 1) { for_3d(0, 1, D0, D1, D2, func); } else { tbb::parallel_for( - 0, nthr, + 0, + nthr, [&](int ithr) { for_3d(ithr, nthr, D0, D1, D2, func); }, @@ -496,7 +508,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) { for_3d(ithr, nthr, D0, D1, D2, func); }); #elif IE_THREAD == IE_THREAD_OMP -#pragma omp parallel +# pragma omp parallel for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func); #elif IE_THREAD == IE_THREAD_SEQ for_3d(0, 1, D0, D1, D2, func); @@ -506,14 +518,15 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) { template void for_4d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const T3& D3, const F& func) { const size_t work_amount = (size_t)D0 * D1 * D2 * D3; - if (work_amount == 0) return; - size_t start {0}, end {0}; + if (work_amount == 0) + return; + size_t start{0}, end{0}; splitter(work_amount, nthr, ithr, start, end); - T0 d0 {0}; - T1 d1 {0}; - T2 d2 {0}; - T3 d3 {0}; + T0 d0{0}; + T1 d1{0}; + T2 d2{0}; + T3 d3{0}; parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3); for (size_t iwork = start; iwork < end; ++iwork) { details::call_with_args(func, ithr, d0, d1, d2, d3); @@ -526,12 +539,14 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons #if IE_THREAD == IE_THREAD_TBB auto work_amount = static_cast(D0 * D1 * D2 * D3); int nthr = parallel_get_max_threads(); - if (static_cast(nthr) > work_amount) nthr = static_cast(work_amount); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); if (nthr == 1) { for_4d(0, 1, D0, D1, D2, D3, func); } else { tbb::parallel_for( - 0, nthr, + 0, + nthr, [&](int ithr) { for_4d(ithr, nthr, D0, D1, D2, D3, func); }, @@ -543,7 +558,7 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_4d(ithr, nthr, D0, D1, D2, D3, func); }); #elif IE_THREAD == IE_THREAD_OMP -#pragma omp parallel +# pragma omp parallel for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func); #elif IE_THREAD == IE_THREAD_SEQ for_4d(0, 1, D0, D1, D2, D3, func); @@ -551,18 +566,25 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons } template -void for_5d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const T3& D3, const T4& D4, +void for_5d(const int& ithr, + const int& nthr, + const T0& D0, + const T1& D1, + const T2& D2, + const T3& D3, + const T4& D4, const F& func) { const size_t work_amount = (size_t)D0 * D1 * D2 * D3 * D4; - if (work_amount == 0) return; - size_t start {0}, end {0}; + if (work_amount == 0) + return; + size_t start{0}, end{0}; splitter(work_amount, nthr, ithr, start, end); - T0 d0 {0}; - T1 d1 {0}; - T2 d2 {0}; - T3 d3 {0}; - T4 d4 {0}; + T0 d0{0}; + T1 d1{0}; + T2 d2{0}; + T3 d3{0}; + T4 d4{0}; parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3, d4, D4); for (size_t iwork = start; iwork < end; ++iwork) { details::call_with_args(func, ithr, d0, d1, d2, d3, d4); @@ -575,12 +597,14 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons #if IE_THREAD == IE_THREAD_TBB auto work_amount = static_cast(D0 * D1 * D2 * D3 * D4); int nthr = parallel_get_max_threads(); - if (static_cast(nthr) > work_amount) nthr = static_cast(work_amount); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); if (nthr == 1) { for_5d(0, 1, D0, D1, D2, D3, D4, func); } else { tbb::parallel_for( - 0, nthr, + 0, + nthr, [&](int ithr) { for_5d(ithr, nthr, D0, D1, D2, D3, D4, func); }, @@ -592,7 +616,7 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons for_5d(ithr, nthr, D0, D1, D2, D3, D4, func); }); #elif IE_THREAD == IE_THREAD_OMP -#pragma omp parallel +# pragma omp parallel for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func); #elif IE_THREAD == IE_THREAD_SEQ for_5d(0, 1, D0, D1, D2, D3, D4, func); diff --git a/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp b/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp index 425673f45b0..9aedae54cef 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp @@ -72,7 +72,7 @@ public: * * @param str char array */ - Parameter(const char* str): Parameter(std::string(str)) {} // NOLINT + Parameter(const char* str) : Parameter(std::string(str)) {} // NOLINT /** * @brief Destructor @@ -89,7 +89,8 @@ public: return *this; } clear(); - if (!parameter.empty()) ptr = parameter.ptr->copy(); + if (!parameter.empty()) + ptr = parameter.ptr->copy(); return *this; } @@ -279,7 +280,7 @@ private: return id == typeid(T); } Any* copy() const override { - return new RealData {get()}; + return new RealData{get()}; } T& get() & { @@ -291,14 +292,12 @@ private: } template - typename std::enable_if::value, bool>::type - equal(const Any& left, const Any& rhs) const { + typename std::enable_if::value, bool>::type equal(const Any& left, const Any& rhs) const { IE_THROW() << "Parameter doesn't contain equal operator"; } template - typename std::enable_if::value, bool>::type - equal(const Any& left, const Any& rhs) const { + typename std::enable_if::value, bool>::type equal(const Any& left, const Any& rhs) const { return dyn_cast(&left) == dyn_cast(&rhs); } @@ -306,13 +305,11 @@ private: return rhs.is(typeid(T)) && equal(*this, rhs); } - template - typename std::enable_if::value, void>::type - print(std::ostream& stream, const U& object) const {} + template ::value, bool>::type = true> + void print(std::ostream& stream, const U& object) const {} - template - typename std::enable_if::value, void>::type - print(std::ostream& stream, const U& object) const { + template ::value, bool>::type = true> + void print(std::ostream& stream, const U& object) const { stream << object; } @@ -323,13 +320,15 @@ private: template static T& dyn_cast(Any* obj) { - if (obj == nullptr) IE_THROW() << "Parameter is empty!"; + if (obj == nullptr) + IE_THROW() << "Parameter is empty!"; return dynamic_cast&>(*obj).get(); } template static const T& dyn_cast(const Any* obj) { - if (obj == nullptr) IE_THROW() << "Parameter is empty!"; + if (obj == nullptr) + IE_THROW() << "Parameter is empty!"; return dynamic_cast&>(*obj).get(); } @@ -338,7 +337,7 @@ private: /** * @brief An std::map object containing parameters - */ + */ using ParamMap = std::map; #ifdef __ANDROID__ diff --git a/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp index 9c2dd68e46f..f1b75fab57c 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp @@ -10,10 +10,10 @@ */ #pragma once +#include #include #include #include -#include #include "ie_precision.hpp" @@ -37,12 +37,11 @@ namespace Metrics { #define EXEC_NETWORK_METRIC_KEY(name) METRIC_KEY(name) #ifndef DECLARE_METRIC_KEY_IMPL -#define DECLARE_METRIC_KEY(name, ...) \ - static constexpr auto METRIC_##name = #name +# define DECLARE_METRIC_KEY(name, ...) static constexpr auto METRIC_##name = # name #else -#define DECLARE_METRIC_KEY(name, ...) \ - static constexpr auto METRIC_##name = #name; \ - DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__) +# define DECLARE_METRIC_KEY(name, ...) \ + static constexpr auto METRIC_##name = #name; \ + DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__) #endif #define DECLARE_EXEC_NETWORK_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(name, __VA_ARGS__) @@ -51,7 +50,7 @@ namespace Metrics { * @def METRIC_VALUE(name) * @brief shortcut for defining metric values */ -#define METRIC_VALUE(name) InferenceEngine::Metrics::name +#define METRIC_VALUE(name) InferenceEngine::Metrics::name #define DECLARE_METRIC_VALUE(name) static constexpr auto name = #name /** @@ -162,9 +161,15 @@ enum class DeviceType { /** @cond INTERNAL */ inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics::DeviceType& deviceType) { switch (deviceType) { - case InferenceEngine::Metrics::DeviceType::discrete: os << "discrete"; break; - case InferenceEngine::Metrics::DeviceType::integrated: os << "integrated"; break; - default: os << "unknown"; break; + case InferenceEngine::Metrics::DeviceType::discrete: + os << "discrete"; + break; + case InferenceEngine::Metrics::DeviceType::integrated: + os << "integrated"; + break; + default: + os << "unknown"; + break; } return os; @@ -177,7 +182,8 @@ inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics DECLARE_METRIC_KEY(DEVICE_TYPE, DeviceType); /** - * @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device + * @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by + * specified device */ DECLARE_METRIC_KEY(DEVICE_GOPS, std::map); @@ -212,15 +218,15 @@ namespace PluginConfigParams { * @def CONFIG_KEY(name) * @brief shortcut for defining configuration keys */ -#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name) -#define _CONFIG_KEY(name) KEY_##name +#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name) +#define _CONFIG_KEY(name) KEY_##name #define DECLARE_CONFIG_KEY(name) static constexpr auto _CONFIG_KEY(name) = #name /** * @def CONFIG_VALUE(name) * @brief shortcut for defining configuration values */ -#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name +#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name #define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name /** @@ -239,13 +245,14 @@ DECLARE_CONFIG_KEY(CPU_THREADS_NUM); * * It is passed to Core::SetConfig(), this option should be used with values: * PluginConfigParams::NO (no pinning for CPU inference threads) - * PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static benchmarks), + * PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static + * benchmarks), * * the following options are implemented only for the TBB as a threading option * PluginConfigParams::NUMA (pinning threads to NUMA nodes, best for real-life, contented cases) * on the Windows and MacOS* this option behaves as YES - * PluginConfigParams::HYBRID_AWARE (let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for latency tasks) - * on the hybrid CPUs this option is default + * PluginConfigParams::HYBRID_AWARE (let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for + * latency tasks) on the hybrid CPUs this option is default * * Also, the settings are ignored, if the OpenVINO compiled with OpenMP and any affinity-related OpenMP's * environment variable is set (as affinity is configured explicitly) @@ -313,12 +320,15 @@ DECLARE_CONFIG_KEY(CONFIG_FILE); DECLARE_CONFIG_KEY(LOG_LEVEL); DECLARE_CONFIG_VALUE(LOG_NONE); // turn off logging -DECLARE_CONFIG_VALUE(LOG_ERROR); // error events that might still allow the application to continue running -DECLARE_CONFIG_VALUE(LOG_WARNING); // potentially harmful situations which may further lead to ERROR -DECLARE_CONFIG_VALUE( - LOG_INFO); // informational messages that display the progress of the application at coarse-grained level -DECLARE_CONFIG_VALUE(LOG_DEBUG); // fine-grained events that are most useful to debug an application. -DECLARE_CONFIG_VALUE(LOG_TRACE); // finer-grained informational events than the DEBUG +DECLARE_CONFIG_VALUE(LOG_ERROR); // error events that might still allow the + // application to continue running +DECLARE_CONFIG_VALUE(LOG_WARNING); // potentially harmful situations which may + // further lead to ERROR +DECLARE_CONFIG_VALUE(LOG_INFO); // informational messages that display the progress of the + // application at coarse-grained level +DECLARE_CONFIG_VALUE(LOG_DEBUG); // fine-grained events that are most useful to + // debug an application. +DECLARE_CONFIG_VALUE(LOG_TRACE); // finer-grained informational events than the DEBUG /** * @brief the key for setting of required device to execute on @@ -349,7 +359,6 @@ DECLARE_CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS); INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork::GetExecGraphInfo::serialize method") DECLARE_CONFIG_KEY(DUMP_EXEC_GRAPH_AS_DOT); - /** * @brief The name for setting to execute in bfloat16 precision whenever it is possible * diff --git a/inference-engine/src/inference_engine/include/ie/ie_precision.hpp b/inference-engine/src/inference_engine/include/ie/ie_precision.hpp index 28a7cf36abc..29c84b88eef 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_precision.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_precision.hpp @@ -15,7 +15,6 @@ #include "ie_common.h" - namespace InferenceEngine { /** @@ -230,10 +229,23 @@ public: static Precision FromStr(const std::string& str) { static const std::unordered_map names = { #define PRECISION_NAME(s) {#s, s} - PRECISION_NAME(Q78), PRECISION_NAME(BOOL), PRECISION_NAME(BF16), - PRECISION_NAME(I4), PRECISION_NAME(I8), PRECISION_NAME(I16), PRECISION_NAME(I32), PRECISION_NAME(I64), - PRECISION_NAME(U4), PRECISION_NAME(U8), PRECISION_NAME(U16), PRECISION_NAME(U32), PRECISION_NAME(U64), - PRECISION_NAME(FP32), PRECISION_NAME(FP64), PRECISION_NAME(FP16), PRECISION_NAME(MIXED), + PRECISION_NAME(Q78), + PRECISION_NAME(BOOL), + PRECISION_NAME(BF16), + PRECISION_NAME(I4), + PRECISION_NAME(I8), + PRECISION_NAME(I16), + PRECISION_NAME(I32), + PRECISION_NAME(I64), + PRECISION_NAME(U4), + PRECISION_NAME(U8), + PRECISION_NAME(U16), + PRECISION_NAME(U32), + PRECISION_NAME(U64), + PRECISION_NAME(FP32), + PRECISION_NAME(FP64), + PRECISION_NAME(FP16), + PRECISION_NAME(MIXED), PRECISION_NAME(BIN), #undef PRECISION_NAME }; @@ -292,12 +304,15 @@ protected: * @returns True if strings are the same */ static bool areSameStrings(const char* l, const char* r) noexcept { - if (l == r) return true; + if (l == r) + return true; - if (l == nullptr || r == nullptr) return false; + if (l == nullptr || r == nullptr) + return false; for (; *l && *r; l++, r++) { - if (*l != *r) return false; + if (*l != *r) + return false; } return *l == *r; } @@ -366,7 +381,7 @@ struct PrecisionTrait { using value_type = int16_t; enum { is_float = true }; }; -template<> +template <> struct PrecisionTrait { using value_type = uint16_t; enum { is_float = false }; @@ -484,7 +499,8 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector& } inline constexpr uint32_t getPrecisionMask( - InferenceEngine::Precision::ePrecision precision1, InferenceEngine::Precision::ePrecision precision2, + InferenceEngine::Precision::ePrecision precision1, + InferenceEngine::Precision::ePrecision precision2, InferenceEngine::Precision::ePrecision precision3 = InferenceEngine::Precision::MIXED, InferenceEngine::Precision::ePrecision precision4 = InferenceEngine::Precision::MIXED) { return (precision1) | (precision2 << 8) | (precision3 << 16) | (precision4 << 24); diff --git a/inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp b/inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp index 6c85cd8db16..273b7f99a27 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp @@ -3,8 +3,8 @@ // /** - * @brief This header file provides structures to store info about pre-processing of network inputs (scale, mean image, - * ...) + * @brief This header file provides structures to store info about pre-processing of + * network inputs (scale, mean image, ...) * * @file ie_preprocess.hpp */ @@ -155,8 +155,7 @@ public: } else if (meanImage.get()->getTensorDesc().getDims().size() != 2) { IE_THROW() << "Failed to set invalid mean image for channel: number of dimensions != 2"; } else if (channel >= _channelsInfo.size()) { - IE_THROW() << "Channel " << channel - << " exceed number of PreProcess channels: " << _channelsInfo.size(); + IE_THROW() << "Channel " << channel << " exceed number of PreProcess channels: " << _channelsInfo.size(); } _variant = MEAN_IMAGE; _channelsInfo[channel]->meanData = meanImage; diff --git a/inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp b/inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp index a6116fe6c0d..ca1c5de14e1 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp @@ -4,7 +4,7 @@ /** * @brief This is a header file for the IE RemoteContext and RemoteBlob classes - * + * * @file ie_remote_context.hpp */ #pragma once @@ -44,7 +44,7 @@ public: * @brief Constructor. Creates an empty RemoteBlob object with the specified precision. * @param tensorDesc Defines the layout and dims of the blob */ - explicit RemoteBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {} + explicit RemoteBlob(const TensorDesc& tensorDesc) : MemoryBlob(tensorDesc) {} /** * @brief Returns a map of device-specific parameters required for low-level @@ -103,8 +103,8 @@ public: * @return true if this object can be dynamically cast to the type T*. Otherwise, false */ template ::value && !std::is_reference::value, int>::type = 0, - typename std::enable_if::value, int>::type = 0> + typename std::enable_if::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> bool is() noexcept { return dynamic_cast(this) != nullptr; } @@ -116,8 +116,8 @@ public: * @return true if this object can be dynamically cast to the type const T*. Otherwise, false */ template ::value && !std::is_reference::value, int>::type = 0, - typename std::enable_if::value, int>::type = 0> + typename std::enable_if::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> bool is() const noexcept { return dynamic_cast(this) != nullptr; } @@ -129,9 +129,9 @@ public: * @return Raw pointer to the object of the type T or nullptr on error */ template ::value && !std::is_reference::value, int>::type = 0, - typename std::enable_if::value, int>::type = 0> - T * as() noexcept { + typename std::enable_if::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + T* as() noexcept { return dynamic_cast(this); } @@ -142,9 +142,9 @@ public: * @return Raw pointer to the object of the type const T or nullptr on error */ template ::value && !std::is_reference::value, int>::type = 0, - typename std::enable_if::value, int>::type = 0> - const T * as() const noexcept { + typename std::enable_if::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + const T* as() const noexcept { return dynamic_cast(this); } @@ -190,4 +190,3 @@ inline RemoteBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::P } } // namespace InferenceEngine - diff --git a/inference-engine/src/inference_engine/include/ie/ie_transformations.hpp b/inference-engine/src/inference_engine/include/ie/ie_transformations.hpp index 64f61bfa63d..1ce6a59eedb 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_transformations.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_transformations.hpp @@ -10,8 +10,8 @@ #pragma once -#include -#include +#include "cpp/ie_cnn_network.h" +#include "ie_api.h" namespace InferenceEngine { @@ -56,7 +56,6 @@ INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. " "Use InferenceEngine::lowLatency2 instead.") INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network); - /** * @brief The transformation finds all TensorIterator/Loop layers in the network, * processes all back edges that describe a connection between Result and Parameter @@ -84,7 +83,6 @@ INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network); If "false, then the transformation leaves existed initializing subgraph for ReadValue operation. * Loop operation by a given number. Does not affect TensorIterators. */ -INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network, - bool use_const_initializer = true); +INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network, bool use_const_initializer = true); -} // namespace InferenceEngine +} // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/include/ie/ie_version.hpp b/inference-engine/src/inference_engine/include/ie/ie_version.hpp index 10e649a09d3..55806697c1e 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_version.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_version.hpp @@ -44,9 +44,9 @@ struct Version { */ struct ApiVersion { INFERENCE_ENGINE_DEPRECATED("Use IE_VERSION_[MAJOR|MINOR|PATCH] definitions, buildNumber property") - int major; //!< A major version + int major; //!< A major version INFERENCE_ENGINE_DEPRECATED("Use IE_VERSION_[MAJOR|MINOR|PATCH] definitions, buildNumber property") - int minor; //!< A minor version + int minor; //!< A minor version /** * @brief A default construtor @@ -60,7 +60,7 @@ struct Version { * @brief A default construtor * @param v A version to copy */ - ApiVersion(const ApiVersion & v) { + ApiVersion(const ApiVersion& v) { major = v.major; minor = v.minor; } diff --git a/inference-engine/src/inference_engine/include/ie/inference_engine.hpp b/inference-engine/src/inference_engine/include/ie/inference_engine.hpp index 2685e381de3..7fcbea9fc5a 100644 --- a/inference-engine/src/inference_engine/include/ie/inference_engine.hpp +++ b/inference-engine/src/inference_engine/include/ie/inference_engine.hpp @@ -8,9 +8,9 @@ */ #pragma once -#include "ie_transformations.hpp" #include "ie_compound_blob.h" #include "ie_core.hpp" +#include "ie_transformations.hpp" // remove in 2022.1 major release #include diff --git a/inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp b/inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp index 5062297a141..0f4f2dd829b 100644 --- a/inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp @@ -26,7 +26,7 @@ namespace MultiDeviceConfigParams { */ #define MULTI_CONFIG_KEY(name) InferenceEngine::MultiDeviceConfigParams::_CONFIG_KEY(MULTI_##name) -#define DECLARE_MULTI_CONFIG_KEY(name) DECLARE_CONFIG_KEY(MULTI_##name) +#define DECLARE_MULTI_CONFIG_KEY(name) DECLARE_CONFIG_KEY(MULTI_##name) #define DECLARE_MULTI_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(MULTI_##name) /** diff --git a/inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp index be1649e6916..f201f8dcc18 100644 --- a/inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp @@ -18,68 +18,68 @@ namespace InferenceEngine { namespace Metrics { /** -* @brief Metric to get a int of the device number, String value is METRIC_HDDL_DEVICE_NUM -*/ + * @brief Metric to get a int of the device number, String value is METRIC_HDDL_DEVICE_NUM + */ DECLARE_METRIC_KEY(HDDL_DEVICE_NUM, int); /** -* @brief Metric to get a std::vector of device names, String value is METRIC_HDDL_DEVICE_NAME -*/ + * @brief Metric to get a std::vector of device names, String value is METRIC_HDDL_DEVICE_NAME + */ DECLARE_METRIC_KEY(HDDL_DEVICE_NAME, std::vector); /** -* @brief Metric to get a std::vector of device thermal, String value is METRIC_HDDL_DEVICE_THERMAL -*/ + * @brief Metric to get a std::vector of device thermal, String value is METRIC_HDDL_DEVICE_THERMAL + */ DECLARE_METRIC_KEY(HDDL_DEVICE_THERMAL, std::vector); /** -* @brief Metric to get a std::vector of device ids, String value is METRIC_HDDL_DEVICE_ID -*/ + * @brief Metric to get a std::vector of device ids, String value is METRIC_HDDL_DEVICE_ID + */ DECLARE_METRIC_KEY(HDDL_DEVICE_ID, std::vector); /** -* @brief Metric to get a std::vector of device subclasses, String value is METRIC_HDDL_DEVICE_SUBCLASS -*/ + * @brief Metric to get a std::vector of device subclasses, String value is METRIC_HDDL_DEVICE_SUBCLASS + */ DECLARE_METRIC_KEY(HDDL_DEVICE_SUBCLASS, std::vector); /** -* @brief Metric to get a std::vector of device total memory, String value is METRIC_HDDL_MEMORY_TOTAL -*/ + * @brief Metric to get a std::vector of device total memory, String value is METRIC_HDDL_MEMORY_TOTAL + */ DECLARE_METRIC_KEY(HDDL_DEVICE_MEMORY_TOTAL, std::vector); /** -* @brief Metric to get a std::vector of device used memory, String value is METRIC_HDDL_DEVICE_MEMORY_USED -*/ + * @brief Metric to get a std::vector of device used memory, String value is METRIC_HDDL_DEVICE_MEMORY_USED + */ DECLARE_METRIC_KEY(HDDL_DEVICE_MEMORY_USED, std::vector); /** -* @brief Metric to get a std::vector of device utilization, String value is METRIC_HDDL_DEVICE_UTILIZATION -*/ + * @brief Metric to get a std::vector of device utilization, String value is METRIC_HDDL_DEVICE_UTILIZATION + */ DECLARE_METRIC_KEY(HDDL_DEVICE_UTILIZATION, std::vector); /** -* @brief Metric to get a std::vector of stream ids, String value is METRIC_HDDL_DEVICE_STREAM_ID -*/ + * @brief Metric to get a std::vector of stream ids, String value is METRIC_HDDL_DEVICE_STREAM_ID + */ DECLARE_METRIC_KEY(HDDL_STREAM_ID, std::vector); /** -* @brief Metric to get a std::vector of device tags, String value is METRIC_HDDL_DEVICE_TAG -*/ + * @brief Metric to get a std::vector of device tags, String value is METRIC_HDDL_DEVICE_TAG + */ DECLARE_METRIC_KEY(HDDL_DEVICE_TAG, std::vector); /** -* @brief Metric to get a std::vector of group ids, String value is METRIC_HDDL_GROUP_ID -*/ + * @brief Metric to get a std::vector of group ids, String value is METRIC_HDDL_GROUP_ID + */ DECLARE_METRIC_KEY(HDDL_GROUP_ID, std::vector); /** -* @brief Metric to get a int number of device be using for group, String value is METRIC_HDDL_DEVICE_GROUP_USING_NUM -*/ + * @brief Metric to get a int number of device be using for group, String value is METRIC_HDDL_DEVICE_GROUP_USING_NUM + */ DECLARE_METRIC_KEY(HDDL_DEVICE_GROUP_USING_NUM, int); /** -* @brief Metric to get a int number of total device, String value is METRIC_HDDL_DEVICE_TOTAL_NUM -*/ + * @brief Metric to get a int number of total device, String value is METRIC_HDDL_DEVICE_TOTAL_NUM + */ DECLARE_METRIC_KEY(HDDL_DEVICE_TOTAL_NUM, int); } // namespace Metrics @@ -151,8 +151,9 @@ DECLARE_VPU_CONFIG(HDDL_BIND_DEVICE); * @brief [Only for HDDLPlugin] * Type: A signed int wrapped in a string, default is "0". * This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False". - * When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler), - * the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority. + * When there are multiple devices running a certain network (a same network running on multiple devices in Bypass + * Scheduler), the device with a larger number has a higher priority, and more inference tasks will be fed to it with + * priority. */ DECLARE_VPU_CONFIG(HDDL_RUNTIME_PRIORITY); @@ -171,7 +172,7 @@ DECLARE_VPU_CONFIG(HDDL_USE_SGAD); * This config gives a "group id" for a certain device when this device has been reserved for certain client, client * can use this device grouped by calling this group id while other client can't use this device * Each device has their own group id. Device in one group shares same group id. - */ + */ DECLARE_VPU_CONFIG(HDDL_GROUP_DEVICE); } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp index 5eb1436973e..1a5740531ba 100644 --- a/inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp @@ -30,7 +30,7 @@ */ #define VPU_HDDL_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_HDDL_##name -#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name) +#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name) #define DECLARE_VPU_HDDL_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_HDDL_##name) // @@ -41,101 +41,102 @@ * @def VPU_HDDL_METRIC(name) * @brief Shortcut for defining VPU HDDL metric */ -#define VPU_HDDL_METRIC(name) METRIC_KEY(VPU_HDDL_##name) -#define DECLARE_VPU_HDDL_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_HDDL_##name, __VA_ARGS__) +#define VPU_HDDL_METRIC(name) METRIC_KEY(VPU_HDDL_##name) +#define DECLARE_VPU_HDDL_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_HDDL_##name, __VA_ARGS__) namespace InferenceEngine { namespace Metrics { /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NUM instead -* @brief Metric to get a int of the device number, String value is METRIC_VPU_HDDL_DEVICE_NUM -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NUM instead + * @brief Metric to get a int of the device number, String value is METRIC_VPU_HDDL_DEVICE_NUM + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_NUM instead") DECLARE_VPU_HDDL_METRIC(DEVICE_NUM, int); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NAME instead -* @brief Metric to get a std::vector of device names, String value is METRIC_VPU_HDDL_DEVICE_NAME -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NAME instead + * @brief Metric to get a std::vector of device names, String value is METRIC_VPU_HDDL_DEVICE_NAME + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_NAME instead") DECLARE_VPU_HDDL_METRIC(DEVICE_NAME, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_THERMAL instead -* @brief Metric to get a std::vector of device thermal, String value is METRIC_VPU_HDDL_DEVICE_THERMAL -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_THERMAL instead + * @brief Metric to get a std::vector of device thermal, String value is METRIC_VPU_HDDL_DEVICE_THERMAL + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_THERMAL instead") DECLARE_VPU_HDDL_METRIC(DEVICE_THERMAL, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_ID instead -* @brief Metric to get a std::vector of device ids, String value is METRIC_VPU_HDDL_DEVICE_ID -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_ID instead + * @brief Metric to get a std::vector of device ids, String value is METRIC_VPU_HDDL_DEVICE_ID + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_ID instead") DECLARE_VPU_HDDL_METRIC(DEVICE_ID, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_SUBCLASS instead -* @brief Metric to get a std::vector of device subclasses, String value is METRIC_VPU_HDDL_DEVICE_SUBCLASS -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_SUBCLASS instead + * @brief Metric to get a std::vector of device subclasses, String value is METRIC_VPU_HDDL_DEVICE_SUBCLASS + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_SUBCLASS instead") DECLARE_VPU_HDDL_METRIC(DEVICE_SUBCLASS, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_TOTAL instead -* @brief Metric to get a std::vector of device total memory, String value is METRIC_VPU_HDDL_MEMORY_TOTAL -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_TOTAL instead + * @brief Metric to get a std::vector of device total memory, String value is METRIC_VPU_HDDL_MEMORY_TOTAL + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_TOTAL instead") DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_TOTAL, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_USED instead -* @brief Metric to get a std::vector of device used memory, String value is METRIC_VPU_HDDL_DEVICE_MEMORY_USED -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_USED instead + * @brief Metric to get a std::vector of device used memory, String value is METRIC_VPU_HDDL_DEVICE_MEMORY_USED + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_USED instead") DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_USED, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_UTILIZATION instead -* @brief Metric to get a std::vector of device utilization, String value is METRIC_VPU_HDDL_DEVICE_UTILIZATION -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_UTILIZATION instead + * @brief Metric to get a std::vector of device utilization, String value is METRIC_VPU_HDDL_DEVICE_UTILIZATION + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_UTILIZATION instead") DECLARE_VPU_HDDL_METRIC(DEVICE_UTILIZATION, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_STREAM_ID instead -* @brief Metric to get a std::vector of stream ids, String value is METRIC_VPU_HDDL_DEVICE_STREAM_ID -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_STREAM_ID instead + * @brief Metric to get a std::vector of stream ids, String value is METRIC_VPU_HDDL_DEVICE_STREAM_ID + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_STREAM_ID instead") DECLARE_VPU_HDDL_METRIC(STREAM_ID, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TAG instead -* @brief Metric to get a std::vector of device tags, String value is METRIC_VPU_HDDL_DEVICE_TAG -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TAG instead + * @brief Metric to get a std::vector of device tags, String value is METRIC_VPU_HDDL_DEVICE_TAG + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_TAG instead") DECLARE_VPU_HDDL_METRIC(DEVICE_TAG, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_GROUP_ID instead -* @brief Metric to get a std::vector of group ids, String value is METRIC_VPU_HDDL_GROUP_ID -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_GROUP_ID instead + * @brief Metric to get a std::vector of group ids, String value is METRIC_VPU_HDDL_GROUP_ID + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_GROUP_ID instead") DECLARE_VPU_HDDL_METRIC(GROUP_ID, std::vector); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_GROUP_USING_NUM instead -* @brief Metric to get a int number of device be using for group, String value is METRIC_VPU_HDDL_DEVICE_GROUP_USING_NUM -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_GROUP_USING_NUM instead + * @brief Metric to get a int number of device be using for group, String value is + * METRIC_VPU_HDDL_DEVICE_GROUP_USING_NUM + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_GROUP_USING_NUM instead") DECLARE_VPU_HDDL_METRIC(DEVICE_GROUP_USING_NUM, int); /** -* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TOTAL_NUM instead -* @brief Metric to get a int number of total device, String value is METRIC_VPU_HDDL_DEVICE_TOTAL_NUM -*/ + * @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TOTAL_NUM instead + * @brief Metric to get a int number of total device, String value is METRIC_VPU_HDDL_DEVICE_TOTAL_NUM + */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_TOTAL_NUM instead") DECLARE_VPU_HDDL_METRIC(DEVICE_TOTAL_NUM, int); @@ -219,8 +220,9 @@ DECLARE_VPU_HDDL_CONFIG_KEY(BIND_DEVICE); * @brief [Only for HDDLPlugin] * Type: A signed int wrapped in a string, default is "0". * This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False". - * When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler), - * the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority. + * When there are multiple devices running a certain network (a same network running on multiple devices in Bypass + * Scheduler), the device with a larger number has a higher priority, and more inference tasks will be fed to it with + * priority. */ INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::HDDL_RUNTIME_PRIORITY instead") DECLARE_VPU_HDDL_CONFIG_KEY(RUNTIME_PRIORITY); diff --git a/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp index d16227f37cd..e777daabf0a 100644 --- a/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp @@ -26,7 +26,7 @@ */ #define VPU_MYRIAD_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_MYRIAD_##name -#define DECLARE_VPU_MYRIAD_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_MYRIAD_##name) +#define DECLARE_VPU_MYRIAD_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_MYRIAD_##name) #define DECLARE_VPU_MYRIAD_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_MYRIAD_##name) namespace InferenceEngine { diff --git a/inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp index f9a21055156..1c05cfca6a3 100644 --- a/inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp @@ -12,11 +12,11 @@ #pragma once -#include "ie_plugin_config.hpp" -#include "ie_api.h" - #include +#include "ie_api.h" +#include "ie_plugin_config.hpp" + #define DECLARE_VPU_CONFIG(name) static constexpr auto name = #name namespace InferenceEngine { diff --git a/inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp index 2ab022c231e..c3e0c5d7b12 100644 --- a/inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp @@ -12,26 +12,26 @@ #pragma once -#include "vpu/myriad_plugin_config.hpp" -#include "vpu/hddl_plugin_config.hpp" #include "ie_api.h" +#include "vpu/hddl_plugin_config.hpp" +#include "vpu/myriad_plugin_config.hpp" // // Common options // -#define VPU_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_##name) +#define VPU_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_##name) #define VPU_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_##name -#define DECLARE_VPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_##name) +#define DECLARE_VPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_##name) #define DECLARE_VPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_##name) // // Common metrics // -#define VPU_METRIC(name) METRIC_KEY(VPU_##name) -#define DECLARE_VPU_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_##name, __VA_ARGS__) +#define VPU_METRIC(name) METRIC_KEY(VPU_##name) +#define DECLARE_VPU_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_##name, __VA_ARGS__) namespace InferenceEngine { @@ -77,8 +77,8 @@ DECLARE_VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME); * VPU_CONFIG_VALUE(NCHW) executable network forced to use NCHW input/output layouts * VPU_CONFIG_VALUE(NHWC) executable network forced to use NHWC input/output layouts */ -INFERENCE_ENGINE_DEPRECATED("Use InputInfo::setLayout on input data from CNNNetwork::getInputsInfo() or" \ - "Data::setLayout on output data from CNNNetwork::getOutputsInfo()") +INFERENCE_ENGINE_DEPRECATED("Use InputInfo::setLayout on input data from CNNNetwork::getInputsInfo() or" + "Data::setLayout on output data from CNNNetwork::getOutputsInfo()") DECLARE_VPU_CONFIG_KEY(COMPUTE_LAYOUT); /** diff --git a/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp b/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp index b1f5823d30f..9bc3504b69a 100644 --- a/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp +++ b/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp @@ -15,9 +15,9 @@ #include #include -#include "ie_version.hpp" -#include "ie_plugin_config.hpp" #include "cpp/ie_executable_network.hpp" +#include "ie_plugin_config.hpp" +#include "ie_version.hpp" namespace ngraph { class Function; @@ -101,7 +101,8 @@ public: * constant data becomes to point to invalid memory. * @return Function */ - std::shared_ptr read_model(const std::string& model, const std::shared_ptr& weights) const; + std::shared_ptr read_model(const std::string& model, + const std::shared_ptr& weights) const; /** * @brief Creates an executable network from a network object. @@ -115,9 +116,9 @@ public: * operation * @return An executable network reference */ - InferenceEngine::ExecutableNetwork compile_model( - const std::shared_ptr& network, const std::string& deviceName, - const std::map& config = {}); + InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr& network, + const std::string& deviceName, + const std::map& config = {}); /** * @brief Reads model and creates an executable network from IR or ONNX file @@ -132,9 +133,9 @@ public: * * @return An executable network reference */ - InferenceEngine::ExecutableNetwork compile_model( - const std::string& modelPath, const std::string& deviceName, - const std::map& config = {}); + InferenceEngine::ExecutableNetwork compile_model(const std::string& modelPath, + const std::string& deviceName, + const std::map& config = {}); /** * @brief Creates an executable network from a network object within a specified remote context. @@ -144,9 +145,9 @@ public: * operation * @return An executable network object */ - InferenceEngine::ExecutableNetwork compile_model( - const std::shared_ptr& network, const std::shared_ptr& context, - const std::map& config = {}); + InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr& network, + const std::shared_ptr& context, + const std::map& config = {}); /** * @brief Registers extension @@ -162,8 +163,9 @@ public: * operation* * @return An executable network reference */ - InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel, const std::string& deviceName, - const std::map& config = {}); + InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel, + const std::string& deviceName, + const std::map& config = {}); /** * @brief Creates an executable network from a previously exported network within a specified @@ -176,8 +178,8 @@ public: * @return An executable network reference */ InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel, - const std::shared_ptr& context, - const std::map& config = {}); + const std::shared_ptr& context, + const std::map& config = {}); /** * @brief Query device if it supports specified network with specified configuration @@ -187,9 +189,9 @@ public: * @param config Optional map of pairs: (config parameter name, config parameter value) * @return An object containing a map of pairs a layer name -> a device name supporting this layer. */ - InferenceEngine::QueryNetworkResult query_model( - const std::shared_ptr& network, const std::string& deviceName, - const std::map& config = {}) const; + InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr& network, + const std::string& deviceName, + const std::map& config = {}) const; /** * @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp diff --git a/inference-engine/src/inference_engine/src/blob_transform.cpp b/inference-engine/src/inference_engine/src/blob_transform.cpp index 26f7d064c64..52e19d86807 100644 --- a/inference-engine/src/inference_engine/src/blob_transform.cpp +++ b/inference-engine/src/inference_engine/src/blob_transform.cpp @@ -6,7 +6,7 @@ #include "ie_system_conf.h" #ifdef HAVE_SSE -#include "cpu_x86_sse42/blob_transform_sse42.hpp" +# include "cpu_x86_sse42/blob_transform_sse42.hpp" #endif #include @@ -53,16 +53,30 @@ static void blob_copy_4d_t(Blob::Ptr src, Blob::Ptr dst) { if (src->getTensorDesc().getLayout() == NHWC && dst->getTensorDesc().getLayout() == NCHW && C == 3 && C_src_stride == 1 && W_src_stride == 3 && W_dst_stride == 1 && with_cpu_x86_sse42()) { if (PRC == Precision::U8) { - blob_copy_4d_split_u8c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, H_src_stride, N_dst_stride, H_dst_stride, C_dst_stride, - static_cast(N), static_cast(H), static_cast(W)); + blob_copy_4d_split_u8c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + H_src_stride, + N_dst_stride, + H_dst_stride, + C_dst_stride, + static_cast(N), + static_cast(H), + static_cast(W)); return; } if (PRC == Precision::FP32) { - blob_copy_4d_split_f32c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, H_src_stride, N_dst_stride, H_dst_stride, C_dst_stride, - static_cast(N), static_cast(H), static_cast(W)); + blob_copy_4d_split_f32c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + H_src_stride, + N_dst_stride, + H_dst_stride, + C_dst_stride, + static_cast(N), + static_cast(H), + static_cast(W)); return; } } @@ -70,16 +84,30 @@ static void blob_copy_4d_t(Blob::Ptr src, Blob::Ptr dst) { if (src->getTensorDesc().getLayout() == NCHW && dst->getTensorDesc().getLayout() == NHWC && C == 3 && C_dst_stride == 1 && W_dst_stride == 3 && W_src_stride == 1 && with_cpu_x86_sse42()) { if (PRC == Precision::U8) { - blob_copy_4d_merge_u8c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, H_src_stride, C_src_stride, N_dst_stride, H_dst_stride, - static_cast(N), static_cast(H), static_cast(W)); + blob_copy_4d_merge_u8c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + H_src_stride, + C_src_stride, + N_dst_stride, + H_dst_stride, + static_cast(N), + static_cast(H), + static_cast(W)); return; } if (PRC == Precision::FP32) { - blob_copy_4d_merge_f32c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, H_src_stride, C_src_stride, N_dst_stride, H_dst_stride, - static_cast(N), static_cast(H), static_cast(W)); + blob_copy_4d_merge_f32c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + H_src_stride, + C_src_stride, + N_dst_stride, + H_dst_stride, + static_cast(N), + static_cast(H), + static_cast(W)); return; } } @@ -186,17 +214,35 @@ static void blob_copy_5d_t(Blob::Ptr src, Blob::Ptr dst) { if (src->getTensorDesc().getLayout() == NDHWC && dst->getTensorDesc().getLayout() == NCDHW && C == 3 && C_src_stride == 1 && W_src_stride == 3 && W_dst_stride == 1 && with_cpu_x86_sse42()) { if (PRC == Precision::U8) { - blob_copy_5d_split_u8c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, D_src_stride, H_src_stride, N_dst_stride, D_dst_stride, H_dst_stride, - C_dst_stride, static_cast(N), static_cast(D), static_cast(H), + blob_copy_5d_split_u8c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + D_src_stride, + H_src_stride, + N_dst_stride, + D_dst_stride, + H_dst_stride, + C_dst_stride, + static_cast(N), + static_cast(D), + static_cast(H), static_cast(W)); return; } if (PRC == Precision::FP32) { - blob_copy_5d_split_f32c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, D_src_stride, H_src_stride, N_dst_stride, D_dst_stride, H_dst_stride, - C_dst_stride, static_cast(N), static_cast(D), static_cast(H), + blob_copy_5d_split_f32c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + D_src_stride, + H_src_stride, + N_dst_stride, + D_dst_stride, + H_dst_stride, + C_dst_stride, + static_cast(N), + static_cast(D), + static_cast(H), static_cast(W)); return; } @@ -205,17 +251,35 @@ static void blob_copy_5d_t(Blob::Ptr src, Blob::Ptr dst) { if (src->getTensorDesc().getLayout() == NCDHW && dst->getTensorDesc().getLayout() == NDHWC && C == 3 && C_dst_stride == 1 && W_dst_stride == 3 && W_src_stride == 1 && with_cpu_x86_sse42()) { if (PRC == Precision::U8) { - blob_copy_5d_merge_u8c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, D_src_stride, H_src_stride, C_src_stride, N_dst_stride, D_dst_stride, - H_dst_stride, static_cast(N), static_cast(D), static_cast(H), + blob_copy_5d_merge_u8c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + D_src_stride, + H_src_stride, + C_src_stride, + N_dst_stride, + D_dst_stride, + H_dst_stride, + static_cast(N), + static_cast(D), + static_cast(H), static_cast(W)); return; } if (PRC == Precision::FP32) { - blob_copy_5d_merge_f32c3(reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr), - N_src_stride, D_src_stride, H_src_stride, C_src_stride, N_dst_stride, D_dst_stride, - H_dst_stride, static_cast(N), static_cast(D), static_cast(H), + blob_copy_5d_merge_f32c3(reinterpret_cast(src_ptr), + reinterpret_cast(dst_ptr), + N_src_stride, + D_src_stride, + H_src_stride, + C_src_stride, + N_dst_stride, + D_dst_stride, + H_dst_stride, + static_cast(N), + static_cast(D), + static_cast(H), static_cast(W)); return; } @@ -289,13 +353,15 @@ static inline void blob_copy_5d(Blob::Ptr src, Blob::Ptr dst) { } void blob_copy(Blob::Ptr src, Blob::Ptr dst) { - if (src->buffer() == nullptr) IE_THROW() << "Cannot copy blob data. Source is not allocated."; + if (src->buffer() == nullptr) + IE_THROW() << "Cannot copy blob data. Source is not allocated."; - if (dst->buffer() == nullptr) IE_THROW() << "Cannot copy blob data. Destination is not allocated."; + if (dst->buffer() == nullptr) + IE_THROW() << "Cannot copy blob data. Destination is not allocated."; if (src->getTensorDesc().getPrecision() != dst->getTensorDesc().getPrecision()) IE_THROW() << "Unimplemented blob transformation from precision " << src->getTensorDesc().getPrecision() - << " to " << src->getTensorDesc().getPrecision(); + << " to " << src->getTensorDesc().getPrecision(); if (src->getTensorDesc().getDims() != dst->getTensorDesc().getDims()) IE_THROW() << "Unimplemented blob transformation from different shapes "; diff --git a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp index 85019779800..9e3171d3a10 100644 --- a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp @@ -4,43 +4,39 @@ #include "cnn_network_ngraph_impl.hpp" -#include -#include #include -#include -#include - - #include #include #include -#include -#include -#include -#include -#include -#include #include #include +#include +#include -#include -#include -#include +#include "blob_factory.hpp" +#include "cpp/ie_cnn_network.h" +#include "ie_common.h" +#include "ie_memcpy.h" +#include "ngraph/graph_util.hpp" +#include "ngraph/ngraph.hpp" +#include "ngraph/pass/constant_folding.hpp" +#include "ngraph/pass/manager.hpp" #include "transformations/serialize.hpp" +#include "transformations/smart_reshape/set_batch_size.hpp" +#include "transformations/smart_reshape/smart_reshape.hpp" +#include "transformations/utils/utils.hpp" // TODO: remove this pass usage -#include #include - +#include #include - -#include #include +#include -#include "ie_ngraph_utils.hpp" #include "exec_graph_info.hpp" #include "ie_itt.hpp" +#include "ie_ngraph_utils.hpp" using namespace std; using namespace InferenceEngine; @@ -48,7 +44,8 @@ using details::CNNNetworkNGraphImpl; using InferenceEngine::details::CNNNetworkNGraphImpl; using ngraph::Function; -void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::Node>& output, const std::string& outName, +void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::Node>& output, + const std::string& outName, DataPtr& ptr) { const auto isCompatible = [](size_t size, const Layout& l) -> bool { switch (size) { @@ -111,10 +108,10 @@ void CNNNetworkNGraphImpl::validateFunctionNames() const { } } -CNNNetworkNGraphImpl::CNNNetworkNGraphImpl( - const std::shared_ptr& nGraph, - const std::vector& exts) - : _ngraph_function(nGraph), _ie_extensions(exts) { +CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const std::shared_ptr& nGraph, + const std::vector& exts) + : _ngraph_function(nGraph), + _ie_extensions(exts) { // Restore usual attributes for CNNNetwork auto keep_input_info = [](CNNNetworkNGraphImpl& network, const DataPtr& inData) { InputInfo::Ptr info(new InputInfo()); @@ -152,7 +149,7 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl( if (output.second->getPrecision() == Precision::I64) { output.second->setPrecision(Precision::I32); } else if (output.second->getPrecision() != Precision::FP32 && - output.second->getPrecision() != Precision::I32) { + output.second->getPrecision() != Precision::I32) { output.second->setPrecision(Precision::FP32); } } @@ -225,18 +222,20 @@ void CNNNetworkNGraphImpl::validate(int version) { _ngraph_function->validate_nodes_and_infer_types(); } -StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t outputIndex, +StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, + size_t outputIndex, ResponseDesc* resp) noexcept { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::addOutput"); try { - for (const auto & layer : _ngraph_function->get_ops()) { + for (const auto& layer : _ngraph_function->get_ops()) { // Result can have the same name as previous operation if (layer->get_friendly_name() == layerName && !std::dynamic_pointer_cast(layer)) { // Check that output port exists if (layer->outputs().size() <= outputIndex) { return DescriptionBuffer(OUT_OF_BOUNDS, resp) - << "port index " << outputIndex << " exceeds the number of layer outputs " << layer->outputs().size(); + << "port index " << outputIndex << " exceeds the number of layer outputs " + << layer->outputs().size(); } std::string outputName = layerName; if (layer->outputs().size() != 1) { @@ -271,7 +270,7 @@ StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t return DescriptionBuffer(NOT_FOUND, resp) << "Cannot add output! Layer " << layerName << " wasn't found!"; } -void CNNNetworkNGraphImpl::addOutput(const ::ngraph::Output<::ngraph::Node> & output) { +void CNNNetworkNGraphImpl::addOutput(const ::ngraph::Output<::ngraph::Node>& output) { auto dataName = ngraph::op::util::create_ie_output_name(output); DataPtr data; if (_data.count(dataName)) @@ -313,16 +312,16 @@ void CNNNetworkNGraphImpl::reshape() { reshape({}); } -StatusCode -CNNNetworkNGraphImpl::reshape(const std::map>& inputShapes, - ResponseDesc* responseDesc) noexcept { - if (inputShapes.empty()) return OK; +StatusCode CNNNetworkNGraphImpl::reshape(const std::map>& inputShapes, + ResponseDesc* responseDesc) noexcept { + if (inputShapes.empty()) + return OK; - const auto & params = _ngraph_function->get_parameters(); + const auto& params = _ngraph_function->get_parameters(); // Check that we need to do reshape only if input shapes will be changed bool needReshape = false; - for (const auto & param : params) { + for (const auto& param : params) { const auto it = inputShapes.find(param->get_friendly_name()); if (it == inputShapes.end()) { continue; @@ -333,11 +332,12 @@ CNNNetworkNGraphImpl::reshape(const std::map>& } } - if (!needReshape) return OK; + if (!needReshape) + return OK; // save original parameters shape std::map originalInputShapes; - for (const auto & param : params) { + for (const auto& param : params) { originalInputShapes[param->get_friendly_name()] = param->get_partial_shape(); } @@ -347,7 +347,7 @@ CNNNetworkNGraphImpl::reshape(const std::map>& ssr_manager.run_passes(_ngraph_function); std::map reshapeShapes; - for (const auto & item : inputShapes) { + for (const auto& item : inputShapes) { reshapeShapes[item.first] = ngraph::PartialShape(item.second); } reshape(reshapeShapes); @@ -359,8 +359,7 @@ CNNNetworkNGraphImpl::reshape(const std::map>& return OK; } -void -CNNNetworkNGraphImpl::reshape(const std::map& inputShapes) { +void CNNNetworkNGraphImpl::reshape(const std::map& inputShapes) { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::reshape"); auto params = _ngraph_function->get_parameters(); @@ -377,9 +376,9 @@ CNNNetworkNGraphImpl::reshape(const std::map& _ngraph_function->validate_nodes_and_infer_types(); const auto& results = _ngraph_function->get_results(); - bool outputs_are_static = all_of( - begin(results), end(results), - [](const std::shared_ptr& n){ return n->get_output_partial_shape(0).is_static(); }); + bool outputs_are_static = all_of(begin(results), end(results), [](const std::shared_ptr& n) { + return n->get_output_partial_shape(0).is_static(); + }); { shared_ptr specialized_ngraph_function = nullptr; @@ -398,7 +397,7 @@ CNNNetworkNGraphImpl::reshape(const std::map& manager.register_pass<::ngraph::pass::ConstantFolding>(); // OneHotToLegacy changes output precision manager.register_pass<::ngraph::pass::ConvertOneHotToOneHotIEMatcher>()->detect_output_type( - specialized_ngraph_function); + specialized_ngraph_function); manager.run_passes(specialized_ngraph_function); } specialized_ngraph_function->validate_nodes_and_infer_types(); @@ -443,12 +442,12 @@ CNNNetworkNGraphImpl::reshape(const std::map& } #endif std::unordered_set opName; - for (const auto &result : specialized_ngraph_function->get_results()) { + for (const auto& result : specialized_ngraph_function->get_results()) { addOutput(result->input_value(0)); } - for (const auto ¶meter : specialized_ngraph_function->get_parameters()) { - const auto &outName = parameter->get_friendly_name(); + for (const auto& parameter : specialized_ngraph_function->get_parameters()) { + const auto& outName = parameter->get_friendly_name(); if (opName.find(outName) != opName.end()) { IE_THROW() << "All operations in nGraph function should have unique friendly names!"; } @@ -468,9 +467,10 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath, custom_opsets.insert(begin(opset), end(opset)); } ngraph::pass::Manager manager; - manager.register_pass( - xmlPath, binPath, ngraph::pass::Serialize::Version::IR_V10, - custom_opsets); + manager.register_pass(xmlPath, + binPath, + ngraph::pass::Serialize::Version::IR_V10, + custom_opsets); manager.run_passes(_ngraph_function); } catch (const Exception& e) { return DescriptionBuffer(GENERAL_ERROR, resp) << e.what(); @@ -482,9 +482,8 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath, return OK; } -StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, - std::ostream& binBuf, - ResponseDesc* resp) const noexcept { +StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, std::ostream& binBuf, ResponseDesc* resp) const + noexcept { try { std::map custom_opsets; for (const auto& extension : _ie_extensions) { @@ -492,9 +491,10 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, custom_opsets.insert(begin(opset), end(opset)); } ngraph::pass::Manager manager; - manager.register_pass( - xmlBuf, binBuf, ngraph::pass::Serialize::Version::IR_V10, - custom_opsets); + manager.register_pass(xmlBuf, + binBuf, + ngraph::pass::Serialize::Version::IR_V10, + custom_opsets); manager.run_passes(_ngraph_function); } catch (const Exception& e) { return DescriptionBuffer(GENERAL_ERROR, resp) << e.what(); @@ -506,9 +506,8 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, return OK; } -StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, - Blob::Ptr& binBlob, - ResponseDesc* resp) const noexcept { +StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, Blob::Ptr& binBlob, ResponseDesc* resp) const + noexcept { try { std::map custom_opsets; for (const auto& extension : _ie_extensions) { @@ -518,15 +517,16 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, std::stringstream binBuf; ngraph::pass::Manager manager; - manager.register_pass( - xmlBuf, binBuf, ngraph::pass::Serialize::Version::IR_V10, - custom_opsets); + manager.register_pass(xmlBuf, + binBuf, + ngraph::pass::Serialize::Version::IR_V10, + custom_opsets); manager.run_passes(_ngraph_function); std::streambuf* pbuf = binBuf.rdbuf(); unsigned long bufSize = binBuf.tellp(); - TensorDesc tensorDesc(Precision::U8, { bufSize }, Layout::C); + TensorDesc tensorDesc(Precision::U8, {bufSize}, Layout::C); binBlob = make_shared_blob(tensorDesc); binBlob->allocate(); pbuf->sgetn(binBlob->buffer(), bufSize); @@ -540,46 +540,66 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, return OK; } -StatusCode CNNNetworkNGraphImpl::getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept { +StatusCode CNNNetworkNGraphImpl::getOVNameForTensor(std::string& ov_name, + const std::string& orig_name, + ResponseDesc* resp) const noexcept { if (_tensorNames.find(orig_name) == _tensorNames.end()) - return DescriptionBuffer(NOT_FOUND, resp) << "Framework tensor with name \"" << orig_name << "\" was not mapped to OpenVINO data!"; + return DescriptionBuffer(NOT_FOUND, resp) + << "Framework tensor with name \"" << orig_name << "\" was not mapped to OpenVINO data!"; ov_name = _tensorNames.at(orig_name); return OK; } StatusCode CNNNetworkNGraphImpl::setBatchSize(size_t size, ResponseDesc* responseDesc) noexcept { try { - if (getBatchSize() == size) return OK; + if (getBatchSize() == size) + return OK; auto original_parameters = _ngraph_function->get_parameters(); - if (original_parameters.empty()) return DescriptionBuffer(GENERAL_ERROR, responseDesc) << "Cannot set batch! Function doesn't contain parameters!"; + if (original_parameters.empty()) + return DescriptionBuffer(GENERAL_ERROR, responseDesc) + << "Cannot set batch! Function doesn't contain parameters!"; stringstream ss; ss << " Please use reshape method instead. Original parameter shapes are: "; for (size_t i = 0; i < original_parameters.size(); ++i) { - if (i) ss << ", "; - ss << "\"" << original_parameters[i]->get_friendly_name() << "\": " << original_parameters[i]->get_partial_shape(); + if (i) + ss << ", "; + ss << "\"" << original_parameters[i]->get_friendly_name() + << "\": " << original_parameters[i]->get_partial_shape(); } // ill-formed logic from the past setBatchSize (we keep it for backward-compatibility) - const auto first_parameter = *std::min_element(original_parameters.begin(), original_parameters.end(), - [](std::shared_ptr lhs, std::shared_ptr rhs){return lhs->get_friendly_name() < rhs->get_friendly_name();}); + const auto first_parameter = + *std::min_element(original_parameters.begin(), + original_parameters.end(), + [](std::shared_ptr lhs, std::shared_ptr rhs) { + return lhs->get_friendly_name() < rhs->get_friendly_name(); + }); const auto first_parameter_pshape = first_parameter->get_partial_shape(); - if (first_parameter_pshape.is_dynamic()) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) << - "Cannot set batch! Function contains parameter with partially defined shape!" << ss.str(); + if (first_parameter_pshape.is_dynamic()) + return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) + << "Cannot set batch! Function contains parameter with partially defined shape!" << ss.str(); const auto first_parameter_rank = first_parameter_pshape.rank().get_length(); - if (first_parameter_rank == 0 || first_parameter_rank == 1 || first_parameter_rank == 3) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) << - "Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension placement." << ss.str(); + if (first_parameter_rank == 0 || first_parameter_rank == 1 || first_parameter_rank == 3) + return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) + << "Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension placement." + << ss.str(); std::map> inShapes; - for (const auto ¶meter : original_parameters) { - const auto & pshape = parameter->get_partial_shape(); - if (pshape.is_dynamic()) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) << - "Cannot set batch! Function contains parameter with partially defined shape!" << ss.str(); - const auto & rank = pshape.rank().get_length(); - if (rank == 0) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) << - "Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension placement." << ss.str(); + for (const auto& parameter : original_parameters) { + const auto& pshape = parameter->get_partial_shape(); + if (pshape.is_dynamic()) + return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) + << "Cannot set batch! Function contains parameter with partially defined shape!" << ss.str(); + const auto& rank = pshape.rank().get_length(); + if (rank == 0) + return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) + << "Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension " + "placement." + << ss.str(); auto shape = parameter->get_shape(); - shape[0] = {static_cast(std::ceil(size * static_cast(shape[0]) / static_cast(getBatchSize())))}; + shape[0] = {static_cast( + std::ceil(size * static_cast(shape[0]) / static_cast(getBatchSize())))}; inShapes[parameter->get_friendly_name()] = shape; } ngraph::pass::Manager ssr_manager; diff --git a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp index 6fe00b8ad81..6a7027327f3 100644 --- a/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp +++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp @@ -11,24 +11,23 @@ #include #include -#include #include #include #include +#include #include -#include -#include -#include - -#include +#include "cpp/ie_cnn_network.h" #include "description_buffer.hpp" #include "ie_api.h" #include "ie_blob.h" #include "ie_common.h" #include "ie_data.h" -#include "ie_input_info.hpp" #include "ie_extension.h" +#include "ie_input_info.hpp" +#include "ngraph/attribute_visitor.hpp" +#include "ngraph/function.hpp" +#include "ngraph/node.hpp" namespace InferenceEngine { namespace details { @@ -62,7 +61,7 @@ public: StatusCode addOutput(const std::string& layerName, size_t outputIndex, ResponseDesc* resp) noexcept override; - void addOutput(const ::ngraph::Output<::ngraph::Node> & dataName); + void addOutput(const ::ngraph::Output<::ngraph::Node>& dataName); std::shared_ptr getFunction() const noexcept override { return _ngraph_function; @@ -79,16 +78,16 @@ public: StatusCode serialize(const std::string& xmlPath, const std::string& binPath, ResponseDesc* resp) const noexcept override; - StatusCode serialize(std::ostream& xmlBuf, std::ostream& binBuf, ResponseDesc* resp) const - noexcept override; + StatusCode serialize(std::ostream& xmlBuf, std::ostream& binBuf, ResponseDesc* resp) const noexcept override; - StatusCode serialize(std::ostream& xmlBuf, Blob::Ptr& binBlob, ResponseDesc* resp) const - noexcept override; + StatusCode serialize(std::ostream& xmlBuf, Blob::Ptr& binBlob, ResponseDesc* resp) const noexcept override; - StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept override; + StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const + noexcept override; // used by convertFunctionToICNNNetwork from legacy library std::map _data; + protected: std::shared_ptr<::ngraph::Function> _ngraph_function; diff --git a/inference-engine/src/inference_engine/src/compilation_context.cpp b/inference-engine/src/inference_engine/src/compilation_context.cpp index 1b656200996..ed787235e1b 100644 --- a/inference-engine/src/inference_engine/src/compilation_context.cpp +++ b/inference-engine/src/inference_engine/src/compilation_context.cpp @@ -4,28 +4,27 @@ #include "compilation_context.hpp" -#include #include +#include #ifndef WIN32 -#include +# include #endif #include -#include "ie_itt.hpp" -#include "transformations/serialize.hpp" #include "cpp/ie_cnn_network.h" #include "details/ie_exception.hpp" - -#include "ngraph/variant.hpp" +#include "file_utils.h" +#include "ie_itt.hpp" #include "ngraph/opsets/opset6.hpp" +#include "ngraph/variant.hpp" #include "transformations/rt_info/dequantization_attribute.hpp" #include "transformations/rt_info/fused_names_attribute.hpp" #include "transformations/rt_info/primitives_priority_attribute.hpp" -#include "file_utils.h" +#include "transformations/serialize.hpp" #ifdef WIN32 -#define stat _stat +# define stat _stat #endif namespace InferenceEngine { @@ -41,12 +40,15 @@ static int32_t as_int32_t(T v) { return static_cast(v); } -class OstreamHashWrapper final: public std::streambuf { +class OstreamHashWrapper final : public std::streambuf { std::size_t m_res = 0; + public: - std::size_t getResult() const { return m_res; } + std::size_t getResult() const { + return m_res; + } std::streamsize xsputn(const char* s, std::streamsize n) override { - const std::int64_t* intS = (const std::int64_t *)s; + const std::int64_t* intS = (const std::int64_t*)s; std::streamsize n64 = n / sizeof(std::int64_t); std::streamsize i = 0; // Using 64-bit values executes much faster than char @@ -85,7 +87,7 @@ std::string NetworkCompilationContext::calculateFileInfo(const std::string& file } std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, - const std::map& compileOptions) { + const std::map& compileOptions) { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN"); OstreamHashWrapper xmlHash; OstreamHashWrapper binHash; @@ -96,8 +98,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, // 1. Serialize CNNNetwork net(network); - ngraph::pass::Serialize serializer(xml, bin, - ngraph::pass::Serialize::Version::IR_V10); + ngraph::pass::Serialize serializer(xml, bin, ngraph::pass::Serialize::Version::IR_V10); serializer.run_on_function(net.getFunction()); // 2. Compute hash on serialized data and options @@ -117,13 +118,17 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, if (auto stringData = std::dynamic_pointer_cast>(rtMapData.second)) { seed = hash_combine(seed, stringData->get()); - } else if (auto intData = std::dynamic_pointer_cast>(rtMapData.second)) { + } else if (auto intData = + std::dynamic_pointer_cast>(rtMapData.second)) { seed = hash_combine(seed, intData->get()); - } else if (auto deq = std::dynamic_pointer_cast>(rtMapData.second)) { + } else if (auto deq = std::dynamic_pointer_cast>( + rtMapData.second)) { seed = hash_combine(seed, deq->get().getDequantizationAttr()); - } else if (auto fNames = std::dynamic_pointer_cast>(rtMapData.second)) { + } else if (auto fNames = + std::dynamic_pointer_cast>(rtMapData.second)) { seed = hash_combine(seed, fNames->get().getNames()); - } else if (auto prim = std::dynamic_pointer_cast>(rtMapData.second)) { + } else if (auto prim = std::dynamic_pointer_cast>( + rtMapData.second)) { seed = hash_combine(seed, prim->get().getPrimitivesPriority()); } } @@ -141,7 +146,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, if (preproc.getMeanVariant() == MeanVariant::MEAN_VALUE) { seed = hash_combine(seed, preproc.getNumberOfChannels()); for (size_t c = 0; c < preproc.getNumberOfChannels(); ++c) { - const PreProcessChannel::Ptr & channelInfo = preproc[c]; + const PreProcessChannel::Ptr& channelInfo = preproc[c]; seed = hash_combine(seed, channelInfo->stdScale); seed = hash_combine(seed, channelInfo->meanValue); } @@ -161,7 +166,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, } std::string NetworkCompilationContext::computeHash(const std::string& modelName, - const std::map& compileOptions) { + const std::map& compileOptions) { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName"); size_t seed = 0; try { @@ -180,12 +185,11 @@ std::string NetworkCompilationContext::computeHash(const std::string& modelName, CompiledBlobHeader::CompiledBlobHeader() {} -CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo) : - m_ieVersion(ieVersion), - m_fileInfo(fileInfo) { -} +CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo) + : m_ieVersion(ieVersion), + m_fileInfo(fileInfo) {} -std::istream& operator >> (std::istream& stream, CompiledBlobHeader& header) { +std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { std::string xmlStr; std::getline(stream, xmlStr); @@ -203,7 +207,7 @@ std::istream& operator >> (std::istream& stream, CompiledBlobHeader& header) { return stream; } -std::ostream& operator << (std::ostream& stream, const CompiledBlobHeader& header) { +std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header) { pugi::xml_document document; auto compiledBlobNode = document.append_child("compiled_blob"); compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str()); diff --git a/inference-engine/src/inference_engine/src/compilation_context.hpp b/inference-engine/src/inference_engine/src/compilation_context.hpp index 53a78dae4e1..2145b4fb812 100644 --- a/inference-engine/src/inference_engine/src/compilation_context.hpp +++ b/inference-engine/src/inference_engine/src/compilation_context.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include #include +#include #include +#include namespace InferenceEngine { @@ -16,8 +16,7 @@ class CNNNetwork; struct NetworkCompilationContext final { static std::string calculateFileInfo(const std::string& filePath); - static std::string computeHash(const CNNNetwork& network, - const std::map& compileOptions); + static std::string computeHash(const CNNNetwork& network, const std::map& compileOptions); static std::string computeHash(const std::string& modelName, const std::map& compileOptions); @@ -39,9 +38,9 @@ public: return m_fileInfo; } - friend std::istream & operator >> (std::istream& stream, CompiledBlobHeader& header); + friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header); - friend std::ostream & operator << (std::ostream& stream, const CompiledBlobHeader& header); + friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header); }; } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp index e2ff71d596c..0bc58df4880 100644 --- a/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp @@ -3,27 +3,24 @@ // #include "cpp/ie_cnn_network.h" -#include "exception2status.hpp" #include "cnn_network_ngraph_impl.hpp" +#include "exception2status.hpp" #include "ie_itt.hpp" namespace InferenceEngine { -CNNNetwork::CNNNetwork() : - network(), actual() { -} +CNNNetwork::CNNNetwork() : network(), actual() {} IE_SUPPRESS_DEPRECATED_START -CNNNetwork::CNNNetwork(std::shared_ptr network) - : network(network) { +CNNNetwork::CNNNetwork(std::shared_ptr network) : network(network) { actual = network.get(); - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; } -CNNNetwork::CNNNetwork(const std::shared_ptr& graph, - const std::vector& exts) { +CNNNetwork::CNNNetwork(const std::shared_ptr& graph, const std::vector& exts) { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetwork::CNNNetwork"); if (graph == nullptr) { @@ -39,26 +36,30 @@ CNNNetwork::CNNNetwork(const std::shared_ptr& graph, } OutputsDataMap CNNNetwork::getOutputsInfo() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; OutputsDataMap outputs; actual->getOutputsInfo(outputs); return outputs; } InputsDataMap CNNNetwork::getInputsInfo() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; InputsDataMap inputs; actual->getInputsInfo(inputs); return inputs; } size_t CNNNetwork::layerCount() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return actual->layerCount(); } const std::string& CNNNetwork::getName() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return actual->getName(); } @@ -67,7 +68,8 @@ void CNNNetwork::setBatchSize(const size_t size) { } size_t CNNNetwork::getBatchSize() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return actual->getBatchSize(); } @@ -76,22 +78,26 @@ CNNNetwork::operator ICNNNetwork::Ptr() { } CNNNetwork::operator ICNNNetwork&() { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return *actual; } CNNNetwork::operator const ICNNNetwork&() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return *actual; } std::shared_ptr CNNNetwork::getFunction() { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return actual->getFunction(); } std::shared_ptr CNNNetwork::getFunction() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; return actual->getFunction(); } @@ -100,7 +106,8 @@ void CNNNetwork::addOutput(const std::string& layerName, size_t outputIndex) { } ICNNNetwork::InputShapes CNNNetwork::getInputShapes() const { - if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized."; + if (actual == nullptr) + IE_THROW() << "CNNNetwork was not initialized."; ICNNNetwork::InputShapes shapes; InputsDataMap inputs; actual->getInputsInfo(inputs); diff --git a/inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp index a4afee5a28b..e0b48fb223d 100644 --- a/inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp @@ -2,24 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ie_common.h" - #include "cpp/ie_executable_network.hpp" + #include "cpp/exception2status.hpp" -#include "ie_executable_network_base.hpp" #include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" +#include "ie_common.h" +#include "ie_executable_network_base.hpp" namespace InferenceEngine { -#define EXEC_NET_CALL_STATEMENT(...) \ - if (_impl == nullptr) IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized."; \ - try { \ - __VA_ARGS__; \ - } catch(...) {details::Rethrow();} +#define EXEC_NET_CALL_STATEMENT(...) \ + if (_impl == nullptr) \ + IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized."; \ + try { \ + __VA_ARGS__; \ + } catch (...) { \ + details::Rethrow(); \ + } -ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so, - const IExecutableNetworkInternal::Ptr& impl) - : _so(so), _impl(impl) { +ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so, const IExecutableNetworkInternal::Ptr& impl) + : _so(so), + _impl(impl) { IE_ASSERT(_impl != nullptr); } @@ -34,8 +37,10 @@ ConstInputsDataMap ExecutableNetwork::GetInputsInfo() const { } void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) { - if (_impl == nullptr) IE_THROW() << "ExecutableNetwork was not initialized."; - if (newActual == nullptr) IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized."; + if (_impl == nullptr) + IE_THROW() << "ExecutableNetwork was not initialized."; + if (newActual == nullptr) + IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized."; auto newBase = std::dynamic_pointer_cast(newActual); IE_ASSERT(newBase != nullptr); auto newImpl = newBase->GetImpl(); @@ -49,10 +54,10 @@ ExecutableNetwork::operator IExecutableNetwork::Ptr() { std::vector ExecutableNetwork::QueryState() { std::vector controller; - EXEC_NET_CALL_STATEMENT( - for (auto&& state : _impl->QueryState()) { - controller.emplace_back(VariableState{ _so, state }); - }); + EXEC_NET_CALL_STATEMENT(for (auto&& state + : _impl->QueryState()) { + controller.emplace_back(VariableState{_so, state}); + }); return controller; } diff --git a/inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp b/inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp index c87b1fc7098..52cb5e0a374 100644 --- a/inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp @@ -14,19 +14,20 @@ #include #include -#include -#include -#include #include "cpp/exception2status.hpp" +#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" +#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp" +#include "ie_iexecutable_network.hpp" #include "ie_infer_async_request_base.hpp" namespace InferenceEngine { IE_SUPPRESS_DEPRECATED_START /** - * @brief Executable network `noexcept` wrapper which accepts IExecutableNetworkInternal derived instance which can throw exceptions + * @brief Executable network `noexcept` wrapper which accepts IExecutableNetworkInternal derived instance which can + * throw exceptions * @ingroup ie_dev_api_exec_network_api - */ + */ class ExecutableNetworkBase : public IExecutableNetwork { protected: std::shared_ptr _impl; diff --git a/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp index f94a3b6ba1c..135cf2d3391 100644 --- a/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp @@ -2,28 +2,31 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "cpp/ie_infer_request.hpp" + #include #include #include -#include "ie_remote_context.hpp" - -#include "cpp/ie_infer_request.hpp" #include "cpp/exception2status.hpp" -#include "ie_infer_async_request_base.hpp" #include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" +#include "ie_infer_async_request_base.hpp" +#include "ie_remote_context.hpp" namespace InferenceEngine { -#define INFER_REQ_CALL_STATEMENT(...) \ - if (_impl == nullptr) IE_THROW(NotAllocated) << "Inference Request is not initialized"; \ - try { \ - __VA_ARGS__ \ - } catch(...) {details::Rethrow();} +#define INFER_REQ_CALL_STATEMENT(...) \ + if (_impl == nullptr) \ + IE_THROW(NotAllocated) << "Inference Request is not initialized"; \ + try { \ + __VA_ARGS__ \ + } catch (...) { \ + details::Rethrow(); \ + } -InferRequest::InferRequest(const details::SharedObjectLoader& so, - const IInferRequestInternal::Ptr& impl) - : _so(so), _impl(impl) { +InferRequest::InferRequest(const details::SharedObjectLoader& so, const IInferRequestInternal::Ptr& impl) + : _so(so), + _impl(impl) { IE_ASSERT(_impl != nullptr); } @@ -38,12 +41,14 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) { INFER_REQ_CALL_STATEMENT(blobPtr = _impl->GetBlob(name);) std::string error = "Internal error: blob with name `" + name + "` is not allocated!"; const bool remoteBlobPassed = blobPtr->is(); - if (blobPtr == nullptr) IE_THROW() << error; - if (!remoteBlobPassed && blobPtr->buffer() == nullptr) IE_THROW() << error; + if (blobPtr == nullptr) + IE_THROW() << error; + if (!remoteBlobPassed && blobPtr->buffer() == nullptr) + IE_THROW() << error; return blobPtr; } -void InferRequest::SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info) { +void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info) { INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data, info);) } @@ -64,19 +69,11 @@ std::map InferRequest::GetPerformanceCo } void InferRequest::SetInput(const BlobMap& inputs) { - INFER_REQ_CALL_STATEMENT( - for (auto&& input : inputs) { - _impl->SetBlob(input.first, input.second); - } - ) + INFER_REQ_CALL_STATEMENT(for (auto&& input : inputs) { _impl->SetBlob(input.first, input.second); }) } void InferRequest::SetOutput(const BlobMap& results) { - INFER_REQ_CALL_STATEMENT( - for (auto&& result : results) { - _impl->SetBlob(result.first, result.second); - } - ) + INFER_REQ_CALL_STATEMENT(for (auto&& result : results) { _impl->SetBlob(result.first, result.second); }) } void InferRequest::SetBatch(const int batch) { @@ -87,92 +84,92 @@ void InferRequest::StartAsync() { INFER_REQ_CALL_STATEMENT(_impl->StartAsync();) } - StatusCode InferRequest::Wait(int64_t millis_timeout) { INFER_REQ_CALL_STATEMENT(return _impl->Wait(millis_timeout);) } void InferRequest::SetCompletionCallbackImpl(std::function callbackToSet) { - INFER_REQ_CALL_STATEMENT( - _impl->SetCallback([callbackToSet] (std::exception_ptr) { - callbackToSet(); - }); - ) + INFER_REQ_CALL_STATEMENT(_impl->SetCallback([callbackToSet](std::exception_ptr) { + callbackToSet(); + });) } -#define CATCH_IE_EXCEPTION_RETURN(StatusCode, ExceptionType) catch (const ExceptionType&) {return StatusCode;} - -#define CATCH_IE_EXCEPTIONS_RETURN \ - CATCH_IE_EXCEPTION_RETURN(GENERAL_ERROR, GeneralError) \ - CATCH_IE_EXCEPTION_RETURN(NOT_IMPLEMENTED, NotImplemented) \ - CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_LOADED, NetworkNotLoaded) \ - CATCH_IE_EXCEPTION_RETURN(PARAMETER_MISMATCH, ParameterMismatch) \ - CATCH_IE_EXCEPTION_RETURN(NOT_FOUND, NotFound) \ - CATCH_IE_EXCEPTION_RETURN(OUT_OF_BOUNDS, OutOfBounds) \ - CATCH_IE_EXCEPTION_RETURN(UNEXPECTED, Unexpected) \ - CATCH_IE_EXCEPTION_RETURN(REQUEST_BUSY, RequestBusy) \ - CATCH_IE_EXCEPTION_RETURN(RESULT_NOT_READY, ResultNotReady) \ - CATCH_IE_EXCEPTION_RETURN(NOT_ALLOCATED, NotAllocated) \ - CATCH_IE_EXCEPTION_RETURN(INFER_NOT_STARTED, InferNotStarted) \ - CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_READ, NetworkNotRead) \ - CATCH_IE_EXCEPTION_RETURN(INFER_CANCELLED, InferCancelled) +#define CATCH_IE_EXCEPTION_RETURN(StatusCode, ExceptionType) \ + catch (const ExceptionType&) { \ + return StatusCode; \ + } +#define CATCH_IE_EXCEPTIONS_RETURN \ + CATCH_IE_EXCEPTION_RETURN(GENERAL_ERROR, GeneralError) \ + CATCH_IE_EXCEPTION_RETURN(NOT_IMPLEMENTED, NotImplemented) \ + CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_LOADED, NetworkNotLoaded) \ + CATCH_IE_EXCEPTION_RETURN(PARAMETER_MISMATCH, ParameterMismatch) \ + CATCH_IE_EXCEPTION_RETURN(NOT_FOUND, NotFound) \ + CATCH_IE_EXCEPTION_RETURN(OUT_OF_BOUNDS, OutOfBounds) \ + CATCH_IE_EXCEPTION_RETURN(UNEXPECTED, Unexpected) \ + CATCH_IE_EXCEPTION_RETURN(REQUEST_BUSY, RequestBusy) \ + CATCH_IE_EXCEPTION_RETURN(RESULT_NOT_READY, ResultNotReady) \ + CATCH_IE_EXCEPTION_RETURN(NOT_ALLOCATED, NotAllocated) \ + CATCH_IE_EXCEPTION_RETURN(INFER_NOT_STARTED, InferNotStarted) \ + CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_READ, NetworkNotRead) \ + CATCH_IE_EXCEPTION_RETURN(INFER_CANCELLED, InferCancelled) void InferRequest::SetCompletionCallbackImpl(std::function callbackToSet) { INFER_REQ_CALL_STATEMENT( - auto weakThis = InferRequest{_so, std::shared_ptr{_impl.get(), [](IInferRequestInternal*){}}}; - _impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) { + auto weakThis = + InferRequest{_so, std::shared_ptr{_impl.get(), [](IInferRequestInternal*) {}}}; + _impl->SetCallback([callbackToSet, weakThis](std::exception_ptr exceptionPtr) { StatusCode statusCode = StatusCode::OK; if (exceptionPtr != nullptr) { statusCode = [&] { try { std::rethrow_exception(exceptionPtr); - } CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) { + } + CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) { return GENERAL_ERROR; - } catch (...) { + } + catch (...) { return UNEXPECTED; } - } (); + }(); } callbackToSet(weakThis, statusCode); - }); - ) + });) } void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback callbackToSet) { INFER_REQ_CALL_STATEMENT( - IInferRequest::Ptr weakThis = InferRequest{_so, std::shared_ptr{_impl.get(), [](IInferRequestInternal*){}}}; - _impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) { + IInferRequest::Ptr weakThis = + InferRequest{_so, std::shared_ptr{_impl.get(), [](IInferRequestInternal*) {}}}; + _impl->SetCallback([callbackToSet, weakThis](std::exception_ptr exceptionPtr) { StatusCode statusCode = StatusCode::OK; if (exceptionPtr != nullptr) { statusCode = [&] { try { std::rethrow_exception(exceptionPtr); - } CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) { + } + CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) { return GENERAL_ERROR; - } catch (...) { + } + catch (...) { return UNEXPECTED; } - } (); + }(); } callbackToSet(weakThis, statusCode); - }); - ) + });) } -InferRequest::operator IInferRequest::Ptr () { - INFER_REQ_CALL_STATEMENT( - return std::make_shared(_impl); - ) +InferRequest::operator IInferRequest::Ptr() { + INFER_REQ_CALL_STATEMENT(return std::make_shared(_impl);) } std::vector InferRequest::QueryState() { std::vector controller; - INFER_REQ_CALL_STATEMENT( - for (auto&& state : _impl->QueryState()) { - controller.emplace_back(VariableState{_so, state}); - } - ) + INFER_REQ_CALL_STATEMENT(for (auto&& state + : _impl->QueryState()) { + controller.emplace_back(VariableState{_so, state}); + }) return controller; } diff --git a/inference-engine/src/inference_engine/src/cpp/ie_variable_state.cpp b/inference-engine/src/inference_engine/src/cpp/ie_variable_state.cpp index 63f7305e8b2..ff8547f13b3 100644 --- a/inference-engine/src/inference_engine/src/cpp/ie_variable_state.cpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_variable_state.cpp @@ -2,23 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "details/ie_so_loader.h" #include "cpp/ie_memory_state.hpp" #include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp" +#include "details/ie_so_loader.h" #include "exception2status.hpp" -#define VARIABLE_CALL_STATEMENT(...) \ - if (_impl == nullptr) IE_THROW(NotAllocated) << "VariableState was not initialized."; \ - try { \ - __VA_ARGS__; \ - } catch(...) {details::Rethrow();} +#define VARIABLE_CALL_STATEMENT(...) \ + if (_impl == nullptr) \ + IE_THROW(NotAllocated) << "VariableState was not initialized."; \ + try { \ + __VA_ARGS__; \ + } catch (...) { \ + details::Rethrow(); \ + } namespace InferenceEngine { -VariableState::VariableState(const details::SharedObjectLoader& so, - const IVariableStateInternal::Ptr& impl) - : _so(so), _impl(impl) { - if (_impl == nullptr) IE_THROW() << "VariableState was not initialized."; +VariableState::VariableState(const details::SharedObjectLoader& so, const IVariableStateInternal::Ptr& impl) + : _so(so), + _impl(impl) { + if (_impl == nullptr) + IE_THROW() << "VariableState was not initialized."; } IE_SUPPRESS_DEPRECATED_START diff --git a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp index 6b5bb34c970..be42b176595 100644 --- a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp +++ b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp @@ -2,19 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include +#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" +#include #include #include #include #include -#include -#include -#include -#include +#include "cpp/ie_cnn_network.h" +#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" +#include "cpp_interfaces/interface/ie_iplugin_internal.hpp" +#include "ie_icore.hpp" +#include "ie_parameter.hpp" namespace InferenceEngine { @@ -90,8 +90,9 @@ std::shared_ptr IExecutableNetworkInternal::GetContext() const { IE_THROW(NotImplemented); } -std::shared_ptr IExecutableNetworkInternal::CreateInferRequestImpl(InputsDataMap networkInputs, - OutputsDataMap networkOutputs) { +std::shared_ptr IExecutableNetworkInternal::CreateInferRequestImpl( + InputsDataMap networkInputs, + OutputsDataMap networkOutputs) { IE_THROW(NotImplemented); } diff --git a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp index 800b5f3cc01..9dcde916f1c 100644 --- a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp +++ b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp @@ -2,31 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" + #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include "cpp_interfaces/interface/ie_iplugin_internal.hpp" +#include "cpp_interfaces/plugin_itt.hpp" +#include "debug.h" +#include "ie_algorithm.hpp" +#include "ie_blob.h" +#include "ie_common.h" +#include "ie_compound_blob.h" +#include "ie_preprocess.hpp" +#include "ie_remote_context.hpp" namespace InferenceEngine { IInferRequestInternal::~IInferRequestInternal() {} -IInferRequestInternal::IInferRequestInternal(const InputsDataMap& networkInputs, const OutputsDataMap& networkOutputs) : - // We should copy maps since they can be overriden in SetBlob with preprocess - _networkInputs{copyInfo(networkInputs)}, - _networkOutputs{copyInfo(networkOutputs)} { -} +IInferRequestInternal::IInferRequestInternal(const InputsDataMap& networkInputs, const OutputsDataMap& networkOutputs) + : // We should copy maps since they can be overriden in SetBlob with preprocess + _networkInputs{copyInfo(networkInputs)}, + _networkOutputs{copyInfo(networkOutputs)} {} void IInferRequestInternal::Infer() { checkBlobs(); @@ -50,9 +49,10 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us if (name.empty()) { IE_THROW(NotFound) << "Failed to set blob with empty name"; } - if (!userBlob) IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'"; + if (!userBlob) + IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'"; const bool compoundBlobPassed = userBlob->is(); - const bool remoteBlobPassed = userBlob->is(); + const bool remoteBlobPassed = userBlob->is(); if (!compoundBlobPassed && !remoteBlobPassed && userBlob->buffer() == nullptr) IE_THROW(NotAllocated) << "Input data was not allocated. Input name: \'" << name << "\'"; if (userBlob->size() == 0) { @@ -66,7 +66,8 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us // ilavreno: the condition below is obsolete, but we need an exact list of precisions // which are supports by G-API preprocessing if (foundInput->getPrecision() != userBlob->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user input precision"; + IE_THROW(ParameterMismatch) + << "Failed to set Blob with precision not corresponding to user input precision"; } auto& devBlob = _deviceInputs[name]; @@ -79,10 +80,11 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us addInputPreProcessingFor(name, userBlob, devBlob ? devBlob : _inputs[name]); } else { size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR - ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) - : 1; + ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) + : 1; if (dataSize != inputSize) { - IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize << ")."; + IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize + << ")."; } _inputs[name] = userBlob; devBlob = userBlob; @@ -92,13 +94,15 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing"; } size_t outputSize = foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR - ? details::product(foundOutput->getTensorDesc().getDims()) : - 1; + ? details::product(foundOutput->getTensorDesc().getDims()) + : 1; if (dataSize != outputSize) { - IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize << ")."; + IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize + << ")."; } if (foundOutput->getPrecision() != userBlob->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user output precision"; + IE_THROW(ParameterMismatch) + << "Failed to set Blob with precision not corresponding to user output precision"; } // ilavreno: this condition is valid for most plugins except MYRIAD // it is able to perform layout conversion for output blob dynamically @@ -114,7 +118,7 @@ Blob::Ptr IInferRequestInternal::GetBlob(const std::string& name) { Blob::Ptr data; InputInfo::Ptr foundInput; DataPtr foundOutput; - const SizeVector oneVector = { 1 }; + const SizeVector oneVector = {1}; if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { // ROI blob is returned only if it was set previously. Otherwise default blob is returned. auto it = _preProcData.find(name); @@ -122,10 +126,11 @@ Blob::Ptr IInferRequestInternal::GetBlob(const std::string& name) { data = it->second->getRoiBlob(); } else { data = _inputs[name]; - checkBlob(data, name, true, - foundInput->getTensorDesc().getLayout() != SCALAR - ? foundInput->getTensorDesc().getDims() - : oneVector); + checkBlob( + data, + name, + true, + foundInput->getTensorDesc().getLayout() != SCALAR ? foundInput->getTensorDesc().getDims() : oneVector); auto& devBlob = _deviceInputs[name]; if (preProcessingRequired(foundInput, data, devBlob)) { @@ -135,10 +140,11 @@ Blob::Ptr IInferRequestInternal::GetBlob(const std::string& name) { } } else { data = _outputs[name]; - checkBlob(data, name, false, - foundOutput->getTensorDesc().getLayout() != SCALAR - ? foundOutput->getTensorDesc().getDims() - : oneVector); + checkBlob( + data, + name, + false, + foundOutput->getTensorDesc().getLayout() != SCALAR ? foundOutput->getTensorDesc().getDims() : oneVector); } return data; } @@ -147,7 +153,7 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& da InputInfo::Ptr foundInput; DataPtr foundOutput; if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { - foundInput->getPreProcess() = copyPreProcess(info); + foundInput->getPreProcess() = copyPreProcess(info); } else { IE_THROW() << "Pre-process can't be set to output blob"; } @@ -201,17 +207,21 @@ void IInferRequestInternal::execDataPreprocessing(InferenceEngine::BlobMap& prep } } -bool IInferRequestInternal::findInputAndOutputBlobByName(const std::string& name, InputInfo::Ptr& foundInput, DataPtr& foundOutput) const { +bool IInferRequestInternal::findInputAndOutputBlobByName(const std::string& name, + InputInfo::Ptr& foundInput, + DataPtr& foundOutput) const { foundInput = nullptr; foundOutput = nullptr; if (_networkOutputs.empty()) { IE_THROW() << "Internal error: network outputs is not set"; } - auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), - [&](const std::pair& pair) { - return pair.first == name; - }); - auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs), + auto foundInputPair = std::find_if(std::begin(_networkInputs), + std::end(_networkInputs), + [&](const std::pair& pair) { + return pair.first == name; + }); + auto foundOutputPair = std::find_if(std::begin(_networkOutputs), + std::end(_networkOutputs), [&](const std::pair& pair) { return pair.first == name; }); @@ -229,7 +239,10 @@ bool IInferRequestInternal::findInputAndOutputBlobByName(const std::string& name return retVal; } -void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string& name, bool isInput, const SizeVector& refDims) const { +void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, + const std::string& name, + bool isInput, + const SizeVector& refDims) const { std::string bType = isInput ? "Input" : "Output"; std::string sType = isInput ? "input" : "output"; std::string strNotAllocated(bType + " data was not allocated."); @@ -242,19 +255,19 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string& if (refDims.empty()) { SizeVector dims; if (isInput) { - auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), - [&](const std::pair& pair) { - return pair.first == name; - }); + auto foundInputPair = std::find_if(std::begin(_networkInputs), + std::end(_networkInputs), + [&](const std::pair& pair) { + return pair.first == name; + }); if (foundInputPair == std::end(_networkInputs)) { IE_THROW(NotFound) << "Failed to find input with name: \'" << name << "\'"; } dims = foundInputPair->second->getTensorDesc().getDims(); - refSize = foundInputPair->second->getTensorDesc().getLayout() != SCALAR - ? details::product(dims) - : 1; + refSize = foundInputPair->second->getTensorDesc().getLayout() != SCALAR ? details::product(dims) : 1; } else { - auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs), + auto foundOutputPair = std::find_if(std::begin(_networkOutputs), + std::end(_networkOutputs), [&](const std::pair& pair) { return pair.first == name; }); @@ -262,9 +275,7 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string& IE_THROW(NotFound) << "Failed to find output with name: \'" << name << "\'"; } dims = foundOutputPair->second->getTensorDesc().getDims(); - refSize = foundOutputPair->second->getTensorDesc().getLayout() != SCALAR - ? details::product(dims) - : 1; + refSize = foundOutputPair->second->getTensorDesc().getLayout() != SCALAR ? details::product(dims) : 1; } } else { refSize = details::product(refDims); @@ -274,7 +285,8 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string& IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize; } const bool remoteBlobPassed = blob->is(); - if (!remoteBlobPassed && blob->buffer() == nullptr) IE_THROW() << strNotAllocated; + if (!remoteBlobPassed && blob->buffer() == nullptr) + IE_THROW() << strNotAllocated; } void IInferRequestInternal::checkBlobs() { @@ -286,11 +298,14 @@ void IInferRequestInternal::checkBlobs() { } } -void IInferRequestInternal::setPointerToExecutableNetworkInternal(const std::shared_ptr& exeNetwork) { +void IInferRequestInternal::setPointerToExecutableNetworkInternal( + const std::shared_ptr& exeNetwork) { _exeNetwork = exeNetwork; } -bool IInferRequestInternal::preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& userBlob, const Blob::Ptr& deviceBlob) { +bool IInferRequestInternal::preProcessingRequired(const InputInfo::Ptr& info, + const Blob::Ptr& userBlob, + const Blob::Ptr& deviceBlob) { // pre-processing is required if: // 1. resize algorithm is specified (resize required) // 2. color format specified: @@ -305,30 +320,34 @@ bool IInferRequestInternal::preProcessingRequired(const InputInfo::Ptr& info, co const auto networkColorFormat = ColorFormat::BGR; const bool colorFormatSpecified = inputColorFormat != ColorFormat::RAW; - auto blob_layout = [](const Blob::Ptr& b) { return b->getTensorDesc().getLayout(); }; - auto blob_prec = [](const Blob::Ptr& b) { return b->getTensorDesc().getPrecision();}; + auto blob_layout = [](const Blob::Ptr& b) { + return b->getTensorDesc().getLayout(); + }; + auto blob_prec = [](const Blob::Ptr& b) { + return b->getTensorDesc().getPrecision(); + }; auto dst_layout = deviceBlob ? blob_layout(deviceBlob) : info->getLayout(); - auto dst_prec = deviceBlob ? blob_prec(deviceBlob) : info->getPrecision(); + auto dst_prec = deviceBlob ? blob_prec(deviceBlob) : info->getPrecision(); - //FIXME: remove the first part to allow any needed conversion? - const bool need_layout_conv = (colorFormatSpecified || deviceBlob) && - (blob_layout(userBlob) != dst_layout); + // FIXME: remove the first part to allow any needed conversion? + const bool need_layout_conv = (colorFormatSpecified || deviceBlob) && (blob_layout(userBlob) != dst_layout); return preProcessInfo.getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE || - (colorFormatSpecified && inputColorFormat != networkColorFormat) || - need_layout_conv || - (blob_prec(userBlob) != dst_prec); + (colorFormatSpecified && inputColorFormat != networkColorFormat) || need_layout_conv || + (blob_prec(userBlob) != dst_prec); } -void IInferRequestInternal::addInputPreProcessingFor(const std::string& name, Blob::Ptr const& from, const Blob::Ptr& to) { +void IInferRequestInternal::addInputPreProcessingFor(const std::string& name, + Blob::Ptr const& from, + const Blob::Ptr& to) { auto ppDataIt = _preProcData.find(name); if (ppDataIt == _preProcData.end()) { ppDataIt = (_preProcData.emplace(name, CreatePreprocDataHelper())).first; } auto& preproc_ptr = ppDataIt->second; - preproc_ptr->isApplicable(from, to); + preproc_ptr->isApplicable(from, to); // Stores the given blob as ROI blob. It will be used to fill in network input // during pre-processing preproc_ptr->setRoiBlob(from); diff --git a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp index c4cf14ba469..e576aed8fb5 100644 --- a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp +++ b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp @@ -7,20 +7,20 @@ * @file ie_iplugin_internal.hpp */ -#include -#include -#include -#include -#include +#include "cpp_interfaces/interface/ie_iplugin_internal.hpp" -#include - -#include #include +#include #include #include #include +#include "blob_factory.hpp" +#include "ie_icore.hpp" +#include "ie_iextension.h" +#include "ie_input_info.hpp" +#include "ie_parameter.hpp" + namespace InferenceEngine { PreProcessInfo copyPreProcess(const PreProcessInfo& from) { @@ -99,21 +99,24 @@ void IInferencePlugin::SetName(const std::string& pluginName) noexcept { _pluginName = pluginName; } -std::shared_ptr IInferencePlugin::LoadNetwork(const CNNNetwork& network, - const std::map& config) { +std::shared_ptr IInferencePlugin::LoadNetwork( + const CNNNetwork& network, + const std::map& config) { return LoadNetwork(network, config, nullptr); } -template +template std::map> const_map_cast(const std::map>& map) { std::map> res; - for (auto&& v : map) res.emplace(v.first, std::const_pointer_cast(v.second)); + for (auto&& v : map) + res.emplace(v.first, std::const_pointer_cast(v.second)); return res; } -std::shared_ptr IInferencePlugin::LoadNetwork(const CNNNetwork& network, - const std::map& config, - const std::shared_ptr& context) { +std::shared_ptr IInferencePlugin::LoadNetwork( + const CNNNetwork& network, + const std::map& config, + const std::shared_ptr& context) { std::shared_ptr impl; if (nullptr == context) { impl = LoadExeNetworkImpl(network, config); @@ -126,8 +129,9 @@ std::shared_ptr IInferencePlugin::LoadNetwork(const return impl; } -std::shared_ptr IInferencePlugin::LoadNetwork(const std::string& modelPath, - const std::map& config) { +std::shared_ptr IInferencePlugin::LoadNetwork( + const std::string& modelPath, + const std::map& config) { auto cnnNet = GetCore()->ReadNetwork(modelPath, std::string()); return GetCore()->LoadNetwork(cnnNet, GetName(), config); } @@ -140,13 +144,11 @@ void IInferencePlugin::SetConfig(const std::map&) { IE_THROW(NotImplemented); } -Parameter IInferencePlugin::GetConfig(const std::string&, - const std::map&) const { +Parameter IInferencePlugin::GetConfig(const std::string&, const std::map&) const { IE_THROW(NotImplemented); } -Parameter IInferencePlugin::GetMetric(const std::string&, - const std::map&) const { +Parameter IInferencePlugin::GetMetric(const std::string&, const std::map&) const { IE_THROW(NotImplemented); } @@ -158,8 +160,9 @@ RemoteContext::Ptr IInferencePlugin::GetDefaultContext(const ParamMap&) { IE_THROW(NotImplemented); } -std::shared_ptr IInferencePlugin::ImportNetwork(const std::string& modelFileName, - const std::map& config) { +std::shared_ptr IInferencePlugin::ImportNetwork( + const std::string& modelFileName, + const std::map& config) { std::ifstream blobFile(modelFileName, std::ios::binary); if (!blobFile.is_open()) { @@ -169,15 +172,17 @@ std::shared_ptr IInferencePlugin::ImportNetwork(cons return ImportNetwork(blobFile, config); } -std::shared_ptr IInferencePlugin::ImportNetwork(std::istream& networkModel, - const std::map& config) { +std::shared_ptr IInferencePlugin::ImportNetwork( + std::istream& networkModel, + const std::map& config) { IE_THROW(NotImplemented); } -std::shared_ptr IInferencePlugin::ImportNetwork(std::istream& networkModel, - const std::shared_ptr& context, - const std::map& config) { - IE_THROW(NotImplemented); +std::shared_ptr IInferencePlugin::ImportNetwork( + std::istream& networkModel, + const std::shared_ptr& context, + const std::map& config) { + IE_THROW(NotImplemented); } void IInferencePlugin::SetCore(std::weak_ptr core) { @@ -194,14 +199,16 @@ QueryNetworkResult IInferencePlugin::QueryNetwork(const CNNNetwork& network, IE_THROW(NotImplemented); } -std::shared_ptr IInferencePlugin::LoadExeNetworkImpl(const CNNNetwork&, - const std::map&) { - IE_THROW(NotImplemented); +std::shared_ptr IInferencePlugin::LoadExeNetworkImpl( + const CNNNetwork&, + const std::map&) { + IE_THROW(NotImplemented); } -std::shared_ptr IInferencePlugin::LoadExeNetworkImpl(const CNNNetwork&, - const std::shared_ptr&, - const std::map&) { +std::shared_ptr IInferencePlugin::LoadExeNetworkImpl( + const CNNNetwork&, + const std::shared_ptr&, + const std::map&) { IE_THROW(NotImplemented); } @@ -215,4 +222,4 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptrSetPointerToPlugin(shared_from_this()); } -} // namespace InferenceEngine \ No newline at end of file +} // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_ivariable_state_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_ivariable_state_internal.cpp index a499e816ee0..94917ceee50 100644 --- a/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_ivariable_state_internal.cpp +++ b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_ivariable_state_internal.cpp @@ -15,7 +15,7 @@ void IVariableStateInternal::Reset() { IE_THROW(NotImplemented); } -void IVariableStateInternal::SetState(const Blob::Ptr& newState) { +void IVariableStateInternal::SetState(const Blob::Ptr& newState) { state = newState; } diff --git a/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.cpp b/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.cpp index 350fe6839b5..49c9d1f0cca 100644 --- a/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.cpp +++ b/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.cpp @@ -91,8 +91,16 @@ static inline void mm_store_interleave(float* ptr, __m128 a, __m128 b, __m128 c) // //------------------------------------------------------------------------ -void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W) { +void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int H, + int W) { for (int n = 0; n < N; n++) for (int h = 0; h < H; h++) { const uint8_t* src = src_ptr + n * N_src_stride + h * H_src_stride; @@ -119,8 +127,16 @@ void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_ } } -void blob_copy_4d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W) { +void blob_copy_4d_split_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int H, + int W) { for (int n = 0; n < N; n++) for (int h = 0; h < H; h++) { const float* src = src_ptr + n * N_src_stride + h * H_src_stride; @@ -147,8 +163,16 @@ void blob_copy_4d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src } } -void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W) { +void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + int N, + int H, + int W) { for (int n = 0; n < N; n++) for (int h = 0; h < H; h++) { const uint8_t* src0 = src_ptr + n * N_src_stride + 0 * C_src_stride + h * H_src_stride; @@ -176,8 +200,16 @@ void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_ } } -void blob_copy_4d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W) { +void blob_copy_4d_merge_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + int N, + int H, + int W) { for (int n = 0; n < N; n++) for (int h = 0; h < H; h++) { const float* src0 = src_ptr + n * N_src_stride + 0 * C_src_stride + h * H_src_stride; @@ -205,9 +237,19 @@ void blob_copy_4d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src } } -void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride, - size_t C_dst_stride, int N, int D, int H, int W) { +void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int D, + int H, + int W) { for (int n = 0; n < N; n++) for (int d = 0; d < D; d++) { for (int h = 0; h < H; h++) { @@ -236,9 +278,19 @@ void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_ } } -void blob_copy_5d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride, - size_t C_dst_stride, int N, int D, int H, int W) { +void blob_copy_5d_split_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int D, + int H, + int W) { for (int n = 0; n < N; n++) for (int d = 0; d < D; d++) { for (int h = 0; h < H; h++) { @@ -267,9 +319,19 @@ void blob_copy_5d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src } } -void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride, - size_t H_dst_stride, int N, int D, int H, int W) { +void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + int N, + int D, + int H, + int W) { for (int n = 0; n < N; n++) for (int d = 0; d < D; d++) { for (int h = 0; h < H; h++) { @@ -302,9 +364,19 @@ void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_ } } -void blob_copy_5d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride, - size_t H_dst_stride, int N, int D, int H, int W) { +void blob_copy_5d_merge_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + int N, + int D, + int H, + int W) { for (int n = 0; n < N; n++) for (int d = 0; d < D; d++) { for (int h = 0; h < H; h++) { diff --git a/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.hpp b/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.hpp index cf5a9807a67..e89603dcb57 100644 --- a/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.hpp +++ b/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.hpp @@ -15,32 +15,104 @@ namespace InferenceEngine { // //------------------------------------------------------------------------ -void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W); +void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int H, + int W); -void blob_copy_4d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W); +void blob_copy_4d_split_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int H, + int W); -void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W); +void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + int N, + int H, + int W); -void blob_copy_4d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride, - size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W); +void blob_copy_4d_merge_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t H_dst_stride, + int N, + int H, + int W); -void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride, - size_t C_dst_stride, int N, int D, int H, int W); +void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int D, + int H, + int W); -void blob_copy_5d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride, - size_t C_dst_stride, int N, int D, int H, int W); +void blob_copy_5d_split_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + size_t C_dst_stride, + int N, + int D, + int H, + int W); -void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride, - size_t H_dst_stride, int N, int D, int H, int W); +void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + int N, + int D, + int H, + int W); -void blob_copy_5d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride, - size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride, - size_t H_dst_stride, int N, int D, int H, int W); +void blob_copy_5d_merge_f32c3(const float* src_ptr, + float* dst_ptr, + size_t N_src_stride, + size_t D_src_stride, + size_t H_src_stride, + size_t C_src_stride, + size_t N_dst_stride, + size_t D_dst_stride, + size_t H_dst_stride, + int N, + int D, + int H, + int W); } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/src/ie_blob_common.cpp b/inference-engine/src/inference_engine/src/ie_blob_common.cpp index 70554bbd12c..444d8e3d232 100644 --- a/inference-engine/src/inference_engine/src/ie_blob_common.cpp +++ b/inference-engine/src/inference_engine/src/ie_blob_common.cpp @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ie_blob.h" - #include #include #include +#include "ie_blob.h" + namespace InferenceEngine { Blob::Ptr Blob::createROI(const ROI&) const { diff --git a/inference-engine/src/inference_engine/src/ie_cache_guard.cpp b/inference-engine/src/inference_engine/src/ie_cache_guard.cpp index fa776d13038..207c6d2aecd 100644 --- a/inference-engine/src/inference_engine/src/ie_cache_guard.cpp +++ b/inference-engine/src/inference_engine/src/ie_cache_guard.cpp @@ -3,13 +3,19 @@ // #include "ie_cache_guard.hpp" + #include "ie_common.h" namespace InferenceEngine { -CacheGuardEntry::CacheGuardEntry(CacheGuard& cacheGuard, const std::string& hash, - std::shared_ptr m, std::atomic_int& refCount): - m_cacheGuard(cacheGuard), m_hash(hash), m_mutex(m), m_refCount(refCount) { +CacheGuardEntry::CacheGuardEntry(CacheGuard& cacheGuard, + const std::string& hash, + std::shared_ptr m, + std::atomic_int& refCount) + : m_cacheGuard(cacheGuard), + m_hash(hash), + m_mutex(m), + m_refCount(refCount) { // Don't lock mutex right here for exception-safe considerations m_refCount++; } @@ -36,8 +42,8 @@ std::unique_ptr CacheGuard::getHashLock(const std::string& hash std::unique_ptr res; try { // TODO: use std::make_unique when migrated to C++14 - res = std::unique_ptr( - new CacheGuardEntry(*this, hash, data.m_mutexPtr, data.m_itemRefCounter)); + res = + std::unique_ptr(new CacheGuardEntry(*this, hash, data.m_mutexPtr, data.m_itemRefCounter)); } catch (...) { // In case of exception, we shall remove hash entry if it is not used if (data.m_itemRefCounter == 0) { @@ -45,15 +51,15 @@ std::unique_ptr CacheGuard::getHashLock(const std::string& hash } throw; } - lock.unlock(); // can unlock table lock here, as refCounter is positive and nobody can remove entry - res->performLock(); // in case of exception, 'res' will be destroyed and item will be cleaned up from table + lock.unlock(); // can unlock table lock here, as refCounter is positive and nobody can remove entry + res->performLock(); // in case of exception, 'res' will be destroyed and item will be cleaned up from table return res; } void CacheGuard::checkForRemove(const std::string& hash) { std::lock_guard lock(m_tableMutex); if (m_table.count(hash)) { - auto &data = m_table[hash]; + auto& data = m_table[hash]; if (data.m_itemRefCounter == 0) { // Nobody is using this and nobody is waiting for it - can be removed m_table.erase(hash); diff --git a/inference-engine/src/inference_engine/src/ie_cache_guard.hpp b/inference-engine/src/inference_engine/src/ie_cache_guard.hpp index b0966de87a8..6adc4dd337b 100644 --- a/inference-engine/src/inference_engine/src/ie_cache_guard.hpp +++ b/inference-engine/src/inference_engine/src/ie_cache_guard.hpp @@ -10,11 +10,11 @@ * @file ie_cache_guard.hpp */ -#include -#include -#include -#include #include +#include +#include +#include +#include #include namespace InferenceEngine { @@ -36,8 +36,10 @@ public: * @param m Shared pointer to mutex for internal locking * @param refCount Reference counter. Will be decremented on CacheGuardEntry destruction */ - CacheGuardEntry(CacheGuard& cacheGuard, const std::string& hash, - std::shared_ptr m, std::atomic_int& refCount); + CacheGuardEntry(CacheGuard& cacheGuard, + const std::string& hash, + std::shared_ptr m, + std::atomic_int& refCount); CacheGuardEntry(const CacheGuardEntry&) = delete; CacheGuardEntry& operator=(const CacheGuardEntry&) = delete; @@ -106,16 +108,14 @@ public: private: struct Item { - std::shared_ptr m_mutexPtr { std::make_shared() }; + std::shared_ptr m_mutexPtr{std::make_shared()}; // Reference counter for item usage - std::atomic_int m_itemRefCounter {0}; + std::atomic_int m_itemRefCounter{0}; Item() = default; - Item(const Item& other): m_mutexPtr(other.m_mutexPtr), - m_itemRefCounter(other.m_itemRefCounter.load()) {} + Item(const Item& other) : m_mutexPtr(other.m_mutexPtr), m_itemRefCounter(other.m_itemRefCounter.load()) {} Item& operator=(const Item& other) = delete; - Item(Item&& other): m_mutexPtr(std::move(other.m_mutexPtr)), - m_itemRefCounter(other.m_itemRefCounter.load()) {} + Item(Item&& other) : m_mutexPtr(std::move(other.m_mutexPtr)), m_itemRefCounter(other.m_itemRefCounter.load()) {} Item& operator=(Item&& other) = delete; }; std::mutex m_tableMutex; diff --git a/inference-engine/src/inference_engine/src/ie_cache_manager.hpp b/inference-engine/src/inference_engine/src/ie_cache_manager.hpp index 2b5718d73e5..859abf7d48f 100644 --- a/inference-engine/src/inference_engine/src/ie_cache_manager.hpp +++ b/inference-engine/src/inference_engine/src/ie_cache_manager.hpp @@ -9,12 +9,13 @@ */ #pragma once -#include #include -#include #include -#include "ie_api.h" +#include +#include + #include "file_utils.h" +#include "ie_api.h" namespace InferenceEngine { diff --git a/inference-engine/src/inference_engine/src/ie_common.cpp b/inference-engine/src/inference_engine/src/ie_common.cpp index 34ffcc2840c..37b791631a3 100644 --- a/inference-engine/src/inference_engine/src/ie_common.cpp +++ b/inference-engine/src/inference_engine/src/ie_common.cpp @@ -2,21 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include +#include "ie_common.h" + #include +#include +#include +#include +#include +#include -#include -#include -#include -#include -#include -#include - -#include +#include "exec_graph_info.hpp" +#include "ie_blob.h" +#include "ie_extension.h" +#include "ie_iextension.h" +#include "ie_parameter.hpp" +#include "ngraph/opsets/opset.hpp" namespace ExecGraphInfoSerialization { // @@ -57,21 +57,37 @@ namespace details { void Rethrow() { try { throw; - } catch (const GeneralError& e) {throw e;} - catch (const NotImplemented& e) {throw e;} - catch (const NetworkNotLoaded& e) {throw e;} - catch (const ParameterMismatch& e) {throw e;} - catch (const NotFound& e) {throw e;} - catch (const OutOfBounds& e) {throw e;} - catch (const Unexpected& e) {throw e;} - catch (const RequestBusy& e) {throw e;} - catch (const ResultNotReady& e) {throw e;} - catch (const NotAllocated& e) {throw e;} - catch (const InferNotStarted& e) {throw e;} - catch (const NetworkNotRead& e) {throw e;} - catch (const InferCancelled& e) {throw e;} - catch (const std::exception& e) {IE_THROW() << e.what();} - catch(...) {IE_THROW(Unexpected);} + } catch (const GeneralError& e) { + throw e; + } catch (const NotImplemented& e) { + throw e; + } catch (const NetworkNotLoaded& e) { + throw e; + } catch (const ParameterMismatch& e) { + throw e; + } catch (const NotFound& e) { + throw e; + } catch (const OutOfBounds& e) { + throw e; + } catch (const Unexpected& e) { + throw e; + } catch (const RequestBusy& e) { + throw e; + } catch (const ResultNotReady& e) { + throw e; + } catch (const NotAllocated& e) { + throw e; + } catch (const InferNotStarted& e) { + throw e; + } catch (const NetworkNotRead& e) { + throw e; + } catch (const InferCancelled& e) { + throw e; + } catch (const std::exception& e) { + IE_THROW() << e.what(); + } catch (...) { + IE_THROW(Unexpected); + } } IE_SUPPRESS_DEPRECATED_START @@ -104,7 +120,8 @@ StatusCode InferenceEngineException::getStatus() const { } else if (dynamic_cast(this) != nullptr) { return INFER_CANCELLED; } else { - assert(!"Unreachable"); return OK; + assert(!"Unreachable"); + return OK; } } } // namespace details diff --git a/inference-engine/src/inference_engine/src/ie_compound_blob.cpp b/inference-engine/src/inference_engine/src/ie_compound_blob.cpp index 983311ec80a..0e2604047df 100644 --- a/inference-engine/src/inference_engine/src/ie_compound_blob.cpp +++ b/inference-engine/src/inference_engine/src/ie_compound_blob.cpp @@ -36,7 +36,7 @@ TensorDesc verifyNV12BlobInput(const Blob::Ptr& y, const Blob::Ptr& uv) { // check Blob element size if (yMemoryBlob->element_size() != uvMemoryBlob->element_size()) { IE_THROW() << "Y and UV planes have different element sizes: " << yMemoryBlob->element_size() - << " != " << uvMemoryBlob->element_size(); + << " != " << uvMemoryBlob->element_size(); } // check tensor descriptor parameters @@ -64,7 +64,7 @@ TensorDesc verifyNV12BlobInput(const Blob::Ptr& y, const Blob::Ptr& uv) { const auto& uvDims = uvDesc.getDims(); if (yDims.size() != 4 || uvDims.size() != 4) { IE_THROW() << "Y and UV planes dimension sizes must be 4, actual: " << yDims.size() << "(Y plane) and " - << uvDims.size() << "(UV plane)"; + << uvDims.size() << "(UV plane)"; } // check batch size @@ -83,13 +83,13 @@ TensorDesc verifyNV12BlobInput(const Blob::Ptr& y, const Blob::Ptr& uv) { // check height if (yDims[2] != 2 * uvDims[2]) { IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the UV plane), actual: " - << yDims[2] << "(Y plane) and " << uvDims[2] << "(UV plane)"; + << yDims[2] << "(Y plane) and " << uvDims[2] << "(UV plane)"; } // check width if (yDims[3] != 2 * uvDims[3]) { - IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " - << yDims[3] << "(Y plane) and " << uvDims[3] << "(UV plane)"; + IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " << yDims[3] + << "(Y plane) and " << uvDims[3] << "(UV plane)"; } return {Precision::U8, {}, Layout::NCHW}; @@ -112,10 +112,10 @@ TensorDesc verifyI420BlobInput(const Blob::Ptr& y, const Blob::Ptr& u, const Blo auto uMemoryBlob = u->as(); auto vMemoryBlob = v->as(); // check Blob element size - if (yMemoryBlob->element_size() != uMemoryBlob->element_size() || yMemoryBlob->element_size() != vMemoryBlob->element_size()) { + if (yMemoryBlob->element_size() != uMemoryBlob->element_size() || + yMemoryBlob->element_size() != vMemoryBlob->element_size()) { IE_THROW() << "Y and UV planes have different element sizes: " << yMemoryBlob->element_size() - << " != " << uMemoryBlob->element_size() - << " != " << vMemoryBlob->element_size(); + << " != " << uMemoryBlob->element_size() << " != " << vMemoryBlob->element_size(); } // check tensor descriptor parameters @@ -152,8 +152,7 @@ TensorDesc verifyI420BlobInput(const Blob::Ptr& y, const Blob::Ptr& u, const Blo if (yDims.size() != 4 || uDims.size() != 4 || vDims.size() != 4) { IE_THROW() << "Y,U and V planes dimension sizes must be 4, actual: " << yDims.size() << "(Y plane) and " - << uDims.size() << "(U plane) " - << vDims.size() << "(V plane)"; + << uDims.size() << "(U plane) " << vDims.size() << "(V plane)"; } // check batch size @@ -174,23 +173,23 @@ TensorDesc verifyI420BlobInput(const Blob::Ptr& y, const Blob::Ptr& u, const Blo // check height if (yDims[2] != 2 * uDims[2]) { - IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the U plane), actual: " - << yDims[2] << "(Y plane) and " << uDims[2] << "(U plane)"; + IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the U plane), actual: " << yDims[2] + << "(Y plane) and " << uDims[2] << "(U plane)"; } if (yDims[2] != 2 * vDims[2]) { IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the UV plane), actual: " - << yDims[2] << "(Y plane) and " << vDims[2] << "(V plane)"; + << yDims[2] << "(Y plane) and " << vDims[2] << "(V plane)"; } // check width if (yDims[3] != 2 * uDims[3]) { - IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " - << yDims[3] << "(Y plane) and " << uDims[3] << "(U plane)"; + IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " << yDims[3] + << "(Y plane) and " << uDims[3] << "(U plane)"; } if (yDims[3] != 2 * vDims[3]) { - IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " - << yDims[3] << "(Y plane) and " << vDims[3] << "(V plane)"; + IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " << yDims[3] + << "(Y plane) and " << vDims[3] << "(V plane)"; } return {Precision::U8, {}, Layout::NCHW}; @@ -215,7 +214,8 @@ TensorDesc getBlobTensorDesc(const Blob::Ptr& blob) { TensorDesc verifyBatchedBlobInput(const std::vector& blobs) { // verify invariants if (blobs.empty()) { - IE_THROW() << "BatchedBlob cannot be created from empty vector of Blob, Please, make sure vector contains at least one Blob"; + IE_THROW() << "BatchedBlob cannot be created from empty vector of Blob, Please, make sure vector contains at " + "least one Blob"; } // Cannot create a compound blob from nullptr Blob objects @@ -227,10 +227,9 @@ TensorDesc verifyBatchedBlobInput(const std::vector& blobs) { const auto subBlobDesc = getBlobTensorDesc(blobs[0]); - if (std::any_of(blobs.begin(), blobs.end(), - [&subBlobDesc](const Blob::Ptr& blob) { - return getBlobTensorDesc(blob) != subBlobDesc; - })) { + if (std::any_of(blobs.begin(), blobs.end(), [&subBlobDesc](const Blob::Ptr& blob) { + return getBlobTensorDesc(blob) != subBlobDesc; + })) { IE_THROW() << "All blobs tensors should be equal"; } @@ -272,9 +271,9 @@ TensorDesc verifyBatchedBlobInput(const std::vector& blobs) { } // anonymous namespace -CompoundBlob::CompoundBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {} +CompoundBlob::CompoundBlob(const TensorDesc& tensorDesc) : Blob(tensorDesc) {} -CompoundBlob::CompoundBlob(const std::vector& blobs): CompoundBlob(TensorDesc{}) { +CompoundBlob::CompoundBlob(const std::vector& blobs) : CompoundBlob(TensorDesc{}) { // Cannot create a compound blob from nullptr Blob objects if (std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { return blob == nullptr; @@ -293,7 +292,7 @@ CompoundBlob::CompoundBlob(const std::vector& blobs): CompoundBlob(Te this->_blobs = blobs; } -CompoundBlob::CompoundBlob(std::vector&& blobs): CompoundBlob(TensorDesc{}) { +CompoundBlob::CompoundBlob(std::vector&& blobs) : CompoundBlob(TensorDesc{}) { // Cannot create a compound blob from nullptr Blob objects if (std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) { return blob == nullptr; @@ -361,13 +360,11 @@ const std::shared_ptr& CompoundBlob::getAllocator() const noexcept { return _allocator; }; -NV12Blob::NV12Blob(const Blob::Ptr& y, const Blob::Ptr& uv) - : CompoundBlob(verifyNV12BlobInput(y, uv)) { +NV12Blob::NV12Blob(const Blob::Ptr& y, const Blob::Ptr& uv) : CompoundBlob(verifyNV12BlobInput(y, uv)) { this->_blobs = {y, uv}; } -NV12Blob::NV12Blob(Blob::Ptr&& y, Blob::Ptr&& uv) - : CompoundBlob(verifyNV12BlobInput(y, uv)) { +NV12Blob::NV12Blob(Blob::Ptr&& y, Blob::Ptr&& uv) : CompoundBlob(verifyNV12BlobInput(y, uv)) { this->_blobs = {std::move(y), std::move(uv)}; } @@ -409,8 +406,7 @@ I420Blob::I420Blob(const Blob::Ptr& y, const Blob::Ptr& u, const Blob::Ptr& v) this->_blobs = {y, u, v}; } -I420Blob::I420Blob(Blob::Ptr&& y, Blob::Ptr&& u, Blob::Ptr&& v) - : CompoundBlob(verifyI420BlobInput(y, u, v)) { +I420Blob::I420Blob(Blob::Ptr&& y, Blob::Ptr&& u, Blob::Ptr&& v) : CompoundBlob(verifyI420BlobInput(y, u, v)) { this->_blobs = {std::move(y), std::move(u), std::move(v)}; } @@ -458,13 +454,11 @@ Blob::Ptr I420Blob::createROI(const ROI& roi) const { return std::make_shared(yRoiBlob, uRoiBlob, vRoiBlob); } -BatchedBlob::BatchedBlob(const std::vector& blobs) - : CompoundBlob(verifyBatchedBlobInput(blobs)) { +BatchedBlob::BatchedBlob(const std::vector& blobs) : CompoundBlob(verifyBatchedBlobInput(blobs)) { this->_blobs = blobs; } -BatchedBlob::BatchedBlob(std::vector&& blobs) - : CompoundBlob(verifyBatchedBlobInput(blobs)) { +BatchedBlob::BatchedBlob(std::vector&& blobs) : CompoundBlob(verifyBatchedBlobInput(blobs)) { this->_blobs = std::move(blobs); } diff --git a/inference-engine/src/inference_engine/src/ie_core.cpp b/inference-engine/src/inference_engine/src/ie_core.cpp index c7896e69593..b636d519368 100644 --- a/inference-engine/src/inference_engine/src/ie_core.cpp +++ b/inference-engine/src/inference_engine/src/ie_core.cpp @@ -2,32 +2,33 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include +#include "ie_core.hpp" + #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "compilation_context.hpp" #include "cpp/ie_plugin.hpp" -#include "ie_plugin_config.hpp" -#include "ie_cache_manager.hpp" -#include "ie_cache_guard.hpp" -#include "ie_itt.hpp" -#include "file_utils.h" -#include "ie_network_reader.hpp" -#include "xml_parse_utils.h" -#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" +#include "file_utils.h" +#include "ie_cache_guard.hpp" +#include "ie_cache_manager.hpp" +#include "ie_icore.hpp" +#include "ie_itt.hpp" +#include "ie_network_reader.hpp" +#include "ie_plugin_config.hpp" +#include "ngraph/graph_util.hpp" +#include "ngraph/ngraph.hpp" +#include "ngraph/opsets/opset.hpp" +#include "ngraph/pass/constant_folding.hpp" +#include "openvino/runtime/core.hpp" +#include "xml_parse_utils.h" using namespace InferenceEngine::PluginConfigParams; using namespace std::placeholders; @@ -44,8 +45,8 @@ std::string parseXmlConfig(const std::string& xmlFile) { std::string xmlConfigFile_ = xmlFile; if (xmlConfigFile_.empty()) { // register plugins from default plugins.xml config - FileUtils::FilePath xmlConfigFileDefault = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), - FileUtils::toFilePath("plugins.xml")); + FileUtils::FilePath xmlConfigFileDefault = + FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), FileUtils::toFilePath("plugins.xml")); xmlConfigFile_ = FileUtils::fromFilePath(xmlConfigFileDefault); } return xmlConfigFile_; @@ -85,39 +86,40 @@ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma return {deviceName_, config_}; } -InferenceEngine::Parameter copyParameterValue(const InferenceEngine::Parameter & value) { +InferenceEngine::Parameter copyParameterValue(const InferenceEngine::Parameter& value) { if (value.is()) { - return { value.as() }; + return {value.as()}; } else if (value.is()) { - return { value.as() }; + return {value.as()}; } else if (value.is()) { - return { value.as() }; + return {value.as()}; } else if (value.is()) { - return { value.as() }; + return {value.as()}; } else if (value.is()) { - return { value.as() }; - } else if (value.is >()) { - return { value.as >() }; - } else if (value.is >()) { - return { value.as >() }; - } else if (value.is >()) { - return { value.as >() }; - } else if (value.is >()) { - return { value.as >() }; - } else if (value.is >()) { - return { value.as >() }; - } else if (value.is >()) { - return { value.as >() }; + return {value.as()}; + } else if (value.is>()) { + return {value.as>()}; + } else if (value.is>()) { + return {value.as>()}; + } else if (value.is>()) { + return {value.as>()}; + } else if (value.is>()) { + return {value.as>()}; + } else if (value.is>()) { + return {value.as>()}; + } else if (value.is>()) { + return {value.as>()}; } return std::move(value); } template -void allowNotImplemented(F && f) { +void allowNotImplemented(F&& f) { try { f(); - } catch (const InferenceEngine::NotImplemented&) { } + } catch (const InferenceEngine::NotImplemented&) { + } } class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_this { @@ -126,7 +128,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t class CoreConfig final { public: struct CacheConfig { - std::string _cacheDir; + std::string _cacheDir; std::shared_ptr _cacheManager; }; @@ -137,7 +139,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t _cacheConfig._cacheDir = it->second; if (!it->second.empty()) { FileUtils::createDirectoryRecursive(it->second); - _cacheConfig._cacheManager = std::make_shared(std::move(it->second)); + _cacheConfig._cacheManager = + std::make_shared(std::move(it->second)); } else { _cacheConfig._cacheManager = nullptr; } @@ -182,10 +185,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t bool DeviceSupportsImportExport(const InferenceEngine::InferencePlugin& plugin) const { std::vector supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {}); - auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), - METRIC_KEY(IMPORT_EXPORT_SUPPORT)); - bool supported = (it != supportedMetricKeys.end()) && - plugin.GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {}); + auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(IMPORT_EXPORT_SUPPORT)); + bool supported = (it != supportedMetricKeys.end()) && plugin.GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {}); return supported; } @@ -198,11 +199,10 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t std::vector supportedMetricKeys; try { // If plugin doesn't support 'SUPPORTED_METRICS' - treat it as config is not supported as well - supportedMetricKeys = - plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {}).as>(); - } catch(...) {} - auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), - METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {}).as>(); + } catch (...) { + } + auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(SUPPORTED_CONFIG_KEYS)); if (it != supportedMetricKeys.end()) { std::vector configKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {}); supported = std::find(configKeys.begin(), configKeys.end(), key) != configKeys.end(); @@ -211,24 +211,25 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t } InferenceEngine::SoExecutableNetworkInternal LoadNetworkImpl(const InferenceEngine::CNNNetwork& network, - InferenceEngine::InferencePlugin& plugin, - const std::map& parsedConfig, - const InferenceEngine::RemoteContext::Ptr& context, - const std::string& blobID, - const std::string& modelPath = std::string(), - bool forceDisableCache = false) { - OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::Impl::LoadNetworkImpl"); + InferenceEngine::InferencePlugin& plugin, + const std::map& parsedConfig, + const InferenceEngine::RemoteContext::Ptr& context, + const std::string& blobID, + const std::string& modelPath = std::string(), + bool forceDisableCache = false) { + OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CoreImpl::LoadNetworkImpl"); InferenceEngine::SoExecutableNetworkInternal execNetwork; - execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) : - plugin.LoadNetwork(network, parsedConfig); + execNetwork = + context ? plugin.LoadNetwork(network, context, parsedConfig) : plugin.LoadNetwork(network, parsedConfig); auto cacheManager = coreConfig.getCacheConfig()._cacheManager; if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) { try { // need to export network for further import from "cache" OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Export"); cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) { - networkStream << InferenceEngine::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber, - InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)); + networkStream << InferenceEngine::CompiledBlobHeader( + InferenceEngine::GetInferenceEngineVersion()->buildNumber, + InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)); execNetwork->Export(networkStream); }); } catch (...) { @@ -239,20 +240,23 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t return execNetwork; } - InferenceEngine::SoExecutableNetworkInternal LoadNetworkFromCache(const std::shared_ptr& cacheManager, - const std::string& blobId, - InferenceEngine::InferencePlugin& plugin, - const std::map& config, - const InferenceEngine::RemoteContext::Ptr& context, - bool& networkIsImported, - const std::string& modelPath = std::string()) { + InferenceEngine::SoExecutableNetworkInternal LoadNetworkFromCache( + const std::shared_ptr& cacheManager, + const std::string& blobId, + InferenceEngine::InferencePlugin& plugin, + const std::map& config, + const InferenceEngine::RemoteContext::Ptr& context, + bool& networkIsImported, + const std::string& modelPath = std::string()) { InferenceEngine::SoExecutableNetworkInternal execNetwork; struct HeaderException {}; IE_ASSERT(cacheManager != nullptr); try { - cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) { - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport"); + cacheManager->readCacheEntry(blobId, [&](std::istream& networkStream) { + OV_ITT_SCOPE(FIRST_INFERENCE, + InferenceEngine::itt::domains::IE_LT, + "Core::LoadNetworkFromCache::ReadStreamAndImport"); try { InferenceEngine::CompiledBlobHeader header; networkStream >> header; @@ -260,7 +264,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t // Build number mismatch, don't use this cache throw InferenceEngine::NetworkNotRead("Version does not match"); } - if (header.getFileInfo() != InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)) { + if (header.getFileInfo() != + InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)) { // Original file is changed, don't use cache throw InferenceEngine::NetworkNotRead("Original model file is changed"); } @@ -268,9 +273,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t throw HeaderException(); } - execNetwork = context ? - plugin.ImportNetwork(networkStream, context, config) : - plugin.ImportNetwork(networkStream, config); + execNetwork = context ? plugin.ImportNetwork(networkStream, context, config) + : plugin.ImportNetwork(networkStream, config); networkIsImported = true; }); } catch (const HeaderException&) { @@ -307,10 +311,9 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t } // 2. replace it with DEVICE_ARCHITECTURE value - std::vector supportedMetricKeys = - plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), getMetricConfig); - auto archIt = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), - METRIC_KEY(DEVICE_ARCHITECTURE)); + std::vector supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), getMetricConfig); + auto archIt = + std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(DEVICE_ARCHITECTURE)); if (archIt != supportedMetricKeys.end()) { auto value = plugin.GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), getMetricConfig); compileConfig[METRIC_KEY(DEVICE_ARCHITECTURE)] = value.as(); @@ -321,14 +324,16 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t return compileConfig; } - std::string CalculateNetworkHash(const InferenceEngine::CNNNetwork& network, const std::string& deviceFamily, + std::string CalculateNetworkHash(const InferenceEngine::CNNNetwork& network, + const std::string& deviceFamily, const InferenceEngine::InferencePlugin& plugin, const std::map& config) const { auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config); return InferenceEngine::NetworkCompilationContext::computeHash(network, compileConfig); } - std::string CalculateFileHash(const std::string& modelName, const std::string& deviceFamily, + std::string CalculateFileHash(const std::string& modelName, + const std::string& deviceFamily, const InferenceEngine::InferencePlugin& plugin, const std::map& config) const { auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config); @@ -349,7 +354,8 @@ public: ~CoreImpl() override = default; /** - * @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE path + * @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE + * path * @param xmlConfigFile An .xml configuraion with device / plugin information */ void RegisterPluginsInRegistry(const std::string& xmlConfigFile) { @@ -366,7 +372,7 @@ public: pugi::xml_node ieNode = xmlDoc.document_element(); pugi::xml_node devicesNode = ieNode.child("plugins"); - FOREACH_CHILD(pluginNode, devicesNode, "plugin") { + FOREACH_CHILD (pluginNode, devicesNode, "plugin") { std::string deviceName = GetStrAttr(pluginNode, "name"); FileUtils::FilePath pluginPath = FileUtils::toFilePath(GetStrAttr(pluginNode, "location").c_str()); @@ -376,8 +382,10 @@ public: // append IR library path for default IE plugins { - FileUtils::FilePath absFilePath = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath); - if (FileUtils::fileExist(absFilePath)) pluginPath = absFilePath; + FileUtils::FilePath absFilePath = + FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath); + if (FileUtils::fileExist(absFilePath)) + pluginPath = absFilePath; } // check properties @@ -385,7 +393,7 @@ public: std::map config; if (propertiesNode) { - FOREACH_CHILD(propertyNode, propertiesNode, "property") { + FOREACH_CHILD (propertyNode, propertiesNode, "property") { std::string key = GetStrAttr(propertyNode, "key"); std::string value = GetStrAttr(propertyNode, "value"); config[key] = value; @@ -397,8 +405,9 @@ public: std::vector listOfExtentions; if (extensionsNode) { - FOREACH_CHILD(extensionNode, extensionsNode, "extension") { - FileUtils::FilePath extensionLocation = FileUtils::toFilePath(GetStrAttr(extensionNode, "location").c_str()); + FOREACH_CHILD (extensionNode, extensionsNode, "extension") { + FileUtils::FilePath extensionLocation = + FileUtils::toFilePath(GetStrAttr(extensionNode, "location").c_str()); listOfExtentions.push_back(extensionLocation); } } @@ -424,18 +433,20 @@ public: } InferenceEngine::CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "Core::Impl::ReadNetwork from file"); + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from file"); return InferenceEngine::details::ReadNetwork(modelPath, binPath, extensions); } - InferenceEngine::CNNNetwork ReadNetwork(const std::string& model, const InferenceEngine::Blob::CPtr& weights) const override { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory"); + InferenceEngine::CNNNetwork ReadNetwork(const std::string& model, + const InferenceEngine::Blob::CPtr& weights) const override { + OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from memory"); return InferenceEngine::details::ReadNetwork(model, weights, extensions); } // TODO: In future this method can be added to ICore interface - InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network, const InferenceEngine::RemoteContext::Ptr& context, - const std::map& config) { + InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network, + const InferenceEngine::RemoteContext::Ptr& context, + const std::map& config) { OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext"); if (context == nullptr) { IE_THROW() << "Remote context is null"; @@ -458,9 +469,10 @@ public: return res; } - InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network, - const std::string& deviceName, - const std::map& config) override { + InferenceEngine::SoExecutableNetworkInternal LoadNetwork( + const InferenceEngine::CNNNetwork& network, + const std::string& deviceName, + const std::map& config) override { OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::CNN"); bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0; auto parsed = parseDeviceNameIntoConfig(deviceName, config); @@ -485,9 +497,10 @@ public: return res; } - InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const std::string& modelPath, - const std::string& deviceName, - const std::map& config) override { + InferenceEngine::SoExecutableNetworkInternal LoadNetwork( + const std::string& modelPath, + const std::string& deviceName, + const std::map& config) override { OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Path"); auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto plugin = GetCPPPluginByName(parsed._deviceName); @@ -497,8 +510,7 @@ public: bool loadedFromCache = false; auto hash = CalculateFileHash(modelPath, parsed._deviceName, plugin, parsed._config); auto lock = cacheGuard.getHashLock(hash); - res = LoadNetworkFromCache(cacheManager, hash, plugin, parsed._config, - nullptr, loadedFromCache, modelPath); + res = LoadNetworkFromCache(cacheManager, hash, plugin, parsed._config, nullptr, loadedFromCache, modelPath); if (!loadedFromCache) { auto cnnNetwork = ReadNetwork(modelPath, std::string()); res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, hash, modelPath); @@ -512,14 +524,17 @@ public: return res; } - InferenceEngine::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName, - const std::map& config) override { + InferenceEngine::SoExecutableNetworkInternal ImportNetwork( + std::istream& networkModel, + const std::string& deviceName, + const std::map& config) override { auto parsed = parseDeviceNameIntoConfig(deviceName, config); return GetCPPPluginByName(parsed._deviceName).ImportNetwork(networkModel, parsed._config); } - InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, const std::string& deviceName, - const std::map& config) const override { + InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, + const std::string& deviceName, + const std::map& config) const override { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::QueryNetwork"); auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto res = GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config); @@ -591,10 +606,10 @@ public: // plugin is not created by e.g. invalid env } catch (const std::exception& ex) { IE_THROW() << "An exception is thrown while trying to create the " << deviceName - << " device and call GetMetric: " << ex.what(); + << " device and call GetMetric: " << ex.what(); } catch (...) { IE_THROW() << "Unknown exception is thrown while trying to create the " << deviceName - << " device and call GetMetric"; + << " device and call GetMetric"; } if (devicesIDs.size() > 1) { @@ -615,7 +630,7 @@ public: * @return Reference to a CPP plugin wrapper */ InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& deviceName) const { - OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName"); + OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName"); std::lock_guard lock(pluginsMutex); @@ -636,13 +651,13 @@ public: plugin.SetName(deviceName); // Set Inference Engine class reference to plugins - std::weak_ptr mutableCore = std::const_pointer_cast( - shared_from_this()); + std::weak_ptr mutableCore = + std::const_pointer_cast(shared_from_this()); plugin.SetCore(mutableCore); } // Add registered extensions to new plugin - allowNotImplemented([&](){ + allowNotImplemented([&]() { for (const auto& ext : extensions) { plugin.AddExtension(ext); } @@ -669,10 +684,10 @@ public: plugins[deviceName] = plugin; } catch (const InferenceEngine::Exception& ex) { - IE_THROW() << "Failed to create plugin " << FileUtils::fromFilePath(desc.libraryLocation) << " for device " << deviceName - << "\n" - << "Please, check your environment\n" - << ex.what() << "\n"; + IE_THROW() << "Failed to create plugin " << FileUtils::fromFilePath(desc.libraryLocation) + << " for device " << deviceName << "\n" + << "Please, check your environment\n" + << ex.what() << "\n"; } } @@ -714,8 +729,10 @@ public: { pluginPath = FileUtils::makePluginLibraryName({}, FileUtils::toFilePath(pluginName.c_str())); - FileUtils::FilePath absFilePath = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath); - if (FileUtils::fileExist(absFilePath)) pluginPath = absFilePath; + FileUtils::FilePath absFilePath = + FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath); + if (FileUtils::fileExist(absFilePath)) + pluginPath = absFilePath; } PluginDescriptor desc = {pluginPath, {}, {}}; @@ -795,7 +812,8 @@ public: std::map opsets = extension->getOpSets(); for (const auto& it : opsets) { if (opsetNames.find(it.first) != opsetNames.end()) - IE_THROW() << "Cannot add opset with name: " << it.first << ". Opset with the same name already exists."; + IE_THROW() << "Cannot add opset with name: " << it.first + << ". Opset with the same name already exists."; opsetNames.insert(it.first); } @@ -803,7 +821,8 @@ public: for (auto& plugin : plugins) { try { plugin.second.AddExtension(extension); - } catch (...) {} + } catch (...) { + } } extensions.emplace_back(extension); } @@ -860,7 +879,6 @@ public: } // namespace core_detail - namespace InferenceEngine { DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) { @@ -893,7 +911,8 @@ std::vector DeviceIDParser::getHeteroDevices(std::string fallbackDe fallbackDevice.erase(0, pos + 1); } - if (!fallbackDevice.empty()) deviceNames.push_back(fallbackDevice); + if (!fallbackDevice.empty()) + deviceNames.push_back(fallbackDevice); return deviceNames; } @@ -915,7 +934,8 @@ std::vector DeviceIDParser::getMultiDevices(std::string devicesList devicesList.erase(0, pos + 1); } - if (!devicesList.empty()) deviceNames.push_back(trim_request_info(devicesList)); + if (!devicesList.empty()) + deviceNames.push_back(trim_request_info(devicesList)); return deviceNames; } @@ -935,8 +955,7 @@ std::map Core::GetVersions(const std::string& deviceName) #ifdef ENABLE_UNICODE_PATH_SUPPORT CNNNetwork Core::ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath) const { - return ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), - FileUtils::wStringtoMBCSstringChar(binPath)); + return ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), FileUtils::wStringtoMBCSstringChar(binPath)); } #endif @@ -949,22 +968,25 @@ CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights return _impl->ReadNetwork(model, weights); } -ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, const std::string& deviceName, +ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, + const std::string& deviceName, const std::map& config) { auto exec = _impl->LoadNetwork(network, deviceName, config); - return { exec, exec }; + return {exec, exec}; } -ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, RemoteContext::Ptr context, +ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, + RemoteContext::Ptr context, const std::map& config) { auto exec = _impl->LoadNetwork(network, context, config); - return { exec, exec }; + return {exec, exec}; } -ExecutableNetwork Core::LoadNetwork(const std::string& modelPath, const std::string& deviceName, +ExecutableNetwork Core::LoadNetwork(const std::string& modelPath, + const std::string& deviceName, const std::map& config) { auto exec = _impl->LoadNetwork(modelPath, deviceName, config); - return { exec, exec }; + return {exec, exec}; } RemoteContext::Ptr Core::CreateContext(const std::string& deviceName, const ParamMap& params) { @@ -999,16 +1021,13 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) { void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) { if (deviceName_.find("HETERO") == 0) { - IE_THROW() - << "HETERO device does not support extensions. Please, set extensions directly to fallback devices"; + IE_THROW() << "HETERO device does not support extensions. Please, set extensions directly to fallback devices"; } if (deviceName_.find("MULTI") == 0) { - IE_THROW() - << "MULTI device does not support extensions. Please, set extensions directly to fallback devices"; + IE_THROW() << "MULTI device does not support extensions. Please, set extensions directly to fallback devices"; } if (deviceName_.find("AUTO") == 0) { - IE_THROW() - << "AUTO device does not support extensions. Please, set extensions directly to fallback devices"; + IE_THROW() << "AUTO device does not support extensions. Please, set extensions directly to fallback devices"; } _impl->AddExtension(extension); @@ -1018,19 +1037,21 @@ void Core::AddExtension(const IExtensionPtr& extension) { _impl->AddExtension(extension); } -ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName, +ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, + const std::string& deviceName, const std::map& config) { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config); auto exec = _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config); - return { exec, exec }; + return {exec, exec}; } -ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName, +ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, + const std::string& deviceName, const std::map& config) { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork"); auto exec = _impl->ImportNetwork(networkModel, deviceName, config); - return { exec, exec }; + return {exec, exec}; } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) { @@ -1047,12 +1068,12 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) { std::getline(networkModel, deviceName); } else { IE_THROW() << "Passed compiled stream does not contain device name. " - "Please, provide device name manually"; + "Please, provide device name manually"; } networkModel.seekg(currentPos, networkModel.beg); auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {}); - return { exec, exec }; + return {exec, exec}; } ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, @@ -1070,10 +1091,11 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config); auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, context, parsed._config); - return { exec, exec }; + return {exec, exec}; } -QueryNetworkResult Core::QueryNetwork(const CNNNetwork& network, const std::string& deviceName, +QueryNetworkResult Core::QueryNetwork(const CNNNetwork& network, + const std::string& deviceName, const std::map& config) const { return _impl->QueryNetwork(network, deviceName, config); } @@ -1082,25 +1104,26 @@ void Core::SetConfig(const std::map& config, const std // HETERO case if (deviceName.find("HETERO:") == 0) { IE_THROW() << "SetConfig is supported only for HETERO itself (without devices). " - "You can configure the devices with SetConfig before creating the HETERO on top."; + "You can configure the devices with SetConfig before creating the HETERO on top."; } // MULTI case if (deviceName.find("MULTI:") == 0) { IE_THROW() << "SetConfig is supported only for MULTI itself (without devices). " - "You can configure the devices with SetConfig before creating the MULTI on top."; + "You can configure the devices with SetConfig before creating the MULTI on top."; } // AUTO case if (deviceName.find("AUTO:") == 0) { IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). " - "You can configure the devices with SetConfig before creating the AUTO on top."; + "You can configure the devices with SetConfig before creating the AUTO on top."; } // GPU.0, FPGA.1 cases if (deviceName.find(".") != std::string::npos) { - IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). " - "You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only"; + IE_THROW() + << "SetConfig is supported only for device family itself (without particular device .#). " + "You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only"; } if (deviceName.empty()) { @@ -1115,25 +1138,22 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name // HETERO case { if (deviceName.find("HETERO:") == 0) { - IE_THROW() - << "You can only GetConfig of the HETERO itself (without devices). " - "GetConfig is also possible for the individual devices before creating the HETERO on top."; + IE_THROW() << "You can only GetConfig of the HETERO itself (without devices). " + "GetConfig is also possible for the individual devices before creating the HETERO on top."; } } // MULTI case { if (deviceName.find("MULTI:") == 0) { - IE_THROW() - << "You can only GetConfig of the MULTI itself (without devices). " - "GetConfig is also possible for the individual devices before creating the MULTI on top."; + IE_THROW() << "You can only GetConfig of the MULTI itself (without devices). " + "GetConfig is also possible for the individual devices before creating the MULTI on top."; } } // AUTO case { if (deviceName.find("AUTO:") == 0) { - IE_THROW() - << "You can only GetConfig of the AUTO itself (without devices). " - "GetConfig is also possible for the individual devices before creating the AUTO on top."; + IE_THROW() << "You can only GetConfig of the AUTO itself (without devices). " + "GetConfig is also possible for the individual devices before creating the AUTO on top."; } } @@ -1142,7 +1162,8 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name // we need to return a copy of Parameter object which is created on Core side, // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread // TODO: remove this WA after *-31417 is resolved - return core_detail::copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); + return core_detail::copyParameterValue( + _impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); } Parameter Core::GetMetric(const std::string& deviceName, const std::string& name) const { @@ -1173,7 +1194,7 @@ void Core::UnregisterPlugin(const std::string& deviceName_) { namespace ov { namespace runtime { -class Core::Impl: public core_detail::CoreImpl {}; +class Core::Impl : public core_detail::CoreImpl {}; Core::Core(const std::string& xmlConfigFile) { _impl = std::make_shared(); @@ -1187,31 +1208,40 @@ std::map Core::get_versions(const std::st #ifdef ENABLE_UNICODE_PATH_SUPPORT std::shared_ptr Core::read_model(const std::wstring& modelPath, const std::wstring& binPath) const { - return _impl->ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), - FileUtils::wStringtoMBCSstringChar(binPath)).getFunction(); + return _impl + ->ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), FileUtils::wStringtoMBCSstringChar(binPath)) + .getFunction(); } #endif std::shared_ptr Core::read_model(const std::string& modelPath, const std::string& binPath) const { return _impl->ReadNetwork(modelPath, binPath).getFunction(); } -std::shared_ptr Core::read_model(const std::string& model, const InferenceEngine::Blob::CPtr& weights) const { +std::shared_ptr Core::read_model(const std::string& model, + const InferenceEngine::Blob::CPtr& weights) const { return _impl->ReadNetwork(model, weights).getFunction(); } InferenceEngine::ExecutableNetwork Core::compile_model(const std::shared_ptr& network, - const std::string& deviceName, const std::map& config) { - auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), deviceName, config); - return { exec, exec }; + const std::string& deviceName, + const std::map& config) { + auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), + deviceName, + config); + return {exec, exec}; } InferenceEngine::ExecutableNetwork Core::compile_model(const std::string& modelPath, - const std::string& deviceName, const std::map& config) { + const std::string& deviceName, + const std::map& config) { auto exec = _impl->LoadNetwork(modelPath, deviceName, config); - return { exec, exec }; + return {exec, exec}; } InferenceEngine::ExecutableNetwork Core::compile_model(const std::shared_ptr& network, - const InferenceEngine::RemoteContext::Ptr& context, const std::map& config) { - auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), context, config); - return { exec, exec }; + const InferenceEngine::RemoteContext::Ptr& context, + const std::map& config) { + auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), + context, + config); + return {exec, exec}; } void Core::add_extension(const InferenceEngine::IExtensionPtr& extension) { @@ -1219,13 +1249,15 @@ void Core::add_extension(const InferenceEngine::IExtensionPtr& extension) { } InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel, - const std::string& deviceName, const std::map& config) { + const std::string& deviceName, + const std::map& config) { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); auto exec = _impl->ImportNetwork(networkModel, deviceName, config); - return { exec, exec }; + return {exec, exec}; } -InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel, const InferenceEngine::RemoteContext::Ptr& context, +InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel, + const InferenceEngine::RemoteContext::Ptr& context, const std::map& config) { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model"); @@ -1240,42 +1272,45 @@ InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel std::getline(networkModel, deviceName); } else { IE_THROW() << "Passed compiled stream does not contain device name. " - "Please, provide device name manually"; + "Please, provide device name manually"; } networkModel.seekg(currentPos, networkModel.beg); auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {}); - return { exec, exec }; + return {exec, exec}; } InferenceEngine::QueryNetworkResult Core::query_model(const std::shared_ptr& network, const std::string& deviceName, const std::map& config) const { - return _impl->QueryNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), deviceName, config); + return _impl->QueryNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast(network)), + deviceName, + config); } void Core::set_config(const std::map& config, const std::string& deviceName) { // HETERO case if (deviceName.find("HETERO:") == 0) { IE_THROW() << "SetConfig is supported only for HETERO itself (without devices). " - "You can configure the devices with SetConfig before creating the HETERO on top."; + "You can configure the devices with SetConfig before creating the HETERO on top."; } // MULTI case if (deviceName.find("MULTI:") == 0) { IE_THROW() << "SetConfig is supported only for MULTI itself (without devices). " - "You can configure the devices with SetConfig before creating the MULTI on top."; + "You can configure the devices with SetConfig before creating the MULTI on top."; } // AUTO case if (deviceName.find("AUTO:") == 0) { IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). " - "You can configure the devices with SetConfig before creating the AUTO on top."; + "You can configure the devices with SetConfig before creating the AUTO on top."; } // GPU.0, FPGA.1 cases if (deviceName.find(".") != std::string::npos) { - IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). " - "You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only"; + IE_THROW() + << "SetConfig is supported only for device family itself (without particular device .#). " + "You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only"; } if (deviceName.empty()) { @@ -1290,25 +1325,22 @@ InferenceEngine::Parameter Core::get_config(const std::string& deviceName, const // HETERO case { if (deviceName.find("HETERO:") == 0) { - IE_THROW() - << "You can only GetConfig of the HETERO itself (without devices). " - "GetConfig is also possible for the individual devices before creating the HETERO on top."; + IE_THROW() << "You can only GetConfig of the HETERO itself (without devices). " + "GetConfig is also possible for the individual devices before creating the HETERO on top."; } } // MULTI case { if (deviceName.find("MULTI:") == 0) { - IE_THROW() - << "You can only GetConfig of the MULTI itself (without devices). " - "GetConfig is also possible for the individual devices before creating the MULTI on top."; + IE_THROW() << "You can only GetConfig of the MULTI itself (without devices). " + "GetConfig is also possible for the individual devices before creating the MULTI on top."; } } // AUTO case { if (deviceName.find("AUTO:") == 0) { - IE_THROW() - << "You can only GetConfig of the AUTO itself (without devices). " - "GetConfig is also possible for the individual devices before creating the AUTO on top."; + IE_THROW() << "You can only GetConfig of the AUTO itself (without devices). " + "GetConfig is also possible for the individual devices before creating the AUTO on top."; } } @@ -1317,7 +1349,8 @@ InferenceEngine::Parameter Core::get_config(const std::string& deviceName, const // we need to return a copy of Parameter object which is created on Core side, // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread // TODO: remove this WA after *-31417 is resolved - return core_detail::copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); + return core_detail::copyParameterValue( + _impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config)); } InferenceEngine::Parameter Core::get_metric(const std::string& deviceName, const std::string& name) const { @@ -1343,7 +1376,8 @@ void Core::register_plugins(const std::string& xmlConfigFile) { _impl->RegisterPluginsInRegistry(xmlConfigFile); } -InferenceEngine::RemoteContext::Ptr Core::create_context(const std::string& deviceName, const InferenceEngine::ParamMap& params) { +InferenceEngine::RemoteContext::Ptr Core::create_context(const std::string& deviceName, + const InferenceEngine::ParamMap& params) { if (deviceName.find("HETERO") == 0) { IE_THROW() << "HETERO device does not support remote context"; } @@ -1374,5 +1408,5 @@ InferenceEngine::RemoteContext::Ptr Core::get_default_context(const std::string& return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config); } -} // namespace runtime -} // namespace ov +} // namespace runtime +} // namespace ov diff --git a/inference-engine/src/inference_engine/src/ie_data.cpp b/inference-engine/src/inference_engine/src/ie_data.cpp index 552cbe09df8..24a7b8b020d 100644 --- a/inference-engine/src/inference_engine/src/ie_data.cpp +++ b/inference-engine/src/inference_engine/src/ie_data.cpp @@ -71,11 +71,13 @@ public: }; Data::Data(const std::string& name, Precision _precision, Layout layout) - : name(name), userObject({0}), tensorDesc(_precision, layout) { + : name(name), + userObject({0}), + tensorDesc(_precision, layout) { _impl = std::make_shared(); } -Data::Data(const std::string& name, const TensorDesc& desc): name(name), userObject({0}), tensorDesc(desc) { +Data::Data(const std::string& name, const TensorDesc& desc) : name(name), userObject({0}), tensorDesc(desc) { _impl = std::make_shared(); } @@ -103,14 +105,13 @@ void Data::reshape(const SizeVector& a_dims, Layout a_layout) { tensorDesc.reshape(a_dims, a_layout); } -Data::Data(const Data& data) : - name(data.name), userObject(data.userObject), tensorDesc(data.tensorDesc) { +Data::Data(const Data& data) : name(data.name), userObject(data.userObject), tensorDesc(data.tensorDesc) { _impl = std::make_shared(); _impl->creatorLayer = data._impl->creatorLayer; _impl->inputTo = data._impl->inputTo; } -Data & Data::operator = (const Data& data) { +Data& Data::operator=(const Data& data) { if (this != &data) { name = data.name; userObject = data.userObject; @@ -151,15 +152,15 @@ const SizeVector& Data::getDims() const { namespace InferenceEngine { -INFERENCE_ENGINE_API_CPP(CNNLayerWeakPtr&) getCreatorLayer(const DataPtr & data) { +INFERENCE_ENGINE_API_CPP(CNNLayerWeakPtr&) getCreatorLayer(const DataPtr& data) { return data->_impl->creatorLayer; } -INFERENCE_ENGINE_API_CPP(std::map&) getInputTo(const DataPtr & data) { +INFERENCE_ENGINE_API_CPP(std::map&) getInputTo(const DataPtr& data) { return data->_impl->inputTo; } -INFERENCE_ENGINE_API_CPP(std::map&) getInputTo(Data * data) { +INFERENCE_ENGINE_API_CPP(std::map&) getInputTo(Data* data) { return data->_impl->inputTo; } diff --git a/inference-engine/src/inference_engine/src/ie_itt.hpp b/inference-engine/src/inference_engine/src/ie_itt.hpp index c36b550f492..40dae86a1b1 100644 --- a/inference-engine/src/inference_engine/src/ie_itt.hpp +++ b/inference-engine/src/inference_engine/src/ie_itt.hpp @@ -14,7 +14,7 @@ namespace InferenceEngine { namespace itt { namespace domains { - OV_ITT_DOMAIN(IE_LT); +OV_ITT_DOMAIN(IE_LT); } // namespace domains } // namespace itt } // namespace InferenceEngine @@ -22,8 +22,8 @@ namespace domains { namespace ov { namespace itt { namespace domains { - OV_ITT_DOMAIN(IE); - OV_ITT_DOMAIN(IE_RT); +OV_ITT_DOMAIN(IE); +OV_ITT_DOMAIN(IE_RT); } // namespace domains } // namespace itt } // namespace ov diff --git a/inference-engine/src/inference_engine/src/ie_layouts.cpp b/inference-engine/src/inference_engine/src/ie_layouts.cpp index a9308877e7d..9cb98c67152 100644 --- a/inference-engine/src/inference_engine/src/ie_layouts.cpp +++ b/inference-engine/src/inference_engine/src/ie_layouts.cpp @@ -10,17 +10,20 @@ using namespace InferenceEngine; TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, Layout layout) - : precision(precision), blockingDesc(dims, layout) { + : precision(precision), + blockingDesc(dims, layout) { this->dims = dims; this->layout = layout; } -TensorDesc::TensorDesc(const Precision& precision, Layout layout): precision(precision), blockingDesc() { +TensorDesc::TensorDesc(const Precision& precision, Layout layout) : precision(precision), blockingDesc() { this->layout = layout; } TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, const BlockingDesc& blockDesc) - : dims(dims), precision(precision), blockingDesc(blockDesc) { + : dims(dims), + precision(precision), + blockingDesc(blockDesc) { if (dims.size() == 0 || blockingDesc.getBlockDims().size() == 0) { layout = Layout::SCALAR; return; @@ -43,7 +46,8 @@ TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, const case 3: if (blockingDesc.getOrder()[0] == 0 && blockingDesc.getOrder()[1] == 1 && blockingDesc.getOrder()[2] == 2) { layout = Layout::CHW; - } else if (blockingDesc.getOrder()[0] == 1 && blockingDesc.getOrder()[1] == 2 && blockingDesc.getOrder()[2] == 0) { + } else if (blockingDesc.getOrder()[0] == 1 && blockingDesc.getOrder()[1] == 2 && + blockingDesc.getOrder()[2] == 0) { layout = Layout::HWC; } break; @@ -81,7 +85,8 @@ void TensorDesc::setDims(const SizeVector& dims) { if (layout == Layout::BLOCKED) { auto newDims = blockingDesc.getBlockDims(); auto newOrder = blockingDesc.getOrder(); - if (newDims.empty()) newDims = dims; + if (newDims.empty()) + newDims = dims; if (newOrder.empty()) { for (size_t i = 0; i < newDims.size(); i++) { newOrder.push_back(i); @@ -93,7 +98,8 @@ void TensorDesc::setDims(const SizeVector& dims) { IE_THROW() << "Cannot set dimensions for SCALAR layout!"; blockingDesc = BlockingDesc(dims, layout); } - if (layout != Layout::SCALAR) this->dims = dims; + if (layout != Layout::SCALAR) + this->dims = dims; } void TensorDesc::setLayout(Layout l) { @@ -138,13 +144,12 @@ void TensorDesc::setLayout(Layout l) { } if (inconsistentLayout) { - IE_THROW() << "Size of dims(" << std::to_string(dims.size()) << ") and format(" << l - << ") are inconsistent."; + IE_THROW() << "Size of dims(" << std::to_string(dims.size()) << ") and format(" << l << ") are inconsistent."; } - // HACK: we need to update BlockingDesc after layout change, but if it was set manually not sure how to di this properly - const bool hasDefaultBlockingDesc = - blockingDesc == BlockingDesc(dims, layout); + // HACK: we need to update BlockingDesc after layout change, but if it was set manually not sure how to di this + // properly + const bool hasDefaultBlockingDesc = blockingDesc == BlockingDesc(dims, layout); layout = l; @@ -185,9 +190,11 @@ Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { } size_t TensorDesc::offset(const SizeVector& v) const { - if (layout == Layout::ANY) IE_THROW() << "Cannot calculate offset for any format!"; + if (layout == Layout::ANY) + IE_THROW() << "Cannot calculate offset for any format!"; - if (layout == Layout::SCALAR) return blockingDesc.getOffsetPadding(); + if (layout == Layout::SCALAR) + return blockingDesc.getOffsetPadding(); SizeVector off_v = v; const SizeVector& blockedDims = blockingDesc.getBlockDims(); @@ -225,7 +232,8 @@ size_t TensorDesc::offset(size_t l) const { void TensorDesc::reshape(const SizeVector& dims, Layout layout) { for (auto& padd : blockingDesc.getOffsetPaddingToData()) { - if (padd) IE_THROW() << "Cannot reshape a non-packaged blob!"; + if (padd) + IE_THROW() << "Cannot reshape a non-packaged blob!"; } if (layout != Layout::ANY) { blockingDesc = BlockingDesc(dims, layout); @@ -242,20 +250,23 @@ void TensorDesc::reshape(const SizeVector& dims, const BlockingDesc& blockDesc) this->layout = Layout::BLOCKED; } -BlockingDesc::BlockingDesc(const SizeVector& block_dims, const SizeVector& order): offsetPadding(0) { +BlockingDesc::BlockingDesc(const SizeVector& block_dims, const SizeVector& order) : offsetPadding(0) { this->order = order; - if (block_dims.empty() || order.empty()) return; + if (block_dims.empty() || order.empty()) + return; fillDesc(block_dims, order); } -BlockingDesc::BlockingDesc(): BlockingDesc({}, Layout::ANY) {} +BlockingDesc::BlockingDesc() : BlockingDesc({}, Layout::ANY) {} BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset) : BlockingDesc(blocked_dims, order) { this->offsetPadding = offset; } -BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset, +BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, + const SizeVector& order, + size_t offset, const SizeVector& dimOffsets) : BlockingDesc(blocked_dims, order) { this->offsetPadding = offset; @@ -264,23 +275,29 @@ BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& ord this->offsetPaddingToData = dimOffsets; } -BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset, - const SizeVector& dimOffsets, const SizeVector& strides) +BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, + const SizeVector& order, + size_t offset, + const SizeVector& dimOffsets, + const SizeVector& strides) : BlockingDesc(blocked_dims, order) { this->offsetPadding = offset; - if (blocked_dims.size() != strides.size()) IE_THROW() << "Strides are not initialized for all dimensions."; + if (blocked_dims.size() != strides.size()) + IE_THROW() << "Strides are not initialized for all dimensions."; this->strides = strides; if (blocked_dims.size() != dimOffsets.size()) IE_THROW() << "Offsets are not initialized for all dimensions."; this->offsetPaddingToData = dimOffsets; } -BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding(0) { - if (dims.empty()) return; +BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout) : offsetPadding(0) { + if (dims.empty()) + return; offsetPadding = 0; auto checkDims = [](size_t r_size, size_t e_size) { - if (r_size != e_size) IE_THROW() << "Dims and format are inconsistent."; + if (r_size != e_size) + IE_THROW() << "Dims and format are inconsistent."; }; SizeVector l_order; SizeVector l_dims; @@ -344,7 +361,8 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding break; case Layout::BLOCKED: l_order.clear(); - for (size_t i = 0; i < dims.size(); i++) l_order.push_back(i); + for (size_t i = 0; i < dims.size(); i++) + l_order.push_back(i); l_dims = dims; break; } @@ -389,22 +407,16 @@ struct DimSlice { DimSlice() = default; - DimSlice(size_t startInd, size_t size) : - startInd(startInd), size(size) { - } + DimSlice(size_t startInd, size_t size) : startInd(startInd), size(size) {} }; using TensorSlice = std::vector; -void checkROI( - const TensorDesc& origDesc, - const TensorSlice& roi) { +void checkROI(const TensorDesc& origDesc, const TensorSlice& roi) { const auto numDims = origDesc.getDims().size(); if (roi.size() != numDims) { - IE_THROW() - << "ROI num dims " << roi.size() << - " differs from original num dims " << numDims; + IE_THROW() << "ROI num dims " << roi.size() << " differs from original num dims " << numDims; } // TensorDesc stores dimensions in standard layout, as well as roi vector @@ -415,18 +427,13 @@ void checkROI( const auto endInd = roiSlice.startInd + roiSlice.size; if (endInd > fullSize) { - IE_THROW() - << "ROI [" << roiSlice.startInd << ", " << endInd << ")" - << " is out of range " << fullSize - << " for dimension " << dimInd; + IE_THROW() << "ROI [" << roiSlice.startInd << ", " << endInd << ")" + << " is out of range " << fullSize << " for dimension " << dimInd; } } } -TensorDesc make_roi_desc( - const TensorDesc& origDesc, - const TensorSlice& roi, - bool useOrigMemDesc) { +TensorDesc make_roi_desc(const TensorDesc& origDesc, const TensorSlice& roi, bool useOrigMemDesc) { const auto numDims = origDesc.getDims().size(); checkROI(origDesc, roi); @@ -447,7 +454,8 @@ TensorDesc make_roi_desc( IE_ASSERT(roiBlkDimOffsets.size() == numDims); // BlockingDesc stores dimensions in memory order, so we need to use origOrder array. - // Offsets in `roi` relates to `origDesc` dimensions, while offsets in `BlockingDesc` relates to top parent tensor dimensions. + // Offsets in `roi` relates to `origDesc` dimensions, while offsets in `BlockingDesc` relates to top parent tensor + // dimensions. for (size_t memInd = 0; memInd < numDims; ++memInd) { const auto dimInd = origBlkOrder[memInd]; const auto& roiSlice = roi[dimInd]; @@ -458,39 +466,32 @@ TensorDesc make_roi_desc( roiBlkOffset += roiSlice.startInd * origBlkStrides[memInd]; } - const auto roiBlkDesc = - useOrigMemDesc ? - BlockingDesc(roiBlkDims, origBlkOrder, roiBlkOffset, roiBlkDimOffsets, origBlkStrides) : - BlockingDesc(roiBlkDims, origBlkOrder); + const auto roiBlkDesc = useOrigMemDesc + ? BlockingDesc(roiBlkDims, origBlkOrder, roiBlkOffset, roiBlkDimOffsets, origBlkStrides) + : BlockingDesc(roiBlkDims, origBlkOrder); const auto roiDesc = TensorDesc(origPrecision, roiDims, roiBlkDesc); return roiDesc; } -TensorSlice make_roi_slice( - const TensorDesc& origDesc, - const ROI& roi) { +TensorSlice make_roi_slice(const TensorDesc& origDesc, const ROI& roi) { const auto layout = origDesc.getLayout(); if (layout != Layout::NCHW && layout != Layout::NHWC) { - IE_THROW() - << "Unsupported layout " << layout; + IE_THROW() << "Unsupported layout " << layout; } TensorSlice roiSlice(4); - roiSlice[0] = DimSlice {roi.id, 1}; // N - roiSlice[1] = DimSlice {0, origDesc.getDims()[1]}; // C - roiSlice[2] = DimSlice {roi.posY, roi.sizeY}; // H - roiSlice[3] = DimSlice {roi.posX, roi.sizeX}; // W + roiSlice[0] = DimSlice{roi.id, 1}; // N + roiSlice[1] = DimSlice{0, origDesc.getDims()[1]}; // C + roiSlice[2] = DimSlice{roi.posY, roi.sizeY}; // H + roiSlice[3] = DimSlice{roi.posX, roi.sizeX}; // W return roiSlice; } } // namespace -TensorDesc InferenceEngine::make_roi_desc( - const TensorDesc& origDesc, - const ROI& roi, - bool useOrigMemDesc) { +TensorDesc InferenceEngine::make_roi_desc(const TensorDesc& origDesc, const ROI& roi, bool useOrigMemDesc) { return make_roi_desc(origDesc, make_roi_slice(origDesc, roi), useOrigMemDesc); } diff --git a/inference-engine/src/inference_engine/src/ie_memcpy.cpp b/inference-engine/src/inference_engine/src/ie_memcpy.cpp index 32d9191577b..20e75286aaa 100644 --- a/inference-engine/src/inference_engine/src/ie_memcpy.cpp +++ b/inference-engine/src/inference_engine/src/ie_memcpy.cpp @@ -16,6 +16,7 @@ int ie_memcpy(void* dest, size_t destsz, void const* src, size_t count) { return -1; } - for (i = 0; i < count; ++i) (reinterpret_cast(dest))[i] = (reinterpret_cast(src))[i]; + for (i = 0; i < count; ++i) + (reinterpret_cast(dest))[i] = (reinterpret_cast(src))[i]; return 0; } diff --git a/inference-engine/src/inference_engine/src/ie_network_reader.cpp b/inference-engine/src/inference_engine/src/ie_network_reader.cpp index 792f95eb74d..f3c95ac4ed2 100644 --- a/inference-engine/src/inference_engine/src/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/src/ie_network_reader.cpp @@ -3,18 +3,18 @@ // #include "ie_network_reader.hpp" -#include "ie_itt.hpp" - -#include
-#include -#include -#include -#include #include #include -#include #include +#include + +#include "details/ie_so_pointer.hpp" +#include "file_utils.h" +#include "frontend_manager/frontend_manager.hpp" +#include "ie_ir_version.hpp" +#include "ie_itt.hpp" +#include "ie_reader.hpp" namespace InferenceEngine { @@ -37,16 +37,17 @@ public: /** * @brief This class is a wrapper for reader interfaces */ -class Reader: public IReader { +class Reader : public IReader { InferenceEngine::details::SOPointer ptr; std::once_flag readFlag; std::string name; std::string location; InferenceEngine::details::SOPointer getReaderPtr() { - std::call_once(readFlag, [&] () { + std::call_once(readFlag, [&]() { FileUtils::FilePath libraryName = FileUtils::toFilePath(location); - FileUtils::FilePath readersLibraryPath = FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName); + FileUtils::FilePath readersLibraryPath = + FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName); if (!FileUtils::fileExist(readersLibraryPath)) { IE_THROW() << "Please, make sure that Inference Engine ONNX reader library " @@ -65,7 +66,7 @@ class Reader: public IReader { public: using Ptr = std::shared_ptr; - Reader(const std::string& name, const std::string location): name(name), location(location) {} + Reader(const std::string& name, const std::string location) : name(name), location(location) {} bool supportModel(std::istream& model) const override { OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Reader::supportModel"); auto reader = getReaderPtr(); @@ -75,7 +76,9 @@ public: auto reader = getReaderPtr(); return reader->read(model, exts); } - CNNNetwork read(std::istream& model, const Blob::CPtr& weights, const std::vector& exts) const override { + CNNNetwork read(std::istream& model, + const Blob::CPtr& weights, + const std::vector& exts) const override { auto reader = getReaderPtr(); return reader->read(model, weights, exts); } @@ -98,12 +101,14 @@ void registerReaders() { static bool initialized = false; static std::mutex readerMutex; std::lock_guard lock(readerMutex); - if (initialized) return; + if (initialized) + return; // TODO: Read readers info from XML - auto create_if_exists = [] (const std::string name, const std::string library_name) { + auto create_if_exists = [](const std::string name, const std::string library_name) { FileUtils::FilePath libraryName = FileUtils::toFilePath(library_name); - FileUtils::FilePath readersLibraryPath = FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName); + FileUtils::FilePath readersLibraryPath = + FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName); if (!FileUtils::fileExist(readersLibraryPath)) return std::shared_ptr(); @@ -111,47 +116,53 @@ void registerReaders() { }; // try to load ONNX reader if library exists - auto onnxReader = create_if_exists("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX)); + auto onnxReader = + create_if_exists("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX)); if (onnxReader) { readers.emplace("onnx", onnxReader); readers.emplace("prototxt", onnxReader); } // try to load IR reader v10 if library exists - auto irReaderv10 = create_if_exists("IRv10", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX)); + auto irReaderv10 = + create_if_exists("IRv10", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX)); if (irReaderv10) readers.emplace("xml", irReaderv10); // try to load IR reader v7 if library exists - auto irReaderv7 = create_if_exists("IRv7", std::string("inference_engine_ir_v7_reader") + std::string(IE_BUILD_POSTFIX)); + auto irReaderv7 = + create_if_exists("IRv7", std::string("inference_engine_ir_v7_reader") + std::string(IE_BUILD_POSTFIX)); if (irReaderv7) readers.emplace("xml", irReaderv7); initialized = true; } -void assertIfIRv7LikeModel(std::istream & modelStream) { +void assertIfIRv7LikeModel(std::istream& modelStream) { auto irVersion = details::GetIRVersion(modelStream); bool isIRv7 = irVersion > 1 && irVersion <= 7; if (!isIRv7) return; - for (auto && kvp : readers) { + for (auto&& kvp : readers) { Reader::Ptr reader = kvp.second; if (reader->getName() == "IRv7") { return; } } - IE_THROW() << "The support of IR v" << irVersion << " has been removed from the product. " - "Please, convert the original model using the Model Optimizer which comes with this " - "version of the OpenVINO to generate supported IR version."; + IE_THROW() << "The support of IR v" << irVersion + << " has been removed from the product. " + "Please, convert the original model using the Model Optimizer which comes with this " + "version of the OpenVINO to generate supported IR version."; } } // namespace -CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector& exts) { +CNNNetwork details::ReadNetwork(const std::string& modelPath, + const std::string& binPath, + const std::vector& exts) { // Register readers if it is needed registerReaders(); @@ -183,7 +194,8 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& if (bPath.empty()) { auto pathWoExt = modelPath; auto pos = modelPath.rfind('.'); - if (pos != std::string::npos) pathWoExt = modelPath.substr(0, pos); + if (pos != std::string::npos) + pathWoExt = modelPath.substr(0, pos); for (const auto& ext : reader->getDataFileExtensions()) { bPath = pathWoExt + "." + ext; if (!FileUtils::fileExist(bPath)) { @@ -209,7 +221,7 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& size_t fileSize = binStream.tellg(); binStream.seekg(0, std::ios::beg); - Blob::Ptr weights = make_shared_blob({Precision::U8, { fileSize }, C }); + Blob::Ptr weights = make_shared_blob({Precision::U8, {fileSize}, C}); { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ReadNetworkWeights"); @@ -238,20 +250,24 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& std::string weights_path = binPath; #endif FE = manager.load_by_model(model_path, weights_path); - if (FE) inputModel = FE->load(model_path, weights_path); + if (FE) + inputModel = FE->load(model_path, weights_path); } else { FE = manager.load_by_model(model_path); - if (FE) inputModel = FE->load(model_path); + if (FE) + inputModel = FE->load(model_path); } if (inputModel) { auto ngFunc = FE->convert(inputModel); return CNNNetwork(ngFunc); } - IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt << " and read the model: " << modelPath << - ". Please check that reader library exists in your PATH."; + IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt + << " and read the model: " << modelPath << ". Please check that reader library exists in your PATH."; } -CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector& exts) { +CNNNetwork details::ReadNetwork(const std::string& model, + const Blob::CPtr& weights, + const std::vector& exts) { // Register readers if it is needed registerReaders(); std::istringstream modelStream(model); @@ -266,7 +282,8 @@ CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weig return reader->read(modelStream, exts); } } - IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader library exists in your PATH."; + IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader " + "library exists in your PATH."; } } // namespace InferenceEngine diff --git a/inference-engine/src/inference_engine/src/ie_network_reader.hpp b/inference-engine/src/inference_engine/src/ie_network_reader.hpp index 2e8a16041f1..6939e8c3fec 100644 --- a/inference-engine/src/inference_engine/src/ie_network_reader.hpp +++ b/inference-engine/src/inference_engine/src/ie_network_reader.hpp @@ -4,11 +4,12 @@ #pragma once -#include -#include -#include #include +#include "cpp/ie_cnn_network.h" +#include "ie_blob.h" +#include "ie_iextension.h" + namespace InferenceEngine { namespace details { @@ -20,7 +21,9 @@ namespace details { * @param exts vector with extensions * @return CNNNetwork */ -CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector& exts); +CNNNetwork ReadNetwork(const std::string& modelPath, + const std::string& binPath, + const std::vector& exts); /** * @brief Reads IR xml and bin (with the same name) files * @param model string with IR diff --git a/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp b/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp index fb32d7526eb..b81b61fa800 100644 --- a/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp +++ b/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "ie_ngraph_utils.hpp" + #include "cnn_network_ngraph_impl.hpp" #include "ie_itt.hpp" diff --git a/inference-engine/src/inference_engine/src/ie_system_conf.cpp b/inference-engine/src/inference_engine/src/ie_system_conf.cpp index 2626ad7e424..067378a8d7e 100644 --- a/inference-engine/src/inference_engine/src/ie_system_conf.cpp +++ b/inference-engine/src/inference_engine/src/ie_system_conf.cpp @@ -2,16 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "ie_system_conf.h" + #include #include #include #include "threading/ie_parallel_custom_arena.hpp" -#include "ie_system_conf.h" -# define XBYAK_NO_OP_NAMES -# define XBYAK_UNDEF_JNL -# include +#define XBYAK_NO_OP_NAMES +#define XBYAK_UNDEF_JNL +#include namespace InferenceEngine { @@ -37,9 +38,7 @@ bool with_cpu_x86_avx512f() { } bool with_cpu_x86_avx512_core() { - return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F | - Xbyak::util::Cpu::tAVX512DQ | - Xbyak::util::Cpu::tAVX512BW); + return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F | Xbyak::util::Cpu::tAVX512DQ | Xbyak::util::Cpu::tAVX512BW); } bool with_cpu_x86_bfloat16() { @@ -47,38 +46,36 @@ bool with_cpu_x86_bfloat16() { } bool checkOpenMpEnvVars(bool includeOMPNumThreads) { - for (auto&& var : { - "GOMP_CPU_AFFINITY", - "GOMP_DEBUG" - "GOMP_RTEMS_THREAD_POOLS", - "GOMP_SPINCOUNT" - "GOMP_STACKSIZE" - "KMP_AFFINITY" - "KMP_NUM_THREADS" - "MIC_KMP_AFFINITY", - "MIC_OMP_NUM_THREADS" - "MIC_OMP_PROC_BIND" - "MKL_DOMAIN_NUM_THREADS" - "MKL_DYNAMIC" - "MKL_NUM_THREADS", - "OMP_CANCELLATION" - "OMP_DEFAULT_DEVICE" - "OMP_DISPLAY_ENV" - "OMP_DYNAMIC", - "OMP_MAX_ACTIVE_LEVELS" - "OMP_MAX_TASK_PRIORITY" - "OMP_NESTED", - "OMP_NUM_THREADS" - "OMP_PLACES" - "OMP_PROC_BIND" - "OMP_SCHEDULE" - "OMP_STACKSIZE", - "OMP_THREAD_LIMIT" - "OMP_WAIT_POLICY" - "PHI_KMP_AFFINITY", - "PHI_KMP_PLACE_THREADS" - "PHI_OMP_NUM_THREADS" - }) { + for (auto&& var : {"GOMP_CPU_AFFINITY", + "GOMP_DEBUG" + "GOMP_RTEMS_THREAD_POOLS", + "GOMP_SPINCOUNT" + "GOMP_STACKSIZE" + "KMP_AFFINITY" + "KMP_NUM_THREADS" + "MIC_KMP_AFFINITY", + "MIC_OMP_NUM_THREADS" + "MIC_OMP_PROC_BIND" + "MKL_DOMAIN_NUM_THREADS" + "MKL_DYNAMIC" + "MKL_NUM_THREADS", + "OMP_CANCELLATION" + "OMP_DEFAULT_DEVICE" + "OMP_DISPLAY_ENV" + "OMP_DYNAMIC", + "OMP_MAX_ACTIVE_LEVELS" + "OMP_MAX_TASK_PRIORITY" + "OMP_NESTED", + "OMP_NUM_THREADS" + "OMP_PLACES" + "OMP_PROC_BIND" + "OMP_SCHEDULE" + "OMP_STACKSIZE", + "OMP_THREAD_LIMIT" + "OMP_WAIT_POLICY" + "PHI_KMP_AFFINITY", + "PHI_KMP_PLACE_THREADS" + "PHI_OMP_NUM_THREADS"}) { if (getenv(var)) { if (0 != strcmp(var, "OMP_NUM_THREADS") || includeOMPNumThreads) return true; @@ -90,10 +87,14 @@ bool checkOpenMpEnvVars(bool includeOMPNumThreads) { #if defined(__APPLE__) // for Linux and Windows the getNumberOfCPUCores (that accounts only for physical cores) implementation is OS-specific // (see cpp files in corresponding folders), for __APPLE__ it is default : -int getNumberOfCPUCores(bool) { return parallel_get_max_threads();} -#if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) -std::vector getAvailableNUMANodes() { return {-1}; } -#endif +int getNumberOfCPUCores(bool) { + return parallel_get_max_threads(); +} +# if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) +std::vector getAvailableNUMANodes() { + return {-1}; +} +# endif #endif #if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) diff --git a/inference-engine/src/inference_engine/src/ie_transformations.cpp b/inference-engine/src/inference_engine/src/ie_transformations.cpp index 2a87671ce25..6c0f5a1c69f 100644 --- a/inference-engine/src/inference_engine/src/ie_transformations.cpp +++ b/inference-engine/src/inference_engine/src/ie_transformations.cpp @@ -3,12 +3,13 @@ // #include "ie_transformations.hpp" -#include -#include + +#include "ngraph/pass/low_latency.hpp" +#include "ngraph/pass/manager.hpp" using namespace InferenceEngine; -void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) { +void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork& network) { auto function = network.getFunction(); ngraph::pass::Manager manager; NGRAPH_SUPPRESS_DEPRECATED_START @@ -17,8 +18,7 @@ void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) { manager.run_passes(function); } -void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network, - bool use_const_initializer) { +void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork& network, bool use_const_initializer) { auto function = network.getFunction(); ngraph::pass::Manager manager; manager.register_pass(use_const_initializer); diff --git a/inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp b/inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp index eda4e5230a1..6d3cfa87e31 100644 --- a/inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp +++ b/inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp @@ -3,11 +3,11 @@ // #include + #include #include "details/ie_so_loader.h" #include "file_utils.h" -#include namespace InferenceEngine { namespace details { @@ -25,8 +25,7 @@ public: } #ifdef ENABLE_UNICODE_PATH_SUPPORT - explicit Impl(const wchar_t* pluginName) : Impl(FileUtils::wStringtoMBCSstringChar(pluginName).c_str()) { - } + explicit Impl(const wchar_t* pluginName) : Impl(FileUtils::wStringtoMBCSstringChar(pluginName).c_str()) {} #endif // ENABLE_UNICODE_PATH_SUPPORT ~Impl() { @@ -46,8 +45,7 @@ public: procAddr = dlsym(shared_object, symbolName); if (procAddr == nullptr) - IE_THROW(NotFound) - << "dlSym cannot locate method '" << symbolName << "': " << dlerror(); + IE_THROW(NotFound) << "dlSym cannot locate method '" << symbolName << "': " << dlerror(); return procAddr; } }; @@ -58,7 +56,7 @@ SharedObjectLoader::SharedObjectLoader(const wchar_t* pluginName) { } #endif -SharedObjectLoader::SharedObjectLoader(const char * pluginName) { +SharedObjectLoader::SharedObjectLoader(const char* pluginName) { _impl.reset(new Impl(pluginName)); } diff --git a/inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp b/inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp index fd33bcf2862..93743fb998e 100644 --- a/inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp +++ b/inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp @@ -2,35 +2,36 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + #include +#include #include +#include #include #include -#include -#include -#include #include "ie_common.h" #include "ie_system_conf.h" #include "threading/ie_parallel_custom_arena.hpp" - namespace InferenceEngine { struct CPU { int _processors = 0; - int _sockets = 0; - int _cores = 0; + int _sockets = 0; + int _cores = 0; CPU() { std::ifstream cpuinfo("/proc/cpuinfo"); - std::vector processors; - std::map sockets; + std::vector processors; + std::map sockets; int socketId = 0; while (!cpuinfo.eof()) { std::string line; std::getline(cpuinfo, line); - if (line.empty()) continue; + if (line.empty()) + continue; auto delimeter = line.find(':'); auto key = line.substr(0, delimeter); auto value = line.substr(delimeter + 1); @@ -83,14 +84,13 @@ int getNumberOfCPUCores(bool bigCoresOnly) { } } int phys_cores = CPU_COUNT(¤tCoreSet); - #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) auto core_types = custom::info::core_types(); if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ { - phys_cores = custom::info::default_concurrency(custom::task_arena::constraints{} - .set_core_type(core_types.back()) - .set_max_threads_per_core(1)); + phys_cores = custom::info::default_concurrency( + custom::task_arena::constraints{}.set_core_type(core_types.back()).set_max_threads_per_core(1)); } - #endif +#endif return phys_cores; } diff --git a/inference-engine/src/inference_engine/src/precomp.hpp b/inference-engine/src/inference_engine/src/precomp.hpp index b604a638d7d..7880b97bea6 100644 --- a/inference-engine/src/inference_engine/src/precomp.hpp +++ b/inference-engine/src/inference_engine/src/precomp.hpp @@ -4,10 +4,12 @@ #pragma once -#include -#include - #include +#include +#include +#include +#include +#include #include #include #include @@ -25,8 +27,5 @@ #include #include -#include -#include -#include -#include -#include +#include "ngraph/ngraph.hpp" +#include "ngraph/ops.hpp" diff --git a/inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp index 1480f2baf1d..c31e0132bfe 100644 --- a/inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp @@ -2,24 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include "threading/ie_cpu_streams_executor.hpp" + +#include +#include +#include +#include #include #include -#include -#include +#include #include -#include -#include -#include +#include +#include #include +#include -#include "threading/ie_thread_local.hpp" #include "ie_parallel_custom_arena.hpp" #include "ie_system_conf.h" #include "threading/ie_thread_affinity.hpp" -#include "threading/ie_cpu_streams_executor.hpp" -#include +#include "threading/ie_thread_local.hpp" using namespace openvino; @@ -27,26 +28,28 @@ namespace InferenceEngine { struct CPUStreamsExecutor::Impl { struct Stream { #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO - struct Observer: public custom::task_scheduler_observer { - CpuSet _mask; - int _ncpus = 0; - int _threadBindingStep = 0; - int _offset = 0; - Observer(custom::task_arena& arena, - CpuSet mask, - int ncpus, - const int streamId, - const int threadsPerStream, - const int threadBindingStep, - const int threadBindingOffset) : - custom::task_scheduler_observer(arena), - _mask{std::move(mask)}, - _ncpus(ncpus), - _threadBindingStep(threadBindingStep), - _offset{streamId * threadsPerStream + threadBindingOffset} { - } + struct Observer : public custom::task_scheduler_observer { + CpuSet _mask; + int _ncpus = 0; + int _threadBindingStep = 0; + int _offset = 0; + Observer(custom::task_arena& arena, + CpuSet mask, + int ncpus, + const int streamId, + const int threadsPerStream, + const int threadBindingStep, + const int threadBindingOffset) + : custom::task_scheduler_observer(arena), + _mask{std::move(mask)}, + _ncpus(ncpus), + _threadBindingStep(threadBindingStep), + _offset{streamId * threadsPerStream + threadBindingOffset} {} void on_scheduler_entry(bool) override { - PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(), _threadBindingStep, _ncpus, _mask); + PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(), + _threadBindingStep, + _ncpus, + _mask); } void on_scheduler_exit(bool) override { PinCurrentThreadByMask(_ncpus, _mask); @@ -54,8 +57,7 @@ struct CPUStreamsExecutor::Impl { ~Observer() override = default; }; #endif - explicit Stream(Impl* impl) : - _impl(impl) { + explicit Stream(Impl* impl) : _impl(impl) { { std::lock_guard lock{_impl->_streamIdMutex}; if (_impl->_streamIdQueue.empty()) { @@ -66,40 +68,52 @@ struct CPUStreamsExecutor::Impl { } } _numaNodeId = _impl->_config._streams - ? _impl->_usedNumaNodes.at( - (_streamId % _impl->_config._streams)/ - ((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1)/_impl->_usedNumaNodes.size())) - : _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size()); + ? _impl->_usedNumaNodes.at((_streamId % _impl->_config._streams) / + ((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1) / + _impl->_usedNumaNodes.size())) + : _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size()); #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO - const auto concurrency = (0 == _impl->_config._threadsPerStream) ? custom::task_arena::automatic : _impl->_config._threadsPerStream; + const auto concurrency = (0 == _impl->_config._threadsPerStream) ? custom::task_arena::automatic + : _impl->_config._threadsPerStream; if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) { if (Config::PreferredCoreType::ROUND_ROBIN != _impl->_config._threadPreferredCoreType) { - if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) { - _taskArena.reset(new custom::task_arena{concurrency}); - } else { - const auto selected_core_type = Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType - ? custom::info::core_types().back() // running on Big cores only - : custom::info::core_types().front(); // running on Little cores only - _taskArena.reset(new custom::task_arena{ - custom::task_arena::constraints{}.set_core_type(selected_core_type).set_max_concurrency(concurrency)}); - } + if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) { + _taskArena.reset(new custom::task_arena{concurrency}); + } else { + const auto selected_core_type = + Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType + ? custom::info::core_types().back() // running on Big cores only + : custom::info::core_types().front(); // running on Little cores only + _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{} + .set_core_type(selected_core_type) + .set_max_concurrency(concurrency)}); + } } else { // assigning the stream to the core type in the round-robin fashion - // wrapping around total_streams (i.e. how many streams all different core types can handle together) + // wrapping around total_streams (i.e. how many streams all different core types can handle + // together) const auto total_streams = _impl->total_streams_on_core_types.back().second; const auto streamId_wrapped = _streamId % total_streams; - const auto& selected_core_type = std::find_if(_impl->total_streams_on_core_types.cbegin(), _impl->total_streams_on_core_types.cend(), - [streamId_wrapped](const decltype(_impl->total_streams_on_core_types)::value_type & p) { return p.second > streamId_wrapped; })->first; - _taskArena.reset(new custom::task_arena{ - custom::task_arena::constraints{}.set_core_type(selected_core_type).set_max_concurrency(concurrency)}); + const auto& selected_core_type = + std::find_if( + _impl->total_streams_on_core_types.cbegin(), + _impl->total_streams_on_core_types.cend(), + [streamId_wrapped](const decltype(_impl->total_streams_on_core_types)::value_type& p) { + return p.second > streamId_wrapped; + }) + ->first; + _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{} + .set_core_type(selected_core_type) + .set_max_concurrency(concurrency)}); } } else if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) { _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{_numaNodeId, concurrency}}); - } else if ((0 != _impl->_config._threadsPerStream) || (ThreadBindingType::CORES == _impl->_config._threadBindingType)) { + } else if ((0 != _impl->_config._threadsPerStream) || + (ThreadBindingType::CORES == _impl->_config._threadBindingType)) { _taskArena.reset(new custom::task_arena{concurrency}); if (ThreadBindingType::CORES == _impl->_config._threadBindingType) { CpuSet processMask; - int ncpus = 0; + int ncpus = 0; std::tie(processMask, ncpus) = GetProcessMask(); if (nullptr != processMask) { _observer.reset(new Observer{*_taskArena, @@ -117,11 +131,12 @@ struct CPUStreamsExecutor::Impl { omp_set_num_threads(_impl->_config._threadsPerStream); if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) { CpuSet processMask; - int ncpus = 0; + int ncpus = 0; std::tie(processMask, ncpus) = GetProcessMask(); if (nullptr != processMask) { - parallel_nt(_impl->_config._threadsPerStream, [&] (int threadIndex, int threadsPerStream) { - int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex + _impl->_config._threadBindingOffset; + parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) { + int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex + + _impl->_config._threadBindingOffset; PinThreadToVacantCore(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask); }); } @@ -131,10 +146,13 @@ struct CPUStreamsExecutor::Impl { PinCurrentThreadToSocket(_numaNodeId); } else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) { CpuSet processMask; - int ncpus = 0; + int ncpus = 0; std::tie(processMask, ncpus) = GetProcessMask(); if (nullptr != processMask) { - PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset, _impl->_config._threadBindingStep, ncpus, processMask); + PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset, + _impl->_config._threadBindingStep, + ncpus, + processMask); } } #endif @@ -151,22 +169,22 @@ struct CPUStreamsExecutor::Impl { #endif } - Impl* _impl = nullptr; - int _streamId = 0; + Impl* _impl = nullptr; + int _streamId = 0; int _numaNodeId = 0; bool _execute = false; std::queue _taskQueue; #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO std::unique_ptr _taskArena; - std::unique_ptr _observer; + std::unique_ptr _observer; #endif }; - explicit Impl(const Config& config) : - _config{config}, - _streams([this] { - return std::make_shared(this); - }) { + explicit Impl(const Config& config) + : _config{config}, + _streams([this] { + return std::make_shared(this); + }) { auto numaNodes = getAvailableNUMANodes(); if (_config._streams != 0) { std::copy_n(std::begin(numaNodes), @@ -175,25 +193,28 @@ struct CPUStreamsExecutor::Impl { } else { _usedNumaNodes = numaNodes; } - #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) if (ThreadBindingType::HYBRID_AWARE == config._threadBindingType) { const auto core_types = custom::info::core_types(); - const int threadsPerStream = (0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream; + const int threadsPerStream = + (0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream; int sum = 0; // reversed order, so BIG cores are first for (auto iter = core_types.rbegin(); iter < core_types.rend(); iter++) { const auto& type = *iter; // calculating the #streams per core type - const int num_streams_for_core_type = std::max(1, - custom::info::default_concurrency( - custom::task_arena::constraints{}.set_core_type(type)) / threadsPerStream); + const int num_streams_for_core_type = + std::max(1, + custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(type)) / + threadsPerStream); sum += num_streams_for_core_type; // prefix sum, so the core type for a given stream id will be deduced just as a upper_bound - // (notice that the map keeps the elements in the descending order, so the big cores are populated first) + // (notice that the map keeps the elements in the descending order, so the big cores are populated + // first) total_streams_on_core_types.push_back({type, sum}); } } - #endif +#endif for (auto streamId = 0; streamId < _config._streams; ++streamId) { _threads.emplace_back([this, streamId] { openvino::itt::threadName(_config._name + "_" + std::to_string(streamId)); @@ -201,7 +222,9 @@ struct CPUStreamsExecutor::Impl { Task task; { std::unique_lock lock(_mutex); - _queueCondVar.wait(lock, [&] { return !_taskQueue.empty() || (stopped = _isStopped); }); + _queueCondVar.wait(lock, [&] { + return !_taskQueue.empty() || (stopped = _isStopped); + }); if (!_taskQueue.empty()) { task = std::move(_taskQueue.front()); _taskQueue.pop(); @@ -246,33 +269,33 @@ struct CPUStreamsExecutor::Impl { Execute(stream._taskQueue.front(), stream); stream._taskQueue.pop(); } - } catch(...) {} + } catch (...) { + } stream._execute = false; } } - Config _config; - std::mutex _streamIdMutex; - int _streamId = 0; - std::queue _streamIdQueue; - std::vector _threads; - std::mutex _mutex; - std::condition_variable _queueCondVar; - std::queue _taskQueue; - bool _isStopped = false; - std::vector _usedNumaNodes; - ThreadLocal> _streams; - #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) + Config _config; + std::mutex _streamIdMutex; + int _streamId = 0; + std::queue _streamIdQueue; + std::vector _threads; + std::mutex _mutex; + std::condition_variable _queueCondVar; + std::queue _taskQueue; + bool _isStopped = false; + std::vector _usedNumaNodes; + ThreadLocal> _streams; +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) // stream id mapping to the core type // stored in the reversed order (so the big cores, with the highest core_type_id value, are populated first) // every entry is the core type and #streams that this AND ALL EARLIER entries can handle (prefix sum) // (so mapping is actually just an upper_bound: core type is deduced from the entry for which the id < #streams) using StreamIdToCoreTypes = std::vector>; StreamIdToCoreTypes total_streams_on_core_types; - #endif +#endif }; - int CPUStreamsExecutor::GetStreamId() { auto stream = _impl->_streams.local(); return stream->_streamId; @@ -283,9 +306,7 @@ int CPUStreamsExecutor::GetNumaNodeId() { return stream->_numaNodeId; } -CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) : - _impl{new Impl{config}} { -} +CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) : _impl{new Impl{config}} {} CPUStreamsExecutor::~CPUStreamsExecutor() { { diff --git a/inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp b/inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp index 0393b7732f5..bb40cf2aea6 100644 --- a/inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "threading/ie_executor_manager.hpp" + #include #include #include -#include "threading/ie_executor_manager.hpp" #include "threading/ie_cpu_streams_executor.hpp" namespace InferenceEngine { @@ -30,15 +31,14 @@ IStreamsExecutor::Ptr ExecutorManagerImpl::getIdleCPUStreamsExecutor(const IStre continue; const auto& executorConfig = it.first; - if (executorConfig._name == config._name && - executorConfig._streams == config._streams && + if (executorConfig._name == config._name && executorConfig._streams == config._streams && executorConfig._threadsPerStream == config._threadsPerStream && executorConfig._threadBindingType == config._threadBindingType && executorConfig._threadBindingStep == config._threadBindingStep && executorConfig._threadBindingOffset == config._threadBindingOffset) - if (executorConfig._threadBindingType != IStreamsExecutor::ThreadBindingType::HYBRID_AWARE - || executorConfig._threadPreferredCoreType == config._threadPreferredCoreType) - return executor; + if (executorConfig._threadBindingType != IStreamsExecutor::ThreadBindingType::HYBRID_AWARE || + executorConfig._threadPreferredCoreType == config._threadPreferredCoreType) + return executor; } auto newExec = std::make_shared(config); cpuStreamsExecutors.emplace_back(std::make_pair(config, newExec)); @@ -64,9 +64,10 @@ void ExecutorManagerImpl::clear(const std::string& id) { } else { executors.erase(id); cpuStreamsExecutors.erase( - std::remove_if(cpuStreamsExecutors.begin(), cpuStreamsExecutors.end(), + std::remove_if(cpuStreamsExecutors.begin(), + cpuStreamsExecutors.end(), [&](const std::pair& it) { - return it.first._name == id; + return it.first._name == id; }), cpuStreamsExecutors.end()); } diff --git a/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp index 702a0beecee..a87b18b362e 100644 --- a/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp @@ -3,17 +3,18 @@ // #include "threading/ie_istreams_executor.hpp" -#include "ie_plugin_config.hpp" + +#include +#include +#include +#include + #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "ie_parallel.hpp" #include "ie_parallel_custom_arena.hpp" -#include "ie_system_conf.h" #include "ie_parameter.hpp" -#include -#include -#include -#include - +#include "ie_plugin_config.hpp" +#include "ie_system_conf.h" namespace InferenceEngine { IStreamsExecutor::~IStreamsExecutor() {} @@ -28,98 +29,98 @@ std::vector IStreamsExecutor::Config::SupportedKeys() { } void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::string& value) { - if (key == CONFIG_KEY(CPU_BIND_THREAD)) { - if (value == CONFIG_VALUE(YES) || value == CONFIG_VALUE(NUMA)) { - #if (defined(__APPLE__) || defined(_WIN32)) - _threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; - #else - _threadBindingType = (value == CONFIG_VALUE(YES)) - ? IStreamsExecutor::ThreadBindingType::CORES : IStreamsExecutor::ThreadBindingType::NUMA; - #endif - } else if (value == CONFIG_VALUE(HYBRID_AWARE)) { - _threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; - } else if (value == CONFIG_VALUE(NO)) { - _threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; - } else { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_BIND_THREAD) - << ". Expected only YES(binds to cores) / NO(no binding) / NUMA(binds to NUMA nodes) / " - "HYBRID_AWARE (let the runtime recognize and use the hybrid cores)"; - } - } else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) { - if (value == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) { - _streams = static_cast(getAvailableNUMANodes().size()); - } else if (value == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) { - const int sockets = static_cast(getAvailableNUMANodes().size()); - // bare minimum of streams (that evenly divides available number of cores) - const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores(); - if (0 == num_cores % 4) - _streams = std::max(4, num_cores / 4); - else if (0 == num_cores % 5) - _streams = std::max(5, num_cores / 5); - else if (0 == num_cores % 3) - _streams = std::max(3, num_cores / 3); - else // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide - _streams = 1; - } else { - int val_i; - try { - val_i = std::stoi(value); - } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS) - << ". Expected only positive numbers (#streams) or " - << "PluginConfigParams::CPU_THROUGHPUT_NUMA/CPU_THROUGHPUT_AUTO"; - } - if (val_i < 0) { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS) - << ". Expected only positive numbers (#streams)"; - } - _streams = val_i; - } - } else if (key == CONFIG_KEY(CPU_THREADS_NUM)) { - int val_i; - try { - val_i = std::stoi(value); - } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM) - << ". Expected only positive numbers (#threads)"; - } - if (val_i < 0) { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM) - << ". Expected only positive numbers (#threads)"; - } - _threads = val_i; - } else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) { - int val_i; - try { - val_i = std::stoi(value); - } catch (const std::exception&) { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM) - << ". Expected only non negative numbers (#threads)"; - } - if (val_i < 0) { - IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM) - << ". Expected only non negative numbers (#threads)"; - } - _threadsPerStream = val_i; + if (key == CONFIG_KEY(CPU_BIND_THREAD)) { + if (value == CONFIG_VALUE(YES) || value == CONFIG_VALUE(NUMA)) { +#if (defined(__APPLE__) || defined(_WIN32)) + _threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA; +#else + _threadBindingType = (value == CONFIG_VALUE(YES)) ? IStreamsExecutor::ThreadBindingType::CORES + : IStreamsExecutor::ThreadBindingType::NUMA; +#endif + } else if (value == CONFIG_VALUE(HYBRID_AWARE)) { + _threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE; + } else if (value == CONFIG_VALUE(NO)) { + _threadBindingType = IStreamsExecutor::ThreadBindingType::NONE; } else { - IE_THROW() << "Wrong value for property key " << key; + IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_BIND_THREAD) + << ". Expected only YES(binds to cores) / NO(no binding) / NUMA(binds to NUMA nodes) / " + "HYBRID_AWARE (let the runtime recognize and use the hybrid cores)"; } + } else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) { + if (value == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) { + _streams = static_cast(getAvailableNUMANodes().size()); + } else if (value == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) { + const int sockets = static_cast(getAvailableNUMANodes().size()); + // bare minimum of streams (that evenly divides available number of cores) + const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores(); + if (0 == num_cores % 4) + _streams = std::max(4, num_cores / 4); + else if (0 == num_cores % 5) + _streams = std::max(5, num_cores / 5); + else if (0 == num_cores % 3) + _streams = std::max(3, num_cores / 3); + else // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide + _streams = 1; + } else { + int val_i; + try { + val_i = std::stoi(value); + } catch (const std::exception&) { + IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS) + << ". Expected only positive numbers (#streams) or " + << "PluginConfigParams::CPU_THROUGHPUT_NUMA/CPU_THROUGHPUT_AUTO"; + } + if (val_i < 0) { + IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS) + << ". Expected only positive numbers (#streams)"; + } + _streams = val_i; + } + } else if (key == CONFIG_KEY(CPU_THREADS_NUM)) { + int val_i; + try { + val_i = std::stoi(value); + } catch (const std::exception&) { + IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM) + << ". Expected only positive numbers (#threads)"; + } + if (val_i < 0) { + IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM) + << ". Expected only positive numbers (#threads)"; + } + _threads = val_i; + } else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) { + int val_i; + try { + val_i = std::stoi(value); + } catch (const std::exception&) { + IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM) + << ". Expected only non negative numbers (#threads)"; + } + if (val_i < 0) { + IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM) + << ". Expected only non negative numbers (#threads)"; + } + _threadsPerStream = val_i; + } else { + IE_THROW() << "Wrong value for property key " << key; + } } Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) { if (key == CONFIG_KEY(CPU_BIND_THREAD)) { switch (_threadBindingType) { - case IStreamsExecutor::ThreadBindingType::NONE: - return {CONFIG_VALUE(NO)}; + case IStreamsExecutor::ThreadBindingType::NONE: + return {CONFIG_VALUE(NO)}; break; - case IStreamsExecutor::ThreadBindingType::CORES: - return {CONFIG_VALUE(YES)}; + case IStreamsExecutor::ThreadBindingType::CORES: + return {CONFIG_VALUE(YES)}; break; - case IStreamsExecutor::ThreadBindingType::NUMA: - return {CONFIG_VALUE(NUMA)}; + case IStreamsExecutor::ThreadBindingType::NUMA: + return {CONFIG_VALUE(NUMA)}; break; - case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: - return {CONFIG_VALUE(HYBRID_AWARE)}; + case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE: + return {CONFIG_VALUE(HYBRID_AWARE)}; break; } } else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) { @@ -134,7 +135,8 @@ Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) { return {}; } -IStreamsExecutor::Config IStreamsExecutor::Config::MakeDefaultMultiThreaded(const IStreamsExecutor::Config& initial, const bool fp_intesive) { +IStreamsExecutor::Config IStreamsExecutor::Config::MakeDefaultMultiThreaded(const IStreamsExecutor::Config& initial, + const bool fp_intesive) { const auto envThreads = parallel_get_env_threads(); const auto& numaNodes = getAvailableNUMANodes(); const int numaNodesNum = numaNodes.size(); @@ -143,47 +145,49 @@ IStreamsExecutor::Config IStreamsExecutor::Config::MakeDefaultMultiThreaded(cons // by default, do not use the hyper-threading (to minimize threads synch overheads) int num_cores_default = getNumberOfCPUCores(); - #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) - //additional latency-case logic for hybrid processors: +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) + // additional latency-case logic for hybrid processors: if (ThreadBindingType::HYBRID_AWARE == streamExecutorConfig._threadBindingType) { const auto core_types = custom::info::core_types(); - const auto num_little_cores = custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.front())); + const auto num_little_cores = + custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.front())); const auto num_big_cores_phys = getNumberOfCPUCores(true); - const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores; - const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores; + const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores; + const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores; // by default the latency case uses (faster) Big cores only, depending on the compute ratio - const bool bLatencyCaseBigOnly = num_big_cores_phys > (num_little_cores / (fp_intesive ? fp32_threshold : int8_threshold)); + const bool bLatencyCaseBigOnly = + num_big_cores_phys > (num_little_cores / (fp_intesive ? fp32_threshold : int8_threshold)); // selecting the preferred core type streamExecutorConfig._threadPreferredCoreType = - bLatencyCase - ? (bLatencyCaseBigOnly - ? IStreamsExecutor::Config::PreferredCoreType::BIG - : IStreamsExecutor::Config::PreferredCoreType::ANY) - : IStreamsExecutor::Config::PreferredCoreType::ROUND_ROBIN; + bLatencyCase ? (bLatencyCaseBigOnly ? IStreamsExecutor::Config::PreferredCoreType::BIG + : IStreamsExecutor::Config::PreferredCoreType::ANY) + : IStreamsExecutor::Config::PreferredCoreType::ROUND_ROBIN; // additionally selecting the #cores to use in the "Big-only" case if (bLatencyCaseBigOnly) { - const int hyper_threading_threshold = 2; // min #cores, for which the hyper-threading becomes useful for the latency case - const auto num_big_cores = custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.back())); + const int hyper_threading_threshold = + 2; // min #cores, for which the hyper-threading becomes useful for the latency case + const auto num_big_cores = + custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.back())); num_cores_default = (num_big_cores_phys <= hyper_threading_threshold) ? num_big_cores : num_big_cores_phys; } } - #endif +#endif const auto hwCores = !bLatencyCase && numaNodesNum == 1 - // throughput case on a single-NUMA node machine uses all available cores - ? parallel_get_max_threads() - // in the rest of cases: - // multi-node machine - // or - // latency case, single-node yet hybrid case that uses - // all core types - // or - // big-cores only, but the #cores is "enough" (pls see the logic above) - // it is usually beneficial not to use the hyper-threading (which is default) - : num_cores_default; - const auto threads = streamExecutorConfig._threads ? streamExecutorConfig._threads : (envThreads ? envThreads : hwCores); - streamExecutorConfig._threadsPerStream = streamExecutorConfig._streams - ? std::max(1, threads/streamExecutorConfig._streams) - : threads; + // throughput case on a single-NUMA node machine uses all available cores + ? parallel_get_max_threads() + // in the rest of cases: + // multi-node machine + // or + // latency case, single-node yet hybrid case that uses + // all core types + // or + // big-cores only, but the #cores is "enough" (pls see the logic above) + // it is usually beneficial not to use the hyper-threading (which is default) + : num_cores_default; + const auto threads = + streamExecutorConfig._threads ? streamExecutorConfig._threads : (envThreads ? envThreads : hwCores); + streamExecutorConfig._threadsPerStream = + streamExecutorConfig._streams ? std::max(1, threads / streamExecutorConfig._streams) : threads; return streamExecutorConfig; } diff --git a/inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp index 0addba46761..7c3885fc132 100644 --- a/inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp @@ -15,11 +15,15 @@ void ITaskExecutor::runAndWait(const std::vector& tasks) { std::vector> packagedTasks; std::vector> futures; for (std::size_t i = 0; i < tasks.size(); ++i) { - packagedTasks.emplace_back([&tasks, i] {tasks[i]();}); + packagedTasks.emplace_back([&tasks, i] { + tasks[i](); + }); futures.emplace_back(packagedTasks.back().get_future()); } for (std::size_t i = 0; i < tasks.size(); ++i) { - run([&packagedTasks, i]{packagedTasks[i]();}); + run([&packagedTasks, i] { + packagedTasks[i](); + }); } // std::future::get will rethrow exception from task. // We should wait all tasks before any exception is thrown. diff --git a/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp b/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp old mode 100755 new mode 100644 index 68d6c220b46..4d02c83a100 --- a/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp @@ -6,29 +6,32 @@ #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO -#ifndef TBBBIND_2_4_AVAILABLE -# define TBBBIND_2_4_AVAILABLE 0 -#endif +# ifndef TBBBIND_2_4_AVAILABLE +# define TBBBIND_2_4_AVAILABLE 0 +# endif -#define USE_TBBBIND_2_4 (TBBBIND_2_4_AVAILABLE && TBB_INTERFACE_VERSION < 12020) -#define TBB_NUMA_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 11100) -#define TBB_HYBRID_CPUS_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 12020) +# define USE_TBBBIND_2_4 (TBBBIND_2_4_AVAILABLE && TBB_INTERFACE_VERSION < 12020) +# define TBB_NUMA_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 11100) +# define TBB_HYBRID_CPUS_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 12020) -#if defined(_WIN32) || defined(_WIN64) -#include -#endif +# if defined(_WIN32) || defined(_WIN64) +# include +# endif namespace custom { namespace detail { -#if USE_TBBBIND_2_4 +# if USE_TBBBIND_2_4 extern "C" { -void __TBB_internal_initialize_system_topology( - std::size_t groups_num, - int& numa_nodes_count, int*& numa_indexes_list, - int& core_types_count, int*& core_types_indexes_list -); -binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, int numa_id, int core_type_id, int max_threads_per_core); +void __TBB_internal_initialize_system_topology(std::size_t groups_num, + int& numa_nodes_count, + int*& numa_indexes_list, + int& core_types_count, + int*& core_types_indexes_list); +binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, + int numa_id, + int core_type_id, + int max_threads_per_core); void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr); void __TBB_internal_apply_affinity(binding_handler* handler_ptr, int slot_num); void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num); @@ -36,7 +39,7 @@ int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int ma } int get_processors_group_num() { -#if defined(_WIN32) || defined(_WIN64) +# if defined(_WIN32) || defined(_WIN64) SYSTEM_INFO si; GetNativeSystemInfo(&si); @@ -44,46 +47,48 @@ int get_processors_group_num() { GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam); int nproc = 0; for (std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1) { - if ( pam & m ) + if (pam & m) ++nproc; } if (nproc == static_cast(si.dwNumberOfProcessors)) { return GetActiveProcessorGroupCount(); } -#endif +# endif return 1; } bool is_binding_environment_valid() { -#if defined(_WIN32) && !defined(_WIN64) +# if defined(_WIN32) && !defined(_WIN64) static bool result = [] { // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. SYSTEM_INFO si; GetNativeSystemInfo(&si); - if (si.dwNumberOfProcessors > 32) return false; + if (si.dwNumberOfProcessors > 32) + return false; return true; }(); return result; -#else +# else return true; -#endif /* _WIN32 && !_WIN64 */ +# endif /* _WIN32 && !_WIN64 */ } -static int numa_nodes_count = 0; +static int numa_nodes_count = 0; static int* numa_nodes_indexes = nullptr; -static int core_types_count = 0; +static int core_types_count = 0; static int* core_types_indexes = nullptr; void initialize_system_topology() { static std::once_flag is_topology_initialized; - std::call_once(is_topology_initialized, [&]{ + std::call_once(is_topology_initialized, [&] { if (is_binding_environment_valid()) { - __TBB_internal_initialize_system_topology( - get_processors_group_num(), - numa_nodes_count, numa_nodes_indexes, - core_types_count, core_types_indexes); + __TBB_internal_initialize_system_topology(get_processors_group_num(), + numa_nodes_count, + numa_nodes_indexes, + core_types_count, + core_types_indexes); } else { static int dummy_index = task_arena::automatic; @@ -99,7 +104,8 @@ void initialize_system_topology() { binding_observer::binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) : task_scheduler_observer(ta) { detail::initialize_system_topology(); - my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core); + my_binding_handler = + detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core); } binding_observer::~binding_observer() { @@ -117,89 +123,91 @@ void binding_observer::on_scheduler_exit(bool) { binding_oberver_ptr construct_binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) { binding_oberver_ptr observer{}; if (detail::is_binding_environment_valid() && - ((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) || c.max_threads_per_core > 0)) { + ((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) || + c.max_threads_per_core > 0)) { observer.reset(new binding_observer{ta, num_slots, c}); observer->observe(true); } return observer; } -#endif /*USE_TBBBIND_2_4*/ +# endif /*USE_TBBBIND_2_4*/ -#if TBB_NUMA_SUPPORT_PRESENT +# if TBB_NUMA_SUPPORT_PRESENT tbb::task_arena::constraints convert_constraints(const custom::task_arena::constraints& c) { tbb::task_arena::constraints result{}; -#if TBB_HYBRID_CPUS_SUPPORT_PRESENT +# if TBB_HYBRID_CPUS_SUPPORT_PRESENT result.core_type = c.core_type; result.max_threads_per_core = c.max_threads_per_core; -#endif +# endif result.numa_id = c.numa_id; result.max_concurrency = c.max_concurrency; return result; } -#endif -} // namespace detail +# endif +} // namespace detail task_arena::task_arena(int max_concurrency_, unsigned reserved_for_masters) - : my_task_arena{max_concurrency_, reserved_for_masters} - , my_initialization_state{} - , my_constraints{} - , my_binding_observer{} -{} + : my_task_arena{max_concurrency_, reserved_for_masters}, + my_initialization_state{}, + my_constraints{}, + my_binding_observer{} {} task_arena::task_arena(const constraints& constraints_, unsigned reserved_for_masters) -#if USE_TBBBIND_2_4 - : my_task_arena {info::default_concurrency(constraints_), reserved_for_masters} -#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT - : my_task_arena {convert_constraints(constraints_), reserved_for_masters} -#else - : my_task_arena {constraints_.max_concurrency, reserved_for_masters} -#endif - , my_initialization_state{} - , my_constraints{constraints_} - , my_binding_observer{} -{} +# if USE_TBBBIND_2_4 + : my_task_arena { + info::default_concurrency(constraints_), reserved_for_masters +} +# elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT + : my_task_arena { + convert_constraints(constraints_), reserved_for_masters +} +# else + : my_task_arena { + constraints_.max_concurrency, reserved_for_masters +} +# endif +, my_initialization_state{}, my_constraints{constraints_}, my_binding_observer{} {} -task_arena::task_arena(const task_arena &s) - : my_task_arena{s.my_task_arena} - , my_initialization_state{} - , my_constraints{s.my_constraints} - , my_binding_observer{} -{} +task_arena::task_arena(const task_arena& s) + : my_task_arena{s.my_task_arena}, + my_initialization_state{}, + my_constraints{s.my_constraints}, + my_binding_observer{} {} void task_arena::initialize() { my_task_arena.initialize(); -#if USE_TBBBIND_2_4 +# if USE_TBBBIND_2_4 std::call_once(my_initialization_state, [this] { - my_binding_observer = detail::construct_binding_observer( - my_task_arena, my_task_arena.max_concurrency(), my_constraints); + my_binding_observer = + detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints); }); -#endif +# endif } void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) { my_task_arena.initialize(max_concurrency_, reserved_for_masters); -#if USE_TBBBIND_2_4 +# if USE_TBBBIND_2_4 std::call_once(my_initialization_state, [this] { - my_binding_observer = detail::construct_binding_observer( - my_task_arena, my_task_arena.max_concurrency(), my_constraints); + my_binding_observer = + detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints); }); -#endif +# endif } void task_arena::initialize(constraints constraints_, unsigned reserved_for_masters) { - my_constraints = constraints_; -#if USE_TBBBIND_2_4 - my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters); - std::call_once(my_initialization_state, [this] { - my_binding_observer = detail::construct_binding_observer( - my_task_arena, my_task_arena.max_concurrency(), my_constraints); - }); -#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT - my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters); -#else - my_task_arena.initialize(my_constraints.max_concurrency, reserved_for_masters); -#endif + my_constraints = constraints_; +# if USE_TBBBIND_2_4 + my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters); + std::call_once(my_initialization_state, [this] { + my_binding_observer = + detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints); + }); +# elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT + my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters); +# else + my_task_arena.initialize(my_constraints.max_concurrency, reserved_for_masters); +# endif } task_arena::operator tbb::task_arena&() { @@ -213,54 +221,54 @@ int task_arena::max_concurrency() { namespace info { std::vector numa_nodes() { -#if USE_TBBBIND_2_4 +# if USE_TBBBIND_2_4 detail::initialize_system_topology(); std::vector node_indexes(detail::numa_nodes_count); std::memcpy(node_indexes.data(), detail::numa_nodes_indexes, detail::numa_nodes_count * sizeof(int)); return node_indexes; -#elif TBB_NUMA_SUPPORT_PRESENT +# elif TBB_NUMA_SUPPORT_PRESENT return tbb::info::numa_nodes(); -#else +# else return {tbb::task_arena::automatic}; -#endif +# endif } std::vector core_types() { -#if USE_TBBBIND_2_4 +# if USE_TBBBIND_2_4 detail::initialize_system_topology(); std::vector core_type_indexes(detail::core_types_count); std::memcpy(core_type_indexes.data(), detail::core_types_indexes, detail::core_types_count * sizeof(int)); return core_type_indexes; -#elif TBB_HYBRID_CPUS_SUPPORT_PRESENT +# elif TBB_HYBRID_CPUS_SUPPORT_PRESENT return tbb::info::core_types(); -#else +# else return {tbb::task_arena::automatic}; -#endif +# endif } int default_concurrency(task_arena::constraints c) { if (c.max_concurrency > 0) { return c.max_concurrency; } -#if USE_TBBBIND_2_4 +# if USE_TBBBIND_2_4 if (detail::is_binding_environment_valid()) { detail::initialize_system_topology(); return detail::__TBB_internal_get_default_concurrency(c.numa_id, c.core_type, c.max_threads_per_core); } return tbb::this_task_arena::max_concurrency(); -#elif TBB_HYBRID_CPUS_SUPPORT_PRESENT +# elif TBB_HYBRID_CPUS_SUPPORT_PRESENT return tbb::info::default_concurrency(convert_constraints(c)); -#elif TBB_NUMA_SUPPORT_PRESENT +# elif TBB_NUMA_SUPPORT_PRESENT return tbb::info::default_concurrency(c.numa_id); -#else +# else return tbb::this_task_arena::max_concurrency(); -#endif +# endif } int default_concurrency(numa_node_id id) { return default_concurrency(task_arena::constraints{}.set_numa_id(id)); } -} // namespace info -} // namespace custom +} // namespace info +} // namespace custom #endif /*IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO*/ diff --git a/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp index a270adc9965..b006490dd02 100644 --- a/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp @@ -3,16 +3,17 @@ // #include "threading/ie_thread_affinity.hpp" -#include "ie_system_conf.h" -#include -#include -#include -#include +#include +#include +#include +#include + +#include "ie_system_conf.h" #if !(defined(__APPLE__) || defined(_WIN32)) -#include -#include +# include +# include #endif namespace InferenceEngine { @@ -20,7 +21,8 @@ namespace InferenceEngine { std::tuple GetProcessMask() { for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) { CpuSet mask{CPU_ALLOC(ncpus)}; - if (nullptr == mask) break; + if (nullptr == mask) + break; const size_t size = CPU_ALLOC_SIZE(ncpus); CPU_ZERO_S(size, mask.get()); // the result fits the mask @@ -28,14 +30,16 @@ std::tuple GetProcessMask() { return std::make_tuple(std::move(mask), ncpus); } // other error - if (errno != EINVAL) break; + if (errno != EINVAL) + break; } return std::make_tuple(nullptr, 0); } /* Release the cores affinity mask for the current process */ void ReleaseProcessMask(cpu_set_t* mask) { - if (nullptr != mask) CPU_FREE(mask); + if (nullptr != mask) + CPU_FREE(mask); } bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) { @@ -74,7 +78,7 @@ bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSe bool PinCurrentThreadToSocket(int socket) { const int sockets = InferenceEngine::getAvailableNUMANodes().size(); const int cores = InferenceEngine::getNumberOfCPUCores(); - const int cores_per_socket = cores/sockets; + const int cores_per_socket = cores / sockets; int ncpus = 0; CpuSet mask; @@ -83,7 +87,7 @@ bool PinCurrentThreadToSocket(int socket) { const size_t size = CPU_ALLOC_SIZE(ncpus); CPU_ZERO_S(size, targetMask.get()); - for (int core = socket*cores_per_socket; core < (socket+1)*cores_per_socket; core++) { + for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) { CPU_SET_S(core, size, targetMask.get()); } // respect the user-defined mask for the entire process diff --git a/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp index e4c80e2bd3c..012e099b68e 100644 --- a/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp +++ b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp @@ -4,13 +4,13 @@ #pragma once -#include - -#include #include +#include + +#include "ie_api.h" #if !(defined(__APPLE__) || defined(_WIN32)) -#include +# include #endif namespace InferenceEngine { diff --git a/inference-engine/src/inference_engine/src/xml_parse_utils.cpp b/inference-engine/src/inference_engine/src/xml_parse_utils.cpp index a71adf1b54e..9f2e5ee8230 100644 --- a/inference-engine/src/inference_engine/src/xml_parse_utils.cpp +++ b/inference-engine/src/inference_engine/src/xml_parse_utils.cpp @@ -16,14 +16,14 @@ int XMLParseUtils::GetIntAttr(const pugi::xml_node& node, const char* str) { auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); std::string str_value = std::string(attr.value()); std::size_t idx = 0; int int_value = std::stoi(str_value, &idx, 10); if (idx != str_value.length()) IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value - << "\" which is not an integer" - << " at offset " << node.offset_debug(); + << "\" which is not an integer" + << " at offset " << node.offset_debug(); return int_value; } @@ -31,14 +31,14 @@ int64_t XMLParseUtils::GetInt64Attr(const pugi::xml_node& node, const char* str) auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); std::string str_value = std::string(attr.value()); std::size_t idx = 0; long long int_value = std::stoll(str_value, &idx, 10); if (idx != str_value.length()) IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value - << "\" which is not a signed 64 bit integer" - << " at offset " << node.offset_debug(); + << "\" which is not a signed 64 bit integer" + << " at offset " << node.offset_debug(); return static_cast(int_value); } @@ -46,14 +46,14 @@ uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node& node, const char* st auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); std::string str_value = std::string(attr.value()); std::size_t idx = 0; long long int_value = std::stoll(str_value, &idx, 10); if (idx != str_value.length() || int_value < 0) IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value - << "\" which is not an unsigned 64 bit integer" - << " at offset " << node.offset_debug(); + << "\" which is not an unsigned 64 bit integer" + << " at offset " << node.offset_debug(); return static_cast(int_value); } @@ -61,14 +61,14 @@ unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node& node, const char* auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); std::string str_value = std::string(attr.value()); std::size_t idx = 0; long long int_value = std::stoll(str_value, &idx, 10); if (idx != str_value.length() || int_value < 0 || int_value > (std::numeric_limits::max)()) IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value - << "\" which is not an unsigned integer" - << " at offset " << node.offset_debug(); + << "\" which is not an unsigned integer" + << " at offset " << node.offset_debug(); return static_cast(int_value); } @@ -76,25 +76,27 @@ std::string XMLParseUtils::GetStrAttr(const pugi::xml_node& node, const char* st auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: '" << str << "' at offset " - << node.offset_debug(); + << node.offset_debug(); return attr.value(); } std::string XMLParseUtils::GetStrAttr(const pugi::xml_node& node, const char* str, const char* def) { auto attr = node.attribute(str); - if (attr.empty()) return def; + if (attr.empty()) + return def; return attr.value(); } bool XMLParseUtils::GetBoolAttr(const pugi::xml_node& node, const char* str, const bool def) { auto attr = node.attribute(str); - if (attr.empty()) return def; + if (attr.empty()) + return def; std::string string_attr = attr.value(); std::transform(string_attr.begin(), string_attr.end(), string_attr.begin(), [](char ch) { return std::tolower(static_cast(ch)); }); - std::set true_names {"true", "1"}; - std::set false_names {"false", "0"}; + std::set true_names{"true", "1"}; + std::set false_names{"false", "0"}; bool is_true = true_names.find(string_attr) != true_names.end(); bool is_false = false_names.find(string_attr) != false_names.end(); @@ -110,13 +112,13 @@ bool XMLParseUtils::GetBoolAttr(const pugi::xml_node& node, const char* str) { auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); std::string string_attr = attr.value(); std::transform(string_attr.begin(), string_attr.end(), string_attr.begin(), [](char ch) { return std::tolower(static_cast(ch)); }); - std::set true_names {"true", "1"}; - std::set false_names {"false", "0"}; + std::set true_names{"true", "1"}; + std::set false_names{"false", "0"}; bool is_true = true_names.find(string_attr) != true_names.end(); bool is_false = false_names.find(string_attr) != false_names.end(); @@ -132,7 +134,7 @@ float XMLParseUtils::GetFloatAttr(const pugi::xml_node& node, const char* str) { auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); std::string str_value = std::string(attr.value()); std::stringstream str_stream(str_value); str_stream.imbue(std::locale("C")); @@ -140,8 +142,8 @@ float XMLParseUtils::GetFloatAttr(const pugi::xml_node& node, const char* str) { str_stream >> float_value; if (!str_stream.eof()) IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value - << "\" which is not a floating point" - << " at offset " << node.offset_debug(); + << "\" which is not a floating point" + << " at offset " << node.offset_debug(); return float_value; } @@ -149,49 +151,57 @@ InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node& auto attr = node.attribute(str); if (attr.empty()) IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset " - << node.offset_debug(); + << node.offset_debug(); return InferenceEngine::Precision::FromStr(attr.value()); } -InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node& node, const char* str, +InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node& node, + const char* str, InferenceEngine::Precision def) { auto attr = node.attribute(str); - if (attr.empty()) return InferenceEngine::Precision(def); + if (attr.empty()) + return InferenceEngine::Precision(def); return InferenceEngine::Precision::FromStr(attr.value()); } int XMLParseUtils::GetIntAttr(const pugi::xml_node& node, const char* str, int defVal) { auto attr = node.attribute(str); - if (attr.empty()) return defVal; + if (attr.empty()) + return defVal; return GetIntAttr(node, str); } int64_t XMLParseUtils::GetInt64Attr(const pugi::xml_node& node, const char* str, int64_t defVal) { auto attr = node.attribute(str); - if (attr.empty()) return defVal; + if (attr.empty()) + return defVal; return GetInt64Attr(node, str); } uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node& node, const char* str, uint64_t defVal) { auto attr = node.attribute(str); - if (attr.empty()) return defVal; + if (attr.empty()) + return defVal; return GetUInt64Attr(node, str); } unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node& node, const char* str, unsigned int defVal) { auto attr = node.attribute(str); - if (attr.empty()) return defVal; + if (attr.empty()) + return defVal; return GetUIntAttr(node, str); } float XMLParseUtils::GetFloatAttr(const pugi::xml_node& node, const char* str, float defVal) { auto attr = node.attribute(str); - if (attr.empty()) return defVal; + if (attr.empty()) + return defVal; return GetFloatAttr(node, str); } int XMLParseUtils::GetIntChild(const pugi::xml_node& node, const char* str, int defVal) { auto child = node.child(str); - if (child.empty()) return defVal; + if (child.empty()) + return defVal; return atoi(child.child_value()); } diff --git a/inference-engine/src/plugin_api/.clang-format b/inference-engine/src/plugin_api/.clang-format new file mode 100644 index 00000000000..ebe747b7838 --- /dev/null +++ b/inference-engine/src/plugin_api/.clang-format @@ -0,0 +1,28 @@ +BasedOnStyle: Google +IndentWidth: 4 +UseTab: Never +ColumnLimit: 120 + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -4 +AlignConsecutiveMacros: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Empty +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: false +BinPackParameters: false +CommentPragmas: '^#' +DerivePointerAlignment: false +FixNamespaceComments: true +IndentCaseLabels: false +IndentPPDirectives: AfterHash +ForEachMacros: + - foreach + - FOREACH_CHILD diff --git a/inference-engine/src/plugin_api/blob_factory.hpp b/inference-engine/src/plugin_api/blob_factory.hpp index 1f613fd7acb..0c821471ee6 100644 --- a/inference-engine/src/plugin_api/blob_factory.hpp +++ b/inference-engine/src/plugin_api/blob_factory.hpp @@ -13,9 +13,9 @@ #include #include -#include "ie_memcpy.h" #include "ie_blob.h" #include "ie_data.h" +#include "ie_memcpy.h" #include "ie_preprocess.hpp" /** @@ -101,9 +101,9 @@ make_plain_blob(InferenceEngine::Precision prec, const InferenceEngine::SizeVect */ template InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision precision, Args&&... args) { - #define USE_FACTORY(precision) \ - case InferenceEngine::Precision::precision: \ - return make_shared_blob2(std::forward(args)...); +#define USE_FACTORY(precision) \ + case InferenceEngine::Precision::precision: \ + return make_shared_blob2(std::forward(args)...); switch (precision) { USE_FACTORY(FP32); @@ -126,7 +126,7 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision p default: IE_THROW() << "cannot locate blob for precision: " << precision; } - #undef USE_FACTORY +#undef USE_FACTORY } /** @@ -138,7 +138,9 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision p */ template void CopyVectorToBlob(const InferenceEngine::Blob::Ptr outputBlob, const std::vector& inputVector) { - if (outputBlob->size() != inputVector.size()) IE_THROW() << "Size mismatch between dims and vector"; - if (outputBlob->element_size() != sizeof(T)) IE_THROW() << "Element size mismatch between blob and vector"; + if (outputBlob->size() != inputVector.size()) + IE_THROW() << "Size mismatch between dims and vector"; + if (outputBlob->element_size() != sizeof(T)) + IE_THROW() << "Element size mismatch between blob and vector"; ie_memcpy(outputBlob->buffer().as(), outputBlob->byteSize(), &inputVector[0], inputVector.size() * sizeof(T)); } diff --git a/inference-engine/src/plugin_api/caseless.hpp b/inference-engine/src/plugin_api/caseless.hpp index d8ce739eaa9..9597ad966ca 100644 --- a/inference-engine/src/plugin_api/caseless.hpp +++ b/inference-engine/src/plugin_api/caseless.hpp @@ -21,14 +21,17 @@ namespace details { /** * @brief Provides caseless comparison for STL algorithms - * + * * @tparam Key type, usually std::string */ template -class CaselessLess { +class CaselessLess { public: bool operator()(const Key& a, const Key& b) const noexcept { - return std::lexicographical_compare(std::begin(a), std::end(a), std::begin(b), std::end(b), + return std::lexicographical_compare(std::begin(a), + std::end(a), + std::begin(b), + std::end(b), [](const char& cha, const char& chb) { return std::tolower(cha) < std::tolower(chb); }); diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp index 594d596a072..534599f752d 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp @@ -9,8 +9,8 @@ #include #include -#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" #include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp" +#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" #include "threading/ie_cpu_streams_executor.hpp" namespace InferenceEngine { @@ -33,14 +33,13 @@ public: * @param[in] taskExecutor The task executor used * @param[in] callbackExecutor The callback executor */ - explicit - ExecutableNetworkThreadSafeDefault(const ITaskExecutor::Ptr& taskExecutor - = std::make_shared(IStreamsExecutor::Config{"Default"}), - const ITaskExecutor::Ptr& callbackExecutor - = std::make_shared(IStreamsExecutor::Config{"Callback"})) : - _taskExecutor{taskExecutor}, - _callbackExecutor{callbackExecutor} { - } + explicit ExecutableNetworkThreadSafeDefault( + const ITaskExecutor::Ptr& taskExecutor = std::make_shared(IStreamsExecutor::Config{ + "Default"}), + const ITaskExecutor::Ptr& callbackExecutor = std::make_shared(IStreamsExecutor::Config{ + "Callback"})) + : _taskExecutor{taskExecutor}, + _callbackExecutor{callbackExecutor} {} /** * @brief Given optional implementation of creating asynchronous inference request to avoid @@ -64,7 +63,7 @@ protected: return std::make_shared(syncRequestImpl, _taskExecutor, _callbackExecutor); } - ITaskExecutor::Ptr _taskExecutor = nullptr; //!< Holds a task executor + ITaskExecutor::Ptr _taskExecutor = nullptr; //!< Holds a task executor ITaskExecutor::Ptr _callbackExecutor = nullptr; //!< Holds a callback executor }; diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp index 3f5c8b26de2..b88a03531a4 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp @@ -4,12 +4,6 @@ #pragma once -#include -#include -#include - -#include - #include #include #include @@ -20,6 +14,11 @@ #include #include +#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" +#include "threading/ie_immediate_executor.hpp" +#include "threading/ie_istreams_executor.hpp" +#include "threading/ie_itask_executor.hpp" + namespace InferenceEngine { /** @@ -28,7 +27,8 @@ namespace InferenceEngine { * To customize pipeline stages derived class should change the content * of AsyncInferRequestThreadSafeDefault::_pipeline member container. * It consists of pairs of tasks and executors which will run the task. - * The class is recommended to be used by plugins as a base class for asynchronous inference request implementation. + * The class is recommended to be used by plugins as a base class for asynchronous inference request + * implementation. * @note To synchronize derived context with stages * derived class should call AsyncInferRequestThreadSafeDefault::StopAndWait() function in destructor. * @par Example @@ -38,7 +38,7 @@ namespace InferenceEngine { * @snippet example_async_infer_request.cpp async_infer_request:define_pipeline */ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal { - enum InferState {Idle, Busy, Canceled, Stop}; + enum InferState { Idle, Busy, Canceled, Stop }; using Futures = std::vector>; using Promise = std::shared_ptr>; enum Stage_e : std::uint8_t { executor, task }; @@ -46,11 +46,10 @@ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal { friend struct DisableCallbackGuard; struct DisableCallbackGuard { - explicit DisableCallbackGuard(AsyncInferRequestThreadSafeDefault* this_) - : _this{this_} { - std::lock_guard lock{_this->_mutex}; - std::swap(_callback, _this->_callback); - } + explicit DisableCallbackGuard(AsyncInferRequestThreadSafeDefault* this_) : _this{this_} { + std::lock_guard lock{_this->_mutex}; + std::swap(_callback, _this->_callback); + } ~DisableCallbackGuard() { std::lock_guard lock{_this->_mutex}; _this->_callback = _callback; @@ -60,12 +59,15 @@ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal { }; struct ImmediateStreamsExecutor : public InferenceEngine::ITaskExecutor { - explicit ImmediateStreamsExecutor(const IStreamsExecutor::Ptr& streamsExecutor) : _streamsExecutor{streamsExecutor} {} - void run(InferenceEngine::Task task) override {_streamsExecutor->Execute(std::move(task));} + explicit ImmediateStreamsExecutor(const IStreamsExecutor::Ptr& streamsExecutor) + : _streamsExecutor{streamsExecutor} {} + void run(InferenceEngine::Task task) override { + _streamsExecutor->Execute(std::move(task)); + } IStreamsExecutor::Ptr _streamsExecutor; }; - template + template void InferImpl(const F& f) { _syncRequest->checkBlobs(); InferState state = InferState::Idle; @@ -73,25 +75,27 @@ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal { std::lock_guard lock{_mutex}; state = _state; switch (_state) { - case InferState::Busy : + case InferState::Busy: IE_THROW(RequestBusy); - case InferState::Canceled : + case InferState::Canceled: IE_THROW(InferCancelled); - case InferState::Idle : { - _futures.erase(std::remove_if(std::begin(_futures), std::end(_futures), - [](const std::shared_future& future) { - if (future.valid()) { - return (std::future_status::ready == - future.wait_for(std::chrono::milliseconds {0})); - } else { - return true; - } - }), - _futures.end()); + case InferState::Idle: { + _futures.erase(std::remove_if(std::begin(_futures), + std::end(_futures), + [](const std::shared_future& future) { + if (future.valid()) { + return (std::future_status::ready == + future.wait_for(std::chrono::milliseconds{0})); + } else { + return true; + } + }), + _futures.end()); _promise = {}; _futures.emplace_back(_promise.get_future().share()); } break; - case InferState::Stop : break; + case InferState::Stop: + break; } _state = InferState::Busy; } @@ -112,13 +116,14 @@ protected: * @brief Throws exception if inference request is busy or canceled */ void CheckState() const { - std::lock_guard lock {_mutex}; + std::lock_guard lock{_mutex}; switch (_state) { - case InferState::Busy : + case InferState::Busy: IE_THROW(RequestBusy); - case InferState::Canceled : + case InferState::Canceled: IE_THROW(InferCancelled); - default: break; + default: + break; } } @@ -139,15 +144,22 @@ public: */ AsyncInferRequestThreadSafeDefault(const IInferRequestInternal::Ptr& request, const ITaskExecutor::Ptr& taskExecutor, - const ITaskExecutor::Ptr& callbackExecutor) : - _syncRequest {request}, - _requestExecutor {taskExecutor}, - _callbackExecutor {callbackExecutor}, - _pipeline {{taskExecutor, [this] {_syncRequest->InferImpl();}}}, - _syncPipeline {{std::make_shared(), [this] {_syncRequest->InferImpl();}}} { + const ITaskExecutor::Ptr& callbackExecutor) + : _syncRequest{request}, + _requestExecutor{taskExecutor}, + _callbackExecutor{callbackExecutor}, + _pipeline{{taskExecutor, + [this] { + _syncRequest->InferImpl(); + }}}, + _syncPipeline{{std::make_shared(), [this] { + _syncRequest->InferImpl(); + }}} { auto streamsExecutor = std::dynamic_pointer_cast(taskExecutor); if (streamsExecutor != nullptr) { - _syncPipeline = {{std::make_shared(std::move(streamsExecutor)), [this] {_syncRequest->InferImpl();}}}; + _syncPipeline = {{std::make_shared(std::move(streamsExecutor)), [this] { + _syncRequest->InferImpl(); + }}}; } } @@ -166,16 +178,15 @@ public: */ StatusCode Wait(int64_t millis_timeout) override { if (millis_timeout < InferRequest::WaitMode::RESULT_READY) { - IE_THROW(ParameterMismatch) - << " Timeout can't be less " - << InferRequest::WaitMode::RESULT_READY << " for InferRequest::Wait\n"; + IE_THROW(ParameterMismatch) << " Timeout can't be less " << InferRequest::WaitMode::RESULT_READY + << " for InferRequest::Wait\n"; } auto status = std::future_status::deferred; // Just use the last '_futures' member to wait pipeline completion auto future = [&] { - std::lock_guard lock {_mutex}; - return _futures.empty() ? std::shared_future {} : _futures.back(); + std::lock_guard lock{_mutex}; + return _futures.empty() ? std::shared_future{} : _futures.back(); }(); if (!future.valid()) { @@ -188,10 +199,10 @@ public: status = std::future_status::ready; } break; case InferRequest::WaitMode::STATUS_ONLY: { - status = future.wait_for(std::chrono::milliseconds {0}); + status = future.wait_for(std::chrono::milliseconds{0}); } break; default: { - status = future.wait_for(std::chrono::milliseconds {millis_timeout}); + status = future.wait_for(std::chrono::milliseconds{millis_timeout}); } break; } @@ -204,12 +215,16 @@ public: } void StartAsync() override { - InferImpl([&] {StartAsync_ThreadUnsafe();}); + InferImpl([&] { + StartAsync_ThreadUnsafe(); + }); } void Infer() override { DisableCallbackGuard disableCallbackGuard{this}; - InferImpl([&] {Infer_ThreadUnsafe();}); + InferImpl([&] { + Infer_ThreadUnsafe(); + }); Wait(InferRequest::WaitMode::RESULT_READY); } @@ -284,7 +299,8 @@ protected: * @param[in] itEndStage End pipeline iterator * @param[in] callbackExecutor Final or error stage executor */ - void RunFirstStage(const Pipeline::iterator itBeginStage, const Pipeline::iterator itEndStage, + void RunFirstStage(const Pipeline::iterator itBeginStage, + const Pipeline::iterator itEndStage, const ITaskExecutor::Ptr callbackExecutor = {}) { auto& firstStageExecutor = std::get(*itBeginStage); IE_ASSERT(nullptr != firstStageExecutor); @@ -317,11 +333,10 @@ protected: } } - - ITaskExecutor::Ptr _requestExecutor; //!< Used to run inference CPU tasks. - ITaskExecutor::Ptr _callbackExecutor; //!< Used to run post inference callback in asynchronous pipline + ITaskExecutor::Ptr _requestExecutor; //!< Used to run inference CPU tasks. + ITaskExecutor::Ptr _callbackExecutor; //!< Used to run post inference callback in asynchronous pipline ITaskExecutor::Ptr _syncCallbackExecutor; //!< Used to run post inference callback in synchronous pipline - Pipeline _pipeline; //!< Pipeline variable that should be filled by inherited class. + Pipeline _pipeline; //!< Pipeline variable that should be filled by inherited class. Pipeline _syncPipeline; //!< Synchronous pipeline variable that should be filled by inherited class. /** @@ -360,57 +375,60 @@ private: * @param[in] callbackExecutor Executor that will run final stage with callback call * @return A next stage task */ - Task MakeNextStageTask(const Pipeline::iterator itStage, const Pipeline::iterator itEndStage, + Task MakeNextStageTask(const Pipeline::iterator itStage, + const Pipeline::iterator itEndStage, const ITaskExecutor::Ptr callbackExecutor) { - return std::bind([this, itStage, itEndStage](ITaskExecutor::Ptr& callbackExecutor) mutable { - std::exception_ptr currentException = nullptr; - auto& thisStage = *itStage; - auto itNextStage = itStage + 1; - try { - auto& stageTask = std::get(thisStage); - IE_ASSERT(nullptr != stageTask); - stageTask(); - if (itEndStage != itNextStage) { - auto& nextStage = *itNextStage; - auto& nextStageExecutor = std::get(nextStage); - IE_ASSERT(nullptr != nextStageExecutor); - nextStageExecutor->run(MakeNextStageTask(itNextStage, itEndStage, std::move(callbackExecutor))); - } - } catch (...) { - currentException = std::current_exception(); - } - - if ((itEndStage == itNextStage) || (nullptr != currentException)) { - auto lastStageTask = [this, currentException]() mutable { - auto promise = std::move(_promise); - Callback callback; - { - std::lock_guard lock{_mutex}; - _state = InferState::Idle; - callback = _callback; + return std::bind( + [this, itStage, itEndStage](ITaskExecutor::Ptr& callbackExecutor) mutable { + std::exception_ptr currentException = nullptr; + auto& thisStage = *itStage; + auto itNextStage = itStage + 1; + try { + auto& stageTask = std::get(thisStage); + IE_ASSERT(nullptr != stageTask); + stageTask(); + if (itEndStage != itNextStage) { + auto& nextStage = *itNextStage; + auto& nextStageExecutor = std::get(nextStage); + IE_ASSERT(nullptr != nextStageExecutor); + nextStageExecutor->run(MakeNextStageTask(itNextStage, itEndStage, std::move(callbackExecutor))); } - if (callback) { - try { - auto local_callback = std::move(callback); - local_callback(currentException); - } catch (...) { - currentException = std::current_exception(); + } catch (...) { + currentException = std::current_exception(); + } + + if ((itEndStage == itNextStage) || (nullptr != currentException)) { + auto lastStageTask = [this, currentException]() mutable { + auto promise = std::move(_promise); + Callback callback; + { + std::lock_guard lock{_mutex}; + _state = InferState::Idle; + callback = _callback; } - } - if (nullptr == currentException) { - promise.set_value(); - } else { - promise.set_exception(currentException); - } - }; + if (callback) { + try { + auto local_callback = std::move(callback); + local_callback(currentException); + } catch (...) { + currentException = std::current_exception(); + } + } + if (nullptr == currentException) { + promise.set_value(); + } else { + promise.set_exception(currentException); + } + }; - if (nullptr == callbackExecutor) { - lastStageTask(); - } else { - callbackExecutor->run(std::move(lastStageTask)); + if (nullptr == callbackExecutor) { + lastStageTask(); + } else { + callbackExecutor->run(std::move(lastStageTask)); + } } - } - }, std::move(callbackExecutor)); + }, + std::move(callbackExecutor)); } std::promise _promise; diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp index 1f3eb681e4b..27e0cf3b544 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp @@ -9,11 +9,11 @@ #include #include -#include -#include -#include -#include -#include
+#include "cpp/ie_cnn_network.h" +#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp" +#include "details/ie_so_pointer.hpp" +#include "ie_parameter.hpp" +#include "ie_remote_context.hpp" namespace InferenceEngine { @@ -27,7 +27,8 @@ class IVariableStateInternal; * @brief An internal API of executable network to be implemented by plugin, * @ingroup ie_dev_api_exec_network_api */ -class INFERENCE_ENGINE_API_CLASS(IExecutableNetworkInternal) : public std::enable_shared_from_this { +class INFERENCE_ENGINE_API_CLASS(IExecutableNetworkInternal) + : public std::enable_shared_from_this { public: /** * @brief A shared pointer to IExecutableNetworkInternal interface @@ -140,7 +141,7 @@ protected: virtual std::shared_ptr CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap networkOutputs); - InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info + InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info InferenceEngine::OutputsDataMap _networkOutputs; //!< Holds information about network outputs data /** diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp index 0c409cc2639..3f48a040c0a 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp @@ -4,16 +4,16 @@ #pragma once -#include -#include -#include -#include -#include - #include #include #include +#include "cpp/ie_infer_request.hpp" +#include "ie_blob.h" +#include "ie_common.h" +#include "ie_input_info.hpp" +#include "ie_preprocess_data.hpp" + namespace InferenceEngine { class IExecutableNetworkInternal; @@ -89,7 +89,8 @@ public: /** * @brief Sets pre-process for input data * @param name Name of input blob. - * @param data - a reference to input or output blob. The type of Blob must correspond to the network input precision and size. + * @param data - a reference to input or output blob. The type of Blob must correspond to the network input + * precision and size. * @param info Preprocess info for blob. */ virtual void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info); @@ -219,17 +220,19 @@ protected: * @param deviceBlob Blob object in plugin's desired format * @return `True` if pre-processing is required, `false` otherwise */ - bool preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& userBlob, const Blob::Ptr& deviceBlob = nullptr); + bool preProcessingRequired(const InputInfo::Ptr& info, + const Blob::Ptr& userBlob, + const Blob::Ptr& deviceBlob = nullptr); void addInputPreProcessingFor(const std::string& name, Blob::Ptr const& from, const Blob::Ptr& to); - InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info + InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info InferenceEngine::OutputsDataMap _networkOutputs; //!< Holds information about network outputs data - InferenceEngine::BlobMap _inputs; //!< A map of user passed blobs for network inputs - InferenceEngine::BlobMap _deviceInputs; //!< A map of actual network inputs, in plugin specific format - InferenceEngine::BlobMap _outputs; //!< A map of user passed blobs for network outputs - std::map _preProcData; //!< A map of pre-process data per input - int m_curBatch = -1; //!< Current batch value used in dynamic batching + InferenceEngine::BlobMap _inputs; //!< A map of user passed blobs for network inputs + InferenceEngine::BlobMap _deviceInputs; //!< A map of actual network inputs, in plugin specific format + InferenceEngine::BlobMap _outputs; //!< A map of user passed blobs for network outputs + std::map _preProcData; //!< A map of pre-process data per input + int m_curBatch = -1; //!< Current batch value used in dynamic batching /** * @brief A shared pointer to IInferRequestInternal @@ -239,7 +242,7 @@ protected: Callback _callback; //!< A callback private: - void* _userData = nullptr; + void* _userData = nullptr; }; /** diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp index 10c968b55f2..10963a5c78c 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp @@ -24,7 +24,7 @@ namespace PluginConfigInternalParams { * @ingroup ie_dev_api_plugin_api * @brief Shortcut for defining internal configuration keys */ -#define CONFIG_KEY_INTERNAL(name) ::InferenceEngine::PluginConfigInternalParams::_CONFIG_KEY(name) +#define CONFIG_KEY_INTERNAL(name) ::InferenceEngine::PluginConfigInternalParams::_CONFIG_KEY(name) /** * @brief Defines a low precision mode key diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp index 038c95faf94..1eb16975f65 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp @@ -9,18 +9,17 @@ #pragma once -#include -#include -#include -#include - -#include - #include #include #include #include +#include "blob_factory.hpp" +#include "cpp/ie_cnn_network.h" +#include "ie_iextension.h" +#include "ie_input_info.hpp" +#include "ie_parameter.hpp" + namespace InferenceEngine { class ICore; @@ -42,10 +41,11 @@ INFERENCE_ENGINE_API_CPP(PreProcessInfo) copyPreProcess(const PreProcessInfo& fr * @param[in] map map to copy * @return map that contains pointers to constant values */ -template +template std::map> constMapCast(const std::map>& map) { std::map> res; - for (auto&& v : map) res.emplace(v.first, std::const_pointer_cast(v.second)); + for (auto&& v : map) + res.emplace(v.first, std::const_pointer_cast(v.second)); return res; } @@ -55,10 +55,11 @@ std::map> constMapCast(const std::map +template std::map> constMapCast(const std::map>& map) { std::map> res; - for (auto&& v : map) res.emplace(v.first, std::const_pointer_cast(v.second)); + for (auto&& v : map) + res.emplace(v.first, std::const_pointer_cast(v.second)); return res; } @@ -108,7 +109,7 @@ public: * @brief Sets a plugin version * @param version A version to set */ - void SetVersion(const Version & version); + void SetVersion(const Version& version); /** * @brief Gets a plugin version @@ -252,7 +253,8 @@ public: * @param[in] config The map of configuration parameters * @return The result of query operator containing supported layers map */ - virtual QueryNetworkResult QueryNetwork(const CNNNetwork& network, const std::map& config) const; + virtual QueryNetworkResult QueryNetwork(const CNNNetwork& network, + const std::map& config) const; protected: ~IInferencePlugin() = default; @@ -267,23 +269,27 @@ protected: * @param config string-string map of config parameters relevant only for this load operation * @return Shared pointer to the ExecutableNetwork object */ - virtual std::shared_ptr LoadExeNetworkImpl(const CNNNetwork& network, - const std::map& config); + virtual std::shared_ptr LoadExeNetworkImpl( + const CNNNetwork& network, + const std::map& config); /** * @brief Creates an executable network using remote context from a parsed network object, - * users can create as many networks as they need and use them simultaneously (up to the limitation of the HW resources) + * users can create as many networks as they need and use them simultaneously (up to the limitation of the HW + * resources) * @note The function is used in - * InferencePluginInternal::LoadNetwork(const CNNNetwork&, const std::map&, RemoteContext::Ptr) - * which performs common steps first and calls this plugin-dependent method implementation after. + * InferencePluginInternal::LoadNetwork(const CNNNetwork&, const std::map&, + * RemoteContext::Ptr) which performs common steps first and calls this plugin-dependent method implementation + * after. * @param network A network object * @param context A remote context * @param config string-string map of config parameters relevant only for this load operation * @return Shared pointer to the ExecutableNetwork object */ - virtual std::shared_ptr LoadExeNetworkImpl(const CNNNetwork& network, - const std::shared_ptr& context, - const std::map& config); + virtual std::shared_ptr LoadExeNetworkImpl( + const CNNNetwork& network, + const std::shared_ptr& context, + const std::map& config); /** * @brief Set input and output information to executable network. This method is used to @@ -296,9 +302,9 @@ protected: const ConstInputsDataMap& inputs, const ConstOutputsDataMap& outputs); - std::string _pluginName; //!< A device name that plugins enables + std::string _pluginName; //!< A device name that plugins enables std::map _config; //!< A map config keys -> values - std::weak_ptr _core; //!< A pointer to ICore interface + std::weak_ptr _core; //!< A pointer to ICore interface }; namespace details { diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp index d34af53631a..435d8b66e7a 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp @@ -4,12 +4,12 @@ #pragma once -#include -#include
- #include #include +#include "details/ie_so_pointer.hpp" +#include "ie_blob.h" + namespace InferenceEngine { /** diff --git a/inference-engine/src/plugin_api/cpp_interfaces/plugin_itt.hpp b/inference-engine/src/plugin_api/cpp_interfaces/plugin_itt.hpp index db54f244a7e..d53a8a27a38 100644 --- a/inference-engine/src/plugin_api/cpp_interfaces/plugin_itt.hpp +++ b/inference-engine/src/plugin_api/cpp_interfaces/plugin_itt.hpp @@ -14,8 +14,8 @@ namespace InferenceEngine { namespace itt { namespace domains { - OV_ITT_DOMAIN(Plugin) - OV_ITT_DOMAIN(Plugin_LT) -} -} -} +OV_ITT_DOMAIN(Plugin) +OV_ITT_DOMAIN(Plugin_LT) +} // namespace domains +} // namespace itt +} // namespace InferenceEngine diff --git a/inference-engine/src/plugin_api/debug.h b/inference-engine/src/plugin_api/debug.h index d52c7b0b942..acdcef55b28 100644 --- a/inference-engine/src/plugin_api/debug.h +++ b/inference-engine/src/plugin_api/debug.h @@ -14,14 +14,14 @@ #include #include #include +#include #include -#include #include #include +#include #include #include #include -#include #include "ie_algorithm.hpp" @@ -37,7 +37,8 @@ namespace details { */ template inline std::ostream& operator<<(std::ostream& out, const std::vector& vec) { - if (vec.empty()) return std::operator<<(out, "[]"); + if (vec.empty()) + return std::operator<<(out, "[]"); out << "[" << vec[0]; for (unsigned i = 1; i < vec.size(); i++) { out << ", " << vec[i]; @@ -62,7 +63,8 @@ inline void ltrim(std::string& s) { * @param s - string to trim */ inline void rtrim(std::string& s) { - s.erase(std::find_if(s.rbegin(), s.rend(), + s.erase(std::find_if(s.rbegin(), + s.rend(), [](int c) { return !std::isspace(c); }) @@ -116,10 +118,12 @@ inline std::vector split(const std::string& src, const std::string& */ template std::string joinVec(std::vector const& vec, std::string const& glue = std::string(",")) { - if (vec.empty()) return ""; + if (vec.empty()) + return ""; std::stringstream oss; oss << vec[0]; - for (size_t i = 1; i < vec.size(); i++) oss << glue << vec[i]; + for (size_t i = 1; i < vec.size(); i++) + oss << glue << vec[i]; return oss.str(); } @@ -142,9 +146,11 @@ std::string dumpVec(std::vector const& vec) { */ template T product(std::vector const& vec) { - if (vec.empty()) return 0; + if (vec.empty()) + return 0; T ret = vec[0]; - for (size_t i = 1; i < vec.size(); ++i) ret *= vec[i]; + for (size_t i = 1; i < vec.size(); ++i) + ret *= vec[i]; return ret; } @@ -157,15 +163,17 @@ T product(std::vector const& vec) { */ template bool equal(const std::vector& v1, const std::vector& v2) { - if (v1.size() != v2.size()) return false; + if (v1.size() != v2.size()) + return false; for (auto i1 = v1.cbegin(), i2 = v2.cbegin(); i1 != v1.cend(); ++i1, ++i2) { - if (*i1 != *i2) return false; + if (*i1 != *i2) + return false; } return true; } #ifdef _WIN32 -# define strncasecmp _strnicmp +# define strncasecmp _strnicmp #endif /** @@ -191,7 +199,8 @@ inline bool equal(const std::string& lhs, const std::string& rhs, bool ignoreCas inline bool endsWith(const std::string& src, const char* with) { int wl = static_cast(strlen(with)); int so = static_cast(src.length()) - wl; - if (so < 0) return false; + if (so < 0) + return false; return 0 == strncmp(with, &src[so], wl); } @@ -204,8 +213,9 @@ inline bool endsWith(const std::string& src, const char* with) { inline std::string tolower(const std::string& s) { std::string ret; ret.resize(s.length()); - std::transform(s.begin(), s.end(), ret.begin(), - [](char c) { return static_cast(::tolower(static_cast(c))); }); + std::transform(s.begin(), s.end(), ret.begin(), [](char c) { + return static_cast(::tolower(static_cast(c))); + }); return ret; } } // namespace details diff --git a/inference-engine/src/plugin_api/description_buffer.hpp b/inference-engine/src/plugin_api/description_buffer.hpp index ff430842d59..533b41ac8f7 100644 --- a/inference-engine/src/plugin_api/description_buffer.hpp +++ b/inference-engine/src/plugin_api/description_buffer.hpp @@ -28,7 +28,7 @@ struct DescriptionBuffer : public std::basic_streambuf DescriptionBuffer& operator<<(const T& obj) { - if (!stream) return *this; + if (!stream) + return *this; (*stream.get()) << obj; return *this; @@ -90,7 +91,8 @@ struct DescriptionBuffer : public std::basic_streambufflush(); + if (stream) + stream->flush(); return err; } @@ -99,7 +101,8 @@ private: StatusCode err = GENERAL_ERROR; void init(ResponseDesc* desc) { - if (desc == nullptr) return; + if (desc == nullptr) + return; init(desc->msg, sizeof(desc->msg) / sizeof(desc->msg[0])); } diff --git a/inference-engine/src/plugin_api/exec_graph_info.hpp b/inference-engine/src/plugin_api/exec_graph_info.hpp index 26a97aeb7eb..5352e50bdf9 100644 --- a/inference-engine/src/plugin_api/exec_graph_info.hpp +++ b/inference-engine/src/plugin_api/exec_graph_info.hpp @@ -10,11 +10,11 @@ #pragma once -#include -#include #include -#include +#include "ie_api.h" +#include "ie_parameter.hpp" +#include "ngraph/node.hpp" /** * @brief A namespace with const values for Execution Graph parameters names. @@ -91,7 +91,7 @@ static const char RUNTIME_PRECISION[] = "runtimePrecision"; */ class INFERENCE_ENGINE_API_CLASS(ExecutionNode) : public ngraph::Node { public: - static constexpr ngraph::NodeTypeInfo type_info { "ExecutionNode", 0 }; + static constexpr ngraph::NodeTypeInfo type_info{"ExecutionNode", 0}; const ngraph::NodeTypeInfo& get_type_info() const override; /** @@ -105,8 +105,7 @@ public: * @param[in] arguments Inputs nodes * @param[in] output_size A number of output ports */ - ExecutionNode(const ngraph::OutputVector& arguments, size_t output_size = 1) : - Node(arguments, output_size) { } + ExecutionNode(const ngraph::OutputVector& arguments, size_t output_size = 1) : Node(arguments, output_size) {} /** * @brief Creates a new execution node with the same state, but different input nodes diff --git a/inference-engine/src/plugin_api/ie_algorithm.hpp b/inference-engine/src/plugin_api/ie_algorithm.hpp index 70b3ba796f7..3e602b577cd 100644 --- a/inference-engine/src/plugin_api/ie_algorithm.hpp +++ b/inference-engine/src/plugin_api/ie_algorithm.hpp @@ -38,7 +38,7 @@ bool contains(const C& container, const T& element) { * @brief Associative containers doesnt work with remove_if algorithm * @tparam ContainerT * @tparam PredicateT - * @param data An associative container + * @param data An associative container * @param predicate A predicate to remove values conditionally */ template @@ -64,7 +64,9 @@ inline void erase_if(Container& data, const PredicateT& predicate) { */ template auto product(TIterator beg, TIterator en) -> typename std::remove_reference::type { - return std::accumulate(beg, en, static_cast::type>(1), + return std::accumulate(beg, + en, + static_cast::type>(1), std::multiplies::type>()); } @@ -87,10 +89,10 @@ inline void clipping(int* idx, const int min, const int max) { * @param rhs Second set container * @return Set intersection */ -template +template static Set Intersection(const Set& lhs, const Set& rhs) { Set result; - const auto& minSizeSet = (lhs.size() < rhs.size()) ? lhs : rhs; + const auto& minSizeSet = (lhs.size() < rhs.size()) ? lhs : rhs; const auto& maxSizeSet = (lhs.size() >= rhs.size()) ? lhs : rhs; for (auto&& val : minSizeSet) { if (InferenceEngine::details::contains(maxSizeSet, val)) { @@ -107,9 +109,9 @@ static Set Intersection(const Set& lhs, const Set& rhs) { * @param rhs Second set container * @return true if two sets interesect false otherwise */ -template +template static bool Intersects(const Set& lhs, const Set& rhs) { - const auto& minSizeSet = (lhs.size() < rhs.size()) ? lhs : rhs; + const auto& minSizeSet = (lhs.size() < rhs.size()) ? lhs : rhs; const auto& maxSizeSet = (lhs.size() >= rhs.size()) ? lhs : rhs; for (auto&& val : minSizeSet) { if (InferenceEngine::details::contains(maxSizeSet, val)) { diff --git a/inference-engine/src/plugin_api/ie_icore.hpp b/inference-engine/src/plugin_api/ie_icore.hpp index fb4ac0b3423..9df7d436a88 100644 --- a/inference-engine/src/plugin_api/ie_icore.hpp +++ b/inference-engine/src/plugin_api/ie_icore.hpp @@ -13,10 +13,9 @@ #include #include -#include -#include +#include "cpp/ie_cnn_network.h" #include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp" - +#include "ie_parameter.hpp" #include "threading/ie_itask_executor.hpp" namespace InferenceEngine { @@ -103,7 +102,8 @@ public: * @param config Optional map of pairs: (config parameter name, config parameter value) * @return An object containing a map of pairs a layer name -> a device name supporting this layer. */ - virtual QueryNetworkResult QueryNetwork(const CNNNetwork& network, const std::string& deviceName, + virtual QueryNetworkResult QueryNetwork(const CNNNetwork& network, + const std::string& deviceName, const std::map& config) const = 0; /** @@ -147,6 +147,7 @@ public: class INFERENCE_ENGINE_API_CLASS(DeviceIDParser) { std::string deviceName; std::string deviceID; + public: explicit DeviceIDParser(const std::string& deviceNameWithID); diff --git a/inference-engine/src/plugin_api/ie_memcpy.h b/inference-engine/src/plugin_api/ie_memcpy.h index 26d07994399..10e45d935d2 100644 --- a/inference-engine/src/plugin_api/ie_memcpy.h +++ b/inference-engine/src/plugin_api/ie_memcpy.h @@ -3,7 +3,7 @@ // /** - * @brief Defines a ie_memcpy to safely (SDL-friendly) copy arrays + * @brief Defines a ie_memcpy to safely (SDL-friendly) copy arrays * @file ie_memcpy.h */ @@ -17,14 +17,14 @@ * @brief Copies bytes between buffers with security enhancements * Copies count bytes from src to dest. If the source and destination * overlap, the behavior is undefined. - * @ingroup ie_dev_api_memory - * + * @ingroup ie_dev_api_memory + * * @param dest A Pointer to the object to copy to * @param destsz A max number of bytes to modify in the destination (typically the size * of the destination object) * @param src A pointer to the object to copy from * @param count A number of bytes to copy - * + * * @return zero on success and non-zero value on error. */ diff --git a/inference-engine/src/plugin_api/ie_ngraph_utils.hpp b/inference-engine/src/plugin_api/ie_ngraph_utils.hpp index 48a9a026dac..b3ff055978b 100644 --- a/inference-engine/src/plugin_api/ie_ngraph_utils.hpp +++ b/inference-engine/src/plugin_api/ie_ngraph_utils.hpp @@ -4,11 +4,12 @@ #pragma once -#include -#include -#include #include -#include +#include + +#include "cpp/ie_cnn_network.h" +#include "ie_precision.hpp" +#include "ngraph/type/element_type.hpp" namespace InferenceEngine { namespace details { @@ -137,7 +138,8 @@ inline Precision convertPrecision(const ::ngraph::element::Type& precision) { case ::ngraph::element::Type_t::dynamic: return Precision(Precision::UNSPECIFIED); default: - IE_THROW() << "Incorrect precision " << precision.get_type_name() << "!"; return{}; + IE_THROW() << "Incorrect precision " << precision.get_type_name() << "!"; + return {}; } } diff --git a/inference-engine/src/plugin_api/ie_system_conf.h b/inference-engine/src/plugin_api/ie_system_conf.h index 2e4481bf09e..254f03a5488 100644 --- a/inference-engine/src/plugin_api/ie_system_conf.h +++ b/inference-engine/src/plugin_api/ie_system_conf.h @@ -9,9 +9,10 @@ #pragma once -#include "ie_api.h" -#include #include +#include + +#include "ie_api.h" namespace InferenceEngine { @@ -25,25 +26,26 @@ namespace InferenceEngine { INFERENCE_ENGINE_API_CPP(bool) checkOpenMpEnvVars(bool includeOMPNumThreads = true); /** - * @brief Returns available CPU NUMA nodes (on Linux, and Windows [only with TBB], single node is assumed on all other OSes) + * @brief Returns available CPU NUMA nodes (on Linux, and Windows [only with TBB], single node is assumed on all + * other OSes) * @ingroup ie_dev_api_system_conf * @return NUMA nodes */ INFERENCE_ENGINE_API_CPP(std::vector) getAvailableNUMANodes(); /** - * @brief Returns available CPU cores types (on Linux, and Windows) and ONLY with TBB, single core type is assumed otherwise + * @brief Returns available CPU cores types (on Linux, and Windows) and ONLY with TBB, single core type is assumed + * otherwise * @ingroup ie_dev_api_system_conf * @return Vector of core types */ INFERENCE_ENGINE_API_CPP(std::vector) getAvailableCoresTypes(); /** - * @brief Returns number of CPU physical cores on Linux/Windows (which is considered to be more performance friendly for servers) - * (on other OSes it simply relies on the original parallel API of choice, which usually uses the logical cores). - * call function with 'false' to get #phys cores of all types - * call function with 'true' to get #phys 'Big' cores - * number of 'Little' = 'all' - 'Big' + * @brief Returns number of CPU physical cores on Linux/Windows (which is considered to be more performance + * friendly for servers) (on other OSes it simply relies on the original parallel API of choice, which usually uses the + * logical cores). call function with 'false' to get #phys cores of all types call function with 'true' to get #phys + * 'Big' cores number of 'Little' = 'all' - 'Big' * @ingroup ie_dev_api_system_conf * @param[in] bigCoresOnly Additionally limits the number of reported cores to the 'Big' cores only. * @return Number of physical CPU cores. diff --git a/inference-engine/src/plugin_api/precision_utils.h b/inference-engine/src/plugin_api/precision_utils.h index 8ec4f546338..1340b384aa1 100644 --- a/inference-engine/src/plugin_api/precision_utils.h +++ b/inference-engine/src/plugin_api/precision_utils.h @@ -9,12 +9,12 @@ #pragma once -#include - -#include -#include -#include #include +#include +#include +#include + +#include "ie_api.h" /** * @brief Inference Engine Plugin API namespace @@ -24,53 +24,53 @@ namespace InferenceEngine { /** * @defgroup ie_dev_api Inference Engine Plugin API * @brief Defines Inference Engine Plugin API which can be used in plugin development - * + * * @{ * @defgroup ie_dev_api_plugin_api Plugin base classes * @brief A set of base and helper classes to implement a plugin class * * @defgroup ie_dev_api_preproc_api Preprocessing API * @brief A set transformations to convert InferenceEngine::PreProcessInfo to ngraph operations - * + * * @defgroup ie_dev_api_exec_network_api Executable Network base classes * @brief A set of base and helper classes to implement an executable network class - * + * * @defgroup ie_dev_api_infer_request_api Inference Request base classes * @brief A set of base and helper classes to implement a syncrhonous inference request class. - * + * * @defgroup ie_dev_api_async_infer_request_api Asynchronous Inference Request base classes * @brief A set of base and helper classes to implement asynchronous inference request class - * + * * @defgroup ie_dev_api_variable_state_api Variable state base classes * @brief A set of base and helper classes to implement variable state - * + * * @defgroup ie_dev_api_threading Threading utilities * @brief Threading API providing task executors for asynchronous operations - * + * * @defgroup ie_dev_api_memory Blob creation and memory utilities * @brief An extension for public Blob API allowing to create blobs in uniform manner - * + * * @defgroup ie_dev_api_precision FP16 to FP32 precision utilities * @brief Set of functions to convert from FP32 to FP16 and vice versa. - * + * * @defgroup ie_dev_api_system_conf System configuration utilities * @brief API to get information about the system, core processor capabilities - * + * * @defgroup ie_dev_exec_graph Execution graph utilities * @brief Contains `ExecutionNode` and its properties - * + * * @defgroup ie_dev_api_error_debug Error handling and debug helpers * @brief Utility methods to works with errors or exceptional situations - * + * * @defgroup ie_dev_api_file_utils File utilities * @brief Utility functions to work with files, UNICODE support - * + * * @defgroup ie_dev_api_xml XML helper utilities * @brief A PUGIXML wrappers to safely extract values of requested type. * * @defgroup ie_dev_profiling ITT profiling utilities * @brief Configurable macro wrappers for ITT profiling - * + * * @} */ @@ -91,7 +91,7 @@ namespace PrecisionUtils { * @ingroup ie_dev_api_precision * * @param[in] x A single-precision floating point value - * @return A half-precision floating point value + * @return A half-precision floating point value */ INFERENCE_ENGINE_API_CPP(ie_fp16) f32tof16(float x); @@ -120,7 +120,7 @@ f16tof32Arrays(float* dst, const ie_fp16* src, size_t nelem, float scale = 1.f, /** * @brief Converts a single-precision floating point array to a half-precision floating point array - * and applies `scale` and `bias` if needed + * and applies `scale` and `bias` if needed * @ingroup ie_dev_api_precision * * @param dst A destination array of half-precision floating point values @@ -133,8 +133,8 @@ INFERENCE_ENGINE_API_CPP(void) f32tof16Arrays(ie_fp16* dst, const float* src, size_t nelem, float scale = 1.f, float bias = 0.f); #if defined(_MSC_VER) -#pragma warning(push) -#pragma warning(disable : 4018) +# pragma warning(push) +# pragma warning(disable : 4018) #endif namespace details { @@ -154,22 +154,17 @@ constexpr inline bool Greater(size_t v1, size_t v2) { * @param value Value to be converted * @return A saturated value */ -template ::value && std::is_integral::value && - std::is_signed::value && - !std::is_same::value - >::type* = nullptr> +template ::value && std::is_integral::value && + std::is_signed::value && !std::is_same::value>::type* = nullptr> inline OutT saturate_cast(const InT& value) { - using MaxT = typename std::conditional< - details::Greater(sizeof(OutT), sizeof(InT)), - typename std::make_unsigned::type, - typename std::make_unsigned::type - >::type; - using MinT = typename std::conditional< - details::Greater(sizeof(OutT), sizeof(InT)), - typename std::make_signed::type, - typename std::make_signed::type - >::type; + using MaxT = typename std::conditional::type, + typename std::make_unsigned::type>::type; + using MinT = typename std::conditional::type, + typename std::make_signed::type>::type; static const MaxT OUT_MAX = static_cast(std::numeric_limits::max()); static const MaxT IN_MAX = static_cast(std::numeric_limits::max()); @@ -195,17 +190,14 @@ inline OutT saturate_cast(const InT& value) { * @param value Value to be converted * @return A saturated value */ -template ::value && std::is_integral::value && - std::is_unsigned::value && - !std::is_same::value - >::type* = nullptr> +template ::value && std::is_integral::value && + std::is_unsigned::value && !std::is_same::value>::type* = nullptr> inline OutT saturate_cast(const InT& value) { - using MaxT = typename std::conditional< - details::Greater(sizeof(OutT), sizeof(InT)), - typename std::make_unsigned::type, - typename std::make_unsigned::type - >::type; + using MaxT = typename std::conditional::type, + typename std::make_unsigned::type>::type; static const MaxT OUT_MAX = static_cast(std::numeric_limits::max()); static const MaxT IN_MAX = static_cast(std::numeric_limits::max()); @@ -220,7 +212,7 @@ inline OutT saturate_cast(const InT& value) { } #if defined(_MSC_VER) -#pragma warning(pop) +# pragma warning(pop) #endif /** diff --git a/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp b/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp index 573dcb5fba1..523bb05d83a 100644 --- a/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp +++ b/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp @@ -30,9 +30,9 @@ public: using Ptr = std::shared_ptr; /** - * @brief Constructor - * @param config Stream executor parameters - */ + * @brief Constructor + * @param config Stream executor parameters + */ explicit CPUStreamsExecutor(const Config& config = {}); /** diff --git a/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp b/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp index e1cd819baaa..0ce1d96f222 100644 --- a/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp +++ b/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp @@ -9,14 +9,14 @@ #pragma once +#include #include #include -#include #include -#include +#include -#include "threading/ie_itask_executor.hpp" #include "threading/ie_istreams_executor.hpp" +#include "threading/ie_itask_executor.hpp" namespace InferenceEngine { @@ -39,7 +39,7 @@ public: private: std::unordered_map executors; - std::vector > cpuStreamsExecutors; + std::vector> cpuStreamsExecutors; std::mutex streamExecutorMutex; std::mutex taskExecutorMutex; }; @@ -101,7 +101,7 @@ private: ExecutorManagerImpl _impl; static std::mutex _mutex; - static ExecutorManager *_instance; + static ExecutorManager* _instance; }; } // namespace InferenceEngine diff --git a/inference-engine/src/plugin_api/threading/ie_immediate_executor.hpp b/inference-engine/src/plugin_api/threading/ie_immediate_executor.hpp index d13a11889fb..36e9fcbc89b 100644 --- a/inference-engine/src/plugin_api/threading/ie_immediate_executor.hpp +++ b/inference-engine/src/plugin_api/threading/ie_immediate_executor.hpp @@ -20,11 +20,11 @@ namespace InferenceEngine { * @brief Task executor implementation that just run tasks in current thread during calling of run() method * @ingroup ie_dev_api_threading */ -class ImmediateExecutor: public ITaskExecutor { +class ImmediateExecutor : public ITaskExecutor { public: - /** - * @brief A shared pointer to a ImmediateExecutor object - */ + /** + * @brief A shared pointer to a ImmediateExecutor object + */ using Ptr = std::shared_ptr; /** diff --git a/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp b/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp index 4dd80f411bc..8bb82aa974f 100644 --- a/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp +++ b/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp @@ -10,8 +10,8 @@ #pragma once #include -#include #include +#include #include "ie_parameter.hpp" #include "threading/ie_itask_executor.hpp" @@ -39,11 +39,13 @@ public: * @brief Defines inference thread binding type */ enum ThreadBindingType : std::uint8_t { - NONE, //!< Don't bind the inference threads - CORES, //!< Bind inference threads to the CPU cores (round-robin) + NONE, //!< Don't bind the inference threads + CORES, //!< Bind inference threads to the CPU cores (round-robin) // the following modes are implemented only for the TBB code-path: - NUMA, //!< Bind to the NUMA nodes (default mode for the non-hybrid CPUs on the Win/MacOS, where the 'CORES' is not implemeneted) - HYBRID_AWARE //!< Let the runtime bind the inference threads depending on the cores type (default mode for the hybrid CPUs) + NUMA, //!< Bind to the NUMA nodes (default mode for the non-hybrid CPUs on the Win/MacOS, where the 'CORES' is + //!< not implemeneted) + HYBRID_AWARE //!< Let the runtime bind the inference threads depending on the cores type (default mode for the + //!< hybrid CPUs) }; /** @@ -51,48 +53,55 @@ public: */ struct INFERENCE_ENGINE_API_CLASS(Config) { /** - * @brief Supported Configuration keys - * @return vector of supported configuration keys - */ + * @brief Supported Configuration keys + * @return vector of supported configuration keys + */ std::vector SupportedKeys(); /** - * @brief Parses configuration key/value pair - * @param key configuration key - * @param value configuration values - */ + * @brief Parses configuration key/value pair + * @param key configuration key + * @param value configuration values + */ void SetConfig(const std::string& key, const std::string& value); /** - * @brief Return configuration value - * @param key configuration key - * @return configuration value wrapped into Parameter - */ + * @brief Return configuration value + * @param key configuration key + * @return configuration value wrapped into Parameter + */ Parameter GetConfig(const std::string& key); /** - * @brief Create appropriate multithreaded configuration - * filing unconfigured values from initial configuration using hardware properties - * @param initial Inital configuration - * @param fp_intesive additional hint for the the (Hybrid) core-types selection logic - * whether the executor should be configured for floating point intensive work (as opposite to int8 intensive) - * @return configured values - */ + * @brief Create appropriate multithreaded configuration + * filing unconfigured values from initial configuration using hardware properties + * @param initial Inital configuration + * @param fp_intesive additional hint for the the (Hybrid) core-types selection logic + * whether the executor should be configured for floating point intensive work (as opposite to int8 + * intensive) + * @return configured values + */ static Config MakeDefaultMultiThreaded(const Config& initial, const bool fp_intesive = true); - std::string _name; //!< Used by `ITT` to name executor threads - int _streams = 1; //!< Number of streams. - int _threadsPerStream = 0; //!< Number of threads per stream that executes `ie_parallel` calls - ThreadBindingType _threadBindingType = ThreadBindingType::NONE; //!< Thread binding to hardware resource type. No binding by default - int _threadBindingStep = 1; //!< In case of @ref CORES binding offset type thread binded to cores with defined step - int _threadBindingOffset = 0; //!< In case of @ref CORES binding offset type thread binded to cores starting from offset - int _threads = 0; //!< Number of threads distributed between streams. Reserved. Should not be used. + std::string _name; //!< Used by `ITT` to name executor threads + int _streams = 1; //!< Number of streams. + int _threadsPerStream = 0; //!< Number of threads per stream that executes `ie_parallel` calls + ThreadBindingType _threadBindingType = ThreadBindingType::NONE; //!< Thread binding to hardware resource type. + //!< No binding by default + int _threadBindingStep = 1; //!< In case of @ref CORES binding offset type + //!< thread binded to cores with defined step + int _threadBindingOffset = 0; //!< In case of @ref CORES binding offset type thread binded to cores + //!< starting from offset + int _threads = 0; //!< Number of threads distributed between streams. + //!< Reserved. Should not be used. enum PreferredCoreType { ANY, LITTLE, BIG, - ROUND_ROBIN // used w/multiple streams to populate the Big cores first, then the Little, then wrap around (for large #streams) - } _threadPreferredCoreType = PreferredCoreType::ANY; //!< In case of @ref HYBRID_AWARE hints the TBB to affinitize + ROUND_ROBIN // used w/multiple streams to populate the Big cores first, then the Little, then wrap around + // (for large #streams) + } _threadPreferredCoreType = + PreferredCoreType::ANY; //!< In case of @ref HYBRID_AWARE hints the TBB to affinitize /** * @brief A constructor with arguments @@ -106,23 +115,22 @@ public: * @param[in] threads @copybrief Config::_threads * @param[in] threadPreferBigCores @copybrief Config::_threadPreferBigCores */ - Config( - std::string name = "StreamsExecutor", - int streams = 1, - int threadsPerStream = 0, - ThreadBindingType threadBindingType = ThreadBindingType::NONE, - int threadBindingStep = 1, - int threadBindingOffset = 0, - int threads = 0, - PreferredCoreType threadPreferredCoreType = PreferredCoreType::ANY) : - _name{name}, - _streams{streams}, - _threadsPerStream{threadsPerStream}, - _threadBindingType{threadBindingType}, - _threadBindingStep{threadBindingStep}, - _threadBindingOffset{threadBindingOffset}, - _threads{threads}, _threadPreferredCoreType(threadPreferredCoreType){ - } + Config(std::string name = "StreamsExecutor", + int streams = 1, + int threadsPerStream = 0, + ThreadBindingType threadBindingType = ThreadBindingType::NONE, + int threadBindingStep = 1, + int threadBindingOffset = 0, + int threads = 0, + PreferredCoreType threadPreferredCoreType = PreferredCoreType::ANY) + : _name{name}, + _streams{streams}, + _threadsPerStream{threadsPerStream}, + _threadBindingType{threadBindingType}, + _threadBindingStep{threadBindingStep}, + _threadBindingOffset{threadBindingOffset}, + _threads{threads}, + _threadPreferredCoreType(threadPreferredCoreType) {} }; /** @@ -131,24 +139,22 @@ public: ~IStreamsExecutor() override; /** - * @brief Return the index of current stream - * @return An index of current stream. Or throw exceptions if called not from stream thread - */ - virtual int GetStreamId() = 0; + * @brief Return the index of current stream + * @return An index of current stream. Or throw exceptions if called not from stream thread + */ + virtual int GetStreamId() = 0; /** - * @brief Return the id of current NUMA Node - * @return `ID` of current NUMA Node, or throws exceptions if called not from stream thread - */ - virtual int GetNumaNodeId() = 0; + * @brief Return the id of current NUMA Node + * @return `ID` of current NUMA Node, or throws exceptions if called not from stream thread + */ + virtual int GetNumaNodeId() = 0; /** - * @brief Execute the task in the current thread using streams executor configuration and constraints - * @param task A task to start - */ + * @brief Execute the task in the current thread using streams executor configuration and constraints + * @param task A task to start + */ virtual void Execute(Task task) = 0; }; - - } // namespace InferenceEngine diff --git a/inference-engine/src/plugin_api/threading/ie_thread_local.hpp b/inference-engine/src/plugin_api/threading/ie_thread_local.hpp index 952fdb27871..2cff0edd0cd 100644 --- a/inference-engine/src/plugin_api/threading/ie_thread_local.hpp +++ b/inference-engine/src/plugin_api/threading/ie_thread_local.hpp @@ -12,14 +12,14 @@ #include "ie_parallel.hpp" #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO -# include +# include #else -# include -# include -# include -# include -# include -# include +# include +# include +# include +# include +# include +# include #endif namespace InferenceEngine { @@ -31,34 +31,36 @@ namespace InferenceEngine { * @ingroup ie_dev_api_threading * @tparam T A type of object to keep thread local. */ -template +template using ThreadLocal = tbb::enumerable_thread_specific; #else -template +template struct ThreadLocal { - using Map = std::unordered_map; - using Create = std::function; - Map _map; + using Map = std::unordered_map; + using Create = std::function; + Map _map; mutable std::mutex _mutex; - Create _create; + Create _create; - ThreadLocal() : _create{[]{return T{};}} {} - explicit ThreadLocal(const T& init) : _create{[init]{return init;}} {} - ThreadLocal(ThreadLocal&& other) : - _map{std::move(other._map)}, - _create{std::move(other._create)} { - } + ThreadLocal() + : _create{[] { + return T{}; + }} {} + explicit ThreadLocal(const T& init) + : _create{[init] { + return init; + }} {} + ThreadLocal(ThreadLocal&& other) : _map{std::move(other._map)}, _create{std::move(other._create)} {} ThreadLocal& operator=(ThreadLocal&& other) { - _map = std::move(other._map); + _map = std::move(other._map); _create = std::move(other._create); return *this; } - ThreadLocal(const ThreadLocal&) = delete; + ThreadLocal(const ThreadLocal&) = delete; ThreadLocal& operator=(const ThreadLocal&&) = delete; - explicit ThreadLocal(const Create& create_) : _create{create_} - {} + explicit ThreadLocal(const Create& create_) : _create{create_} {} T& local() { auto threadId = std::this_thread::get_id(); @@ -71,7 +73,7 @@ struct ThreadLocal { } } - auto size() const -> decltype(_map.size()) { + auto size() const -> decltype(_map.size()) { std::lock_guard lock{_mutex}; return _map.size(); } @@ -80,18 +82,39 @@ struct ThreadLocal { template struct Iterator { It it; - bool operator!=(const Iterator& other) {return it != other.it;} - Iterator& operator++() {++it; return *this;} - auto operator*() -> decltype(it->second) {return it->second;} - auto operator->() -> decltype(&(it->second)) {return &(it->second);} - auto operator*() const -> decltype(it->second) {return it->second;} - auto operator->() const -> decltype(&(it->second)) {return &(it->second);} + bool operator!=(const Iterator& other) { + return it != other.it; + } + Iterator& operator++() { + ++it; + return *this; + } + auto operator*() -> decltype(it->second) { + return it->second; + } + auto operator-> () -> decltype(&(it->second)) { + return &(it->second); + } + auto operator*() const -> decltype(it->second) { + return it->second; + } + auto operator-> () const -> decltype(&(it->second)) { + return &(it->second); + } }; - auto begin() -> Iterator {return {_map.begin()};} - auto end() -> Iterator {return {_map.end()};} - auto begin() const -> Iterator const {return {_map.begin()};} - auto end() const -> Iterator const {return {_map.end()};} + auto begin() -> Iterator { + return {_map.begin()}; + } + auto end() -> Iterator { + return {_map.end()}; + } + auto begin() const -> Iterator const { + return {_map.begin()}; + } + auto end() const -> Iterator const { + return {_map.end()}; + } }; #endif diff --git a/inference-engine/src/plugin_api/xml_parse_utils.h b/inference-engine/src/plugin_api/xml_parse_utils.h index 911745cdf43..7ce0ee46927 100644 --- a/inference-engine/src/plugin_api/xml_parse_utils.h +++ b/inference-engine/src/plugin_api/xml_parse_utils.h @@ -12,16 +12,15 @@ #include #include #include +#include #include #include #include -#include - -#include "ie_api.h" -#include "ie_precision.hpp" -#include "ie_common.h" #include "file_utils.h" +#include "ie_api.h" +#include "ie_common.h" +#include "ie_precision.hpp" /** * @ingroup ie_dev_api_xml @@ -232,17 +231,18 @@ struct parse_result { * @param[in] error_msg The error message */ parse_result(std::unique_ptr&& xml, std::string error_msg) - : xml(std::move(xml)), error_msg(std::move(error_msg)) {} + : xml(std::move(xml)), + error_msg(std::move(error_msg)) {} /** - * @brief A XML document. + * @brief A XML document. */ std::unique_ptr xml; /** * @brief An error message */ - std::string error_msg {}; + std::string error_msg{}; }; /** @@ -262,14 +262,16 @@ inline parse_result ParseXml(const char* file_path) { #endif try { - auto xml = std::unique_ptr {new pugi::xml_document {}}; + auto xml = std::unique_ptr{new pugi::xml_document{}}; const auto load_result = xml->load_file(resolvedFilepath); const auto error_msg = [&]() -> std::string { - if (load_result.status == pugi::status_ok) return {}; + if (load_result.status == pugi::status_ok) + return {}; std::ifstream file_stream(file_path); - const auto file = std::string(std::istreambuf_iterator {file_stream}, std::istreambuf_iterator {}); + const auto file = + std::string(std::istreambuf_iterator{file_stream}, std::istreambuf_iterator{}); const auto error_offset = std::next(file.rbegin(), file.size() - load_result.offset); const auto line_begin = std::find(error_offset, file.rend(), '\n'); @@ -277,12 +279,13 @@ inline parse_result ParseXml(const char* file_path) { const auto pos = std::distance(error_offset, line_begin); std::stringstream ss; - ss << "Error loading XML file: " << file_path << ":" << line << ":" << pos << ": " << load_result.description(); + ss << "Error loading XML file: " << file_path << ":" << line << ":" << pos << ": " + << load_result.description(); return ss.str(); }(); return {std::move(xml), error_msg}; - } catch(std::exception& e) { + } catch (std::exception& e) { return {std::move(nullptr), std::string("Error loading XML file: ") + e.what()}; } } From 7c82ad78eea8b5453639e9c6395f8d0671513d0a Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 10 Aug 2021 11:58:57 +0300 Subject: [PATCH 16/24] [CPU] Add reorder if the constant memory is not aligned, and isa is SSE (#6912) --- .../src/mkldnn_plugin/mkldnn_edge.cpp | 34 ++++++++++++++++--- .../src/mkldnn_plugin/mkldnn_memory.h | 2 +- .../nodes/mkldnn_reorder_node.cpp | 9 ++--- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp index 34261b1ac87..acce99cfbd3 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp @@ -6,6 +6,7 @@ #include "mkldnn_node.h" #include "mkldnn_extension_utils.h" #include +#include using namespace mkldnn; namespace MKLDNNPlugin { @@ -66,8 +67,13 @@ void MKLDNNEdge::drop() { bool MKLDNNEdge::needReorder() { + if (!getInputDesc().isCompatible(getOutputDesc())) { + return true; + } + bool canBeInPlaceConflicts = false; - auto parentSPD = getParent()->getSelectedPrimitiveDescriptor(); + auto parentNode = getParent(); + auto parentSPD = parentNode->getSelectedPrimitiveDescriptor(); auto childSPD = getChild()->getSelectedPrimitiveDescriptor(); if (!parentSPD || !childSPD) IE_THROW() << "Cannot make a decision about reorder. Primitive descriptors weren't selected."; @@ -96,10 +102,10 @@ bool MKLDNNEdge::needReorder() { return count; }; - const auto portChildEdges = getParent()->getChildEdgesAtPort(inNumber); + const auto portChildEdges = parentNode->getChildEdgesAtPort(inNumber); if (in_place && childCanChangeMem && portChildEdges.size() > 1 && detectInPlaceChildrenNum(portChildEdges) > 1) canBeInPlaceConflicts = true; - if (!canBeInPlaceConflicts && in_place && !getParent()->getChildEdges().empty()) { + if (!canBeInPlaceConflicts && in_place && !parentNode->getChildEdges().empty()) { for (auto &p_edge_peer : portChildEdges) { if (p_edge_peer.get() == this) continue; @@ -113,7 +119,27 @@ bool MKLDNNEdge::needReorder() { outNumber >= 0 && outNumber < childSPD->getConfig().inConfs.size() && childSPD->getConfig().inConfs[outNumber].inPlace >= 0) canBeInPlaceConflicts = true; } - return canBeInPlaceConflicts || !getInputDesc().isCompatible(getOutputDesc()); + + if (canBeInPlaceConflicts) { + return true; + } + + // In case the parent node is an input constant, the memory is unaligned and the child primitive isa is SSE, + // we have to insert reorder since the vast majority of arithmetic and data processing instructions in legacy SSE isa requires + // the memory address in the operands must be aligned on 16-byte boundary. + if ((childSPD->getImplementationType() & impl_desc_type::sse42) && + Type::Input == parentNode->getType() && + parentNode->isConstant()) { + if (auto pInputNode = std::dynamic_pointer_cast(parentNode)) { + auto rawMemPtr = pInputNode->getMemoryPtr()->GetData(); + bool isAligned = (reinterpret_cast(rawMemPtr) & 15) == 0; + if (!isAligned) { + return true; + } + } + } + + return false; } void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index d4cf4fc634b..42284edca83 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -148,7 +148,7 @@ public: */ void* GetData() const { void* data = prim->get_data_handle(); - if (data == nullptr) + if (data == nullptr && pMemDesc->getShape().getElementsCount() != 0) IE_THROW() << "Cannot get memory!"; return data; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index 99bd606a9a0..c1701deacde 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -133,11 +133,12 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe // split group dimension in separate shape dimension. IE use OIHW, but mkldnn expect GOIHW. // So we will perform implicit reshape to dst shape. // - // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats. - // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw) - // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout. + // MKLDNN doesn't support direct reorders for tensors of different rank. The code below tries to + // perform such conversion if the source tensor can be reshaped to the destination rank. This is + // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, + // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) if (src_blocked->GetDesc().hasLayoutType(LayoutType::ncsp) && - src_blocked->GetDims().size() + 1 == dst_blocked->GetDims().size()) { + src_blocked->GetDims().size() != dst_blocked->GetDims().size()) { const auto newDims = dst_blocked->GetDims(); const auto newFormat = MKLDNNMemory::GetPlainFormatByRank(newDims.size()); From cc76d3892076f2a7cb8152bda06180bbf9cd2191 Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Tue, 10 Aug 2021 13:39:16 +0300 Subject: [PATCH 17/24] remove InsertIdentityToLSTMCellPass and fix InsertIdentity (#6962) * check is node final non-functional for grouping; remove InsertIdentityToLSTMCellPass * code style fix --- .../src/gna_plugin/gna_plugin.cpp | 1 - .../gna_plugin/optimizer/gna_pass_manager.cpp | 47 ++++--------------- 2 files changed, 8 insertions(+), 40 deletions(-) diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index bb3451c0aa7..6cef822e120 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -757,7 +757,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { passes->registerPass(); passes->registerPass(); passes->registerPass(); - passes->registerPass(); passes->registerPass(); passes->registerPass(); diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index fb7a673ca1b..75db6c9a474 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -131,6 +131,12 @@ static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, return copyWithQuant; } +static bool hasNextFuncLayer(const CNNLayerPtr layer) { + return CNNNetHasNextLayerSkipCertain(layer, 0, 0, [](CNNLayerPtr layer) { + return LayerInfo(layer).isNonFunctional(); + }); +} + static std::vector getCandidatesForIdentityInsertion(const CNNLayerPtr l, std::shared_ptr passmanager) { std::vector prevLayers; @@ -796,7 +802,8 @@ void InsertIdentityLayerPass::run() { for (auto && nextLayer : getInputTo(nextData)) { if (nextLayer.second.get() == l.get()) continue; - if (getCandidatesForIdentityInsertion(nextLayer.second, getPassManager()).empty()) { + if (getCandidatesForIdentityInsertion(nextLayer.second, getPassManager()).empty() && + hasNextFuncLayer(nextLayer.second)) { notAll = true; } } @@ -1608,44 +1615,6 @@ void BroadcastConstPass::run() { } } -void InsertIdentityToLSTMCellPass::run() { - OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityToLSTMCellPass"); - for (auto layer : *pLayers) { - if (layer->type == "LSTMCell") { - // This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used) - // which results in scratch buffer being used so outputs cannot be used in form of blob or by non-functional layers - // downside is scaling down from i32 to i16 which may - for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) { - int numOfIdentityLayers = ((this->getPassManager())->getIntVar(identityLayersCounterName))++; - auto activationName = std::string("lstm_identity_") + std::to_string(numOfIdentityLayers); - auto& output = layer->outData[output_idx]; - auto& input_to = getInputTo(output); - - CNNLayerPtr activationLayer = - std::make_shared(LayerParams({activationName, "identity", InferenceEngine::Precision::FP32})); - - auto dataPtr = std::make_shared("lstm_identity_data_" + std::to_string(numOfIdentityLayers), output->getTensorDesc()); - - auto quantized = InferenceEngine::getInjectedData(layer); - auto activationLayerWithQuant = quantized ? InferenceEngine::injectData(activationLayer) : activationLayer; - getCreatorLayer(dataPtr) = activationLayerWithQuant; - activationLayerWithQuant->outData.push_back(dataPtr); - activationLayerWithQuant->insData.push_back(output); - auto& activationInputTo = getInputTo(dataPtr); - - for (auto& input : input_to) { - auto& next_layer = input.second; - activationInputTo[input.first] = next_layer; - std::replace_if(std::begin(next_layer->insData), std::end(next_layer->insData), - [output](DataWeakPtr data) { return data.lock() == output; }, dataPtr); - } - input_to.clear(); - input_to[activationName] = activationLayerWithQuant; - } - } - } -} - void BreakFusingOfOutputLayersPass::run() { OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BreakFusingOfOutputLayersPass"); #if GNA_LIB_VER == 1 From 48358965827231721003342af2fcc7f5552e1956 Mon Sep 17 00:00:00 2001 From: Dmitrii Khurtin Date: Tue, 10 Aug 2021 14:08:17 +0300 Subject: [PATCH 18/24] Convolution to matmul (#6803) * [GNA] Remove transposes around MatMul * Added tests for transformation HandleTransposesAroundMatMul * Move IsTransposeSupported function to GNA limitations file * Correct transpose insertion tests * added TransposeAfterMatmul tests and moved InsertTransposeBeforeMatmul tests to handle_transposes_around_matmul.cpp * added inifitiry loop checker and memory concat test * fixed build errors * changed the conditions for selecting an input of Concat for ScaleFactor calculation when entering an infinite loop * fixed after review Co-authored-by: Elizaveta Lobanova --- .../gna_plugin/backend/gna_limitations.hpp | 11 +- .../src/gna_plugin/backend/make_pwl.cpp | 7 +- .../gna_plugin/frontend/model_quantizer.hpp | 60 +++++- .../gna_plugin/frontend/scale_factor_calc.hpp | 44 ++-- .../src/gna_plugin/gna_graph_compiler.cpp | 7 +- .../src/gna_plugin/gna_plugin.cpp | 4 +- .../gna_plugin/optimizer/gna_pass_manager.cpp | 7 +- .../handle_transposes_around_matmul.cpp | 132 ++++++++++++ .../handle_transposes_around_matmul.hpp | 63 ++++++ .../insert_transpose_before_matmul.cpp | 69 ------ .../insert_transpose_before_matmul.hpp | 30 --- .../subgraph_tests/memory_fq_concat_prelu.cpp | 69 ++++++ .../subgraph_tests/relu_split_reshape.cpp | 45 ++++ .../subgraph_tests/memory_fq_concat_prelu.hpp | 18 ++ .../subgraph_tests/relu_split_reshape.hpp | 15 ++ .../subgraph/memory_fq_concat_prelu.hpp | 50 +++++ .../subgraph/relu_split_reshape.hpp | 34 +++ .../src/subgraph/memory_fq_concat_prelu.cpp | 124 +++++++++++ .../src/subgraph/relu_split_reshape.cpp | 50 +++++ .../gna_insert_transpose_before_matmul.cpp | 138 ------------ .../handle_transposes_around_matmul.cpp | 201 ++++++++++++++++++ 21 files changed, 907 insertions(+), 271 deletions(-) create mode 100644 inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.cpp create mode 100644 inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.hpp delete mode 100644 inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp delete mode 100644 inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.hpp create mode 100644 inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_fq_concat_prelu.cpp create mode 100644 inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/relu_split_reshape.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_fq_concat_prelu.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/relu_split_reshape.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_fq_concat_prelu.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_split_reshape.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_fq_concat_prelu.cpp create mode 100644 inference-engine/tests/functional/shared_test_classes/src/subgraph/relu_split_reshape.cpp delete mode 100644 inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_transpose_before_matmul.cpp create mode 100644 inference-engine/tests/unit/gna/ngraph/transformations/handle_transposes_around_matmul.cpp diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp index 6a3af8e428b..731155df31d 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp @@ -25,8 +25,17 @@ constexpr uint32_t noOfInputsLowPrecDivisor = 16; constexpr uint32_t affineMaxBatchSize = 8; constexpr uint32_t maxPoolMaxWindowSize = 6; - constexpr uint32_t copyMaxGrouping = 8; +constexpr uint32_t transposeMaxSize = 65528; + +inline bool IsTransposeSupported(const std::vector& shape) { + auto shape_no_1 = shape; + shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end()); + if (shape_no_1.size() != 2) return false; + size_t min, max; + std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]); + return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize; +} namespace Cnn2D { struct RangeLimit { diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.cpp b/inference-engine/src/gna_plugin/backend/make_pwl.cpp index cd9ff0852e3..e0f71bc7fc7 100644 --- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp +++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp @@ -279,19 +279,20 @@ void make_gna_pwl(const DnnActivation fun, gnalog() << "=========================== LeakyReLU Segments ======================\n"; int32_t x_lower = INT32_MIN; int32_t x_upper = INT32_MAX; - int16_t y_lower = y_min; + int32_t y_lower = y_min; int16_t y_upper = y_max; if (fun.fqParams.set) { x_lower = std::max(FLOAT_TO_INT64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast(x_lower)); x_upper = std::min(FLOAT_TO_INT64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast(x_upper)); - y_lower = std::max(FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * out_scale), static_cast(y_lower)); + // y_lower can be reduced with negative slope + y_lower = *fun.fqParams.input_low * 1.25 * out_scale; y_upper = std::min(FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast(y_upper)); } else { if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale); if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale); } - gna_pwl[0].yBase = y_lower * fun.args.lrelu.negative_slope; + gna_pwl[0].yBase = std::max(FLOAT_TO_INT32(y_lower * fun.args.lrelu.negative_slope), static_cast(y_min)); s = gna_slope(fun.args.lrelu.negative_slope, in_scale, out_scale); gna_pwl[0].xBase = (x_lower & XBASEMASK) | s.slope_scale_index; // zero out the 2 lsb gna_pwl[0].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); diff --git a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp index b57813858ac..cd15b1c1b13 100644 --- a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp +++ b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp @@ -100,14 +100,70 @@ class ModelQuantizer { int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize) const { ScaleFactorCalculator sf(net, mandWeightsBytesSize, optWeightsBytesSize, inputsBytesSize, fakeQuantize); - while (!sf.allLayersProcessed()) { - for (auto &&layer : sf.getStartLayers()) { + int infiniteLoopCount = 0; + std::vector infiniteLoopPattern; + std::vector infiniteLoopHistory; + while (!sf.allLayersProcessed() && infiniteLoopCount <= 2) { + auto layers = sf.getStartLayers(); + infiniteLoopHistory.emplace_back(layers.front()->name); + for (auto &&layer : layers) { transformLayer(layer, sf); // transforming until we reached cases where output scale updated due to situation in downstream layer if (sf.needToRestart()) { + infiniteLoopHistory.back() += "#" + layer->name; break; } } + + // looking for infinite loop by using algorithm of compute prefix function, complexity O(N) + std::map prefixFunction; + int k = infiniteLoopHistory.size(); + for (int i = infiniteLoopHistory.size() - 2; i >= 0; i--) { + while (k < infiniteLoopHistory.size() && infiniteLoopHistory[k - 1] != infiniteLoopHistory[i]) { + auto iter = prefixFunction.find(k); + k = iter == prefixFunction.end() ? infiniteLoopHistory.size() : iter->second; + } + + if (infiniteLoopHistory[k - 1] == infiniteLoopHistory[i]) { + k--; + } + + if ((infiniteLoopHistory.size() - i) % 2 == 0 && (infiniteLoopHistory.size() - i) / 2 == infiniteLoopHistory.size() - k) { + infiniteLoopPattern.clear(); + int patternLength = (infiniteLoopHistory.size() - i)/2; + for (int j = 0; j < patternLength; j++) { + infiniteLoopPattern.emplace_back(infiniteLoopHistory[infiniteLoopHistory.size() - patternLength + j]); + } + infiniteLoopHistory.clear(); + gnalog() << "infinite loop detected\n"; + break; + } + + prefixFunction.emplace(i, k); + } + + if (infiniteLoopHistory.empty()) { + infiniteLoopCount++; + } else { + if (infiniteLoopCount > 0 && + (infiniteLoopHistory.size()%infiniteLoopPattern.size() == 0 || sf.allLayersProcessed()) && + !std::equal(infiniteLoopHistory.begin() + (infiniteLoopHistory.size() - infiniteLoopPattern.size()), + infiniteLoopHistory.end(), infiniteLoopPattern.begin())) { + infiniteLoopCount = 0; + infiniteLoopPattern.clear(); + gnalog() << "infinite loop fixed\n"; + } + } + + sf.SetInfiniteLoopCount(infiniteLoopCount); + } + + if (infiniteLoopCount > 0) { + std::string additionalInformation; + for (const auto& p : infiniteLoopPattern) { + additionalInformation += '\n' + p; + } + THROW_GNA_EXCEPTION << "infinite loop: " + additionalInformation; } } }; diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index 7fe08a571f8..cec813aecfa 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -205,7 +205,7 @@ class ScaleFactorPerLayer { * @param result * @return */ - bool operator()(T cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) { + bool operator()(T cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize, int infiniteLoopCount) { return false; } }; @@ -438,7 +438,8 @@ class ScaleFactorPerLayer { } public : - bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) { + bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize, + int infiniteLoopCount) { if ( !cnnLayer ) { IE_THROW() << "Incorrect Convolutional Layer pointer \n"; } @@ -477,7 +478,7 @@ class ScaleFactorPerLayer { if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) || (fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) && - quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale())) { + quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || infiniteLoopCount > 0) { // means we already restarted propagation input memory layer // need to search for requantiseable layer prior memory output layer InferenceEngine::CNNLayerPtr restartedLayer; @@ -645,7 +646,8 @@ class ScaleFactorPerLayer { template<> class ScaleFactorPerLayer { public: - bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) { + bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, + bool fakeQuantize, int infiniteLoopCount) { if ( !eltwiseLayer ) { THROW_GNA_EXCEPTION << "Incorrect Eltwise Layer pointer \n"; } @@ -814,7 +816,8 @@ class ScaleFactorPerLayer { template<> class ScaleFactorPerLayer { public: - bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) { + bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, + bool fakeQuantize, int infiniteLoopCount) { if ( !concatLayer ) { THROW_GNA_EXCEPTION << "Incorrect Concat Layer pointer \n"; } @@ -872,15 +875,8 @@ class ScaleFactorPerLayer { // find a source quant value // - 1st candidate - input layer // - 2nd candidate - non-activation layer with non-1 scale factor - static std::map restarted_counter; - auto restartedCountIt = restarted_counter.find(concatLayer->name); - if (restartedCountIt == restarted_counter.end()) { - auto pos = restarted_counter.insert({ concatLayer->name, 0 }); - restartedCountIt = pos.first; - } - if (sourceLayerIt == inputLayers.end()) { - if (((restartedCountIt->second) / 2) % 2 == 1) { + if (infiniteLoopCount % 2 == 1) { std::reverse(inputLayers.begin(), inputLayers.end()); } @@ -898,7 +894,7 @@ class ScaleFactorPerLayer { } } } else { - if (((restartedCountIt->second) / 4) % 2 == 0) { + if (infiniteLoopCount % 4 == 2 || infiniteLoopCount % 4 == 3) { auto sourceLayerCheck = [](InferenceEngine::CNNLayerPtr& inputLayer) { auto quantParams = InferenceEngine::getInjectedData(inputLayer); LayerInfo info(inputLayer); @@ -916,8 +912,6 @@ class ScaleFactorPerLayer { sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), nonDefaultScaleFactor); } } - - ++restartedCountIt->second; } std::set concatIdxToUpdate; @@ -978,7 +972,7 @@ class ScaleFactorPerLayer { gnalog() << "[UFS] from : " << concatLayer->name << " reached: " << layer->name; // found that direct input to concat is a indirect parent of align filter - so no link required auto info = LayerInfo(layer); - if (!info.isWeightable() && !info.isActivation() && !info.isConst()) { + if (!info.isWeightable() && !info.isActivation() && !info.isConst() && !info.isMemory()) { gnalog() << "... skipped\n"; return; } @@ -1030,8 +1024,8 @@ class ScaleFactorPerLayer { } quantDataForConCatInput->_dst_quant.SetScale(newScaleFactor); - } else if (restarLayerInfo.isConst()) { - gnalog() << "... warning const layer will be requantized\n"; + } else if (restarLayerInfo.isConst() || restarLayerInfo.isMemory()) { + gnalog() << "... warning " << restartedLayer->type << " layer will be requantized\n"; quantDataForConCatInput->_src_quant.SetScale(sourceQuantParams->_dst_quant.GetScale()); quantDataForConCatInput->_dst_quant.SetScale(sourceQuantParams->_dst_quant.GetScale()); } else { @@ -1057,7 +1051,8 @@ class ScaleFactorPerLayer { }; public: - bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) { + bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, + bool fakeQuantize, int infiniteLoopCount) { if ( !wl ) { THROW_GNA_EXCEPTION << "Incorrect Weightable Layer pointer \n"; } else if (!wl->_weights) { @@ -1211,7 +1206,8 @@ class ScaleFactorPerLayer : public ScaleFact template<> class ScaleFactorPerLayer { public: - bool operator() (InferenceEngine::GemmLayer* gemmLayer, int weightsSize, int inputSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) { + bool operator() (InferenceEngine::GemmLayer* gemmLayer, int weightsSize, int inputSize, ScaleFactorUpdateResult &result, + bool fakeQuantize, int infiniteLoopCount) { if ( !gemmLayer ) { THROW_GNA_EXCEPTION << "Incorrect Gemm Layer pointer \n"; } @@ -1270,6 +1266,7 @@ class ScaleFactorCalculator { int optWeightsBytesSize; bool isFakeQuantize; int inputsBytesSize; + int infiniteLoopCount = 0; public: ScaleFactorCalculator(Cnt &net, int mandWeightsBytesSize, int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize) @@ -1286,6 +1283,9 @@ class ScaleFactorCalculator { std::vector getStartLayers() const { return std::vector(idx, std::end(net)); } + void SetInfiniteLoopCount(int infiniteLoopCount) { + this->infiniteLoopCount = infiniteLoopCount; + } template bool operator()(T ptr) const { needRestart = false; @@ -1296,7 +1296,7 @@ class ScaleFactorCalculator { weightsBytesSize = optWeightsBytesSize; } - if (!frontend::ScaleFactorPerLayer()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize)) { + if (!frontend::ScaleFactorPerLayer()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize, infiniteLoopCount)) { return false; } if (result) { diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 51a429d9a33..88110701499 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -145,12 +145,15 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) size_t output_layer_size = 0; for (int j = 0; j != getInputTo(layer->outData[i]).size(); j++) { - auto outFunctionalLayer = CNNNetGetNextLayerSkipCertain(layer, i, j, [](CNNLayerPtr l) { + auto outFunctionalLayer = CNNNetCheckNextLayerSkipCertain(layer, i, j, true, [](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }); if (!outFunctionalLayer.first) { - THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer"; + output_layer_size = + InferenceEngine::details::product(begin(layer->outData[i]->getDims()), + end(layer->outData[i]->getDims())) * layer->outData[i]->getPrecision().size(); + continue; } for (int idx : outFunctionalLayer.second) { diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index 6cef822e120..5e069154763 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -58,11 +58,11 @@ #include "transformations/remove_extra_reshapes.hpp" #include "transformations/insert_transpose_after_convolution_or_pooling.hpp" -#include "transformations/insert_transpose_before_matmul.hpp" #include "transformations/reorder_activation_and_pooling.hpp" #include "transformations/swap_input_matmul_gna.hpp" #include "transformations/convert_matmul_to_pointwise_convolution.hpp" #include "transformations/split_convolution_with_large_buffer_size.hpp" +#include "transformations/handle_transposes_around_matmul.hpp" #include "transformations/decompose_2d_conv.hpp" #include "transformations/convert_padded2valid_conv.hpp" @@ -702,7 +702,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index 75db6c9a474..b92bd153370 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -2102,8 +2102,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { THROW_GNA_LAYER_EXCEPTION(fqLayer) << "Zero levels"; } - // Before FQ layer is removed, the previous layer has to be updated with its quantization data - auto quantParamsPrevLayer = InferenceEngine::getInjectedData(prevLayer); + // Before FQ layer is removed, the previous functional layer has to be updated with its quantization data + auto prevFuncLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, [](CNNLayerPtr layer) { + return LayerInfo(layer).isNonFunctional(); + }); + auto quantParamsPrevLayer = InferenceEngine::getInjectedData(prevFuncLayer); quantParamsPrevLayer->_dst_quant.SetLevels(fqLevels); quantParamsPrevLayer->_dst_quant.SetMinValues({ inputRange.first[0] }, true); quantParamsPrevLayer->_dst_quant.SetMaxValues({ inputRange.second[0] }, true); diff --git a/inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.cpp b/inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.cpp new file mode 100644 index 00000000000..9591bd0fc6c --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.cpp @@ -0,0 +1,132 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/handle_transposes_around_matmul.hpp" + +#include + +#include +#include +#include +#include + +#include "gna_plugin_log.hpp" +#include "backend/gna_limitations.hpp" + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(HandleTransposesAroundMatMul, "HandleTransposesAroundMatMul", 0); +NGRAPH_RTTI_DEFINITION(HandleTransposeBeforeMatMul, "HandleTransposeBeforeMatMul", 0); +NGRAPH_RTTI_DEFINITION(HandleTransposeAfterMatMul, "HandleTransposeAfterMatMul", 0); + +static void ReplaceTransposeWithReshape(std::shared_ptr transpose_node) { + auto shape = transpose_node->get_output_shape(0); + auto reshape_const = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{shape.size()}, shape); + auto reshape_node = std::make_shared(transpose_node->input_value(0), reshape_const, false); + reshape_node->set_friendly_name(transpose_node->get_friendly_name() + "/reshape"); + ngraph::copy_runtime_info(transpose_node, reshape_node); + transpose_node->output(0).replace(reshape_node->output(0)); +} + +static void InsertTranspose(std::shared_ptr prev_node, const std::string& base_name) { + auto consumers = prev_node->output(0).get_target_inputs(); + const auto orig_shape = prev_node->get_output_shape(0); + std::vector transpose_ids; + for (size_t i = 0; i < orig_shape.size(); ++i) { + if (orig_shape[i] > 1) { + transpose_ids.push_back(i); + } + } + IE_ASSERT(transpose_ids.size() == 2); + std::vector permute_order(orig_shape.size()); + std::iota(std::begin(permute_order), std::end(permute_order), 0); + std::swap(permute_order[transpose_ids[0]], permute_order[transpose_ids[1]]); + + auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{permute_order.size()}, permute_order); + auto transpose = std::make_shared(prev_node, transpose_order); + transpose->set_friendly_name(base_name + "/in_transpose"); + + auto reshapeConstAfter = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{orig_shape.size()}, orig_shape); + auto reshapeAfter = std::make_shared(transpose, reshapeConstAfter, false); + reshapeAfter->set_friendly_name(base_name + "/reshape_after_transpose"); + ngraph::copy_runtime_info(prev_node, ngraph::NodeVector{transpose, reshapeAfter}); + + for (auto input : consumers) { + input.replace_source_output(reshapeAfter); + } +} + +HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() { + auto reshape = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), + ngraph::pattern::any_input()}, VerifyReshape()); + auto transpose = ngraph::pattern::wrap_type({reshape, + ngraph::pattern::any_input()}); + auto matmul_input = std::make_shared(ngraph::OutputVector{reshape, transpose}); + auto matmul1 = ngraph::pattern::wrap_type({matmul_input, ngraph::pattern::any_input()}); + auto matmul2 = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), matmul_input}); + auto matmul = std::make_shared(ngraph::OutputVector{matmul1, matmul2}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto transpose_it = pattern_map.find(transpose); + if (transpose_it != std::end(pattern_map)) { + ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr()); + } else { + auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); + if (!GNALimitations::IsTransposeSupported(reshape_node->get_output_shape(0))) return false; + auto matmul_it = pattern_map.find(matmul1); + auto matmul_out = matmul_it != std::end(pattern_map) ? matmul_it->second : pattern_map.at(matmul2); + InsertTranspose(reshape_node, matmul_out.get_node_shared_ptr()->get_friendly_name()); + } + return true; + }; + + auto m = std::make_shared(matmul, "HandleTransposeBeforeMatMul"); + this->register_matcher(m, callback); +} + +HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() { + auto matmul = ngraph::pattern::wrap_type(); + auto fq = ngraph::pattern::wrap_type({matmul, ngraph::pattern::any_input(), + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}); + auto transpose_input = std::make_shared(ngraph::OutputVector{matmul, fq}); + auto transpose = ngraph::pattern::wrap_type({transpose_input, ngraph::pattern::any_input()}); + auto reshape_input = std::make_shared(ngraph::OutputVector{transpose_input, transpose}); + auto reshape = ngraph::pattern::wrap_type({reshape_input, + ngraph::pattern::any_input()}, VerifyReshape()); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto transpose_it = pattern_map.find(transpose); + if (transpose_it != std::end(pattern_map)) { + ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr()); + } else { + auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); + if (!GNALimitations::IsTransposeSupported(reshape_node->get_input_shape(0))) return false; + auto matmul_node = pattern_map.at(matmul).get_node_shared_ptr(); + InsertTranspose(matmul_node, matmul_node->get_friendly_name()); + } + return true; + }; + + auto m = std::make_shared(reshape, "HandleTransposeAfterMatMul"); + this->register_matcher(m, callback); +} + +bool VerifyReshape::operator()(const ngraph::Output& reshape_out) const { + auto in_shape = reshape_out.get_node_shared_ptr()->get_input_shape(0); + auto out_shape = reshape_out.get_node_shared_ptr()->get_output_shape(0); + + // Check if Reshape changes the final 2d shape of Affine primitive + in_shape.erase(std::remove(in_shape.begin(), in_shape.end(), 1), in_shape.end()); + out_shape.erase(std::remove(out_shape.begin(), out_shape.end(), 1), out_shape.end()); + return in_shape != out_shape; +} + +HandleTransposesAroundMatMul::HandleTransposesAroundMatMul() { + add_matcher(); + add_matcher(); +} diff --git a/inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.hpp b/inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.hpp new file mode 100644 index 00000000000..2601655f77f --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/handle_transposes_around_matmul.hpp @@ -0,0 +1,63 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +struct VerifyReshape { + bool operator()(const ngraph::Output& reshape_out) const; +}; + +/** + * @brief Inserts Transpose before MatMul or removes it (if it exists) if there is Reshape + * before MatMul which changes the batch size: + * [1, A*B] [1, A*B] + * | | + * Reshape Reshape + * | | + * [1, A, 1, B] [1, A, 1, B] + * | | + * | Transpose + * | -> | + * | <- [1, B, 1, A] + * | | + * MatMul MatMul + */ +class HandleTransposeBeforeMatMul : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + HandleTransposeBeforeMatMul(); +}; + +/** + * @brief Inserts Transpose after MatMul or removes it (if it exists) if there is Reshape + * after MatMul which changes the batch size: + * MatMul MatMul + * | | + * [1, A, 1, B] [1, A, 1, B] + * | | + * | Transpose + * | -> | + * | <- [1, B, 1, A] + * | | + * Reshape Reshape + * | | + * [1, A*B] [1, A*B] + */ +class HandleTransposeAfterMatMul : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + HandleTransposeAfterMatMul(); +}; + +class HandleTransposesAroundMatMul: public ngraph::pass::GraphRewrite { +public: + NGRAPH_RTTI_DECLARATION; + HandleTransposesAroundMatMul(); +}; + +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp b/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp deleted file mode 100644 index d311fd19656..00000000000 --- a/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "transformations/insert_transpose_before_matmul.hpp" - -#include -#include -#include -#include - -using namespace GNAPluginNS; - -NGRAPH_RTTI_DEFINITION(InsertTransposeBeforeMatmul, "InsertTransposeBeforeMatmul", 0); - -InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() { - MATCHER_SCOPE(InsertTransposeBeforeMatmul); - auto reshape = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), - ngraph::pattern::any_input()}, - ngraph::pattern::rank_equals(2)); - auto matmul1 = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), reshape}); - auto matmul2 = ngraph::pattern::wrap_type({reshape, ngraph::pattern::any_input()}); - auto root = std::make_shared(ngraph::OutputVector{matmul1, matmul2}); - - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto& pattern_map = m.get_pattern_value_map(); - auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); - auto reshape_in_shape = reshape_node->get_input_shape(0); - auto reshape_out_shape = reshape_node->get_output_shape(0); - if (reshape_in_shape.front() == reshape_out_shape.front()) { - return false; - } - - if (reshape_out_shape[0] == 1 || reshape_out_shape[1] == 1) { - return false; - } - - size_t min, max; - std::tie(min, max) = std::minmax(reshape_out_shape[0], reshape_out_shape[1]); - if (min > 8 || max % 8 != 0) return false; - - auto consumers = reshape_node->output(0).get_target_inputs(); - auto matmul_node = consumers.begin()->get_node()->shared_from_this(); - - auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector{1, 0}); - auto transpose = register_new_node(reshape_node, transpose_order); - transpose->set_friendly_name(matmul_node->get_friendly_name() + "/in_transpose"); - - auto transpose_out_shape = transpose->output(0).get_shape(); - std::swap(transpose_out_shape[0], transpose_out_shape[1]); - auto reshapeConstAfter = std::make_shared(ngraph::element::Type_t::i64, - ngraph::Shape{2}, - transpose_out_shape); - auto reshapeAfter = std::make_shared(transpose, reshapeConstAfter, false); - reshapeAfter->set_friendly_name(matmul_node->get_friendly_name() + "/reshape_after_transpose"); - - for (auto input : consumers) { - input.replace_source_output(reshapeAfter); - } - - ngraph::copy_runtime_info(matmul_node, {transpose, reshapeAfter}); - return true; - }; - - auto m = std::make_shared(root, matcher_name); - this->register_matcher(m, callback); -} diff --git a/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.hpp b/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.hpp deleted file mode 100644 index 943bb905f04..00000000000 --- a/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace GNAPluginNS { - -/** - * @brief Inserts Transpose before MatMul in the following topology: - * [1, A] - * | - * Reshape - * | - * [B, C], - * 1 < B <= 8, C % 8 == 0 or - * B % 8 == 0, 1 < C <= 8 - * | Const - * \ / - * Matmul - */ -class InsertTransposeBeforeMatmul : public ngraph::pass::MatcherPass { -public: - NGRAPH_RTTI_DECLARATION; - InsertTransposeBeforeMatmul(); -}; - -} // namespace GNAPluginNS \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_fq_concat_prelu.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_fq_concat_prelu.cpp new file mode 100644 index 00000000000..be49337f8ef --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_fq_concat_prelu.cpp @@ -0,0 +1,69 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "subgraph_tests/memory_fq_concat_prelu.hpp" +#include "common_test_utils/test_constants.hpp" +#include "gna/gna_config.hpp" + +namespace SubgraphTestsDefinitions { +namespace { + +std::vector>> inputs{ + {{1, 64}} +}; + +std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, +}; + +std::map additional_config = { + {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}} +}; + +std::vector< + std::tuple< + std::vector, + std::vector, + std::vector, + std::vector, + std::vector>> strided_slice_params = { + std::make_tuple( + std::vector{0, 0}, + std::vector{1, 64}, + std::vector{1, 1}, + std::vector{1, 0}, + std::vector{1, 0}) +}; + +std::vector< + std::tuple< + std::size_t, + std::vector, + std::vector, + std::vector, + std::vector, + std::vector>> fake_quantize_params = { + std::make_tuple( + 65535, + std::vector{1}, + std::vector{-1}, + std::vector{1}, + std::vector{-1}, + std::vector{1}) +}; + +} // namespace + +INSTANTIATE_TEST_SUITE_P(smoke_memory_fq_concat_prelu, MemoryFqConcatPrelu, + ::testing::Combine( + ::testing::ValuesIn(inputs), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::Values(additional_config), + ::testing::ValuesIn(strided_slice_params), + ::testing::ValuesIn(fake_quantize_params)), + MemoryFqConcatPrelu::getTestCaseName); +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/relu_split_reshape.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/relu_split_reshape.cpp new file mode 100644 index 00000000000..73d36467ccd --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/relu_split_reshape.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "subgraph_tests/relu_split_reshape.hpp" +#include "common_test_utils/test_constants.hpp" +#include "gna/gna_config.hpp" + +using namespace SubgraphTestsDefinitions; + +namespace { +std::vector> inputShape = { + {1, 1, 64}, + {1, 1, 128} +}; + +std::vector splitAxis = {2}; + +std::vector splitNum = {2}; + +std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, +}; + +std::vector> additional_config = { + { + {"GNA_DEVICE_MODE", "GNA_SW_FP32"} + }, + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"} + } +}; + +INSTANTIATE_TEST_CASE_P(smoke_relu_split_reshape, ReluSplitReshape, + ::testing::Combine( + ::testing::ValuesIn(inputShape), + ::testing::ValuesIn(splitAxis), + ::testing::ValuesIn(splitNum), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(additional_config)), + ReluSplitReshape::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_fq_concat_prelu.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_fq_concat_prelu.hpp new file mode 100644 index 00000000000..514c4588e6e --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_fq_concat_prelu.hpp @@ -0,0 +1,18 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef PLUGIN_SHARED_MEMORY_FQ_CONCAT_PRELU_HPP +#define PLUGIN_SHARED_MEMORY_FQ_CONCAT_PRELU_HPP + +#include "shared_test_classes/subgraph/memory_fq_concat_prelu.hpp" + +namespace SubgraphTestsDefinitions { + +TEST_P(MemoryFqConcatPrelu, CompareWithRefs){ + Run(); +}; + +} // namespace SubgraphTestsDefinitions + +#endif // PLUGIN_SHARED_MEMORY_FQ_CONCAT_PRELU_HPP diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/relu_split_reshape.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/relu_split_reshape.hpp new file mode 100644 index 00000000000..73e50306c22 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/relu_split_reshape.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/relu_split_reshape.hpp" + +namespace SubgraphTestsDefinitions { + +TEST_P(ReluSplitReshape, CompareWithRefs) { + Run(); +}; + +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_fq_concat_prelu.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_fq_concat_prelu.hpp new file mode 100644 index 00000000000..ae33b815962 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/memory_fq_concat_prelu.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef SHARED_TEST_CLASSES_MEMORY_FQ_CONCAT_PRELU_H +#define SHARED_TEST_CLASSES_MEMORY_FQ_CONCAT_PRELU_H + +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +namespace SubgraphTestsDefinitions { + +typedef std::tuple< + std::vector>, //input shapes + InferenceEngine::Precision, //Network precision + std::string, //Device name + std::map, //Configuration + std::tuple< + std::vector, + std::vector, + std::vector, + std::vector, + std::vector>, // StridedSlice + std::tuple< + std::size_t, + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> // FakeQuantize +> MemoryFqConcatPreluTuple; + +class MemoryFqConcatPrelu : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + void Run() override; + +protected: + void SetUp() override; +}; // class MemoryFqConcatPrelu + +} // namespace SubgraphTestsDefinitions + +#endif // SHARED_TEST_CLASSES_MEMORY_FQ_CONCAT_PRELU_H diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_split_reshape.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_split_reshape.hpp new file mode 100644 index 00000000000..b6ca2e38b56 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/relu_split_reshape.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "common_test_utils/test_constants.hpp" + +namespace SubgraphTestsDefinitions { + +typedef std::tuple< + std::vector, // Input shape + size_t, // Split axis + size_t, // Split number + InferenceEngine::Precision, // Network precision + std::string, // Device name + std::map // Configuration +> ReluSplitReshapeTuple; + +class ReluSplitReshape: + public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); +protected: + void SetUp() override; +}; +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_fq_concat_prelu.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_fq_concat_prelu.cpp new file mode 100644 index 00000000000..230b0cb4bf1 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/memory_fq_concat_prelu.cpp @@ -0,0 +1,124 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/subgraph/memory_fq_concat_prelu.hpp" +#include + +namespace SubgraphTestsDefinitions { + +template +inline typename std::enable_if::value, void>::type + printTupleElement(std::ostringstream& out, const T& value) { + out << "_" << value; +} + +template +inline typename std::enable_if>::value, void>::type + printTupleElement(std::ostringstream& out, const T& vector) { + for (const auto& value : vector) { + out << "_" << value; + } +} + +template +inline typename std::enable_if::type printTuple(std::ostringstream& out, std::tuple& t) { +} + +template +inline typename std::enable_if::type printTuple(std::ostringstream& out, std::tuple& t) { + printTupleElement(out, std::get(t)); + printTuple(out, t); +} + +std::string MemoryFqConcatPrelu::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector> input; + InferenceEngine::Precision netPrecision; + std::string targetName; + std::map additional_config; + std::tuple< + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> strided_slice_params; + std::tuple< + std::size_t, + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> fake_quantize_params; + std::tie(input, netPrecision, targetName, additional_config, strided_slice_params, fake_quantize_params) = obj.param; + std::ostringstream results; + + results << "IS=" << CommonTestUtils::vec2str(input[0]) << "_"; + results << "netPRC=" << netPrecision.name() << "_"; + results << "targetDevice=" << targetName << "_"; + for (auto const &item : additional_config) { + results << "_additional_config=" << item.first << "_" << item.second; + } + results << "_strided_slice_params="; + printTuple(results, strided_slice_params); + results << "_fake_quantize_params="; + printTuple(results, fake_quantize_params); + return results.str(); +} + +void MemoryFqConcatPrelu::Run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + LoadNetwork(); + GenerateInputs(); + Infer(); +} + +void MemoryFqConcatPrelu::SetUp() { + std::vector> inputs; + InferenceEngine::Precision netPrecision; + std::map additional_config; + std::tuple< + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> strided_slice_params; + std::tuple< + std::size_t, + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> fake_quantize_params; + std::tie(inputs, netPrecision, targetDevice, additional_config, strided_slice_params, fake_quantize_params) = this->GetParam(); + configuration.insert(additional_config.begin(), additional_config.end()); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto input = ngraph::builder::makeParams(ngPrc, {inputs}); + auto memory_read = ngraph::builder::makeConstant(ngPrc, {inputs[0]}, {0}); + auto read = std::make_shared(memory_read, "variable1"); + auto fake_constatnt = ngraph::builder::makeConstant(ngPrc, {inputs[0]}, {0}); + auto fake = ngraph::builder::makeFakeQuantize(fake_constatnt, ngPrc, + std::get<0>(fake_quantize_params), + std::get<1>(fake_quantize_params), + std::get<2>(fake_quantize_params), + std::get<3>(fake_quantize_params), + std::get<4>(fake_quantize_params), + std::get<5>(fake_quantize_params)); + auto concat = ngraph::builder::makeConcat({read, fake, input[0]}, 1); + auto prelu_constant = ngraph::op::Constant::create(ngPrc, {1}, {-2}); + auto prelu = std::make_shared(concat, prelu_constant); + auto slice = ngraph::builder::makeStridedSlice(prelu, + std::get<0>(strided_slice_params), + std::get<1>(strided_slice_params), + std::get<2>(strided_slice_params), + ngPrc, + std::get<3>(strided_slice_params), + std::get<4>(strided_slice_params)); + auto assign = std::make_shared(slice, "variable1"); + auto result = std::make_shared(prelu); + assign->add_control_dependency(read); + result->add_control_dependency(assign); + function = std::make_shared(ngraph::ResultVector{result}, input, "memory_fq_concat_prelu"); +} + +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/relu_split_reshape.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/relu_split_reshape.cpp new file mode 100644 index 00000000000..38261ac6c90 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/relu_split_reshape.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/subgraph/relu_split_reshape.hpp" + +namespace SubgraphTestsDefinitions { +std::string ReluSplitReshape::getTestCaseName(const testing::TestParamInfo &obj) { + std::vector inputShape; + size_t splitAxis, splitNum; + InferenceEngine::Precision netPrecision; + std::string targetName; + std::map config; + std::tie(inputShape, splitAxis, splitNum, netPrecision, targetName, config) = obj.param; + std::ostringstream results; + + results << "IS=" << CommonTestUtils::vec2str(inputShape) << "_"; + results << "axis=" << splitAxis << "_"; + results << "num=" << splitNum << "_"; + results << "netPRC=" << netPrecision.name() << "_"; + results << "targetDevice=" << targetName << "_"; + for (auto const& configItem : config) { + results << "_configItem=" << configItem.first << "_" << configItem.second; + } + return results.str(); +} + +void ReluSplitReshape::SetUp() { + std::vector inputShape; + size_t splitAxis, splitNum; + InferenceEngine::Precision netPrecision; + std::map additional_config; + std::tie(inputShape, splitAxis, splitNum, netPrecision, targetDevice, additional_config) = this->GetParam(); + configuration.insert(additional_config.begin(), additional_config.end()); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto relu = std::make_shared(params[0]); + auto split = ngraph::builder::makeSplit(relu, ngPrc, splitNum, splitAxis); + + auto shape = split->get_output_shape(0); + shape[shape.size() - 2] *= 2; + shape[shape.size() - 1] /= 2; + auto reshape_const = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{shape.size()}, shape); + auto reshape = std::make_shared(split->output(0), reshape_const, false); + + function = std::make_shared(reshape, params, "ReluSplitReshape"); +} +} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_transpose_before_matmul.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_transpose_before_matmul.cpp deleted file mode 100644 index 17ff10e804f..00000000000 --- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_insert_transpose_before_matmul.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include - -#include "transformations/insert_transpose_before_matmul.hpp" - -#include "common_test_utils/ngraph_test_utils.hpp" -#include -#include -#include -#include - -namespace testing { - -namespace { - -std::shared_ptr createFunction(const ngraph::PartialShape& input_values, - const ngraph::Shape& reshape_values, - const ngraph::Shape& matmul_values) { - auto input_params = std::make_shared(ngraph::element::i64, input_values); - - auto new_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_values.size()}, reshape_values); - auto reshape_operation = std::make_shared(input_params, new_shape, true); - - auto constant = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{matmul_values.size()}, matmul_values); - auto matmul_operation = std::make_shared(reshape_operation, constant); - - auto result = std::make_shared(matmul_operation); - return std::make_shared(ngraph::ResultVector{result}, - ngraph::ParameterVector{input_params}); -} - -// --------------------------------------------------------------------------------------------------------------------- - -class InsertTransposeBeforeMatmulTestInvalidFixture: public CommonTestUtils::TestsCommon, - public ::testing::WithParamInterface> { -public: - void SetUp() override; -public: - std::shared_ptr function, reference_function; -}; - -void InsertTransposeBeforeMatmulTestInvalidFixture::SetUp() { - ngraph::PartialShape input_shape; - ngraph::Shape reshape_shape, matmul_shape; - std::tie(input_shape, reshape_shape, matmul_shape) = this->GetParam(); - - function = createFunction(input_shape, reshape_shape, matmul_shape); - reference_function = createFunction(input_shape, reshape_shape, matmul_shape); -} - -// --------------------------------------------------------------------------------------------------------------------- - -class InsertTransposeBeforeMatmulTestFixture: public CommonTestUtils::TestsCommon, - public ::testing::WithParamInterface> { -public: - void SetUp() override; - std::shared_ptr get_initial_function(const ngraph::PartialShape & input_shape, - const ngraph::Shape & reshape_shape, - const ngraph::Shape & matmul_shape); - std::shared_ptr get_reference(const ngraph::PartialShape & input_shape); -public: - std::shared_ptr function, reference_function; -}; - -void InsertTransposeBeforeMatmulTestFixture::SetUp() { - ngraph::PartialShape input_shape; - ngraph::Shape reshape_shape, matmul_shape; - std::tie(input_shape, reshape_shape, matmul_shape) = this->GetParam(); - - function = get_initial_function(input_shape, reshape_shape, matmul_shape); - reference_function = get_reference(input_shape); -} - -std::shared_ptr InsertTransposeBeforeMatmulTestFixture::get_initial_function(const ngraph::PartialShape & input_shape, - const ngraph::Shape & reshape_shape, - const ngraph::Shape & matmul_shape) { - return createFunction(input_shape, reshape_shape, matmul_shape); -} - -std::shared_ptr InsertTransposeBeforeMatmulTestFixture::get_reference(const ngraph::PartialShape & input_shape) { - auto input_params = std::make_shared(ngraph::element::i64, input_shape); - - auto new_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {8, 2}); - auto reshape_operation = std::make_shared(input_params, new_shape, true); - - auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, - std::vector{1, 0}); - auto transpose_operation = std::make_shared(reshape_operation, transpose_order); - - auto new_shape_after_transpose = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {8, 2}); - auto reshape_after_transpose = std::make_shared(transpose_operation, - new_shape_after_transpose, - false); - - auto constant = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {2, 1}); - auto matmul_operation = std::make_shared(reshape_after_transpose, constant); - - auto result = std::make_shared(matmul_operation); - return std::make_shared(ngraph::ResultVector{result}, - ngraph::ParameterVector{input_params}); -} - -// --------------------------------------------------------------------------------------------------------------------- - -void execute_test(std::shared_ptr function, std::shared_ptr reference_function) { - ngraph::pass::Manager manager; - manager.register_pass(); - manager.register_pass(); - manager.run_passes(function); - const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); - const FunctionsComparator::Result result = func_comparator(function, reference_function); - ASSERT_TRUE(result.valid); -} - -TEST_P(InsertTransposeBeforeMatmulTestFixture, CompareFunctions) { - execute_test(function, reference_function); -} - -INSTANTIATE_TEST_SUITE_P(InsertTransposeBeforeMatmulTestSuite, InsertTransposeBeforeMatmulTestFixture, - ::testing::Values(std::make_tuple(ngraph::PartialShape{2, 8}, ngraph::Shape{8, 2}, ngraph::Shape{2, 1}), - std::make_tuple(ngraph::PartialShape{1, 16}, ngraph::Shape{8, 2}, ngraph::Shape{2, 1}))); - -TEST_P(InsertTransposeBeforeMatmulTestInvalidFixture, CompareFunctions) { - execute_test(function, reference_function); -} - -INSTANTIATE_TEST_SUITE_P(InsertTransposeBeforeMatmulTestInvalidSuite, InsertTransposeBeforeMatmulTestInvalidFixture, - ::testing::Values(std::make_tuple(ngraph::PartialShape{2, 9}, ngraph::Shape{9, 2}, ngraph::Shape{2, 1}), - std::make_tuple(ngraph::PartialShape{9, 2}, ngraph::Shape{9, 2}, ngraph::Shape{2, 1}))); - -} // namespace - -} // namespace testing diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/handle_transposes_around_matmul.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/handle_transposes_around_matmul.cpp new file mode 100644 index 00000000000..df8ac77ed6e --- /dev/null +++ b/inference-engine/tests/unit/gna/ngraph/transformations/handle_transposes_around_matmul.cpp @@ -0,0 +1,201 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "transformations/handle_transposes_around_matmul.hpp" + +#include "common_test_utils/ngraph_test_utils.hpp" +#include +#include +#include +#include +#include + +namespace handle_transpose_before_matmul { + +std::shared_ptr CreateTransposeMatmulFunction(const ngraph::Shape& input_shape, + const ngraph::Shape& reshape_shape, const ngraph::Shape& matmul_shape, bool create_reshape_after_transpose) { + auto input_params = std::make_shared(ngraph::element::i64, input_shape); + + auto new_shape_const = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape); + auto reshape = std::make_shared(input_params, new_shape_const, false); + + auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0}); + auto transpose = std::make_shared(reshape, transpose_order); + + std::vector data(ngraph::shape_size(matmul_shape)); + std::iota(std::begin(data), std::end(data), 1); + auto constant = ngraph::opset7::Constant::create(ngraph::element::i64, matmul_shape, data); + std::shared_ptr matmul; + if (create_reshape_after_transpose) { + auto reshape_after_transpose_const = ngraph::opset7::Constant::create(ngraph::element::i64, + ngraph::Shape{reshape_shape.size()}, reshape_shape); + auto reshape_after_transpose = std::make_shared(transpose, reshape_after_transpose_const, false); + matmul = std::make_shared(reshape_after_transpose, constant); + } else { + matmul = std::make_shared(transpose, constant); + } + + auto result = std::make_shared(matmul); + return std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); +} + +std::shared_ptr CreateMatmulFunction(const ngraph::Shape& input_shape, + const ngraph::Shape& reshape_shape, const ngraph::Shape& matmul_shape, bool create_reshape_instead_of_transpose) { + auto input_params = std::make_shared(ngraph::element::i64, input_shape); + + std::shared_ptr reshape; + auto const_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape); + if (create_reshape_instead_of_transpose) { + auto new_reshape = std::make_shared(input_params, const_shape, false); + auto new_shape_after_transpose = ngraph::opset7::Constant::create(ngraph::element::i64, + ngraph::Shape{reshape_shape.size()}, {reshape_shape[1], reshape_shape[0]}); + reshape = std::make_shared(new_reshape, new_shape_after_transpose, false); + } else { + reshape = std::make_shared(input_params, const_shape, false); + } + + std::vector data(ngraph::shape_size(matmul_shape)); + std::iota(std::begin(data), std::end(data), 1); + auto constant = ngraph::opset7::Constant::create(ngraph::element::i64, matmul_shape, data); + auto matmul = std::make_shared(reshape, constant); + + auto result = std::make_shared(matmul); + return std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); +} + +} // namespace handle_transpose_before_matmul + +namespace handle_transpose_after_matmul { + +std::shared_ptr CreateMatmulTransposeFunction(const ngraph::Shape& input_shape, + const ngraph::Shape& matmul_shape, const ngraph::Shape& reshape_shape, bool create_reshape_after_transpose) { + auto input_params = std::make_shared(ngraph::element::i64, input_shape); + + std::vector data(ngraph::shape_size(matmul_shape)); + std::iota(std::begin(data), std::end(data), 1); + auto matmul_constant = ngraph::opset7::Constant::create(ngraph::element::i64, matmul_shape, data); + auto matmul = std::make_shared(input_params, matmul_constant); + const auto matmul_output_shape = matmul->get_output_shape(0); + + auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0}); + auto transpose = std::make_shared(matmul, transpose_order); + const auto transpose_output_shape = transpose->get_output_shape(0); + + std::shared_ptr reshape; + auto shape_const = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape); + if (create_reshape_after_transpose) { + const auto matmul_output_shape = matmul->get_output_shape(0); + auto reshape_after_transpose_const = ngraph::opset7::Constant::create(ngraph::element::i64, + ngraph::Shape{matmul_output_shape.size()}, matmul_output_shape); + auto reshape_after_transpose = std::make_shared(transpose, reshape_after_transpose_const, false); + reshape = std::make_shared(reshape_after_transpose, shape_const, false); + } else { + reshape = std::make_shared(transpose, shape_const, false); + const auto reshape_output_shape = reshape->get_output_shape(0); + } + + auto result = std::make_shared(reshape); + return std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); +} + +std::shared_ptr CreateMatmulFunction(const ngraph::Shape& input_shape, + const ngraph::Shape& matmul_shape, const ngraph::Shape& reshape_shape, bool create_reshape_instead_of_transpose) { + auto input_params = std::make_shared(ngraph::element::i64, input_shape); + + std::vector data(ngraph::shape_size(matmul_shape)); + std::iota(std::begin(data), std::end(data), 1); + auto matmul_constant = ngraph::opset7::Constant::create(ngraph::element::i64, matmul_shape, data); + auto matmul = std::make_shared(input_params, matmul_constant); + + std::shared_ptr reshape; + auto shape_const = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape); + if (create_reshape_instead_of_transpose) { + const auto matmul_output_shape = matmul->get_output_shape(0); + auto reshape_instead_of_transpose_const = ngraph::opset7::Constant::create(ngraph::element::i64, + ngraph::Shape{matmul_output_shape.size()}, {matmul_output_shape[1], matmul_output_shape[0]}); + auto reshape_instead_of_transpose = std::make_shared(matmul, reshape_instead_of_transpose_const, false); + reshape = std::make_shared(reshape_instead_of_transpose, shape_const, false); + } else { + reshape = std::make_shared(matmul, shape_const, false); + } + + auto result = std::make_shared(reshape); + return std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); +} + +} // namespace handle_transpose_after_matmul + +namespace { + +void RunTest(const std::shared_ptr& func, const std::shared_ptr& reference_func) { + { + ngraph::pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(func); + ASSERT_NO_THROW(check_rt_info(func)); + } + + const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const FunctionsComparator::Result result = func_comparator(func, reference_func); + ASSERT_TRUE(result.valid); +} + +} // namespace + +TEST(TransformationTests, InsertTransposeBeforeMatmulTest) { + RunTest( + handle_transpose_before_matmul::CreateMatmulFunction({2, 8}, {8, 2}, {2, 1}, false), + handle_transpose_before_matmul::CreateTransposeMatmulFunction({2, 8}, {8, 2}, {2, 1}, true)); + RunTest( + handle_transpose_before_matmul::CreateMatmulFunction({1, 16}, {8, 2}, {2, 1}, false), + handle_transpose_before_matmul::CreateTransposeMatmulFunction({1, 16}, {8, 2}, {2, 1}, true)); +} + +TEST(TransformationTests, InsertTransposeBeforeMatmulTestReshapeInOutEq) { + RunTest( + handle_transpose_before_matmul::CreateMatmulFunction({2, 9}, {9, 2}, {2, 1}, false), + handle_transpose_before_matmul::CreateMatmulFunction({2, 9}, {9, 2}, {2, 1}, false)); + RunTest( + handle_transpose_before_matmul::CreateMatmulFunction({9, 2}, {9, 2}, {2, 1}, false), + handle_transpose_before_matmul::CreateMatmulFunction({9, 2}, {9, 2}, {2, 1}, false)); +} + +TEST(TransformationTests, RemoveTransposeBeforeMatmulTest) { + RunTest( + handle_transpose_before_matmul::CreateTransposeMatmulFunction({1, 8}, {2, 4}, {2, 1}, false), + handle_transpose_before_matmul::CreateMatmulFunction({1, 8}, {2, 4}, {2, 1}, true)); +} + +TEST(TransformationTests, RemoveTransposeBeforeMatmulTestReshapeInOutEq) { + RunTest( + handle_transpose_before_matmul::CreateTransposeMatmulFunction({2, 8}, {2, 8}, {2, 5}, false), + handle_transpose_before_matmul::CreateTransposeMatmulFunction({2, 8}, {2, 8}, {2, 5}, false)); +} + +TEST(TransformationTests, InsertTransposeAfterMatmulTest) { + RunTest( + handle_transpose_after_matmul::CreateMatmulFunction({4, 1}, {1, 8}, {2, 16}, false), + handle_transpose_after_matmul::CreateMatmulTransposeFunction({4, 1}, {1, 8}, {2, 16}, true)); +} + +TEST(TransformationTests, RemoveTransposeAfterMatmulTest) { + RunTest( + handle_transpose_after_matmul::CreateMatmulTransposeFunction({4, 1}, {1, 8}, {2, 16}, false), + handle_transpose_after_matmul::CreateMatmulFunction({4, 1}, {1, 8}, {2, 16}, true)); +} + +TEST(TransformationTests, RemoveTransposeAfterMatmulTestReshapeInOutEq) { + RunTest( + handle_transpose_after_matmul::CreateMatmulTransposeFunction({4, 1}, {1, 8}, {8, 4}, false), + handle_transpose_after_matmul::CreateMatmulTransposeFunction({4, 1}, {1, 8}, {8, 4}, false)); +} + +TEST(TransformationTests, InsertTransposeAfterMatmulTestReshapeInOutEq) { + RunTest( + handle_transpose_after_matmul::CreateMatmulFunction({4, 1}, {1, 8}, {4, 8}, false), + handle_transpose_after_matmul::CreateMatmulFunction({4, 1}, {1, 8}, {4, 8}, false)); +} From 28f708c991dfcc4a6e1b962fae818237bdf3b676 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Tue, 10 Aug 2021 20:53:09 +0900 Subject: [PATCH 19/24] [GPU] Add optimized cldnn detection output GPU implementation (#6634) Co-authored-by: Taylor Lee --- .../kernel_selector/common/common_types.h | 3 +- .../detection_output_kernel_ref.cpp | 274 ++++++ .../detection_output_kernel_ref.h | 75 ++ .../detection_output_kernel_selector.cpp | 14 + .../detection_output_kernel_selector.h | 21 + .../cl_kernels/detection_output_gpu_ref.cl | 840 ++++++++++++++++++ .../include/detection_output_common.cl | 155 ++-- .../clDNN/src/impls/cpu/detection_output.cpp | 7 +- .../clDNN/src/impls/ocl/detection_output.cpp | 88 ++ .../clDNN/src/impls/ocl/register.cpp | 1 + .../clDNN/src/impls/ocl/register.hpp | 2 + .../thirdparty/clDNN/src/layout_optimizer.cpp | 9 + .../test_cases/detection_output_test.cpp | 42 +- 13 files changed, 1436 insertions(+), 95 deletions(-) create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/detection_output.cpp diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h index dbe6bd7004c..5552f9681e8 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h @@ -70,7 +70,8 @@ enum class KernelType { CUM_SUM, EMBEDDING_BAG, EXTRACT_IMAGE_PATCHES, - LOOP + LOOP, + DETECTION_OUTPUT }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp new file mode 100644 index 00000000000..8917ecffd7e --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp @@ -0,0 +1,274 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "detection_output_kernel_ref.h" +#include "kernel_selector_utils.h" + +#include + +namespace kernel_selector { + +ParamsKey DetectionOutputKernelRef::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + return k; +} + +JitConstants DetectionOutputKernelRef::GetJitConstants(const detection_output_params& params) const { + JitConstants jit = MakeBaseParamsJitConstants(params); + + const auto& detectOutParams = params.detectOutParams; + auto num_prior_boxes = params.inputs[1].Feature().v / detectOutParams.num_classes; + + jit.AddConstants({ + MakeJitConstant("NUM_IMAGES", detectOutParams.num_images), + MakeJitConstant("NUM_CLASSES", detectOutParams.num_classes), + MakeJitConstant("NUM_CLASSES_PER_ITEM", 4), + MakeJitConstant("KEEP_TOP_K", detectOutParams.keep_top_k), + MakeJitConstant("TOP_K", std::min(detectOutParams.top_k, (int32_t)num_prior_boxes)), + MakeJitConstant("BACKGROUND_LABEL_ID", detectOutParams.background_label_id), + MakeJitConstant("CODE_TYPE", detectOutParams.code_type), + MakeJitConstant("CONF_SIZE_X", detectOutParams.conf_size_x), + MakeJitConstant("CONF_SIZE_Y", detectOutParams.conf_size_y), + MakeJitConstant("CONF_PADDING_X", detectOutParams.conf_padding_x), + MakeJitConstant("CONF_PADDING_Y", detectOutParams.conf_padding_y), + MakeJitConstant("SHARE_LOCATION", detectOutParams.share_location), + MakeJitConstant("VARIANCE_ENCODED_IN_TARGET", detectOutParams.variance_encoded_in_target), + MakeJitConstant("NMS_THRESHOLD", detectOutParams.nms_threshold), + MakeJitConstant("ETA", detectOutParams.eta), + MakeJitConstant("CONFIDENCE_THRESHOLD", detectOutParams.confidence_threshold), + MakeJitConstant("IMAGE_WIDTH", detectOutParams.input_width), + MakeJitConstant("IMAGE_HEIGH", detectOutParams.input_heigh), + MakeJitConstant("DECREASE_LABEL_ID", detectOutParams.decrease_label_id), + MakeJitConstant("CLIP_BEFORE_NMS", detectOutParams.clip_before_nms), + MakeJitConstant("CLIP_AFTER_NMS", detectOutParams.clip_after_nms), + MakeJitConstant("ELEMENTS_PER_THREAD", detectOutParams.elements_per_thread), + MakeJitConstant("PRIOR_COORD_OFFSET", detectOutParams.prior_coordinates_offset), + MakeJitConstant("PRIOR_INFO_SIZE", detectOutParams.prior_info_size), + MakeJitConstant("PRIOR_IS_NORMALIZED", detectOutParams.prior_is_normalized), + }); + + return jit; +} + +int GetPartitionStep(int localWorkItemNum) { + int step_size = 0; + for (int temp = localWorkItemNum; temp > 1; temp /= 2) { + step_size++; + } + return step_size; +} + +size_t GetOptimalLocalClassSize(std::vector gws, const EngineInfo& info) { + const size_t optimal_values[] = {16, 8, 7, 6, 5, 4, 2, 1}; + const size_t splitNum = gws[2]; + const size_t globalClassNum = gws[1]; + const auto rest_lws = info.maxWorkGroupSize / splitNum; + size_t lws_idx = 0; + while (rest_lws < optimal_values[lws_idx]) lws_idx++; + while (globalClassNum % optimal_values[lws_idx]) lws_idx++; + + return optimal_values[lws_idx]; +} + +DetectionOutputKernelRef::DispatchData SetDefault(const detection_output_params& params, int idx) { + DetectionOutputKernelRef::DispatchData dispatchData; + const auto& input = params.inputs[0]; + const auto& detectOutParams = params.detectOutParams; + auto num_classes = detectOutParams.num_classes; + auto num_prior_boxes = params.inputs[1].Feature().v / num_classes; + + if (idx == 0) { + if (detectOutParams.decrease_label_id) { + dispatchData.gws = {input.Batch().v, num_prior_boxes, 1}; + dispatchData.lws = {input.Batch().v, 1, 1}; + } else { + if (detectOutParams.conf_padding_x || detectOutParams.conf_padding_y) { + dispatchData.gws = {num_classes, params.engineInfo.maxWorkGroupSize, input.Batch().v}; + } else { + dispatchData.gws = {CeilDiv(num_classes, 4), params.engineInfo.maxWorkGroupSize, input.Batch().v}; + } + dispatchData.lws = {1, dispatchData.gws[1], 1}; + } + } else if (idx == 1) { + const size_t kSplitNum = 16; + if (detectOutParams.decrease_label_id) { + dispatchData.gws = {input.Batch().v, 1, kSplitNum}; + dispatchData.lws = {1, 1, kSplitNum}; + } else { + dispatchData.gws = {input.Batch().v, num_classes, kSplitNum}; + const size_t kClassSize = GetOptimalLocalClassSize(dispatchData.gws, params.engineInfo); + dispatchData.lws = {1, kClassSize, kSplitNum}; + } + } else if (idx == 2) { + if (detectOutParams.decrease_label_id) { + dispatchData.gws = {input.Batch().v, 1, 1}; + dispatchData.lws = {1, 1, 1}; + } else { + dispatchData.gws = {input.Batch().v, num_classes, 1}; + dispatchData.lws = {1, 1, 1}; + } + } else if (idx == 3) { + if (detectOutParams.decrease_label_id) { + dispatchData.gws = {1, 1, 1}; + dispatchData.lws = {1, 1, 1}; + } else { + dispatchData.gws = {input.Batch().v, 1, 1}; + dispatchData.lws = {input.Batch().v, 1, 1}; + } + } else { + dispatchData.gws = {1, 1, 1}; + dispatchData.lws = {1, 1, 1}; + } + + return dispatchData; +} + +void DetectionOutputKernelRef::SetKernelArguments(const detection_output_params& params, clKernelData& kernel, size_t idx) const { + if (params.detectOutParams.decrease_label_id) { + if (idx == 0) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + } else if (idx == 1) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + } else if (idx == 2) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + } else if (idx == 3) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + } + } else { + if (idx == 0) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + } else if (idx == 1) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + } else if (idx == 2) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + } else if (idx == 3) { + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + } + } +} + +KernelsData DetectionOutputKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { + assert(params.GetType() == KernelType::DETECTION_OUTPUT && options.GetType() == KernelType::DETECTION_OUTPUT); + + constexpr size_t kKernelsNum = 4; + KernelData kd = KernelData::Default(params, kKernelsNum); + const detection_output_params& detectOutParams = static_cast(params); + + constexpr size_t prior_box_size = 4; + auto num_of_images = detectOutParams.inputs[0].Batch().v; + auto loc_feature_num = detectOutParams.inputs[0].Feature().v; + auto num_classes = detectOutParams.detectOutParams.num_classes; + auto num_loc_classes = (detectOutParams.detectOutParams.share_location) ? 1 : num_classes; + auto num_prior_boxes = (loc_feature_num / (num_loc_classes * prior_box_size)); + + constexpr size_t buffer_bytes = 10; // The size of struct Scores in detection_output_gpu_ref.cl + size_t buffer_stride = num_prior_boxes * buffer_bytes; + size_t buffer_size = num_of_images * num_classes * buffer_stride; + size_t num_scores_size = num_of_images * (num_classes + 1) * sizeof(int); + + kd.internalBufferSizes.push_back(buffer_size); + if (detectOutParams.detectOutParams.decrease_label_id) { + kd.internalBufferSizes.push_back(buffer_size); + } + kd.internalBufferSizes.push_back(num_scores_size); + kd.internalBufferDataType = GetUnitType(detectOutParams); + + for (size_t i = 0; i < kKernelsNum; i++) { + DispatchData dispatchData = SetDefault(detectOutParams, i); + auto cldnnJit = GetJitConstants(detectOutParams); + auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, params, options, i); + cldnnJit.AddConstant(MakeJitConstant("BUFFER_STRIDE", buffer_stride)); + if (i == 0) { + if (detectOutParams.detectOutParams.decrease_label_id) { + cldnnJit.AddConstant(MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_MXNET", "true")); + } else { + if (detectOutParams.detectOutParams.conf_padding_x || detectOutParams.detectOutParams.conf_padding_y) { + cldnnJit.AddConstants({MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_CAFFE", "true")}); + } else { + cldnnJit.AddConstants({MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_CAFFE_OPT", "true")}); + } + size_t num_bit_mask = CeilDiv(num_prior_boxes, 8); + size_t num_score_per_item = RoundUp(CeilDiv(num_prior_boxes, 256), 8); + size_t num_score_block = CeilDiv(num_prior_boxes, num_score_per_item); + cldnnJit.AddConstants({MakeJitConstant("NUM_BIT_MASK", num_bit_mask), + MakeJitConstant("NUM_PRIORS_PER_ITEM", num_score_per_item), + MakeJitConstant("NUM_PRIOR_BLOCKS", num_score_block)}); + } + } else if (i == 1) { + if (detectOutParams.detectOutParams.decrease_label_id) { + cldnnJit.AddConstants({MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_MXNET", "true"), + MakeJitConstant("LOCAL_WORK_NUM", dispatchData.lws[2]), + MakeJitConstant("PARTITION_STEP", GetPartitionStep(dispatchData.lws[2]))}); + } else { + cldnnJit.AddConstants({MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_CAFFE", "true"), + MakeJitConstant("LOCAL_CLASS_NUM", dispatchData.lws[1]), + MakeJitConstant("LOCAL_WORK_NUM", dispatchData.lws[2]), + MakeJitConstant("PARTITION_STEP", GetPartitionStep(dispatchData.lws[2]))}); + } + } else if (i == 2) { + if (detectOutParams.detectOutParams.decrease_label_id) { + cldnnJit.AddConstant(MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_MXNET", "true")); + } else { + if (detectOutParams.detectOutParams.top_k > 0) { + cldnnJit.AddConstant(MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_CAFFE_OPT", "true")); + } else { + cldnnJit.AddConstant(MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_CAFFE", "true")); + } + } + } else { + if (detectOutParams.detectOutParams.decrease_label_id) { + cldnnJit.AddConstant(MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_MXNET", "true")); + } else { + cldnnJit.AddConstants({MakeJitConstant("DO_STAGE_" + std::to_string(i) + "_CAFFE", "true"), + MakeJitConstant("LOCAL_BATCHES_NUM", dispatchData.lws[0])}); + } + } + + auto jit = CreateJit(kernelName, cldnnJit, entryPoint); + auto& kernel = kd.kernels[i]; + KernelBase::CheckDispatchData(kernelName, dispatchData); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); + SetKernelArguments(detectOutParams, kernel, i); + } + + return {kd}; +} + +KernelsPriority DetectionOutputKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const { + return FORCE_PRIORITY_9; +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h new file mode 100644 index 00000000000..9f0cf5466c6 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h @@ -0,0 +1,75 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base_opencl.h" +#include "kernel_selector_params.h" + +namespace kernel_selector { +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// detection_output_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct detection_output_params : public base_params { + detection_output_params() : base_params(KernelType::DETECTION_OUTPUT), detectOutParams() {} + + struct DedicatedParams { + uint32_t num_images; + uint32_t num_classes; + int32_t keep_top_k; + int32_t top_k; + int32_t background_label_id; + int32_t code_type; + int32_t conf_size_x; + int32_t conf_size_y; + int32_t conf_padding_x; + int32_t conf_padding_y; + int32_t elements_per_thread; + int32_t input_width; + int32_t input_heigh; + int32_t prior_coordinates_offset; + int32_t prior_info_size; + bool prior_is_normalized; + bool share_location; + bool variance_encoded_in_target; + bool decrease_label_id; + bool clip_before_nms; + bool clip_after_nms; + float nms_threshold; + float eta; + float confidence_threshold; + }; + + DedicatedParams detectOutParams; + + ParamsKey GetParamsKey() const override { + auto k = base_params::GetParamsKey(); + return k; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// detection_output_optional_params +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct detection_output_optional_params : optional_params { + detection_output_optional_params() : optional_params(KernelType::DETECTION_OUTPUT) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// DetectionOutputKernelRef +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class DetectionOutputKernelRef: public KernelBaseOpenCL { +public: + DetectionOutputKernelRef() : KernelBaseOpenCL("detection_output_gpu_ref") {} + + using DispatchData = CommonDispatchData; + KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; + KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; + ParamsKey GetSupportedKey() const override; + +protected: + virtual JitConstants GetJitConstants(const detection_output_params& params) const; + void SetKernelArguments(const detection_output_params& params, clKernelData& kernel, size_t idx) const; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp new file mode 100644 index 00000000000..bd6d358fea9 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "detection_output_kernel_selector.h" +#include "detection_output_kernel_ref.h" + +namespace kernel_selector { +detection_output_kernel_selector::detection_output_kernel_selector() { Attach(); } + +KernelsData detection_output_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const { + return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT); +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h new file mode 100644 index 00000000000..c57ebcdd617 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h @@ -0,0 +1,21 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_selector.h" + +namespace kernel_selector { +class detection_output_kernel_selector : public kernel_selector_base { +public: + static detection_output_kernel_selector& Instance() { + static detection_output_kernel_selector instance_; + return instance_; + } + + detection_output_kernel_selector(); + + KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; +}; +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl new file mode 100644 index 00000000000..6f19536ed68 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_gpu_ref.cl @@ -0,0 +1,840 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "include/data_types.cl" +#include "include/common.cl" +#include "include/detection_output_common.cl" + +// DetectionOuput - performs non-maximuim suppression to generate the detection output +// using information on location and confidence predictions. +// +// Below is a set of 4 kernels: +// - detection_output_stage_0_scores_(caffe/mxnet) +// - detection_output_stage_1_sort_(caffe/mxnet) +// - detection_output_stage_2_nms_(caffe/mxnet) +// - detection_output_stage_final_(caffe/mxnet) +// that can perform detection output operation in two modes determined by decrease_label_id. +// +// Caffe-style NMS mode: +// In this mode _caffe kernels are used. +// +// detection_output_stage_0_scores_caffe should be first enqueued, provided extra global memory +// on second and third input. +// This kernel will calculate detections whose confidences are larger than a threshold and +// the number of detections per each class for each work-groups and store it into global memory. +// +// detection_output_stage_1_sort_caffe should be next enqueued in order to sort detections. +// This kernel expects on first and second input global memory from the result of previous kernel. +// In this kernel, detections for each class are sorted using iterative quick sort from each +// work-groups and store it into global memory. If the total of detections per each batch is +// greater than TOP_K, it is stored as TOP_K into global memory. +// +// detection_output_stage_2_nms_caffe sould be next enqueued with provided buffers with outputs +// from previous kernel and inputs(location and prior_box). +// This kernel will select detections per each class using non-maximum suppresion for work-goups +// and store it into global memory. During NMS, box coordinates of detections are calculated +// from inputs(location and prior_box) using bounding box decoding. +// +// Finally detection_output_stage_final_caffe should be enqueued with provided buffers with outputs +// from previous kernel and output using single work-group. +// This kernel will produce the results of the final detections form the result of previous kernel. +// If the total of detections per each batch is greater than KEEP_TOP_K, detections are sorted using +// iterative quick sort and it is stored as KEEP_TOP_K. Final detections contain information about +// filetered detection described with 7 elements [batch_id, class_id, confidence, x_1, y_1, x_2, y_2]. +// +// ================================================================================================================= +// Required jit constants: +// ----------------------------------------------------------------------------------------------------------------- +// BUFFER_STRIDE - buffer size per class +// NUM_BIT_MASK - bit mask size that can be processed per work-group +// NUM_PRIORS_PER_ITEM - number of prior boxes that can be processed per work-item +// NUM_PRIOR_BLOCKS - local memory size that can handle the number of detections accumulated per work-group +// LOCAL_CLASS_NUM - number of class that can be process per work-item +// LOCAL_WORK_NUM - number of work-items that can be processed simultaneously +// PARTITION_STEP - loop size that will perform partition to calculalte pivot and store it into local memory +// LOCAL_BATCHES_NUM - number of batch that can be process per work-group +// ================================================================================================================= + +#define unroll_for __attribute__((opencl_unroll_hint)) for +#define NUM_CLASSES_ACC (NUM_CLASSES + 1) + +typedef struct __attribute__((__packed__)) { + short classId; + int boxId; + INPUT1_TYPE score; +} FUNC(Scores); + +#define SCORES_INFO FUNC(Scores) + +inline void FUNC(swap_scores_info)(__global SCORES_INFO* a, __global SCORES_INFO* b) { + SCORES_INFO temp = *a; + *a = *b; + *b = temp; +} + +inline int FUNC(partition)(__global SCORES_INFO* arr, int l, int h, bool use_custom_comp) { + INPUT1_TYPE pivotScore = arr[h].score; + int pivotBoxId = arr[h].boxId; + int i = (l - 1); + for (int j = l; j <= h - 1; j++) { + if (use_custom_comp) { + if ((arr[j].score > pivotScore) || (arr[j].score == pivotScore && arr[j].boxId < pivotBoxId)) { + i++; + FUNC_CALL(swap_scores_info)(&arr[i], &arr[j]); + } + } else { + if (arr[j].score > pivotScore) { + i++; + FUNC_CALL(swap_scores_info)(&arr[i], &arr[j]); + } + } + } + FUNC_CALL(swap_scores_info)(&arr[i + 1], &arr[h]); + return (i + 1); +} + +inline void FUNC(bubbleSortIterative)(__global SCORES_INFO* arr, int l, int h) { + for (int i = 0; i < h-l; i++) { + bool swapped = false; + for (int j = l; j < h-i; j++) { + if ((arr[j].score > arr[j+1].score) || (arr[j].score == arr[j+1].score && arr[j].boxId < arr[j+1].boxId)) { + FUNC_CALL(swap_scores_info)(&arr[j], &arr[j+1]); + swapped = true; + } + } + + if (!swapped) + break; + } +} + +inline void FUNC(quickSortIterative)(__global SCORES_INFO* arr, int l, int h, bool use_custom_comp) { + // Create an auxiliary stack + const int kStackSize = 100; + int stack[kStackSize]; + + // initialize top of stack + int top = -1; + + // push initial values of l and h to stack + stack[++top] = l; + stack[++top] = h; + + // Keep popping from stack while is not empty + while (top >= 0) { + // Pop h and l + h = stack[top--]; + l = stack[top--]; + + // Set pivot element at its correct position + // in sorted array + int p = FUNC_CALL(partition)(arr, l, h, use_custom_comp); + + // If there are elements on left side of pivot, + // then push left side to stack + if (p - 1 > l) { + if (top >= (kStackSize - 1)) { + FUNC_CALL(bubbleSortIterative)(arr, l, p - 1); + } else { + stack[++top] = l; + stack[++top] = p - 1; + } + } + + // If there are elements on right side of pivot, + // then push right side to stack + if (p + 1 < h) { + if (top >= (kStackSize - 1)) { + FUNC_CALL(bubbleSortIterative)(arr, p + 1, h); + } else { + stack[++top] = p + 1; + stack[++top] = h; + } + } + } +} + +inline int FUNC(get_accumulated_detections)(__global int* size_buf, int batch_id) { + int acc_num = 0; + for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) + { + acc_num += size_buf[batch_id * NUM_CLASSES_ACC + idx_class]; + } + return acc_num; +} + +inline int FUNC(get_start_idx)(__global int* size_buf, int batch_id) { + int start_idx = 0; + for (uint idx_batch = 0; idx_batch < batch_id; idx_batch++) + { + const int num_det = size_buf[idx_batch * NUM_CLASSES_ACC + NUM_CLASSES]; + start_idx += (num_det > KEEP_TOP_K ? KEEP_TOP_K: num_det); + } + return start_idx; +} + +inline int FUNC(get_final_detections)(__global int* size_buf) { + int final_detections = 0; + for (uint idx_image = 0; idx_image < NUM_OF_IMAGES; idx_image++) + { + const int num_det = size_buf[idx_image * NUM_CLASSES_ACC + NUM_CLASSES]; + final_detections += (num_det > KEEP_TOP_K ? KEEP_TOP_K: num_det); + } + return final_detections; +} + +inline INPUT0_TYPE FUNC(jaccardOverlap)(INPUT0_TYPE* bbox1, INPUT0_TYPE* bbox2) { + INPUT0_TYPE overlap = 0.0; + bool intersecting = (bbox1[0] < bbox2[2]) & (bbox2[0] < bbox1[2]) & (bbox1[1] < bbox2[3]) & (bbox2[1] < bbox1[3]); + + if (intersecting) + { + const INPUT0_TYPE intersect_width = min(bbox1[2], bbox2[2]) - max(bbox1[0], bbox2[0]); + const INPUT0_TYPE intersect_height = min(bbox1[3], bbox2[3]) - max(bbox1[1], bbox2[1]); + if (intersect_width > 0 && intersect_height > 0) { + const INPUT0_TYPE intersect_size = intersect_width * intersect_height; + const INPUT0_TYPE bbox1_size = (bbox1[2] - bbox1[0]) * (bbox1[3]- bbox1[1]); + const INPUT0_TYPE bbox2_size = (bbox2[2] - bbox2[0]) * (bbox2[3]- bbox2[1]); + overlap = intersect_size / (bbox1_size + bbox2_size - intersect_size); + } + } + return overlap; +} + +inline uint FUNC(get_confidence_offset)(const uint idx_prior, const uint idx_class, const uint idx_image) { + return (idx_prior * NUM_CLASSES + idx_image * NUM_OF_PRIORS * NUM_CLASSES + idx_class) * CONF_XY_SIZE_PRODUCT + CONF_PADDING; +} + +inline uint FUNC(get_largest_score)(__global INPUT1_TYPE* input_confidence, const uint idx_prior, const uint idx_image) { + const uint idx_start = (BACKGROUND_LABEL_ID == 0 ? 1 : 0); + uint offset = FUNC_CALL(get_confidence_offset)(idx_prior, idx_start, idx_image); + INPUT1_TYPE max_score = input_confidence[offset]; + uint idx = idx_start; + + for (uint j = idx_start; j < NUM_CLASSES; j++) + { + offset = FUNC_CALL(get_confidence_offset)(idx_prior, j, idx_image); + INPUT1_TYPE score = input_confidence[offset]; + if (score > max_score) { + max_score = score; + idx = j; + } + } + return idx; +} + +#ifdef DO_STAGE_0_CAFFE_OPT +KERNEL (detection_output_stage_0_scores_caffe)(__global INPUT1_TYPE* input_confidence, + __global uchar *buffer0, + __global int *buffer1) { + const int classId = (int)get_global_id(0) * NUM_CLASSES_PER_ITEM; + const int box_gid = get_global_id(1); + const int batchId = get_global_id(2); + + int classes_leftover = ((NUM_CLASSES - (classId) >= NUM_CLASSES_PER_ITEM)) ? 0 : 1; + int n_classes_this_item = classes_leftover ? (NUM_CLASSES - classId) : NUM_CLASSES_PER_ITEM; + + const int start_bid = box_gid * NUM_PRIORS_PER_ITEM; + const int end_bid = min(start_bid + NUM_PRIORS_PER_ITEM, NUM_OF_PRIORS); + + __local char4 bit_mask[NUM_BIT_MASK]; + __local int4 block_num[NUM_PRIOR_BLOCKS]; + + block_num[box_gid] = (int4)(0, 0, 0, 0); + + { + int mask_id = start_bid / 8; + for (int i = start_bid; i < end_bid; i += 8) { + bit_mask[mask_id] = (char4)(0, 0, 0, 0); + unroll_for (int bi = 0; bi < 8; bi++) { + if ((i + bi) >= NUM_OF_PRIORS) + break; + CMP_TYPE4 valid_scores = FUNC_CALL(filter_score4)(input_confidence, (i + bi), classId, batchId); + bit_mask[mask_id] |= ((convert_char4(valid_scores)) << bi); + block_num[box_gid] += convert_int4(valid_scores); + } + if (classes_leftover) { + for (int c = n_classes_this_item; c < NUM_CLASSES_PER_ITEM; c++) { + bit_mask[mask_id][c] = 0; + } + } + mask_id++; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + { + if (box_gid == 0 && get_local_id(1) == 0) { + int4 acc_num = (int4)(0, 0, 0, 0); + for (int i = 0; i < NUM_PRIOR_BLOCKS; i++) { + int4 n = block_num[i]; + block_num[i] = acc_num; + acc_num += n; + } + for (int c = 0; c < n_classes_this_item ; ++c) { + buffer1[batchId * NUM_CLASSES_ACC + (classId + c)] = acc_num[c]; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + { + int4 write_offsets = block_num[box_gid]; + int mask_id = start_bid >> 3; + for (int i = start_bid; i < end_bid; i += 8) { + for (int bi = 0; bi < 8; bi++) { + char bitset = 1 << bi; + if (all((bit_mask[mask_id] & bitset) == (char4)(0, 0, 0, 0))) + continue; + INPUT_TYPE4 score4 = FUNC_CALL(get_score4)(input_confidence, (i + bi), classId, batchId); + for (int c = 0; c < n_classes_this_item; c++) { + if ((bit_mask[mask_id][c] & bitset) == 0) continue; + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + classId + c) * BUFFER_STRIDE]; + SCORES_INFO score_info; + score_info.classId = (short)(classId + c); + score_info.boxId = i + bi; + score_info.score = score4[c]; + scoresList[write_offsets[c]] = score_info; + write_offsets[c]++; + } + } + mask_id++; + } + } +} +#endif /* DO_STAGE_0_CAFFE_OPT */ + +#ifdef DO_STAGE_0_CAFFE +KERNEL (detection_output_stage_0_scores_caffe)(__global INPUT1_TYPE* input_confidence, + __global uchar *buffer0, + __global int *buffer1) { + const int classId = get_global_id(0); + const int box_gid = get_global_id(1); + const int batchId = get_global_id(2); + + const int start_bid = box_gid * NUM_PRIORS_PER_ITEM; + const int end_bid = min(start_bid + NUM_PRIORS_PER_ITEM, NUM_OF_PRIORS); + + __local char bit_mask[NUM_BIT_MASK]; + __local int block_num[NUM_PRIOR_BLOCKS]; + + block_num[box_gid] = 0; + + { + int mask_id = start_bid / 8; + for (int i = start_bid; i < end_bid; i += 8) { + bit_mask[mask_id] = 0; + unroll_for (int bi = 0; bi < 8; bi++) { + if ((i + bi) >= NUM_OF_PRIORS) + break; + INPUT1_TYPE score = FUNC_CALL(get_score)(input_confidence, (i + bi), classId, batchId); + int valid = (score < 0) ? 0 : 1; + bit_mask[mask_id] |= (valid << bi); + block_num[box_gid] += valid; + } + mask_id++; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + { + if (box_gid == 0 && get_local_id(1) == 0) { + int acc_num = 0; + for (int i = 0; i < NUM_PRIOR_BLOCKS; i++) { + int n = block_num[i]; + block_num[i] = acc_num; + acc_num += n; + } + buffer1[batchId * NUM_CLASSES_ACC + classId] = acc_num; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + { + int write_offset = block_num[box_gid]; + int mask_id = start_bid >> 3; + for (int i = start_bid; i < end_bid; i += 8) { + for (int bi = 0; bi < 8; bi++) { + char bitset = 1 << bi; + if ((bit_mask[mask_id] & bitset) && ((i + bi) < NUM_OF_PRIORS)) { + INPUT1_TYPE score = FUNC_CALL(get_score)(input_confidence, (i + bi), classId, batchId); + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE]; + SCORES_INFO score_info; + score_info.classId = (short)classId; + score_info.boxId = i + bi; + score_info.score = score; + scoresList[write_offset] = score_info; + write_offset++; + } + } + mask_id++; + } + } +} +#endif /* DO_STAGE_0_CAFFE*/ + +#ifdef DO_STAGE_0_MXNET +KERNEL (detection_output_stage_0_scores_mxnet)(__global INPUT1_TYPE* input_confidence, + __global uchar *buffer0, + volatile __global int *buffer2) { + const int batchId = get_global_id(0); + const int priorId = get_global_id(1); + + const int scores_size_offset = batchId * NUM_OF_PRIORS + priorId; + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[batchId * BUFFER_STRIDE]; + + if (priorId == 0) { + buffer2[batchId * NUM_CLASSES_ACC + NUM_CLASSES] = 0; + } + barrier(CLK_GLOBAL_MEM_FENCE); + + int idx_max_score = FUNC_CALL(get_largest_score)(input_confidence, priorId, batchId); + INPUT1_TYPE score = FUNC_CALL(get_score)(input_confidence, priorId, idx_max_score, batchId); + SCORES_INFO score_info; + score_info.classId = (short)idx_max_score; + score_info.boxId = priorId; + score_info.score = score; + scoresList[priorId] = score_info; + atomic_inc(&buffer2[batchId * NUM_CLASSES_ACC + NUM_CLASSES]); +} +#endif /* DO_STAGE_0_MXNET */ + +#ifdef DO_STAGE_1_CAFFE +KERNEL (detection_output_stage_1_sort_caffe)(__global uchar *buffer0, + __global int *buffer1) { + const int batchId = get_global_id(0); + const int classId = get_global_id(1); + const int workItemId = get_global_id(2); + const int localClassId = get_local_id(1); + __local int __range[LOCAL_CLASS_NUM][LOCAL_WORK_NUM * 2]; + + const int scoresInfoNum = buffer1[batchId * NUM_CLASSES_ACC + classId]; + + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE]; + + if (workItemId == 0) { + __range[localClassId][0] = 0; + __range[localClassId][1] = (classId == BACKGROUND_LABEL_ID ? 0 : scoresInfoNum - 1); + } else { + __range[localClassId][workItemId * 2] = 0; + __range[localClassId][workItemId * 2 + 1] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int range_step = 2; + const int first_id = workItemId * 2; + for (int i = 0, maxWorkingNum = 1; i < PARTITION_STEP; ++i, maxWorkingNum *= 2, range_step *= 2) { + if (workItemId < maxWorkingNum) { + const int begin_id = __range[localClassId][first_id]; + const int end_id = __range[localClassId][first_id + 1]; + const int second_id = first_id + range_step; + if (begin_id < end_id) { + const int pivot = FUNC_CALL(partition)(scoresList, begin_id, end_id, true); + __range[localClassId][first_id ] = begin_id; + __range[localClassId][first_id + 1 ] = max(pivot - 1, begin_id); + __range[localClassId][second_id ] = min(pivot + 1, end_id); + __range[localClassId][second_id + 1] = end_id; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + const int begin_id = __range[localClassId][first_id]; + const int end_id = __range[localClassId][first_id + 1]; + if (begin_id < end_id) { + FUNC_CALL(quickSortIterative)(scoresList, begin_id, end_id, true); + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + if (workItemId == 0) { + if (TOP_K != -1 && TOP_K < scoresInfoNum) { + buffer1[batchId * NUM_CLASSES_ACC + classId] = TOP_K; + } + if (classId == BACKGROUND_LABEL_ID) { + buffer1[batchId * NUM_CLASSES_ACC + classId] = 0; + } + } +} +#endif /* DO_STAGE_1_CAFFE */ + +#ifdef DO_STAGE_1_MXNET +KERNEL (detection_output_stage_1_sort_mxnet)(__global uchar *buffer0, + __global int *buffer2) { + const int batchId = get_global_id(0); + const int workItemId = get_global_id(2); + __local int __range[LOCAL_WORK_NUM * 2]; + + const int scoresInfoNum = buffer2[batchId * NUM_CLASSES_ACC + NUM_CLASSES]; + if (scoresInfoNum < 2) + return; + + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[batchId * BUFFER_STRIDE]; + + if (workItemId == 0) { + __range[0] = 0; + __range[1] = scoresInfoNum - 1; + } else { + __range[workItemId * 2] = 0; + __range[workItemId * 2 + 1] = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int range_step = 2; + const int first_id = workItemId * 2; + for (int i = 0; i < PARTITION_STEP; ++i, range_step *= 2) { + if (workItemId <= i) { + const int begin_id = __range[first_id]; + const int end_id = __range[first_id + 1]; + const int second_id = first_id + range_step; + + if (begin_id < end_id) { + const int pivot = FUNC_CALL(partition)(scoresList, begin_id, end_id, true); + __range[first_id ] = begin_id; + __range[first_id + 1 ] = max(pivot - 1, begin_id); + __range[second_id ] = min(pivot + 1, end_id); + __range[second_id + 1] = end_id; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + const int begin_id = __range[first_id]; + const int end_id = __range[first_id + 1]; + if (begin_id < end_id) { + FUNC_CALL(quickSortIterative)(scoresList, begin_id, end_id, true); + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + if (workItemId == 0 && (TOP_K != -1 && TOP_K < scoresInfoNum)) { + buffer2[batchId * NUM_CLASSES_ACC + NUM_CLASSES] = TOP_K; + } +} +#endif /* DO_STAGE_1_MXNET */ + +#ifdef DO_STAGE_2_CAFFE +KERNEL (detection_output_stage_2_nms_caffe)(__global INPUT0_TYPE* input_location, + __global INPUT2_TYPE* input_prior_box, + __global uchar *buffer0, + __global int *buffer1) { + const int batchId = get_global_id(0); + const int classId = get_global_id(1); + const int loc_label = ((SHARE_LOCATION)? 0 : classId); + const int scoresInfoIdx = batchId * NUM_CLASSES_ACC + classId; + + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE]; + + const int scoresInfoNum = buffer1[scoresInfoIdx]; + + int selectedBoxNum = 0; + for (uint idx_score = 0; idx_score < scoresInfoNum; idx_score++) { + bool keep = true; + int idx = scoresList[idx_score].boxId; + for (uint idx_indice = 0; idx_indice < selectedBoxNum; idx_indice++) { + int kept_idx = scoresList[idx_indice].boxId; + INPUT0_TYPE decoded_bbox1[4]; + FUNC_CALL(get_decoded_bbox)(decoded_bbox1, input_location, input_prior_box, idx, loc_label, batchId); + INPUT0_TYPE decoded_bbox2[4]; + FUNC_CALL(get_decoded_bbox)(decoded_bbox2, input_location, input_prior_box, kept_idx, loc_label, batchId); + INPUT0_TYPE overlap = FUNC_CALL(jaccardOverlap)(decoded_bbox1, decoded_bbox2); + if (overlap > NMS_THRESHOLD) { + keep = false; + break; + } + } + if (keep) { + scoresList[selectedBoxNum] = scoresList[idx_score]; + ++selectedBoxNum; + } + } + buffer1[scoresInfoIdx] = selectedBoxNum; +} +#endif /* DO_STAGE_2_CAFFE */ + +#ifdef DO_STAGE_2_CAFFE_OPT +KERNEL (detection_output_stage_2_nms_caffe)(__global INPUT0_TYPE* input_location, + __global INPUT2_TYPE* input_prior_box, + __global uchar *buffer0, + __global int *buffer1) { + const int batchId = get_global_id(0); + const int classId = get_global_id(1); + const int loc_label = ((SHARE_LOCATION)? 0 : classId); + const int scoresInfoIdx = batchId * NUM_CLASSES_ACC + classId; + INPUT0_TYPE decoded_bboxes[TOP_K * 4]; + + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + classId) * BUFFER_STRIDE]; + + const int scoresInfoNum = buffer1[scoresInfoIdx]; + + int selectedBoxNum = 0; + for (uint idx_score = 0; idx_score < scoresInfoNum; idx_score++) { + bool keep = true; + int idx = scoresList[idx_score].boxId; + INPUT0_TYPE decoded_bbox_cur[4]; + FUNC_CALL(get_decoded_bbox)(decoded_bbox_cur, input_location, input_prior_box, idx, loc_label, batchId); + + for (uint idx_indice = 0; idx_indice < selectedBoxNum; idx_indice++) { + INPUT0_TYPE decoded_bbox_kept[4] = { decoded_bboxes[4 * idx_indice], + decoded_bboxes[4 * idx_indice + 1], + decoded_bboxes[4 * idx_indice + 2], + decoded_bboxes[4 * idx_indice + 3] }; + + INPUT0_TYPE overlap = FUNC_CALL(jaccardOverlap)(decoded_bbox_cur, decoded_bbox_kept); + if (overlap > NMS_THRESHOLD) { + keep = false; + break; + } + } + if (keep) { + scoresList[selectedBoxNum] = scoresList[idx_score]; + decoded_bboxes[4 * selectedBoxNum] = decoded_bbox_cur[0]; + decoded_bboxes[4 * selectedBoxNum + 1] = decoded_bbox_cur[1]; + decoded_bboxes[4 * selectedBoxNum + 2] = decoded_bbox_cur[2]; + decoded_bboxes[4 * selectedBoxNum + 3] = decoded_bbox_cur[3]; + ++selectedBoxNum; + } + } + buffer1[scoresInfoIdx] = selectedBoxNum; +} +#endif /* DO_STAGE_2_CAFFE_OPT */ + +#ifdef DO_STAGE_2_MXNET +KERNEL (detection_output_stage_2_nms_mxnet)(__global INPUT0_TYPE* input_location, + __global INPUT2_TYPE* input_prior_box, + __global uchar *buffer0, + __global uchar *buffer1, + __global int *buffer2) { + const int batchId = get_global_id(0); + const int scoresInfoNum = buffer2[batchId * NUM_CLASSES_ACC + NUM_CLASSES]; + + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[batchId * BUFFER_STRIDE]; + __global SCORES_INFO *selectedScoresList = (__global SCORES_INFO*)&buffer1[batchId * NUM_CLASSES * BUFFER_STRIDE]; + + for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) { + buffer2[batchId * NUM_CLASSES_ACC + idx_class] = 0; + } + + int selectedBoxNum = 0; + for (uint idx_score = 0; idx_score < scoresInfoNum; idx_score++) { + bool keep = true; + int idx = scoresList[idx_score].boxId; + int cls = (int)scoresList[idx_score].classId; + int loc_label = ((SHARE_LOCATION)? 0 : cls); + int indice_offset = cls * NUM_OF_PRIORS; + int scores_size_offset = batchId * NUM_CLASSES_ACC + cls; + int cur_num_indice = buffer2[scores_size_offset]; + for (uint idx_indice = 0; idx_indice < cur_num_indice; idx_indice++) { + int kept_idx = selectedScoresList[indice_offset + idx_indice].boxId; + INPUT0_TYPE decoded_bbox1[4]; + FUNC_CALL(get_decoded_bbox)(decoded_bbox1, input_location, input_prior_box, idx, loc_label, batchId); + INPUT0_TYPE decoded_bbox2[4]; + FUNC_CALL(get_decoded_bbox)(decoded_bbox2, input_location, input_prior_box, kept_idx, loc_label, batchId); + INPUT0_TYPE overlap = FUNC_CALL(jaccardOverlap)(decoded_bbox1, decoded_bbox2); + if (overlap > NMS_THRESHOLD) { + keep = false; + break; + } + } + if (keep) { + SCORES_INFO score_info; + score_info.classId = scoresList[idx_score].classId; + score_info.boxId = scoresList[idx_score].boxId; + score_info.score = scoresList[idx_score].score; + selectedScoresList[indice_offset + cur_num_indice] = score_info; + buffer2[scores_size_offset] = cur_num_indice + 1; + ++selectedBoxNum; + } + } + buffer2[batchId * NUM_CLASSES_ACC + NUM_CLASSES] = selectedBoxNum; +} +#endif /* DO_STAGE_2_MXNET */ + +#ifdef DO_STAGE_3_CAFFE +KERNEL (detection_output_stage_final_caffe)(__global INPUT0_TYPE* input_location, + __global INPUT2_TYPE* input_prior_box, + __global OUTPUT_TYPE* output, + __global uchar *buffer0, + __global int *buffer1) { + const int batchId = get_global_id(0); + + __local int class_offset[LOCAL_BATCHES_NUM * NUM_CLASSES_ACC]; + + const int total_det = FUNC_CALL(get_accumulated_detections)(buffer1, batchId); + buffer1[batchId * NUM_CLASSES_ACC + NUM_CLASSES] = total_det; + + if (KEEP_TOP_K > -1 && total_det > KEEP_TOP_K) { + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[0]; + int num_det = 0; + int scores_offset = (batchId * NUM_CLASSES * NUM_OF_PRIORS); + int scores_size_offset = batchId * NUM_CLASSES_ACC; + for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) { + const int acc_num = buffer1[scores_size_offset + idx_class]; + + for (uint idx_score = 0; idx_score < acc_num; idx_score++) { + SCORES_INFO score_info; + score_info = *((__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + idx_class) * BUFFER_STRIDE] + idx_score); + scoresList[scores_offset + num_det + idx_score] = score_info; + } + num_det += acc_num; + buffer1[scores_size_offset + idx_class] = 0; + } + + FUNC_CALL(quickSortIterative)(scoresList + scores_offset, 0, num_det - 1, true); + + // recalculate valid items for each class + for (uint idx_num_det = 0; idx_num_det < KEEP_TOP_K; idx_num_det++) { + SCORES_INFO score_info = scoresList[scores_offset + idx_num_det]; + buffer1[scores_size_offset + score_info.classId]++; + } + + // calculate starting point of each class + class_offset[scores_size_offset] = 0; + for (int i = 1; i < NUM_CLASSES_ACC; ++i) { + class_offset[scores_size_offset + i] = class_offset[scores_size_offset + i - 1] + buffer1[scores_size_offset + i - 1]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + const int startIdx = FUNC_CALL(get_start_idx)(buffer1, batchId); + for (uint idx_num_det = 0; idx_num_det < KEEP_TOP_K; idx_num_det++) { + SCORES_INFO score_info; + score_info = scoresList[scores_offset + idx_num_det]; + const int idx = startIdx + class_offset[batchId * NUM_CLASSES_ACC + score_info.classId]; + output[idx * OUTPUT_ROW_SIZE] = TO_OUTPUT_TYPE(batchId); + output[idx * OUTPUT_ROW_SIZE + 1] = TO_OUTPUT_TYPE((DECREASE_LABEL_ID) ? score_info.classId - 1 : score_info.classId); + output[idx * OUTPUT_ROW_SIZE + 2] = TO_OUTPUT_TYPE(score_info.score); + + INPUT0_TYPE decoded_bbox[4]; + const uint loc_label = ((SHARE_LOCATION)? 0 : score_info.classId); + FUNC_CALL(get_decoded_bbox)(decoded_bbox, input_location, input_prior_box, score_info.boxId, loc_label, batchId); + INPUT0_TYPE xmin = decoded_bbox[0]; + INPUT0_TYPE ymin = decoded_bbox[1]; + INPUT0_TYPE xmax = decoded_bbox[2]; + INPUT0_TYPE ymax = decoded_bbox[3]; + if (CLIP_AFTER_NMS) { + xmin = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), xmin)); + ymin = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymin)); + xmax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), xmax)); + ymax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymax)); + } + vstore4((OUTPUT_TYPE4)(xmin, ymin, xmax, ymax), 0, output + idx * OUTPUT_ROW_SIZE + 3); + class_offset[batchId * NUM_CLASSES_ACC + score_info.classId]++; + } + } else { + const int startIdx = FUNC_CALL(get_start_idx)(buffer1, batchId); + int outputIdx = 0; + for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) { + int scores_size_offset = batchId * NUM_CLASSES_ACC + idx_class; + const int acc_num = buffer1[scores_size_offset]; + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[(batchId * NUM_CLASSES + idx_class) * BUFFER_STRIDE]; + for (uint idx_score = 0; idx_score < acc_num; idx_score++) { + SCORES_INFO score_info = scoresList[idx_score]; + const int idx = startIdx + outputIdx; + output[idx * OUTPUT_ROW_SIZE] = TO_OUTPUT_TYPE(batchId); + output[idx * OUTPUT_ROW_SIZE + 1] = TO_OUTPUT_TYPE((DECREASE_LABEL_ID) ? (int)score_info.classId - 1 : (int)score_info.classId); + output[idx * OUTPUT_ROW_SIZE + 2] = TO_OUTPUT_TYPE(score_info.score); + INPUT0_TYPE decoded_bbox[4]; + const uint loc_label = ((SHARE_LOCATION)? 0 : (int)score_info.classId); + FUNC_CALL(get_decoded_bbox)(decoded_bbox, input_location, input_prior_box, score_info.boxId, loc_label, batchId); + INPUT0_TYPE xmin = decoded_bbox[0]; + INPUT0_TYPE ymin = decoded_bbox[1]; + INPUT0_TYPE xmax = decoded_bbox[2]; + INPUT0_TYPE ymax = decoded_bbox[3]; + if (CLIP_AFTER_NMS) { + xmin = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), xmin)); + ymin = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymin)); + xmax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), xmax)); + ymax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymax)); + } + vstore4((OUTPUT_TYPE4)(xmin, ymin, xmax, ymax), 0, output + idx * OUTPUT_ROW_SIZE + 3); + outputIdx++; + } + } + } + + barrier(CLK_GLOBAL_MEM_FENCE); + if(batchId == 0) { + const int final_detections = FUNC_CALL(get_final_detections)(buffer1); + unroll_for (uint i = final_detections; i < NUM_OF_IMAGES * KEEP_TOP_K; i++) { + output[i * OUTPUT_ROW_SIZE] = (i == final_detections ? -1.0 : 0.0); + vstore4((OUTPUT_TYPE4)(0.0, 0.0, 0.0, 0.0), 0, output + i * OUTPUT_ROW_SIZE + 1); + vstore2((OUTPUT_TYPE2)(0.0, 0.0), 0, output + i * OUTPUT_ROW_SIZE + 5); + } + } +} +#endif /* DO_STAGE_3_CAFFE */ + +#ifdef DO_STAGE_3_MXNET +KERNEL (detection_output_stage_final_mxnet)(__global INPUT0_TYPE* input_location, + __global INPUT2_TYPE* input_prior_box, + __global OUTPUT_TYPE* output, + __global uchar *buffer0, + __global uchar *buffer1, + __global int *buffer2) { + for (uint idx_image = 0; idx_image < NUM_OF_IMAGES; idx_image++) { + __global SCORES_INFO *scoresList = (__global SCORES_INFO*)&buffer0[idx_image * BUFFER_STRIDE]; + const int total_det = buffer2[idx_image * NUM_CLASSES_ACC + NUM_CLASSES]; + + if (KEEP_TOP_K > -1 && total_det > KEEP_TOP_K) { + int num_det = 0; + for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) { + int scores_size_offset = idx_image * NUM_CLASSES_ACC + idx_class; + const int acc_num = buffer2[scores_size_offset]; + __global SCORES_INFO *selectedScoresList = (__global SCORES_INFO*)&buffer1[(idx_image * NUM_CLASSES + idx_class) * BUFFER_STRIDE]; + + for (uint idx_score = 0; idx_score < acc_num; idx_score++) { + scoresList[num_det + idx_score] = selectedScoresList[idx_score]; + } + num_det += acc_num; + buffer2[scores_size_offset] = 0; + } + FUNC_CALL(quickSortIterative)(scoresList, 0, num_det - 1, true); + + for (uint idx_num_det = 0; idx_num_det < KEEP_TOP_K; idx_num_det++) { + int scores_size_offset = idx_image * NUM_CLASSES_ACC + (int)scoresList[idx_num_det].classId; + int acc_num = buffer2[scores_size_offset]; + __global SCORES_INFO *selectedScoresList = (__global SCORES_INFO*)&buffer1[(idx_image * NUM_CLASSES + (int)scoresList[idx_num_det].classId) * BUFFER_STRIDE]; + selectedScoresList[acc_num] = scoresList[idx_num_det]; + buffer2[scores_size_offset] = (acc_num + 1); + } + } + } + + int count = 0; + for (uint idx_image = 0; idx_image < NUM_OF_IMAGES; idx_image++) { + for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++) { + int scores_size_offset = idx_image * NUM_CLASSES_ACC + idx_class; + int acc_num = buffer2[scores_size_offset]; + __global SCORES_INFO *selectedScoresList = (__global SCORES_INFO*)&buffer1[(idx_image * NUM_CLASSES + idx_class) * BUFFER_STRIDE]; + int loc_label = ((SHARE_LOCATION)? 0 : idx_class); + for (uint idx_score = 0; idx_score < acc_num; idx_score++) { + SCORES_INFO score_info; + score_info = selectedScoresList[idx_score]; + output[count * OUTPUT_ROW_SIZE] = TO_OUTPUT_TYPE(idx_image); + output[count * OUTPUT_ROW_SIZE + 1] = TO_OUTPUT_TYPE((DECREASE_LABEL_ID) ? (int)score_info.classId - 1 : (int)score_info.classId); + output[count * OUTPUT_ROW_SIZE + 2] = TO_OUTPUT_TYPE(score_info.score); + INPUT0_TYPE decoded_bbox[4]; + FUNC_CALL(get_decoded_bbox)(decoded_bbox, input_location, input_prior_box, score_info.boxId, loc_label, idx_image); + INPUT0_TYPE xmin = decoded_bbox[0]; + INPUT0_TYPE ymin = decoded_bbox[1]; + INPUT0_TYPE xmax = decoded_bbox[2]; + INPUT0_TYPE ymax = decoded_bbox[3]; + + if (CLIP_AFTER_NMS) { + xmin = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), xmin)); + ymin = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymin)); + xmax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), xmax)); + ymax = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), ymax)); + } + vstore4((OUTPUT_TYPE4)(xmin, ymin, xmax, ymax), 0, output + count * OUTPUT_ROW_SIZE + 3); + ++count; + } + } + } + + if (count < NUM_OF_IMAGES * KEEP_TOP_K) { + output[count * OUTPUT_ROW_SIZE] = -1.0; + } +} +#endif /* DO_STAGE_3_MXNET */ diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl index ffc6e8d75d5..703f30eb6f3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl @@ -11,6 +11,7 @@ #define HIDDEN_CLASS ((BACKGROUND_LABEL_ID == 0 && SHARE_LOCATION)? 1 : 0) #define NUM_OF_IMAGES INPUT0_BATCH_NUM +#define PRIOR_BATCH_SIZE INPUT2_BATCH_NUM #define NUM_LOC_CLASSES ((SHARE_LOCATION)? 1 : NUM_CLASSES) #define NUM_CLASSES_OUT ((HIDDEN_CLASS == 1)? NUM_CLASSES - 1 : NUM_CLASSES) #define NUM_OF_PRIORS (INPUT0_LENGTH / (NUM_OF_IMAGES * NUM_LOC_CLASSES * PRIOR_BOX_SIZE)) @@ -24,12 +25,10 @@ #define CONF_PADDING (CONF_PADDING_Y * CONF_SIZE_X + CONF_PADDING_X) #define CONF_XY_SIZE_PRODUCT (CONF_SIZE_X * CONF_SIZE_Y) -#define NUM_OF_PRIOR_COMPONENTS (NUM_OF_PRIORS * PRIOR_BOX_SIZE) +#define NUM_OF_PRIOR_COMPONENTS (NUM_OF_PRIORS * PRIOR_INFO_SIZE) #define NUM_OF_IMAGE_CONF (INPUT0_LENGTH/NUM_OF_IMAGES/PRIOR_BOX_SIZE) #define SCORES_COUNT (((TOP_K != -1) && (TOP_K < NUM_OF_PRIORS))? TOP_K : NUM_OF_PRIORS) - -#define OUTPUT_OFFSET (((NUM_OF_IMAGES + 15) / 16) * 16) #define SCORE_OFFSET 2 #define INPUT_OFFSET (((NUM_IMAGES + 15) / 16) * 16) @@ -41,130 +40,148 @@ #define NUM_OF_IMAGE_BBOXES (INPUT_BBOXES_LENGTH / NUM_IMAGES) #define NUM_OF_ITEMS_SORT ((NUM_CLASSES_IN / 256) + 1) +#define INPUT_TYPE4 MAKE_VECTOR_TYPE(INPUT1_TYPE, 4) +#define OUTPUT_TYPE2 MAKE_VECTOR_TYPE(OUTPUT_TYPE, 2) +#define OUTPUT_TYPE4 MAKE_VECTOR_TYPE(OUTPUT_TYPE, 4) +#if INPUT1_TYPE_SIZE == 2 +#define CMP_TYPE4 short4 +#elif INPUT1_TYPE_SIZE == 4 +#define CMP_TYPE4 int4 +#endif // Number of bboxes to keep in output #define KEEP_BBOXES_NUM ((KEEP_TOP_K < NUM_OF_IMAGE_BBOXES)? KEEP_TOP_K : NUM_OF_IMAGE_BBOXES) -void FUNC(get_decoded_bbox)(UNIT_TYPE* decoded_bbox, __global UNIT_TYPE* input_location, __global UNIT_TYPE* input_prior_box, const uint idx_prior, const uint idx_class, const uint idx_image) -{ - const uint prior_offset = idx_prior * PRIOR_INFO_SIZE + PRIOR_COORD_OFFSET; +inline void FUNC(get_decoded_bbox)(INPUT0_TYPE* decoded_bbox, __global INPUT0_TYPE* input_location, + __global INPUT2_TYPE* input_prior_box, const uint idx_prior, const uint idx_class, const uint idx_image) { + const uint prior_box_offset = ((PRIOR_BATCH_SIZE == 1)? 0 : idx_image) * NUM_OF_PRIOR_COMPONENTS * (VARIANCE_ENCODED_IN_TARGET ? 1 : 2); + const uint prior_offset = prior_box_offset + idx_prior * PRIOR_INFO_SIZE + PRIOR_COORD_OFFSET; + const uint variance_offset = prior_box_offset + NUM_OF_PRIOR_COMPONENTS + (idx_prior * PRIOR_BOX_SIZE); uint location_offset = (NUM_LOC_CLASSES * (idx_prior * PRIOR_BOX_SIZE) + idx_image * INPUT0_FEATURE_NUM + idx_class * PRIOR_BOX_SIZE) * LOC_XY_SIZE_PRODUCT + LOCATION_PADDING; - UNIT_TYPE prior_bboxes[4] = { + INPUT2_TYPE prior_bboxes[4] = { input_prior_box[prior_offset], input_prior_box[prior_offset + 1], input_prior_box[prior_offset + 2], - input_prior_box[prior_offset + 3]}; + input_prior_box[prior_offset + 3] + }; - if (!PRIOR_IS_NORMALIZED) - { + if (!PRIOR_IS_NORMALIZED) { prior_bboxes[0] /= IMAGE_WIDTH; prior_bboxes[1] /= IMAGE_HEIGH; prior_bboxes[2] /= IMAGE_WIDTH; prior_bboxes[3] /= IMAGE_HEIGH; } - if (CODE_TYPE == CODE_TYPE_CORNER) - { - if (VARIANCE_ENCODED_IN_TARGET) - { + if (CODE_TYPE == CODE_TYPE_CORNER) { + if (VARIANCE_ENCODED_IN_TARGET) { // variance is encoded in target, we simply need to add the offset predictions. - for(uint i = 0; i < PRIOR_BOX_SIZE; i++) - { + for(uint i = 0; i < PRIOR_BOX_SIZE; i++) { decoded_bbox[i] = prior_bboxes[i] + input_location[location_offset]; location_offset += LOC_XY_SIZE_PRODUCT; } - } - else - { + } else { // variance is encoded in bbox, we need to scale the offset accordingly. - for(uint i = 0; i < PRIOR_BOX_SIZE; i++) - { - decoded_bbox[i] = - mad(input_prior_box[NUM_OF_PRIOR_COMPONENTS + i], // prior variances are places after prior bboxes - input_location[location_offset], - prior_bboxes[i]); + for(uint i = 0; i < PRIOR_BOX_SIZE; i++) { + decoded_bbox[i] = + prior_bboxes[i] + + input_prior_box[variance_offset + i] * + input_location[location_offset]; location_offset += LOC_XY_SIZE_PRODUCT; } } - } - else if (CODE_TYPE == CODE_TYPE_CENTER_SIZE) - { - const UNIT_TYPE prior_width = prior_bboxes[2] - prior_bboxes[0]; - const UNIT_TYPE prior_height = prior_bboxes[3] - prior_bboxes[1]; - const UNIT_TYPE prior_center_x = (prior_bboxes[0] + prior_bboxes[2]) / 2; - const UNIT_TYPE prior_center_y = (prior_bboxes[1] + prior_bboxes[3]) / 2; - const UNIT_TYPE bbox_xmin = input_location[location_offset]; - const UNIT_TYPE bbox_ymin = input_location[location_offset + LOC_XY_SIZE_PRODUCT]; - const UNIT_TYPE bbox_xmax = input_location[location_offset + 2 * LOC_XY_SIZE_PRODUCT]; - const UNIT_TYPE bbox_ymax = input_location[location_offset + 3 * LOC_XY_SIZE_PRODUCT]; - UNIT_TYPE decode_bbox_center_x, decode_bbox_center_y; - UNIT_TYPE decode_bbox_width, decode_bbox_height; + } else if (CODE_TYPE == CODE_TYPE_CENTER_SIZE) { + const INPUT2_TYPE prior_width = prior_bboxes[2] - prior_bboxes[0]; + const INPUT2_TYPE prior_height = prior_bboxes[3] - prior_bboxes[1]; + const INPUT2_TYPE prior_center_x = (prior_bboxes[0] + prior_bboxes[2]) / 2; + const INPUT2_TYPE prior_center_y = (prior_bboxes[1] + prior_bboxes[3]) / 2; + const INPUT0_TYPE bbox_xmin = input_location[location_offset]; + const INPUT0_TYPE bbox_ymin = input_location[location_offset + LOC_XY_SIZE_PRODUCT]; + const INPUT0_TYPE bbox_xmax = input_location[location_offset + 2 * LOC_XY_SIZE_PRODUCT]; + const INPUT0_TYPE bbox_ymax = input_location[location_offset + 3 * LOC_XY_SIZE_PRODUCT]; + INPUT0_TYPE decode_bbox_center_x, decode_bbox_center_y; + INPUT0_TYPE decode_bbox_width, decode_bbox_height; - if (VARIANCE_ENCODED_IN_TARGET) - { + if (VARIANCE_ENCODED_IN_TARGET) { // variance is encoded in target, we simply need to restore the offset predictions. decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x; decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y; decode_bbox_width = (exp(bbox_xmax) * prior_width) / 2; decode_bbox_height = (exp(bbox_ymax) * prior_height) / 2; - } - else - { + } else { // variance is encoded in bbox, we need to scale the offset accordingly. - decode_bbox_center_x = input_prior_box[NUM_OF_PRIOR_COMPONENTS] * bbox_xmin * prior_width + prior_center_x; - decode_bbox_center_y = input_prior_box[NUM_OF_PRIOR_COMPONENTS + 1] * bbox_ymin * prior_height + prior_center_y; - decode_bbox_width = (exp(input_prior_box[NUM_OF_PRIOR_COMPONENTS + 2] * bbox_xmax) * prior_width) / 2; - decode_bbox_height = (exp(input_prior_box[NUM_OF_PRIOR_COMPONENTS + 3] * bbox_ymax) * prior_height) / 2; + decode_bbox_center_x = input_prior_box[variance_offset] * bbox_xmin * prior_width + prior_center_x; + decode_bbox_center_y = input_prior_box[variance_offset + 1] * bbox_ymin * prior_height + prior_center_y; + decode_bbox_width = (exp(input_prior_box[variance_offset + 2] * bbox_xmax) * prior_width) / 2; + decode_bbox_height = (exp(input_prior_box[variance_offset + 3] * bbox_ymax) * prior_height) / 2; } decoded_bbox[0] = decode_bbox_center_x - decode_bbox_width; decoded_bbox[1] = decode_bbox_center_y - decode_bbox_height; decoded_bbox[2] = decode_bbox_center_x + decode_bbox_width; decoded_bbox[3] = decode_bbox_center_y + decode_bbox_height; - } - else - { - const UNIT_TYPE prior_width = prior_bboxes[2] - prior_bboxes[0]; - const UNIT_TYPE prior_height = prior_bboxes[3] - prior_bboxes[1]; - const UNIT_TYPE bbox_xmin = input_location[location_offset]; - const UNIT_TYPE bbox_ymin = input_location[location_offset + LOC_XY_SIZE_PRODUCT]; - const UNIT_TYPE bbox_xmax = input_location[location_offset + 2 * LOC_XY_SIZE_PRODUCT]; - const UNIT_TYPE bbox_ymax = input_location[location_offset + 3 * LOC_XY_SIZE_PRODUCT]; + } else { + const INPUT2_TYPE prior_width = prior_bboxes[2] - prior_bboxes[0]; + const INPUT2_TYPE prior_height = prior_bboxes[3] - prior_bboxes[1]; + const INPUT0_TYPE bbox_xmin = input_location[location_offset]; + const INPUT0_TYPE bbox_ymin = input_location[location_offset + LOC_XY_SIZE_PRODUCT]; + const INPUT0_TYPE bbox_xmax = input_location[location_offset + 2 * LOC_XY_SIZE_PRODUCT]; + const INPUT0_TYPE bbox_ymax = input_location[location_offset + 3 * LOC_XY_SIZE_PRODUCT]; - if (VARIANCE_ENCODED_IN_TARGET) - { + if (VARIANCE_ENCODED_IN_TARGET) { // variance is encoded in target, we simply need to add the offset predictions. decoded_bbox[0] = prior_bboxes[0] + bbox_xmin * prior_width; decoded_bbox[1] = prior_bboxes[1] + bbox_ymin * prior_height; decoded_bbox[2] = prior_bboxes[2] + bbox_xmax * prior_width; decoded_bbox[3] = prior_bboxes[3] + bbox_ymax * prior_height; - } - else - { + } else { // variance is encoded in bbox, we need to scale the offset accordingly. - decoded_bbox[0] = prior_bboxes[0] + input_prior_box[NUM_OF_PRIOR_COMPONENTS] * bbox_xmin * prior_width; - decoded_bbox[1] = prior_bboxes[1] + input_prior_box[NUM_OF_PRIOR_COMPONENTS + 1] * bbox_ymin * prior_height; - decoded_bbox[2] = prior_bboxes[2] + input_prior_box[NUM_OF_PRIOR_COMPONENTS + 2] * bbox_xmax * prior_width; - decoded_bbox[3] = prior_bboxes[3] + input_prior_box[NUM_OF_PRIOR_COMPONENTS + 3] * bbox_ymax * prior_height; + decoded_bbox[0] = prior_bboxes[0] + input_prior_box[variance_offset] * bbox_xmin * prior_width; + decoded_bbox[1] = prior_bboxes[1] + input_prior_box[variance_offset + 1] * bbox_ymin * prior_height; + decoded_bbox[2] = prior_bboxes[2] + input_prior_box[variance_offset + 2] * bbox_xmax * prior_width; + decoded_bbox[3] = prior_bboxes[3] + input_prior_box[variance_offset + 3] * bbox_ymax * prior_height; } - } + } + if (CLIP_BEFORE_NMS) { + decoded_bbox[0] = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), decoded_bbox[0])); + decoded_bbox[1] = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), decoded_bbox[1])); + decoded_bbox[2] = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), decoded_bbox[2])); + decoded_bbox[3] = max(TO_INPUT0_TYPE(0.0), min(TO_INPUT0_TYPE(1.0), decoded_bbox[3])); + } } -UNIT_TYPE FUNC(get_score)(__global UNIT_TYPE* input_confidence, const uint idx_prior, const uint idx_class, const uint idx_image) -{ +inline INPUT1_TYPE FUNC(get_score)(__global INPUT1_TYPE* input_confidence, const uint idx_prior, const uint idx_class, const uint idx_image) { const uint confidence_offset = // offset in kernel input 'input_confidence' (idx_prior * NUM_CLASSES + idx_image * NUM_OF_PRIORS * NUM_CLASSES + idx_class) * CONF_XY_SIZE_PRODUCT + CONF_PADDING; - return (input_confidence[confidence_offset] > CONFIDENCE_THRESHOLD)? input_confidence[confidence_offset] : 0; + return (input_confidence[confidence_offset] > CONFIDENCE_THRESHOLD)? input_confidence[confidence_offset] : -1; } +inline INPUT_TYPE4 FUNC(get_score4)(__global INPUT1_TYPE* input_confidence, const uint idx_prior, const uint idx_class, const uint idx_image) { + const uint confidence_offset = // offset in kernel input 'input_confidence' + (idx_prior * NUM_CLASSES + idx_image * NUM_OF_PRIORS * NUM_CLASSES + idx_class) * + CONF_XY_SIZE_PRODUCT + + CONF_PADDING; + INPUT_TYPE4 scores = vload4(0, input_confidence + confidence_offset); + CMP_TYPE4 compare = isgreater(scores, (INPUT_TYPE4)(CONFIDENCE_THRESHOLD, CONFIDENCE_THRESHOLD, CONFIDENCE_THRESHOLD, CONFIDENCE_THRESHOLD)); + return select((INPUT_TYPE4)(-1, -1, -1, -1), scores, compare); +} + +inline CMP_TYPE4 FUNC(filter_score4)(__global INPUT1_TYPE* input_confidence, const uint idx_prior, const uint idx_class, const uint idx_image) { + const uint confidence_offset = // offset in kernel input 'input_confidence' + (idx_prior * NUM_CLASSES + idx_image * NUM_OF_PRIORS * NUM_CLASSES + idx_class) * + CONF_XY_SIZE_PRODUCT + + CONF_PADDING; + INPUT_TYPE4 scores = vload4(0, input_confidence + confidence_offset); + CMP_TYPE4 compare = isgreater(scores, (INPUT_TYPE4)(CONFIDENCE_THRESHOLD, CONFIDENCE_THRESHOLD, CONFIDENCE_THRESHOLD, CONFIDENCE_THRESHOLD)); + return select((CMP_TYPE4)(0, 0, 0, 0), (CMP_TYPE4)(1, 1, 1, 1), compare); +} diff --git a/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp index ea879410b69..fce958757d6 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp @@ -383,10 +383,9 @@ struct detection_output_impl : typed_primitive_impl { } } } - // In case number of detections is smaller than keep_top_k fill the rest of the buffer with invalid image id - // (-1). - while (count < num_of_images * args.keep_top_k) { - out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (dtype)-1.f; + const int final_cnt = count; + for (int i = count; i < num_of_images * args.keep_top_k; i++) { + out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (i == final_cnt ? (dtype)-1.f : (dtype)0.f); out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 1] = (dtype)0.f; out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 2] = (dtype)0.f; out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 3] = (dtype)0.f; diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/detection_output.cpp new file mode 100644 index 00000000000..e4ffde3ff76 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/detection_output.cpp @@ -0,0 +1,88 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "detection_output_inst.h" +#include "primitive_base.hpp" +#include "impls/implementation_map.hpp" +#include "cldnn/runtime/error_handler.hpp" +#include "kernel_selector_helper.h" +#include "detection_output/detection_output_kernel_selector.h" +#include "detection_output/detection_output_kernel_ref.h" +#include + +namespace cldnn { +namespace ocl { + +struct detection_output_impl : typed_primitive_impl_ocl { + using parent = typed_primitive_impl_ocl; + using parent::parent; + + std::unique_ptr clone() const override { + return make_unique(*this); + } + +private: + static void set_detection_output_specific_params(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, + const detection_output_node& arg) { + auto primitive = arg.get_primitive(); + detectOutParams.keep_top_k = primitive->keep_top_k; + detectOutParams.num_classes = primitive->num_classes; + detectOutParams.top_k = primitive->top_k; + detectOutParams.background_label_id = primitive->background_label_id; + detectOutParams.code_type = (int32_t)primitive->code_type; + detectOutParams.share_location = primitive->share_location; + detectOutParams.variance_encoded_in_target = primitive->variance_encoded_in_target; + detectOutParams.nms_threshold = primitive->nms_threshold; + detectOutParams.eta = primitive->eta; + detectOutParams.confidence_threshold = primitive->confidence_threshold; + detectOutParams.prior_coordinates_offset = primitive->prior_coordinates_offset; + detectOutParams.prior_info_size = primitive->prior_info_size; + detectOutParams.prior_is_normalized = primitive->prior_is_normalized; + detectOutParams.input_width = primitive->input_width; + detectOutParams.input_heigh = primitive->input_height; + detectOutParams.decrease_label_id = primitive->decrease_label_id; + detectOutParams.clip_before_nms = primitive->clip_before_nms; + detectOutParams.clip_after_nms = primitive->clip_after_nms; + detectOutParams.conf_size_x = arg.confidence().get_output_layout().get_buffer_size().spatial[0]; + detectOutParams.conf_size_y = arg.confidence().get_output_layout().get_buffer_size().spatial[1]; + detectOutParams.conf_padding_x = arg.confidence().get_output_layout().data_padding.lower_size().spatial[0]; + detectOutParams.conf_padding_y = arg.confidence().get_output_layout().data_padding.lower_size().spatial[1]; + } + +public: + static primitive_impl* create(const detection_output_node& arg) { + auto detect_out_params = get_default_params(arg); + auto detect_out_optional_params = + get_default_optional_params(arg.get_program()); + + detect_out_params.inputs.push_back(convert_data_tensor(arg.confidence().get_output_layout())); + detect_out_params.inputs.push_back(convert_data_tensor(arg.prior_box().get_output_layout())); + set_detection_output_specific_params(detect_out_params.detectOutParams, arg); + + auto& kernel_selector = kernel_selector::detection_output_kernel_selector::Instance(); + auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params); + + CLDNN_ERROR_BOOL(arg.id(), + "Best_kernel.empty()", + best_kernels.empty(), + "Cannot find a proper kernel with this arguments"); + + auto detection_output = new detection_output_impl(arg, best_kernels[0]); + + return detection_output; + } +}; + +namespace detail { + +attach_detection_output_impl::attach_detection_output_impl() { + implementation_map::add(impl_types::ocl, detection_output_impl::create, { + std::make_tuple(data_types::f32, format::bfyx), + std::make_tuple(data_types::f16, format::bfyx) + }); +} + +} // namespace detail +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp index 86a423a8471..28ce961755e 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp @@ -26,6 +26,7 @@ void register_implementations() { REGISTER_OCL(deformable_conv); REGISTER_OCL(deformable_interp); REGISTER_OCL(depth_to_space); + REGISTER_OCL(detection_output); REGISTER_OCL(batch_to_space); REGISTER_OCL(eltwise); REGISTER_OCL(fully_connected); diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp index dcd58574e52..f462c37c3a9 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp @@ -18,6 +18,7 @@ #include "cldnn/primitives/custom_gpu_primitive.hpp" #include "cldnn/primitives/deconvolution.hpp" #include "cldnn/primitives/depth_to_space.hpp" +#include "cldnn/primitives/detection_output.hpp" #include "cldnn/primitives/eltwise.hpp" #include "cldnn/primitives/fully_connected.hpp" #include "cldnn/primitives/gather.hpp" @@ -90,6 +91,7 @@ REGISTER_OCL(deconvolution); REGISTER_OCL(deformable_conv); REGISTER_OCL(deformable_interp); REGISTER_OCL(depth_to_space); +REGISTER_OCL(detection_output); REGISTER_OCL(eltwise); REGISTER_OCL(embed); REGISTER_OCL(fully_connected); diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index 998d7bf21ca..4e64b6b349b 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -825,6 +825,15 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node) { impl_types preferred_impl = impl_types::any; if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) { preferred_impl = _forcing_map.at(node.id()).second; + } else if (node.is_type()) { + auto& detection_output_node = node.as(); + auto confidence_layout = detection_output_node.confidence().get_output_layout(); + auto prim = detection_output_node.get_primitive(); + if (confidence_layout.size.batch[0] >= 4 && prim->confidence_threshold >= 0.1 && prim->top_k <= 400 && + prim->num_classes >= 16 && confidence_layout.size.feature[0] > 10000) + preferred_impl = impl_types::ocl; + else + preferred_impl = impl_types::cpu; } return preferred_impl; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp index b406730a288..0155673ca0a 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp @@ -255,7 +255,7 @@ public: check_results(output_prim, 4, "1 1 0.6 0.45 0.45 0.75 0.75"); check_results(output_prim, 5, "1 1 0.0 0.25 0.25 0.55 0.55"); check_results(output_prim, 6, "-1 0 0 0 0 0 0"); - check_results(output_prim, 7, "-1 0 0 0 0 0 0"); + check_results(output_prim, 7, "0 0 0 0 0 0 0"); } void forward_num_detections_greater_than_keep_top_k() { @@ -345,11 +345,11 @@ public: check_results(output_prim, 4, "1 1 0.6 0.45 0.45 0.75 0.75"); check_results(output_prim, 5, "1 1 0.0 0.25 0.25 0.55 0.55"); check_results(output_prim, 6, "-1 0 0 0 0 0 0"); - check_results(output_prim, 7, "-1 0 0 0 0 0 0"); - check_results(output_prim, 8, "-1 0 0 0 0 0 0"); - check_results(output_prim, 9, "-1 0 0 0 0 0 0"); - check_results(output_prim, 10, "-1 0 0 0 0 0 0"); - check_results(output_prim, 11, "-1 0 0 0 0 0 0"); + check_results(output_prim, 7, "0 0 0 0 0 0 0"); + check_results(output_prim, 8, "0 0 0 0 0 0 0"); + check_results(output_prim, 9, "0 0 0 0 0 0 0"); + check_results(output_prim, 10, "0 0 0 0 0 0 0"); + check_results(output_prim, 11, "0 0 0 0 0 0 0"); } void test_forward_share_location_top_k() { @@ -447,14 +447,14 @@ public: check_results(output_prim, 9, "1 0 0.4 0.45 0.45 0.75 0.75"); check_results(output_prim, 10, "1 1 0.6 0.40 0.40 0.70 0.70"); check_results(output_prim, 11, "-1 0 0 0 0 0 0"); - check_results(output_prim, 12, "-1 0 0 0 0 0 0"); - check_results(output_prim, 13, "-1 0 0 0 0 0 0"); - check_results(output_prim, 14, "-1 0 0 0 0 0 0"); - check_results(output_prim, 15, "-1 0 0 0 0 0 0"); - check_results(output_prim, 16, "-1 0 0 0 0 0 0"); - check_results(output_prim, 17, "-1 0 0 0 0 0 0"); - check_results(output_prim, 18, "-1 0 0 0 0 0 0"); - check_results(output_prim, 19, "-1 0 0 0 0 0 0"); + check_results(output_prim, 12, "0 0 0 0 0 0 0"); + check_results(output_prim, 13, "0 0 0 0 0 0 0"); + check_results(output_prim, 14, "0 0 0 0 0 0 0"); + check_results(output_prim, 15, "0 0 0 0 0 0 0"); + check_results(output_prim, 16, "0 0 0 0 0 0 0"); + check_results(output_prim, 17, "0 0 0 0 0 0 0"); + check_results(output_prim, 18, "0 0 0 0 0 0 0"); + check_results(output_prim, 19, "0 0 0 0 0 0 0"); } void forward_no_share_location_top_k() { @@ -503,7 +503,7 @@ public: check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55"); check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70"); check_results(output_prim, 6, "-1 0 0 0 0 0 0"); - check_results(output_prim, 7, "-1 0 0 0 0 0 0"); + check_results(output_prim, 7, "0 0 0 0 0 0 0"); } void forward_no_share_location_neg_0() { @@ -550,10 +550,10 @@ public: check_results(output_prim, 3, "0 1 0.4 0.50 0.50 0.80 0.80"); check_results(output_prim, 4, "1 1 0.6 0.40 0.40 0.70 0.70"); check_results(output_prim, 5, "-1 0 0 0 0 0 0"); - check_results(output_prim, 6, "-1 0 0 0 0 0 0"); - check_results(output_prim, 7, "-1 0 0 0 0 0 0"); - check_results(output_prim, 8, "-1 0 0 0 0 0 0"); - check_results(output_prim, 9, "-1 0 0 0 0 0 0"); + check_results(output_prim, 6, "0 0 0 0 0 0 0"); + check_results(output_prim, 7, "0 0 0 0 0 0 0"); + check_results(output_prim, 8, "0 0 0 0 0 0 0"); + check_results(output_prim, 9, "0 0 0 0 0 0 0"); } void forward_no_share_location_neg_0_top_k() { @@ -648,7 +648,7 @@ public: check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55"); check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70"); check_results(output_prim, 6, "-1 0 0 0 0 0 0"); - check_results(output_prim, 7, "-1 0 0 0 0 0 0"); + check_results(output_prim, 7, "0 0 0 0 0 0 0"); } void test_forward_no_share_location_top_k_faster_rcnn_case() { @@ -711,7 +711,7 @@ public: check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55"); check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70"); check_results(output_prim, 6, "-1 0 0 0 0 0 0"); - check_results(output_prim, 7, "-1 0 0 0 0 0 0"); + check_results(output_prim, 7, "0 0 0 0 0 0 0"); } static const int num_of_images = 2; From e327c34af040848adbe82c67b37b8fbe89a1fe1d Mon Sep 17 00:00:00 2001 From: Alexander Shchepetov Date: Tue, 10 Aug 2021 17:04:18 +0300 Subject: [PATCH 20/24] Add StressMemLeak test with 2 streams (#6964) * StressMemLeakTests add tests with streams * little fixes * Code consistency --- .../common/ie_pipelines/pipelines.cpp | 35 +++++++++++++++++++ .../common/ie_pipelines/pipelines.h | 1 + tests/stress_tests/memleaks_tests/tests.cpp | 10 ++++++ .../tests_pipelines/tests_pipelines.cpp | 8 +++++ .../tests_pipelines/tests_pipelines.h | 1 + 5 files changed, 55 insertions(+) diff --git a/tests/stress_tests/common/ie_pipelines/pipelines.cpp b/tests/stress_tests/common/ie_pipelines/pipelines.cpp index eccee2bb615..8a1fb3dec65 100644 --- a/tests/stress_tests/common/ie_pipelines/pipelines.cpp +++ b/tests/stress_tests/common/ie_pipelines/pipelines.cpp @@ -134,3 +134,38 @@ std::function reinfer_request_inference(InferenceEngine::InferRequest& i Blob::Ptr outputBlob = infer_request.GetBlob(output.first); }; } + +std::function inference_with_streams(const std::string &model, const std::string &target_device, const int& nstreams) { + return [&] { + std::map config; + config[target_device + "_THROUGHPUT_STREAMS"] = std::to_string(nstreams); + + Core ie; + ie.GetVersions(target_device); + ie.SetConfig(config, target_device); + + InferRequest inferRequest; + + CNNNetwork cnnNetwork = ie.ReadNetwork(model); + ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device); + auto batchSize = cnnNetwork.getBatchSize(); + batchSize = batchSize != 0 ? batchSize : 1; + const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo()); + + unsigned int nireq = nstreams; + try { + nireq = exeNetwork.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); + } catch (const std::exception &ex) { + log_err("Failed to query OPTIMAL_NUMBER_OF_INFER_REQUESTS"); + } + for (int counter = 0; counter < nireq; counter++) { + inferRequest = exeNetwork.CreateInferRequest(); + fillBlobs(inferRequest, inputsInfo, batchSize); + + inferRequest.Infer(); + OutputsDataMap output_info(cnnNetwork.getOutputsInfo()); + for (auto &output : output_info) + Blob::Ptr outputBlob = inferRequest.GetBlob(output.first); + } + }; +} diff --git a/tests/stress_tests/common/ie_pipelines/pipelines.h b/tests/stress_tests/common/ie_pipelines/pipelines.h index 4bb93fd861d..bd86a4d8806 100644 --- a/tests/stress_tests/common/ie_pipelines/pipelines.h +++ b/tests/stress_tests/common/ie_pipelines/pipelines.h @@ -16,3 +16,4 @@ std::function create_infer_request(const std::string &model, const std:: std::function recreate_infer_request(InferenceEngine::ExecutableNetwork& exeNetwork); std::function infer_request_inference(const std::string &model, const std::string &target_device); std::function reinfer_request_inference(InferenceEngine::InferRequest& infer_request, InferenceEngine::OutputsDataMap& output_info); +std::function inference_with_streams(const std::string &model, const std::string &target_device, const int& nstreams); diff --git a/tests/stress_tests/memleaks_tests/tests.cpp b/tests/stress_tests/memleaks_tests/tests.cpp index 92bb7982fca..7eedd5d541c 100644 --- a/tests/stress_tests/memleaks_tests/tests.cpp +++ b/tests/stress_tests/memleaks_tests/tests.cpp @@ -118,6 +118,16 @@ TEST_P(MemLeaksTestSuite, infer_request_inference) { }; test_runner(test_params.numthreads, test); } + +TEST_P(MemLeaksTestSuite, inference_with_streams) { + const auto nstreams = 2; + auto test_params = GetParam(); + auto test = [&] { + return test_inference_with_streams(test_params.model, test_params.device, nstreams, test_params.numiters); + }; + test_runner(test_params.numthreads, test); +} + // tests_pipelines/tests_pipelines.cpp INSTANTIATE_TEST_SUITE_P(MemLeaksTests, MemLeaksTestSuiteNoModel, diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp index 10a91006885..a4e4aeed190 100644 --- a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp +++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp @@ -207,3 +207,11 @@ TestResult test_reinfer_request_inference(InferenceEngine::InferRequest& infer_r << n << " times"); return common_test_pipeline(reinfer_request_inference(infer_request, output_info), n); } + +TestResult test_inference_with_streams(const std::string& model, const std::string& target_device, + const int& nstreams, const int& n) { + log_info("Inference of InferRequest from network: \"" << model + << "\" for device: \"" << target_device + << "\" with streams: " << nstreams << " for " << n << " times"); + return common_test_pipeline(inference_with_streams(model, target_device, nstreams), n); +} diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h index dec1f4ac968..64aee916400 100644 --- a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h +++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h @@ -22,4 +22,5 @@ TestResult test_create_infer_request(const std::string &model, const std::string TestResult test_recreate_infer_request(InferenceEngine::ExecutableNetwork& network, const std::string &model, const std::string &target_device, const int &n); TestResult test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n); TestResult test_reinfer_request_inference(InferenceEngine::InferRequest& infer_request, InferenceEngine::OutputsDataMap& output_info, const std::string &model, const std::string &target_device, const int &n); +TestResult test_inference_with_streams(const std::string &model, const std::string &target_device, const int &nstreams, const int &n); // tests_pipelines/tests_pipelines.cpp From 16056de08e546bd2c2b00795e2998c9f5ec9ea9a Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 10 Aug 2021 19:51:20 +0300 Subject: [PATCH 21/24] Update itt collector style (#7001) * Updated ITT collector style * Applied code style --- thirdparty/itt_collector/.clang-format | 16 +- .../sea_itt_lib/IttNotifyStdSrc.cpp | 364 ++++++++++++------ .../sea_itt_lib/IttNotifyStdSrc.h | 65 +++- .../itt_collector/sea_itt_lib/Recorder.cpp | 101 +++-- .../itt_collector/sea_itt_lib/Recorder.h | 19 +- .../sea_itt_lib/TraceEventFormat.h | 11 +- .../itt_collector/sea_itt_lib/Utils.cpp | 81 ++-- thirdparty/itt_collector/sea_itt_lib/Utils.h | 16 +- .../itt_collector/sea_itt_lib/sea_itt_lib.cpp | 74 ++-- 9 files changed, 494 insertions(+), 253 deletions(-) diff --git a/thirdparty/itt_collector/.clang-format b/thirdparty/itt_collector/.clang-format index 93b6f4f50f5..ebe747b7838 100644 --- a/thirdparty/itt_collector/.clang-format +++ b/thirdparty/itt_collector/.clang-format @@ -1,26 +1,28 @@ BasedOnStyle: Google IndentWidth: 4 UseTab: Never ---- +ColumnLimit: 120 + Language: Cpp Standard: Cpp11 AccessModifierOffset: -4 AlignConsecutiveMacros: true AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: Never AllowShortLambdasOnASingleLine: Empty AllowShortLoopsOnASingleLine: false AlwaysBreakBeforeMultilineStrings: false -ColumnLimit: 160 -# Specialize this comment pragma in order to avoid changes in SEA copyrights +BinPackArguments: false +BinPackParameters: false CommentPragmas: '^#' DerivePointerAlignment: false FixNamespaceComments: true IndentCaseLabels: false -IndentPPDirectives: BeforeHash -SpaceBeforeCpp11BracedList: true -SpaceBeforeCtorInitializerColon: false ---- +IndentPPDirectives: AfterHash +ForEachMacros: + - foreach + - FOREACH_CHILD diff --git a/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp index 22bd5108804..ae3ac72ea9b 100644 --- a/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp +++ b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.cpp @@ -31,17 +31,17 @@ #include #ifdef _WIN32 - #include - #include +# include +# include #else - #include - #include +# include +# include #endif #ifdef __APPLE__ - //#define __APPLE_API_UNSTABLE - #include - #include +//#define __APPLE_API_UNSTABLE +# include +# include #endif namespace sea { @@ -79,9 +79,9 @@ public: }; #ifdef _DEBUG - #define ITT_FUNCTION_STAT() CIttFnStat oIttFnStat(__FUNCTION__) +# define ITT_FUNCTION_STAT() CIttFnStat oIttFnStat(__FUNCTION__) #else - #define ITT_FUNCTION_STAT() +# define ITT_FUNCTION_STAT() #endif struct __itt_frame_t { @@ -229,7 +229,8 @@ std::shared_ptr g_spCutName; std::string Escape4Path(std::string str) { std::replace_if( - str.begin(), str.end(), + str.begin(), + str.end(), [](char sym) { return strchr("/\\:*?\"<>|", sym); }, @@ -239,7 +240,7 @@ std::string Escape4Path(std::string str) { void InitDomain(__itt_domain* pDomain) { CIttLocker locker; - pDomain->extra2 = new DomainExtra {}; + pDomain->extra2 = new DomainExtra{}; if (g_savepath.size()) { DomainExtra* pDomainExtra = reinterpret_cast(pDomain->extra2); pDomainExtra->strDomainPath = GetDir(g_savepath, Escape4Path(pDomain->nameA)); @@ -258,7 +259,7 @@ SThreadRecord* GetThreadRecord() { CIttLocker lock; - pThreadRecord = new SThreadRecord {}; + pThreadRecord = new SThreadRecord{}; static __itt_global* pGlobal = GetITTGlobal(); __itt_domain* pDomain = pGlobal->domain_list; @@ -308,9 +309,11 @@ CTraceEventFormat::SRegularFields GetRegularFields(__itt_clock_domain* clock_dom if (pTrack) { CTraceEventFormat::SRegularFields& trackRF = *(CTraceEventFormat::SRegularFields*)pTrack->extra2; - rf.changed |= (rf.pid != trackRF.pid) ? CTraceEventFormat::SRegularFields::ecPid : CTraceEventFormat::SRegularFields::ecNothing; + rf.changed |= (rf.pid != trackRF.pid) ? CTraceEventFormat::SRegularFields::ecPid + : CTraceEventFormat::SRegularFields::ecNothing; rf.pid = trackRF.pid; - rf.changed |= (rf.tid != trackRF.tid) ? CTraceEventFormat::SRegularFields::ecTid : CTraceEventFormat::SRegularFields::ecNothing; + rf.changed |= (rf.tid != trackRF.tid) ? CTraceEventFormat::SRegularFields::ecTid + : CTraceEventFormat::SRegularFields::ecNothing; rf.tid = trackRF.tid; } if (clock_domain || timestamp) { @@ -396,7 +399,11 @@ __itt_string_handle* string_handle_createW(const wchar_t* name) { } #endif -void marker_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle* pName, +void marker_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id id, + __itt_string_handle* pName, __itt_scope scope) { ITT_FUNCTION_STAT(); CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); @@ -428,12 +435,12 @@ void task_begin(const __itt_domain* pDomain, __itt_id taskid, __itt_id parentid, SThreadRecord* pThreadRecord = GetThreadRecord(); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - pThreadRecord->pTask = placement_new(STaskDescriptor) {pThreadRecord->pTask, // chaining the previous task inside - rf, - pDomain, - pName, - taskid, - parentid}; + pThreadRecord->pTask = placement_new(STaskDescriptor){pThreadRecord->pTask, // chaining the previous task inside + rf, + pDomain, + pName, + taskid, + parentid}; for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { g_handlers[i]->TaskBegin(*pThreadRecord->pTask, false); @@ -446,13 +453,13 @@ void task_begin_fn(const __itt_domain* pDomain, __itt_id taskid, __itt_id parent CTraceEventFormat::SRegularFields rf = GetRegularFields(); SThreadRecord* pThreadRecord = GetThreadRecord(); - pThreadRecord->pTask = placement_new(STaskDescriptor) {pThreadRecord->pTask, // chaining the previous task inside - rf, - pDomain, - nullptr, - taskid, - parentid, - fn}; + pThreadRecord->pTask = placement_new(STaskDescriptor){pThreadRecord->pTask, // chaining the previous task inside + rf, + pDomain, + nullptr, + taskid, + parentid, + fn}; for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { g_handlers[i]->TaskBegin(*pThreadRecord->pTask, false); @@ -480,7 +487,11 @@ void task_end(const __itt_domain* pDomain) { pThreadRecord->pTask = prev; } -void Counter(const __itt_domain* pDomain, __itt_string_handle* pName, double value, __itt_clock_domain* clock_domain, unsigned long long timestamp) { +void Counter(const __itt_domain* pDomain, + __itt_string_handle* pName, + double value, + __itt_clock_domain* clock_domain, + unsigned long long timestamp) { CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { @@ -494,13 +505,16 @@ void counter_inc_delta_v3(const __itt_domain* pDomain, __itt_string_handle* pNam } void FixCounter(__itt_counter_info_t* pCounter) { - pCounter->extra2 = new SDomainName {UNICODE_AGNOSTIC(domain_create)(pCounter->domainA), UNICODE_AGNOSTIC(string_handle_create)(pCounter->nameA)}; + pCounter->extra2 = new SDomainName{UNICODE_AGNOSTIC(domain_create)(pCounter->domainA), + UNICODE_AGNOSTIC(string_handle_create)(pCounter->nameA)}; for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { g_handlers[i]->CreateCounter(reinterpret_cast<__itt_counter>(pCounter)); } } -__itt_counter ITTAPI UNICODE_AGNOSTIC(counter_create_typed)(const char* name, const char* domain, __itt_metadata_type type) { +__itt_counter ITTAPI UNICODE_AGNOSTIC(counter_create_typed)(const char* name, + const char* domain, + __itt_metadata_type type) { ITT_FUNCTION_STAT(); if (!name || !domain) @@ -514,7 +528,8 @@ __itt_counter ITTAPI UNICODE_AGNOSTIC(counter_create_typed)(const char* name, co __itt_global* pGlobal = GetITTGlobal(); for (h_tail = NULL, h = pGlobal->counter_list; h != NULL; h_tail = h, h = h->next) { if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && - ((h->domainA == NULL && domain == NULL) || (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) + ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; } if (!h) { @@ -549,10 +564,21 @@ double Convert(void* ptr) { typedef double (*FConvert)(void* ptr); FConvert g_MetatypeFormatConverter[] = { - nullptr, Convert, Convert, Convert, Convert, Convert, Convert, Convert, Convert, + nullptr, + Convert, + Convert, + Convert, + Convert, + Convert, + Convert, + Convert, + Convert, }; -void counter_set_value_ex(__itt_counter id, __itt_clock_domain* clock_domain, unsigned long long timestamp, void* value_ptr) { +void counter_set_value_ex(__itt_counter id, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + void* value_ptr) { ITT_FUNCTION_STAT(); if (id->type < __itt_metadata_u64 || id->type > __itt_metadata_double) { VerbosePrint("%s: weird type: %d stack: %s\n", __FUNCTION__, (int)id->type, GetStackString().c_str()); @@ -579,7 +605,7 @@ void UNICODE_AGNOSTIC(sync_create)(void* addr, const char* objtype, const char* __itt_id id = __itt_id_make(addr, 0); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::ObjectNew, SRecord {rf, *g_pIntelSEAPIDomain, id, __itt_null, pName}); + WriteRecord(ERecordType::ObjectNew, SRecord{rf, *g_pIntelSEAPIDomain, id, __itt_null, pName}); } #ifdef _WIN32 @@ -593,7 +619,7 @@ void sync_destroy(void* addr) { __itt_id id = __itt_id_make(addr, 0); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::ObjectDelete, SRecord {rf, *g_pIntelSEAPIDomain, id, __itt_null}); + WriteRecord(ERecordType::ObjectDelete, SRecord{rf, *g_pIntelSEAPIDomain, id, __itt_null}); } inline void SyncState(void* addr, const char* state) { @@ -602,7 +628,8 @@ inline void SyncState(void* addr, const char* state) { __itt_id id = __itt_id_make(addr, 0); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::ObjectSnapshot, SRecord {rf, *g_pIntelSEAPIDomain, id, __itt_null, nullptr, nullptr, state, strlen(state)}); + WriteRecord(ERecordType::ObjectSnapshot, + SRecord{rf, *g_pIntelSEAPIDomain, id, __itt_null, nullptr, nullptr, state, strlen(state)}); } void UNICODE_AGNOSTIC(sync_rename)(void* addr, const char* name) { @@ -643,14 +670,14 @@ void region_begin(const __itt_domain* pDomain, __itt_id id, __itt_id parentid, c ITT_FUNCTION_STAT(); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::BeginFrame, SRecord {rf, *pDomain, id, parentid, pName}); + WriteRecord(ERecordType::BeginFrame, SRecord{rf, *pDomain, id, parentid, pName}); } void region_end(const __itt_domain* pDomain, __itt_id id) { ITT_FUNCTION_STAT(); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::EndFrame, SRecord {rf, *pDomain, id, __itt_null}); + WriteRecord(ERecordType::EndFrame, SRecord{rf, *pDomain, id, __itt_null}); } __itt_clock_domain* clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data) { @@ -668,8 +695,11 @@ __itt_clock_domain* clock_domain_create(__itt_get_clock_info_fn fn, void* fn_dat fn(&ci, fn_data); uint64_t now2 = CTraceEventFormat::GetRegularFields().nanoseconds; - *ppClockDomain = new __itt_clock_domain { - ci, fn, fn_data, 0, + *ppClockDomain = new __itt_clock_domain{ + ci, + fn, + fn_data, + 0, new uint64_t((now1 + now2) / 2) // let's keep current time point in extra2 }; @@ -694,7 +724,11 @@ void clock_domain_reset() { }); } -void task_begin_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, +void task_begin_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id taskid, + __itt_id parentid, __itt_string_handle* pName) { ITT_FUNCTION_STAT(); @@ -702,12 +736,12 @@ void task_begin_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); - pThreadRecord->pTask = placement_new(STaskDescriptor) {pThreadRecord->pTask, // chaining the previous task inside - rf, - pDomain, - pName, - taskid, - parentid}; + pThreadRecord->pTask = placement_new(STaskDescriptor){pThreadRecord->pTask, // chaining the previous task inside + rf, + pDomain, + pName, + taskid, + parentid}; for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { g_handlers[i]->TaskBegin(*pThreadRecord->pTask, false); @@ -765,7 +799,8 @@ __itt_track_group* track_group_create(__itt_string_handle* pName, __itt_track_gr WriteGroupName(g_lastPseudoProcess, pName->strA); } // zero name means current process - return *ppTrackGroup = new __itt_track_group {pName, nullptr, track_group_type, int(pName ? g_lastPseudoProcess-- : g_PID)}; + return *ppTrackGroup = + new __itt_track_group{pName, nullptr, track_group_type, int(pName ? g_lastPseudoProcess-- : g_PID)}; } __itt_track* track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type) { @@ -783,13 +818,14 @@ __itt_track* track_create(__itt_track_group* track_group, __itt_string_handle* n ppTrack = &(*ppTrack)->next; } - CTraceEventFormat::SRegularFields* pRF = new CTraceEventFormat::SRegularFields {int64_t(track_group->extra1), g_lastPseudoThread--}; + CTraceEventFormat::SRegularFields* pRF = + new CTraceEventFormat::SRegularFields{int64_t(track_group->extra1), g_lastPseudoThread--}; for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { g_handlers[i]->SetThreadName(*pRF, name->strA); } - return *ppTrack = new __itt_track {name, track_group, track_type, 0, pRF}; + return *ppTrack = new __itt_track{name, track_group, track_type, 0, pRF}; } class COverlapped { @@ -801,9 +837,17 @@ public: return *(pThreadRecord->pOverlapped = new COverlapped); } - void Begin(__itt_id taskid, const CTraceEventFormat::SRegularFields& rf, const __itt_domain* domain, __itt_string_handle* name, __itt_id parentid) { - m_map[taskid].reset(placement_new(STaskDescriptor) {nullptr, // chaining the previous task inside - rf, domain, name, taskid, parentid}, + void Begin(__itt_id taskid, + const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* domain, + __itt_string_handle* name, + __itt_id parentid) { + m_map[taskid].reset(placement_new(STaskDescriptor){nullptr, // chaining the previous task inside + rf, + domain, + name, + taskid, + parentid}, placement_free); for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { @@ -863,20 +907,30 @@ protected: TTaskMap m_map; }; -void task_begin_overlapped_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, +void task_begin_overlapped_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id taskid, + __itt_id parentid, __itt_string_handle* pName) { ITT_FUNCTION_STAT(); COverlapped::Get().Begin(taskid, GetRegularFields(clock_domain, timestamp), pDomain, pName, parentid); } -void task_begin_overlapped(const __itt_domain* pDomain, __itt_id taskid, __itt_id parentid, __itt_string_handle* pName) { +void task_begin_overlapped(const __itt_domain* pDomain, + __itt_id taskid, + __itt_id parentid, + __itt_string_handle* pName) { ITT_FUNCTION_STAT(); task_begin_overlapped_ex(pDomain, nullptr, 0, taskid, parentid, pName); } -void task_end_overlapped_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid) { +void task_end_overlapped_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id taskid) { ITT_FUNCTION_STAT(); COverlapped::Get().End(taskid, GetRegularFields(clock_domain, timestamp), pDomain); @@ -899,7 +953,8 @@ template void MetadataAdd(const __itt_domain* pDomain, __itt_id id, __itt_string_handle* pKey, Args... args) { if (id.d1 || id.d2) { SThreadRecord* pThreadRecord = GetThreadRecord(); - if (!COverlapped::Get().AddArg(pDomain, id, pKey, args...) && pThreadRecord->pTask && pThreadRecord->pTask->id == id) { + if (!COverlapped::Get().AddArg(pDomain, id, pKey, args...) && pThreadRecord->pTask && + pThreadRecord->pTask->id == id) { for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { g_handlers[i]->AddArg(*pThreadRecord->pTask, pKey, args...); } @@ -907,7 +962,11 @@ void MetadataAdd(const __itt_domain* pDomain, __itt_id id, __itt_string_handle* } } -void UNICODE_AGNOSTIC(metadata_str_add)(const __itt_domain* pDomain, __itt_id id, __itt_string_handle* pKey, const char* data, size_t length) { +void UNICODE_AGNOSTIC(metadata_str_add)(const __itt_domain* pDomain, + __itt_id id, + __itt_string_handle* pKey, + const char* data, + size_t length) { ITT_FUNCTION_STAT(); if (id == __itt_null) { @@ -940,12 +999,21 @@ void UNICODE_AGNOSTIC(metadata_str_add)(const __itt_domain* pDomain, __itt_id id } #ifdef _WIN32 -void metadata_str_addW(const __itt_domain* pDomain, __itt_id id, __itt_string_handle* pKey, const wchar_t* data, size_t length) { +void metadata_str_addW(const __itt_domain* pDomain, + __itt_id id, + __itt_string_handle* pKey, + const wchar_t* data, + size_t length) { UNICODE_AGNOSTIC(metadata_str_add)(pDomain, id, pKey, W2L(data).c_str(), length); } #endif -void metadata_add(const __itt_domain* pDomain, __itt_id id, __itt_string_handle* pKey, __itt_metadata_type type, size_t count, void* data) { +void metadata_add(const __itt_domain* pDomain, + __itt_id id, + __itt_string_handle* pKey, + __itt_metadata_type type, + size_t count, + void* data) { ITT_FUNCTION_STAT(); if (id.d1 || id.d2) { @@ -976,19 +1044,19 @@ void frame_begin_v3(const __itt_domain* pDomain, __itt_id* id) { ITT_FUNCTION_STAT(); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::BeginFrame, SRecord {rf, *pDomain, id ? *id : __itt_null, __itt_null}); + WriteRecord(ERecordType::BeginFrame, SRecord{rf, *pDomain, id ? *id : __itt_null, __itt_null}); } void frame_end_v3(const __itt_domain* pDomain, __itt_id* id) { ITT_FUNCTION_STAT(); CTraceEventFormat::SRegularFields rf = GetRegularFields(); - WriteRecord(ERecordType::EndFrame, SRecord {rf, *pDomain, id ? *id : __itt_null, __itt_null}); + WriteRecord(ERecordType::EndFrame, SRecord{rf, *pDomain, id ? *id : __itt_null, __itt_null}); } __itt_frame_t* UNICODE_AGNOSTIC(frame_create)(const char* domain) { ITT_FUNCTION_STAT(); - return new __itt_frame_t {UNICODE_AGNOSTIC(domain_create)(domain), __itt_id_make(const_cast(domain), 0)}; + return new __itt_frame_t{UNICODE_AGNOSTIC(domain_create)(domain), __itt_id_make(const_cast(domain), 0)}; } #ifdef _WIN32 @@ -1027,9 +1095,9 @@ void frame_submit_v3(const __itt_domain* pDomain, __itt_id* pId, __itt_timestamp } } rf.nanoseconds = begin; - WriteRecord(ERecordType::BeginFrame, SRecord {rf, *pDomain, pId ? *pId : __itt_null, __itt_null, pName}); + WriteRecord(ERecordType::BeginFrame, SRecord{rf, *pDomain, pId ? *pId : __itt_null, __itt_null, pName}); rf.nanoseconds = end; - WriteRecord(ERecordType::EndFrame, SRecord {rf, *pDomain, pId ? *pId : __itt_null, __itt_null}); + WriteRecord(ERecordType::EndFrame, SRecord{rf, *pDomain, pId ? *pId : __itt_null, __itt_null}); } __itt_timestamp get_timestamp() { @@ -1043,7 +1111,8 @@ void Pause() { pGlobal->state = __itt_collection_paused; ___itt_domain* pDomain = pGlobal->domain_list; while (pDomain) { - pDomain->flags = 0; // this flag is analyzed by static part of ITT to decide where to call dynamic part or not + pDomain->flags = + 0; // this flag is analyzed by static part of ITT to decide where to call dynamic part or not pDomain = pDomain->next; } pGlobal = pGlobal->next; @@ -1065,7 +1134,8 @@ void Resume() { while (pGlobal) { ___itt_domain* pDomain = pGlobal->domain_list; while (pDomain) { - pDomain->flags = 1; // this flag is analyzed by static part of ITT to decide where to call dynamic part or not + pDomain->flags = + 1; // this flag is analyzed by static part of ITT to decide where to call dynamic part or not pDomain = pDomain->next; } pGlobal->state = __itt_collection_normal; @@ -1085,7 +1155,11 @@ using TRelations = __itt_string_handle * [__itt_relation_is_predecessor_to + 1]; // it's not static member of function to avoid racing TRelations g_relations = {}; // will be filled in InitSEA -void relation_add_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, +void relation_add_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_id head, + __itt_relation relation, __itt_id tail) { ITT_FUNCTION_STAT(); CTraceEventFormat::SRegularFields rf = GetRegularFields(clock_domain, timestamp); @@ -1105,7 +1179,10 @@ void relation_add(const __itt_domain* pDomain, __itt_id head, __itt_relation rel relation_add_ex(pDomain, nullptr, 0, head, relation, tail); } -void relation_add_to_current_ex(const __itt_domain* pDomain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, +void relation_add_to_current_ex(const __itt_domain* pDomain, + __itt_clock_domain* clock_domain, + unsigned long long timestamp, + __itt_relation relation, __itt_id tail) { ITT_FUNCTION_STAT(); relation_add_ex(pDomain, clock_domain, timestamp, __itt_null, relation, tail); @@ -1120,7 +1197,9 @@ struct SHeapFunction { __itt_heap_function ITTAPI UNICODE_AGNOSTIC(heap_function_create)(const char* name, const char* domain) { ITT_FUNCTION_STAT(); std::string counter_name = std::string(name) + ":ALL(bytes)"; - return new SHeapFunction {UNICODE_AGNOSTIC(domain_create)(domain), name, UNICODE_AGNOSTIC(string_handle_create)(counter_name.c_str())}; + return new SHeapFunction{UNICODE_AGNOSTIC(domain_create)(domain), + name, + UNICODE_AGNOSTIC(string_handle_create)(counter_name.c_str())}; } #ifdef _WIN32 @@ -1151,7 +1230,7 @@ protected: bool m_bInitialized = false; public: - CMemoryTracker(): m_bInitialized(true) {} + CMemoryTracker() : m_bInitialized(true) {} void Alloc(SHeapFunction* pHeapFunction, const void* addr, size_t size) { static bool bMemCount = !!(GetFeatureSet() & sfMemCounters); @@ -1159,7 +1238,11 @@ public: return; for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { - g_handlers[i]->Alloc(GetRegularFields(), addr, size, pHeapFunction->pDomain->nameA, pHeapFunction->name.c_str()); + g_handlers[i]->Alloc(GetRegularFields(), + addr, + size, + pHeapFunction->pDomain->nameA, + pHeapFunction->name.c_str()); } SNode* pNode = UpdateAllocation(size, +1, nullptr); @@ -1172,7 +1255,8 @@ public: if (m_counter_map.end() == it) { std::string name = pHeapFunction->name + std::string(":size<") + std::to_string(size) + ">(count)"; __itt_string_handle* pName = UNICODE_AGNOSTIC(string_handle_create)(name.c_str()); - it = m_counter_map.insert(m_counter_map.end(), std::make_pair(size, std::make_pair(pName, size_t(1)))); + it = m_counter_map.insert(m_counter_map.end(), + std::make_pair(size, std::make_pair(pName, size_t(1)))); } else { ++it->second.second; } @@ -1233,7 +1317,11 @@ public: UpdateAllocation(size, -1, pNode); } for (size_t i = 0; (i < MAX_HANDLERS) && g_handlers[i]; ++i) { - g_handlers[i]->Free(GetRegularFields(), addr, size, pHeapFunction->pDomain->nameA, pHeapFunction->name.c_str()); + g_handlers[i]->Free(GetRegularFields(), + addr, + size, + pHeapFunction->pDomain->nameA, + pHeapFunction->name.c_str()); } } @@ -1324,9 +1412,9 @@ __itt_event ITTAPI event_createW(const wchar_t* name, int namelen) { #endif #ifdef _WIN32 - #define WIN(something) something +# define WIN(something) something #else - #define WIN(nothing) +# define WIN(nothing) #endif #define _AW(macro, name) macro(UNICODE_AGNOSTIC(name)) WIN(macro(ITT_JOIN(name, W))) @@ -1487,8 +1575,9 @@ void FillApiList(__itt_api_info* api_list_ptr) { continue; \ } #define ITT_STUB_IMPL_ORIG(name) ITT_STUB_IMPL(name) -#ifdef _DEBUG // dangerous stub that doesn't return anything (even when expected) but records the function call for statistics sake - #define ITT_STUB_NO_IMPL(fn) \ +#ifdef _DEBUG // dangerous stub that doesn't return anything (even when expected) but records the function call for + // statistics sake +# define ITT_STUB_NO_IMPL(fn) \ if (0 == strcmp("__itt_" ITT_TO_STR(fn), api_list_ptr[i].name)) { \ struct local { \ static void stub(...) { \ @@ -1499,7 +1588,7 @@ void FillApiList(__itt_api_info* api_list_ptr) { continue; \ } #else - #define ITT_STUB_NO_IMPL(fn) +# define ITT_STUB_NO_IMPL(fn) #endif for (int i = 0; (api_list_ptr[i].name != NULL) && (*api_list_ptr[i].name != 0); ++i) { @@ -1516,10 +1605,13 @@ uint64_t GetFeatureSet() { static std::string save = GetSavePath(); static uint64_t features = (std::string::npos != env.find("mfp") ? sfMetricsFrameworkPublisher : 0) | - (std::string::npos != env.find("mfc") ? sfMetricsFrameworkConsumer : 0) | (save.size() ? sfSEA : 0) | - (std::string::npos != env.find("stack") ? sfStack : 0) | (std::string::npos != env.find("vscv") ? sfConcurrencyVisualizer : 0) | - (std::string::npos != env.find("rmtr") ? sfRemotery : 0) | (std::string::npos != env.find("brflr") ? sfBrofiler : 0) | - (std::string::npos != env.find("memstat") ? sfMemStat : 0) | (std::string::npos != env.find("memcount") ? sfMemCounters : 0) | + (std::string::npos != env.find("mfc") ? sfMetricsFrameworkConsumer : 0) | + (save.size() ? sfSEA : 0) | (std::string::npos != env.find("stack") ? sfStack : 0) | + (std::string::npos != env.find("vscv") ? sfConcurrencyVisualizer : 0) | + (std::string::npos != env.find("rmtr") ? sfRemotery : 0) | + (std::string::npos != env.find("brflr") ? sfBrofiler : 0) | + (std::string::npos != env.find("memstat") ? sfMemStat : 0) | + (std::string::npos != env.find("memcount") ? sfMemCounters : 0) | (std::string::npos != env.find("rad") ? sfRadTelemetry : 0); return features; } @@ -1534,7 +1626,8 @@ void TraverseDomains(const std::function& callback) { void TraverseThreadRecords(const std::function& callback) { TraverseDomains([&](___itt_domain& domain) { if (DomainExtra* pDomainExtra = reinterpret_cast(domain.extra2)) { - for (SThreadRecord* pThreadRecord = pDomainExtra->pThreadRecords; pThreadRecord; pThreadRecord = pThreadRecord->pNext) + for (SThreadRecord* pThreadRecord = pDomainExtra->pThreadRecords; pThreadRecord; + pThreadRecord = pThreadRecord->pNext) callback(*pThreadRecord); } }); @@ -1544,7 +1637,8 @@ void SetCutName(const std::string& name) { CIttLocker lock; g_spCutName = std::make_shared(Escape4Path(name)); TraverseThreadRecords([](SThreadRecord& record) { - record.nSpeedupCounter = (std::numeric_limits::max)(); // changing number is safer than changing pointer to last recorder + record.nSpeedupCounter = + (std::numeric_limits::max)(); // changing number is safer than changing pointer to last recorder }); } @@ -1555,7 +1649,8 @@ CTraceEventFormat::SRegularFields g_rfMainThread = CTraceEventFormat::GetRegular void SetFolder(const std::string& path) { CIttLocker lock; - std::string new_path = path.size() ? (path + "-" + std::to_string(CTraceEventFormat::GetRegularFields().pid) + "/") : ""; + std::string new_path = + path.size() ? (path + "-" + std::to_string(CTraceEventFormat::GetRegularFields().pid) + "/") : ""; if (g_savepath == new_path) return; @@ -1575,11 +1670,14 @@ void SetFolder(const std::string& path) { for (___itt_domain* pDomain = pGlobal->domain_list; pDomain; pDomain = pDomain->next) { DomainExtra* pDomainExtra = reinterpret_cast(pDomain->extra2); if (pDomainExtra) { - pDomainExtra->strDomainPath = g_savepath.size() ? GetDir(g_savepath, Escape4Path(pDomain->nameA)) : ""; // 3. + pDomainExtra->strDomainPath = + g_savepath.size() ? GetDir(g_savepath, Escape4Path(pDomain->nameA)) : ""; // 3. pDomainExtra->bHasDomainPath = !pDomainExtra->strDomainPath.empty(); - for (SThreadRecord* pThreadRecord = pDomainExtra->pThreadRecords; pThreadRecord; pThreadRecord = pThreadRecord->pNext) { + for (SThreadRecord* pThreadRecord = pDomainExtra->pThreadRecords; pThreadRecord; + pThreadRecord = pThreadRecord->pNext) { if (g_savepath.size()) { - pThreadRecord->bRemoveFiles = true; // 1. on next attempt to get a file it will recreate all files with new paths + pThreadRecord->bRemoveFiles = + true; // 1. on next attempt to get a file it will recreate all files with new paths } else { pThreadRecord->files.clear(); } @@ -1589,7 +1687,8 @@ void SetFolder(const std::string& path) { if (g_savepath.size()) { for (___itt_string_handle* pString = pGlobal->string_list; pString; pString = pString->next) - sea::ReportString(const_cast<__itt_string_handle*>(pString)); // 2. making string to be reported again - into the new folder + sea::ReportString(const_cast<__itt_string_handle*>( + pString)); // 2. making string to be reported again - into the new folder } } @@ -1620,7 +1719,10 @@ bool WriteFTraceTimeSyncMarkers() { } for (size_t i = 0; i < 5; ++i) { char buff[100] = {}; - int size = snprintf(buff, sizeof(buff), "IntelSEAPI_Time_Sync: %llu\n", (long long unsigned int)CTraceEventFormat::GetTimeNS()); + int size = snprintf(buff, + sizeof(buff), + "IntelSEAPI_Time_Sync: %llu\n", + (long long unsigned int)CTraceEventFormat::GetTimeNS()); int res = write(fd, buff, (unsigned int)size); if (-1 == res) return false; @@ -1633,8 +1735,17 @@ bool WriteFTraceTimeSyncMarkers() { #ifdef __APPLE__ bool WriteKTraceTimeSyncMarkers() { for (size_t i = 0; i < 5; ++i) { - kdebug_signpost(APPSDBG_CODE(DBG_MACH_CHUD, 0x15EA), CTraceEventFormat::GetTimeNS(), 0x15EA15EA, 0x15EA15EA, 0x15EA15EA); - syscall(SYS_kdebug_trace, APPSDBG_CODE(DBG_MACH_CHUD, 0x15EA) | DBG_FUNC_NONE, CTraceEventFormat::GetTimeNS(), 0x15EA15EA, 0x15EA15EA, 0x15EA15EA); + kdebug_signpost(APPSDBG_CODE(DBG_MACH_CHUD, 0x15EA), + CTraceEventFormat::GetTimeNS(), + 0x15EA15EA, + 0x15EA15EA, + 0x15EA15EA); + syscall(SYS_kdebug_trace, + APPSDBG_CODE(DBG_MACH_CHUD, 0x15EA) | DBG_FUNC_NONE, + CTraceEventFormat::GetTimeNS(), + 0x15EA15EA, + 0x15EA15EA, + 0x15EA15EA); } return true; } @@ -1655,8 +1766,10 @@ void InitSEA() { ("parent_of"), /**< "A is parent of B" means that A created B */ ("continuation_of"), /**< "A is continuation of B" means that A assumes the dependencies of B */ ("child_of"), /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ - ("continued_by"), /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ - ("predecessor_to") /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ + ("continued_by"), /**< "A is continued by B" means that B assumes the dependencies of A (inverse of + is_continuation_of) */ + ("predecessor_to") /**< "A is predecessor to B" means that B cannot start until A completes (inverse of + is_dependent_on) */ }; size_t i = 0; @@ -1716,32 +1829,54 @@ SEA_EXPORT void* itt_create_string(const char* str) { SEA_EXPORT void itt_marker(void* domain, uint64_t id, void* name, int scope, uint64_t timestamp) { __itt_marker_ex(reinterpret_cast<__itt_domain*>(domain), nullptr, // zero clock domain means that given time is already a correct timestamp - timestamp, id ? __itt_id_make(domain, id) : __itt_null, reinterpret_cast<__itt_string_handle*>(name), (__itt_scope)scope); + timestamp, + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + (__itt_scope)scope); } SEA_EXPORT void itt_task_begin(void* domain, uint64_t id, uint64_t parent, void* name, uint64_t timestamp) { - __itt_task_begin_ex(reinterpret_cast<__itt_domain*>(domain), nullptr, timestamp, id ? __itt_id_make(domain, id) : __itt_null, - parent ? __itt_id_make(domain, parent) : __itt_null, reinterpret_cast<__itt_string_handle*>(name)); + __itt_task_begin_ex(reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp, + id ? __itt_id_make(domain, id) : __itt_null, + parent ? __itt_id_make(domain, parent) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name)); } SEA_EXPORT void itt_task_begin_overlapped(void* domain, uint64_t id, uint64_t parent, void* name, uint64_t timestamp) { - __itt_task_begin_overlapped_ex(reinterpret_cast<__itt_domain*>(domain), nullptr, timestamp, __itt_id_make(domain, id), - parent ? __itt_id_make(domain, parent) : __itt_null, reinterpret_cast<__itt_string_handle*>(name)); + __itt_task_begin_overlapped_ex(reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp, + __itt_id_make(domain, id), + parent ? __itt_id_make(domain, parent) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name)); } SEA_EXPORT void itt_metadata_add(void* domain, uint64_t id, void* name, double value) { - __itt_metadata_add(reinterpret_cast<__itt_domain*>(domain), id ? __itt_id_make(domain, id) : __itt_null, reinterpret_cast<__itt_string_handle*>(name), - __itt_metadata_double, 1, &value); + __itt_metadata_add(reinterpret_cast<__itt_domain*>(domain), + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + __itt_metadata_double, + 1, + &value); } SEA_EXPORT void itt_metadata_add_str(void* domain, uint64_t id, void* name, const char* value) { - __itt_metadata_str_add(reinterpret_cast<__itt_domain*>(domain), id ? __itt_id_make(domain, id) : __itt_null, reinterpret_cast<__itt_string_handle*>(name), - value, 0); + __itt_metadata_str_add(reinterpret_cast<__itt_domain*>(domain), + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + value, + 0); } SEA_EXPORT void itt_metadata_add_blob(void* domain, uint64_t id, void* name, const void* value, uint32_t size) { - __itt_metadata_add(reinterpret_cast<__itt_domain*>(domain), id ? __itt_id_make(domain, id) : __itt_null, reinterpret_cast<__itt_string_handle*>(name), - __itt_metadata_unknown, size, const_cast(value)); + __itt_metadata_add(reinterpret_cast<__itt_domain*>(domain), + id ? __itt_id_make(domain, id) : __itt_null, + reinterpret_cast<__itt_string_handle*>(name), + __itt_metadata_unknown, + size, + const_cast(value)); } SEA_EXPORT void itt_task_end(void* domain, uint64_t timestamp) { @@ -1749,11 +1884,16 @@ SEA_EXPORT void itt_task_end(void* domain, uint64_t timestamp) { } SEA_EXPORT void itt_task_end_overlapped(void* domain, uint64_t timestamp, uint64_t taskid) { - __itt_task_end_overlapped_ex(reinterpret_cast<__itt_domain*>(domain), nullptr, timestamp, __itt_id_make(domain, taskid)); + __itt_task_end_overlapped_ex(reinterpret_cast<__itt_domain*>(domain), + nullptr, + timestamp, + __itt_id_make(domain, taskid)); } SEA_EXPORT void* itt_counter_create(void* domain, void* name) { - return __itt_counter_create_typed(reinterpret_cast<__itt_string_handle*>(name)->strA, reinterpret_cast<__itt_domain*>(domain)->nameA, __itt_metadata_u64); + return __itt_counter_create_typed(reinterpret_cast<__itt_string_handle*>(name)->strA, + reinterpret_cast<__itt_domain*>(domain)->nameA, + __itt_metadata_u64); } SEA_EXPORT void itt_set_counter(void* id, double value, uint64_t timestamp) { @@ -1761,8 +1901,10 @@ SEA_EXPORT void itt_set_counter(void* id, double value, uint64_t timestamp) { } SEA_EXPORT void* itt_create_track(const char* group, const char* track) { - return __itt_track_create(__itt_track_group_create(((group) ? __itt_string_handle_create(group) : nullptr), __itt_track_group_type_normal), - __itt_string_handle_create(track), __itt_track_type_normal); + return __itt_track_create(__itt_track_group_create(((group) ? __itt_string_handle_create(group) : nullptr), + __itt_track_group_type_normal), + __itt_string_handle_create(track), + __itt_track_type_normal); } SEA_EXPORT void itt_set_track(void* track) { diff --git a/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h index f9d763a639d..4f869083228 100644 --- a/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h +++ b/thirdparty/itt_collector/sea_itt_lib/IttNotifyStdSrc.h @@ -24,11 +24,11 @@ #include "ittnotify_config.h" #ifdef _WIN32 - #define SEA_EXPORT __declspec(dllexport) - #define _sprintf sprintf_s +# define SEA_EXPORT __declspec(dllexport) +# define _sprintf sprintf_s #else - #define SEA_EXPORT __attribute__((visibility("default"))) - #define _sprintf sprintf +# define SEA_EXPORT __attribute__((visibility("default"))) +# define _sprintf sprintf #endif namespace sea { @@ -36,7 +36,7 @@ bool IsVerboseMode(); } #if defined(_WIN32) - #define VerbosePrint(...) \ +# define VerbosePrint(...) \ { \ if (sea::IsVerboseMode()) { \ std::vector buff(1024); \ @@ -46,7 +46,7 @@ bool IsVerboseMode(); } \ } #else - #define VerbosePrint(...) \ +# define VerbosePrint(...) \ { \ if (sea::IsVerboseMode()) \ printf(__VA_ARGS__); \ @@ -71,7 +71,10 @@ bool WriteFTraceTimeSyncMarkers(); // For Driver instrumentation see: http://lw void InitSEA(); void FillApiList(__itt_api_info* pApiInfo); void FinitaLaComedia(); -void Counter(const __itt_domain* pDomain, __itt_string_handle* pName, double value, __itt_clock_domain* clock_domain = nullptr, +void Counter(const __itt_domain* pDomain, + __itt_string_handle* pName, + double value, + __itt_clock_domain* clock_domain = nullptr, unsigned long long timestamp = 0); __itt_clock_domain* clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); void SetCutName(const std::string& path); @@ -99,15 +102,15 @@ struct ___itt_counter : public __itt_counter_info_t {}; #define USE_PROBES #ifdef _WIN32 - #include "windows.h" +# include "windows.h" #elif defined(__linux__) - #ifndef USE_PROBES +# ifndef USE_PROBES __thread FILE* stdsrc_trace_info_t::pFile = nullptr; - #endif +# endif #endif #ifdef _WIN32 - #define UNICODE_AGNOSTIC(name) name##A +# define UNICODE_AGNOSTIC(name) name##A inline std::string W2L(const wchar_t* wstr) { size_t len = lstrlenW(wstr); char* dest = (char*)alloca(len + 2); @@ -123,9 +126,9 @@ union IdCaster { GUID to; }; #else - #include - #define _strdup strdup - #define UNICODE_AGNOSTIC(name) name +# include +# define _strdup strdup +# define UNICODE_AGNOSTIC(name) name #endif namespace sea { @@ -148,7 +151,8 @@ enum SEAFeature { }; uint64_t GetFeatureSet(); -CTraceEventFormat::SRegularFields GetRegularFields(__itt_clock_domain* clock_domain = nullptr, unsigned long long timestamp = 0); +CTraceEventFormat::SRegularFields GetRegularFields(__itt_clock_domain* clock_domain = nullptr, + unsigned long long timestamp = 0); struct SThreadRecord; @@ -199,7 +203,8 @@ protected: placement_free(reinterpret_cast(ptr)); } }; - oTask.cookies[m_cookie] = STaskDescriptor::SCookie {placement_new(T)(args...), SDeleter::Deleter}; // consider placement new here! + oTask.cookies[m_cookie] = + STaskDescriptor::SCookie{placement_new(T)(args...), SDeleter::Deleter}; // consider placement new here! } return *reinterpret_cast(oTask.cookies[m_cookie].pCookie); } @@ -241,15 +246,33 @@ public: virtual void TaskBegin(STaskDescriptor& oTask, bool bOverlapped) {} virtual void AddArg(STaskDescriptor& oTask, const __itt_string_handle* pKey, const char* data, size_t length) {} virtual void AddArg(STaskDescriptor& oTask, const __itt_string_handle* pKey, double value) {} - virtual void AddRelation(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pDomain, __itt_id head, __itt_string_handle* relation, + virtual void AddRelation(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* pDomain, + __itt_id head, + __itt_string_handle* relation, __itt_id tail) {} virtual void TaskEnd(STaskDescriptor& oTask, const CTraceEventFormat::SRegularFields& rf, bool bOverlapped) {} - virtual void Marker(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pDomain, __itt_id id, __itt_string_handle* pName, __itt_scope scope) {} + virtual void Marker(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* pDomain, + __itt_id id, + __itt_string_handle* pName, + __itt_scope scope) {} virtual void CreateCounter(const __itt_counter& id) {} - virtual void Counter(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pDomain, const __itt_string_handle* pName, double value) {} + virtual void Counter(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* pDomain, + const __itt_string_handle* pName, + double value) {} virtual void SetThreadName(const CTraceEventFormat::SRegularFields& rf, const char* name) {} - virtual void Alloc(const CTraceEventFormat::SRegularFields& rf, const void* addr, size_t size, const char* domain, const char* name) {} - virtual void Free(const CTraceEventFormat::SRegularFields& rf, const void* addr, size_t size, const char* domain, const char* name) {} + virtual void Alloc(const CTraceEventFormat::SRegularFields& rf, + const void* addr, + size_t size, + const char* domain, + const char* name) {} + virtual void Free(const CTraceEventFormat::SRegularFields& rf, + const void* addr, + size_t size, + const char* domain, + const char* name) {} virtual ~IHandler() {} }; diff --git a/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp b/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp index d977a11e893..dbb41f43c40 100644 --- a/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp +++ b/thirdparty/itt_collector/sea_itt_lib/Recorder.cpp @@ -25,13 +25,13 @@ #include "IttNotifyStdSrc.h" #ifdef _WIN32 - #include - #include - #include +# include +# include +# include - #define open crossopen - #define write _write - #define close _close +# define open crossopen +# define write _write +# define close _close int crossopen(_In_z_ const char* _Filename, _In_ int _Openflag, int perm) { int fd = 0; _sopen_s(&fd, _Filename, _Openflag | _O_BINARY, _SH_DENYWR, perm); @@ -40,7 +40,7 @@ int crossopen(_In_z_ const char* _Filename, _In_ int _Openflag, int perm) { // FIXME: support wide char mode #endif -CRecorder::CRecorder(): m_pCurPos(nullptr) {} +CRecorder::CRecorder() : m_pCurPos(nullptr) {} size_t ChunkSize = 1 * 1020 * 1024; @@ -80,9 +80,9 @@ size_t CRecorder::CheckCapacity(size_t size) { size_t nWroteBytes = (char*)m_pCurPos - (char*)m_memmap->GetPtr(); if (nWroteBytes + size > m_memmap->GetSize()) { m_pCurPos = m_memmap->Remap((std::max)(ChunkSize, size), m_nWroteTotal); - #ifdef TURBO_MODE +# ifdef TURBO_MODE sea::GetThreadRecord()->nMemMoveCounter += 1; - #endif +# endif if (!m_pCurPos) return 0; } @@ -184,7 +184,8 @@ inline CRecorder* GetFile(const SRecord& record) { pThreadRecord->files.clear(); } // with very high probability the same thread will write into the same domain - if (pThreadRecord->pLastRecorder && (pThreadRecord->pLastDomain == record.domain.nameA) && (100 > pThreadRecord->nSpeedupCounter++)) + if (pThreadRecord->pLastRecorder && (pThreadRecord->pLastDomain == record.domain.nameA) && + (100 > pThreadRecord->nSpeedupCounter++)) return reinterpret_cast(pThreadRecord->pLastRecorder); pThreadRecord->nSpeedupCounter = 0; // we can't avoid checking ring size pThreadRecord->pLastDomain = record.domain.nameA; @@ -218,7 +219,11 @@ inline CRecorder* GetFile(const SRecord& record) { CTraceEventFormat::SRegularFields rf = CTraceEventFormat::GetRegularFields(); char path[1024] = {}; - _sprintf(path, "%s%llu%s%s.sea", pDomainExtra->strDomainPath.c_str(), (unsigned long long)rf.tid, spCutName ? (std::string("!") + *spCutName).c_str() : "", + _sprintf(path, + "%s%llu%s%s.sea", + pDomainExtra->strDomainPath.c_str(), + (unsigned long long)rf.tid, + spCutName ? (std::string("!") + *spCutName).c_str() : "", (g_nRingBuffer ? ((pRecorder->GetCount() % 2) ? "-1" : "-0") : "")); try { VerbosePrint("Opening: %s\n", path); @@ -244,12 +249,13 @@ double* WriteRecord(ERecordType type, const SRecord& record) { CRecorder& stream = *pFile; - const size_t MaxSize = sizeof(STinyRecord) + 2 * sizeof(__itt_id) + 3 * sizeof(uint64_t) + sizeof(double) + sizeof(void*); + const size_t MaxSize = + sizeof(STinyRecord) + 2 * sizeof(__itt_id) + 3 * sizeof(uint64_t) + sizeof(double) + sizeof(void*); size_t size = stream.CheckCapacity(MaxSize + record.length); if (!size) return nullptr; - STinyRecord* pRecord = WriteToBuff(stream, STinyRecord {record.rf.nanoseconds, type}); + STinyRecord* pRecord = WriteToBuff(stream, STinyRecord{record.rf.nanoseconds, type}); if (!pRecord) return nullptr; @@ -311,7 +317,12 @@ double* WriteRecord(ERecordType type, const SRecord& record) { CMemMap::CMemMap(const std::string& path, size_t size, size_t offset) { #ifdef _WIN32 - m_hFile = CreateFile(path.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_SEQUENTIAL_SCAN, + m_hFile = CreateFile(path.c_str(), + GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ, + NULL, + CREATE_ALWAYS, + FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (INVALID_HANDLE_VALUE == m_hFile) { m_hFile = NULL; @@ -395,13 +406,17 @@ CMemMap::~CMemMap() { using namespace sea; const bool g_bWithStacks = !!(GetFeatureSet() & sfStack); -void WriteMeta(const CTraceEventFormat::SRegularFields& main, __itt_string_handle* pKey, const char* name, double* pDelta) { - WriteRecord(ERecordType::Metadata, SRecord {main, *g_pIntelSEAPIDomain, __itt_null, __itt_null, pKey, pDelta, name, strlen(name)}); +void WriteMeta(const CTraceEventFormat::SRegularFields& main, + __itt_string_handle* pKey, + const char* name, + double* pDelta) { + WriteRecord(ERecordType::Metadata, + SRecord{main, *g_pIntelSEAPIDomain, __itt_null, __itt_null, pKey, pDelta, name, strlen(name)}); } -class CSEARecorder : public IHandler {void Init(const CTraceEventFormat::SRegularFields& main) - override {// write process name into trace - __itt_string_handle* pKey = UNICODE_AGNOSTIC(string_handle_create)("__process__"); +class CSEARecorder : public IHandler{void Init(const CTraceEventFormat::SRegularFields& main) override{ + // write process name into trace + __itt_string_handle* pKey = UNICODE_AGNOSTIC(string_handle_create)("__process__"); const char* name = GetProcessName(true); double delta = -1; // sort order - highest for processes written thru SEA @@ -432,32 +447,40 @@ void TaskBegin(STaskDescriptor& oTask, bool bOverlapped) override { } #ifdef TURBO_MODE double duration = 0; - oTask.pDur = WriteRecord(bOverlapped ? ERecordType::BeginOverlappedTask : ERecordType::BeginTask, - SRecord {oTask.rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, &duration, pData, length, oTask.fn}); + oTask.pDur = WriteRecord( + bOverlapped ? ERecordType::BeginOverlappedTask : ERecordType::BeginTask, + SRecord{oTask.rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, &duration, pData, length, oTask.fn}); oTask.nMemCounter = GetThreadRecord()->nMemMoveCounter; #else - WriteRecord(bOverlapped ? ERecordType::BeginOverlappedTask : ERecordType::BeginTask, - SRecord {oTask.rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, nullptr, pData, length, oTask.fn}); + WriteRecord( + bOverlapped ? ERecordType::BeginOverlappedTask : ERecordType::BeginTask, + SRecord{oTask.rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, nullptr, pData, length, oTask.fn}); #endif } void AddArg(STaskDescriptor& oTask, const __itt_string_handle* pKey, const char* data, size_t length) override { - WriteRecord(ERecordType::Metadata, SRecord {oTask.rf, *oTask.pDomain, oTask.id, __itt_null, pKey, nullptr, data, length}); + WriteRecord(ERecordType::Metadata, + SRecord{oTask.rf, *oTask.pDomain, oTask.id, __itt_null, pKey, nullptr, data, length}); #ifdef TURBO_MODE - oTask.pDur = nullptr; // for now we don't support turbo tasks with arguments. But if count of arguments was saved it could work. + oTask.pDur = nullptr; // for now we don't support turbo tasks with arguments. But if count of arguments was saved + // it could work. #endif } void AddArg(STaskDescriptor& oTask, const __itt_string_handle* pKey, double value) override { - WriteRecord(ERecordType::Metadata, SRecord {oTask.rf, *oTask.pDomain, oTask.id, __itt_null, pKey, &value}); + WriteRecord(ERecordType::Metadata, SRecord{oTask.rf, *oTask.pDomain, oTask.id, __itt_null, pKey, &value}); #ifdef TURBO_MODE - oTask.pDur = nullptr; // for now we don't support turbo tasks with arguments. But if count of arguments was saved it could work. + oTask.pDur = nullptr; // for now we don't support turbo tasks with arguments. But if count of arguments was saved + // it could work. #endif } -void AddRelation(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pDomain, __itt_id head, __itt_string_handle* relation, +void AddRelation(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* pDomain, + __itt_id head, + __itt_string_handle* relation, __itt_id tail) override { - WriteRecord(ERecordType::Relation, SRecord {rf, *pDomain, head, tail, relation}); + WriteRecord(ERecordType::Relation, SRecord{rf, *pDomain, head, tail, relation}); } void TaskEnd(STaskDescriptor& oTask, const CTraceEventFormat::SRegularFields& rf, bool bOverlapped) override { @@ -466,18 +489,26 @@ void TaskEnd(STaskDescriptor& oTask, const CTraceEventFormat::SRegularFields& rf *oTask.pDur = double(rf.nanoseconds - oTask.rf.nanoseconds); else WriteRecord(bOverlapped ? ERecordType::EndOverlappedTask : ERecordType::EndTask, - SRecord {rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, nullptr, nullptr, 0, oTask.fn}); + SRecord{rf, *oTask.pDomain, oTask.id, oTask.parent, oTask.pName, nullptr, nullptr, 0, oTask.fn}); #else - WriteRecord(bOverlapped ? ERecordType::EndOverlappedTask : ERecordType::EndTask, SRecord {rf, *oTask.pDomain, oTask.id, __itt_null}); + WriteRecord(bOverlapped ? ERecordType::EndOverlappedTask : ERecordType::EndTask, + SRecord{rf, *oTask.pDomain, oTask.id, __itt_null}); #endif } -void Marker(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pDomain, __itt_id id, __itt_string_handle* pName, __itt_scope theScope) override { +void Marker(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* pDomain, + __itt_id id, + __itt_string_handle* pName, + __itt_scope theScope) override { const char* scope = GetScope(theScope); - WriteRecord(ERecordType::Marker, SRecord {rf, *pDomain, id, __itt_null, pName, nullptr, scope, strlen(scope)}); + WriteRecord(ERecordType::Marker, SRecord{rf, *pDomain, id, __itt_null, pName, nullptr, scope, strlen(scope)}); } -void Counter(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pDomain, const __itt_string_handle* pName, double value) override { +void Counter(const CTraceEventFormat::SRegularFields& rf, + const __itt_domain* pDomain, + const __itt_string_handle* pName, + double value) override { const char* pData = nullptr; size_t length = 0; if (g_bWithStacks) { @@ -487,7 +518,7 @@ void Counter(const CTraceEventFormat::SRegularFields& rf, const __itt_domain* pD length = (GetStack(*pStack) - 3) * sizeof(void*); pData = reinterpret_cast(&(*pStack)[3]); } - WriteRecord(ERecordType::Counter, SRecord {rf, *pDomain, __itt_null, __itt_null, pName, &value, pData, length}); + WriteRecord(ERecordType::Counter, SRecord{rf, *pDomain, __itt_null, __itt_null, pName, &value, pData, length}); } void SetThreadName(const CTraceEventFormat::SRegularFields& rf, const char* name) override { diff --git a/thirdparty/itt_collector/sea_itt_lib/Recorder.h b/thirdparty/itt_collector/sea_itt_lib/Recorder.h index 3684a351709..714c45ada30 100644 --- a/thirdparty/itt_collector/sea_itt_lib/Recorder.h +++ b/thirdparty/itt_collector/sea_itt_lib/Recorder.h @@ -21,14 +21,14 @@ //#define TURBO_MODE #ifdef _WIN32 - #include +# include #else - #include - #include - #include - #include - #include - #include +# include +# include +# include +# include +# include +# include #endif #include @@ -145,7 +145,10 @@ struct SRecord { void* function; }; double* WriteRecord(ERecordType type, const SRecord& record); -void WriteMeta(const CTraceEventFormat::SRegularFields& main, __itt_string_handle* pKey, const char* name, double* pDelta = nullptr); +void WriteMeta(const CTraceEventFormat::SRegularFields& main, + __itt_string_handle* pKey, + const char* name, + double* pDelta = nullptr); namespace sea { struct IHandler; diff --git a/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h b/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h index 3434c11860a..6fd5f24caa8 100644 --- a/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h +++ b/thirdparty/itt_collector/sea_itt_lib/TraceEventFormat.h @@ -21,28 +21,29 @@ #include "Utils.h" #ifndef _WIN32 - #include - #include +# include +# include #endif #ifdef _WIN32 static const int64_t g_PID = (int64_t)GetCurrentProcessId(); #else static const int64_t g_PID = (int64_t)getpid(); - #if defined(__APPLE__) +# if defined(__APPLE__) inline int64_t GetTidFromPThread() { uint64_t tid64 = 0; pthread_threadid_np(NULL, &tid64); return (int64_t)tid64; } - #endif +# endif #endif // https://github.com/google/trace-viewer // For ETW see here: // http://git.chromium.org/gitweb/?p=chromium/src.git;a=commitdiff;h=41fabf8e2dd3a847cbdad05da9b43fd9a99d741a // (content/browser/tracing/etw_system_event_consumer_win.cc) -// parser source: https://github.com/google/trace-viewer/blob/49d0dd94c3925c3721d059ad3ee2db51d176248c/trace_viewer/extras/importer/trace_event_importer.html +// parser source: +// https://github.com/google/trace-viewer/blob/49d0dd94c3925c3721d059ad3ee2db51d176248c/trace_viewer/extras/importer/trace_event_importer.html class CTraceEventFormat { public: struct SRegularFields { diff --git a/thirdparty/itt_collector/sea_itt_lib/Utils.cpp b/thirdparty/itt_collector/sea_itt_lib/Utils.cpp index 46b7c2bbc6e..e29c3e460e8 100644 --- a/thirdparty/itt_collector/sea_itt_lib/Utils.cpp +++ b/thirdparty/itt_collector/sea_itt_lib/Utils.cpp @@ -23,34 +23,35 @@ #include "IttNotifyStdSrc.h" #ifdef _WIN32 - #include - #undef API_VERSION - #include +# include +# undef API_VERSION +# include #else - #include - #include - #include +# include +# include +# include #endif #ifdef __APPLE__ - #include +# include #endif #if defined(ARM32) - #define NO_DL_ITERATE_PHDR +# define NO_DL_ITERATE_PHDR #endif #if !defined(NO_DL_ITERATE_PHDR) && defined(__linux__) - #ifndef _GNU_SOURCE - #define _GNU_SOURCE - #endif - #include +# ifndef _GNU_SOURCE +# define _GNU_SOURCE +# endif +# include #endif size_t GetStack(TStack& stack) { #ifdef _WIN32 typedef USHORT(WINAPI * FCaptureStackBackTrace)(__in ULONG, __in ULONG, __out PVOID*, __out_opt PULONG); - static FCaptureStackBackTrace CaptureStackBackTrace = (FCaptureStackBackTrace)(GetProcAddress(LoadLibraryA("kernel32.dll"), "RtlCaptureStackBackTrace")); + static FCaptureStackBackTrace CaptureStackBackTrace = + (FCaptureStackBackTrace)(GetProcAddress(LoadLibraryA("kernel32.dll"), "RtlCaptureStackBackTrace")); return CaptureStackBackTrace ? CaptureStackBackTrace(0, StackSize, stack, NULL) : 0; #else return backtrace(stack, StackSize); @@ -97,19 +98,25 @@ SModuleInfo Fn2Mdl(void* fn) { GetModuleFileNameA(hModule, filename, sizeof(filename) - 1); MODULEINFO mi = {}; GetModuleInformation(GetCurrentProcess(), hModule, &mi, sizeof(MODULEINFO)); - return SModuleInfo {hModule, mi.SizeOfImage, filename}; + return SModuleInfo{hModule, mi.SizeOfImage, filename}; } LONG WINAPI CreateMiniDump(EXCEPTION_POINTERS* pep) { - typedef BOOL(WINAPI * PDUMPFN)(HANDLE hProcess, DWORD ProcessId, HANDLE hFile, MINIDUMP_TYPE DumpType, PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam, - PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam, PMINIDUMP_CALLBACK_INFORMATION CallbackParam); + typedef BOOL(WINAPI * PDUMPFN)(HANDLE hProcess, + DWORD ProcessId, + HANDLE hFile, + MINIDUMP_TYPE DumpType, + PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam, + PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam, + PMINIDUMP_CALLBACK_INFORMATION CallbackParam); PDUMPFN fnMiniDumpWriteDump = (PDUMPFN)GetProcAddress(::LoadLibraryA("DbgHelp.dll"), "MiniDumpWriteDump"); if (!fnMiniDumpWriteDump) return EXCEPTION_EXECUTE_HANDLER; std::string path = g_savepath.empty() ? "c:/temp" : g_savepath; path += "/isea_minidump.dmp"; - HANDLE hFile = CreateFileA(path.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + HANDLE hFile = + CreateFileA(path.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); if (!hFile || INVALID_HANDLE_VALUE == hFile) return EXCEPTION_EXECUTE_HANDLER; @@ -118,7 +125,13 @@ LONG WINAPI CreateMiniDump(EXCEPTION_POINTERS* pep) { mdei.ExceptionPointers = pep; mdei.ClientPointers = TRUE; - fnMiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, (pep != 0) ? &mdei : 0, 0, 0); + fnMiniDumpWriteDump(GetCurrentProcess(), + GetCurrentProcessId(), + hFile, + MiniDumpNormal, + (pep != 0) ? &mdei : 0, + 0, + 0); CloseHandle(hFile); return EXCEPTION_EXECUTE_HANDLER; @@ -134,7 +147,7 @@ void SetGlobalCrashHandler() { // FIXME: implement } - #include +# include size_t GetFileSize(const char* path) { struct stat st = {}; @@ -145,9 +158,9 @@ size_t GetFileSize(const char* path) { return -1; } - #ifndef __APPLE__ +# ifndef __APPLE__ - #if !defined(NO_DL_ITERATE_PHDR) +# if !defined(NO_DL_ITERATE_PHDR) int iterate_callback(struct dl_phdr_info* info, size_t size, void* data) { Dl_info* pInfo = reinterpret_cast(data); VerbosePrint("iterate_callback: %lx, %s\n", (long int)info->dlpi_addr, info->dlpi_name); @@ -155,7 +168,7 @@ int iterate_callback(struct dl_phdr_info* info, size_t size, void* data) { pInfo->dli_fname = strdup(info->dlpi_name); return 0; } - #endif +# endif bool proc_self_map(Dl_info& info) { char base[100] = {}; @@ -173,46 +186,46 @@ bool proc_self_map(Dl_info& info) { } return false; } - #endif +# endif sea::SModuleInfo Fn2Mdl(void* fn) { Dl_info dl_info = {}; dladdr(fn, &dl_info); VerbosePrint("Fn2Mdl: %p, %s\n", dl_info.dli_fbase, dl_info.dli_fname); if (!dl_info.dli_fname || !strstr(dl_info.dli_fname, ".so")) { - #ifndef __APPLE__ - #if !defined(NO_DL_ITERATE_PHDR) +# ifndef __APPLE__ +# if !defined(NO_DL_ITERATE_PHDR) dl_iterate_phdr(iterate_callback, &dl_info); - #endif +# endif if (!dl_info.dli_fname || !strstr(dl_info.dli_fname, ".so")) proc_self_map(dl_info); - #endif - return SModuleInfo {dl_info.dli_fbase, 0, dl_info.dli_fname}; +# endif + return SModuleInfo{dl_info.dli_fbase, 0, dl_info.dli_fname}; } if (dl_info.dli_fname[0] == '/') { // path is absolute - return SModuleInfo {dl_info.dli_fbase, GetFileSize(dl_info.dli_fname), dl_info.dli_fname}; + return SModuleInfo{dl_info.dli_fbase, GetFileSize(dl_info.dli_fname), dl_info.dli_fname}; } else { if (const char* absolute = realpath(dl_info.dli_fname, nullptr)) { - SModuleInfo mdlInfo {dl_info.dli_fbase, GetFileSize(absolute), absolute}; + SModuleInfo mdlInfo{dl_info.dli_fbase, GetFileSize(absolute), absolute}; free((void*)absolute); return mdlInfo; } else { - return SModuleInfo {dl_info.dli_fbase, GetFileSize(dl_info.dli_fname), dl_info.dli_fname}; + return SModuleInfo{dl_info.dli_fbase, GetFileSize(dl_info.dli_fname), dl_info.dli_fname}; } } } const char* GetProcessName(bool bFullPath) { static char process_name[1024] = {}; - #ifdef __APPLE__ +# ifdef __APPLE__ uint32_t size = 1023; _NSGetExecutablePath(process_name, &size); - #else +# else if (!process_name[0]) process_name[readlink("/proc/self/exe", process_name, sizeof(process_name) / sizeof(process_name[0]) - 1)] = 0; - #endif //__APPLE__ +# endif //__APPLE__ if (bFullPath) return process_name; return strrchr(process_name, '/') + 1; diff --git a/thirdparty/itt_collector/sea_itt_lib/Utils.h b/thirdparty/itt_collector/sea_itt_lib/Utils.h index 235e37846c8..acda3c59bac 100644 --- a/thirdparty/itt_collector/sea_itt_lib/Utils.h +++ b/thirdparty/itt_collector/sea_itt_lib/Utils.h @@ -31,15 +31,15 @@ #include #if defined(__arm__) && !defined(__aarch64__) - #define ARM32 +# define ARM32 #endif #ifdef _WIN32 - #include +# include #else - #include - #include - #include +# include +# include +# include #endif static std::string get_environ_value(const std::string& name) { @@ -142,9 +142,9 @@ typedef std::recursive_mutex TCritSec; #endif #ifdef _MSC_VER - #define thread_local __declspec(thread) +# define thread_local __declspec(thread) #else - #define thread_local __thread +# define thread_local __thread #endif template @@ -201,7 +201,7 @@ protected: std::function m_fn; public: - CScope(const std::function& fn): m_fn(fn) {} + CScope(const std::function& fn) : m_fn(fn) {} ~CScope() { m_fn(); } diff --git a/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp b/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp index a33d41a62b0..18196eda17e 100644 --- a/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp +++ b/thirdparty/itt_collector/sea_itt_lib/sea_itt_lib.cpp @@ -29,22 +29,22 @@ #define INTEL_JIT_PROFILER "INTEL_JIT_PROFILER" #ifdef _WIN32 - #define setenv _putenv - #include - #undef API_VERSION - #include - #pragma comment(lib, "dbghelp") +# define setenv _putenv +# include +# undef API_VERSION +# include +# pragma comment(lib, "dbghelp") #else - #define setenv putenv - #define _strdup strdup +# define setenv putenv +# define _strdup strdup #endif #if (INTPTR_MAX == INT32_MAX) - #define BIT_SUFFIX "32" +# define BIT_SUFFIX "32" #elif INTPTR_MAX == INT64_MAX - #define BIT_SUFFIX "64" +# define BIT_SUFFIX "64" #else - #error "Environment not 32 or 64-bit!" +# error "Environment not 32 or 64-bit!" #endif int GlobalInit() { @@ -52,7 +52,10 @@ int GlobalInit() { static const char jit_var_name[] = INTEL_JIT_PROFILER BIT_SUFFIX; sea::SModuleInfo mdlinfo = sea::Fn2Mdl((void*)GlobalInit); - VerbosePrint("IntelSEAPI: %s=%s | Loaded from: %s\n", var_name, get_environ_value(var_name).c_str(), mdlinfo.path.c_str()); + VerbosePrint("IntelSEAPI: %s=%s | Loaded from: %s\n", + var_name, + get_environ_value(var_name).c_str(), + mdlinfo.path.c_str()); std::string value = var_name; value += "="; @@ -103,9 +106,9 @@ void UnchainGlobal(__itt_global* pOld) { } #ifdef _WIN32 - #include +# include - #define FIX_STR(type, ptr, name) \ +# define FIX_STR(type, ptr, name) \ if (!ptr->name##A) { \ if (ptr->name##W) { \ size_t len = lstrlenW((const wchar_t*)ptr->name##W); \ @@ -118,7 +121,7 @@ void UnchainGlobal(__itt_global* pOld) { } #else - #define FIX_STR(type, ptr, name) \ +# define FIX_STR(type, ptr, name) \ if (!ptr->name##A) { \ if (ptr->name##W) { \ size_t len = wcslen((const wchar_t*)ptr->name##W); \ @@ -208,8 +211,15 @@ void AtExit() { extern "C" { #ifdef STANDARD_SOURCES -typedef bool (*receive_t)(uint64_t receiver, uint64_t time, uint16_t count, const stdsrc::uchar_t** names, const stdsrc::uchar_t** values, double progress); -typedef uint64_t (*get_receiver_t)(const stdsrc::uchar_t* provider, const stdsrc::uchar_t* opcode, const stdsrc::uchar_t* taskName); +typedef bool (*receive_t)(uint64_t receiver, + uint64_t time, + uint16_t count, + const stdsrc::uchar_t** names, + const stdsrc::uchar_t** values, + double progress); +typedef uint64_t (*get_receiver_t)(const stdsrc::uchar_t* provider, + const stdsrc::uchar_t* opcode, + const stdsrc::uchar_t* taskName); SEA_EXPORT bool parse_standard_source(const char* file, get_receiver_t get_receiver, receive_t receive) { STDSRC_CHECK_RET(file, false); @@ -220,7 +230,10 @@ SEA_EXPORT bool parse_standard_source(const char* file, get_receiver_t get_recei stdsrc::Reader& m_reader; public: - Receiver(stdsrc::Reader& reader, uint64_t receiver, receive_t receive): m_receiver(receiver), m_reader(reader), m_receive(receive) {} + Receiver(stdsrc::Reader& reader, uint64_t receiver, receive_t receive) + : m_receiver(receiver), + m_reader(reader), + m_receive(receive) {} virtual bool onEvent(uint64_t time, const stdsrc::CVariantTree& props) { size_t size = props.get_bags().size(); @@ -237,7 +250,12 @@ SEA_EXPORT bool parse_standard_source(const char* file, get_receiver_t get_recei values[i] = value.is_empty() ? nullptr : value.as_str(values_temp[i]).c_str(); ++i; } - return m_receive(m_receiver, time, (uint16_t)size, size ? &names[0] : nullptr, size ? &values[0] : nullptr, m_reader.getProgress()); + return m_receive(m_receiver, + time, + (uint16_t)size, + size ? &names[0] : nullptr, + size ? &values[0] : nullptr, + m_reader.getProgress()); } }; @@ -246,8 +264,10 @@ SEA_EXPORT bool parse_standard_source(const char* file, get_receiver_t get_recei receive_t m_receive = nullptr; public: - Reader(get_receiver_t get_receiver, receive_t receive): m_get_receiver(get_receiver), m_receive(receive) {} - virtual stdsrc::Receiver::Ptr getReceiver(const stdsrc::ustring& provider, const stdsrc::ustring& opcode, const stdsrc::ustring& taskName, + Reader(get_receiver_t get_receiver, receive_t receive) : m_get_receiver(get_receiver), m_receive(receive) {} + virtual stdsrc::Receiver::Ptr getReceiver(const stdsrc::ustring& provider, + const stdsrc::ustring& opcode, + const stdsrc::ustring& taskName, stdsrc::CVariantTree& props) { uint64_t receiver = m_get_receiver(provider.c_str(), opcode.c_str(), taskName.c_str()); if (!receiver) @@ -257,10 +277,10 @@ SEA_EXPORT bool parse_standard_source(const char* file, get_receiver_t get_recei }; Reader reader(get_receiver, receive); std::string path(file); - #ifdef _WIN32 +# ifdef _WIN32 if (path.substr(path.size() - 4) == ".etl") return stdsrc::readETLFile(reader, file, stdsrc::etuRaw); - #endif +# endif return false; } #endif @@ -270,7 +290,8 @@ SEA_EXPORT const char* resolve_pointer(const char* szModulePath, uint64_t addr) static std::string res; res.clear(); static HANDLE hCurProc = GetCurrentProcess(); - DWORD dwOptions = SymSetOptions((SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME | SYMOPT_INCLUDE_32BIT_MODULES | SYMOPT_ALLOW_ABSOLUTE_SYMBOLS) & + DWORD dwOptions = SymSetOptions((SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME | + SYMOPT_INCLUDE_32BIT_MODULES | SYMOPT_ALLOW_ABSOLUTE_SYMBOLS) & ~SYMOPT_DEFERRED_LOADS); static BOOL bInitialize = SymInitialize(hCurProc, NULL, TRUE); if (!bInitialize) @@ -343,7 +364,12 @@ SEA_EXPORT int Initialize() { #if defined(STANDARD_SOURCES) && defined(_DEBUG) && 0 -bool receive(uint64_t, uint64_t time, uint16_t count, const stdsrc::uchar_t** names, const stdsrc::uchar_t** values, double progress) { +bool receive(uint64_t, + uint64_t time, + uint16_t count, + const stdsrc::uchar_t** names, + const stdsrc::uchar_t** values, + double progress) { return true; } From 38ed0c88b009a0fc50eff8a97674761ab0da405b Mon Sep 17 00:00:00 2001 From: Szymon Durawa Date: Tue, 10 Aug 2021 21:37:06 +0200 Subject: [PATCH 22/24] Remove deprecated api. (#6925) --- .../reference/convolution_backprop_data.hpp | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp index 93edbd6acc3..4e692cad4dc 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp @@ -392,42 +392,6 @@ namespace ngraph forward_in_pad_above, output_padding); } - - // DEPRECATED, can't be removed currently due to arm-plugin dependency - template ::type> - NGRAPH_DEPRECATED( - "convolution_backprop_in function with 4 template types is deprecated, use " - "function with 1 template and output_padding parameter.") - void convolution_backprop_in(const INPUT* delta_in, - const FILTER* filter, - OUTPUT* delta_out, - const Shape& in_shape, - const Shape& filter_shape, - const Shape& out_shape, - const Strides& in_dilation, - const Strides& filter_dilation, - const CoordinateDiff& forward_in_pad_bellow, - const CoordinateDiff& forward_in_pad_above, - const Strides& stride) - { - const ngraph::CoordinateDiff output_padding(in_shape.size() - 2, 0); - - convolution_backprop_in(delta_in, - filter, - delta_out, - in_shape, - filter_shape, - out_shape, - in_dilation, - filter_dilation, - forward_in_pad_bellow, - forward_in_pad_above, - stride, - output_padding); - } } // namespace reference } // namespace runtime } // namespace ngraph From e6a4803b74db407b5d556138b169be933a9a8f1d Mon Sep 17 00:00:00 2001 From: Kate Generalova Date: Tue, 10 Aug 2021 23:03:26 +0300 Subject: [PATCH 23/24] doc: fix Docker install guide (#7004) * doc: fix 58710 issue (#6911) * doc: refactor docker install guide (#6988) * doc: refactor docker install guide * doc: refactor docker install guide windows * Update installing-openvino-docker-linux.md * Update installing-openvino-docker-windows.md Co-authored-by: Andrey Zaytsev * doc fixes (#6438) * doc fixes * doc fix * doc fix Co-authored-by: Andrey Zaytsev Co-authored-by: Nikolay Tyukaev --- docs/IE_DG/Model_caching_overview.md | 2 +- docs/IE_DG/inference_engine_intro.md | 2 +- docs/doxygen/ie_docs.xml | 3 + docs/doxygen/openvino_docs.xml | 5 + docs/get_started/get_started_linux.md | 2 +- docs/get_started/get_started_macos.md | 2 +- docs/get_started/get_started_windows.md | 2 +- docs/index.md | 64 +++------ .../installing-openvino-docker-linux.md | 113 +++++++++++----- .../installing-openvino-docker-windows.md | 125 +++++++++--------- docs/security_guide/workbench.md | 2 +- .../c/samples/hello_classification/README.md | 4 +- .../hello_nv12_input_classification/README.md | 4 +- .../object_detection_sample_ssd/README.md | 4 +- .../classification_sample_async/README.md | 4 +- .../sample/hello_classification/README.md | 4 +- .../python/sample/hello_reshape_ssd/README.md | 4 +- .../ngraph_function_creation_sample/README.md | 2 +- .../object_detection_sample_ssd/README.md | 4 +- .../python/sample/speech_sample/README.md | 2 +- .../sample/style_transfer_sample/README.md | 4 +- .../classification_sample_async/README.md | 4 +- .../samples/hello_classification/README.md | 4 +- .../hello_nv12_input_classification/README.md | 4 +- .../samples/hello_reshape_ssd/README.md | 4 +- .../object_detection_sample_ssd/README.md | 4 +- .../samples/speech_sample/README.md | 2 +- .../samples/style_transfer_sample/README.md | 4 +- 28 files changed, 212 insertions(+), 172 deletions(-) diff --git a/docs/IE_DG/Model_caching_overview.md b/docs/IE_DG/Model_caching_overview.md index 25ae7387c24..10d3d6cf99e 100644 --- a/docs/IE_DG/Model_caching_overview.md +++ b/docs/IE_DG/Model_caching_overview.md @@ -2,7 +2,7 @@ ## Introduction -As described in [Inference Engine Introduction](inference_engine_intro.md), common application flow consists of the following steps: +As described in [Inference Engine Developer Guide](Deep_Learning_Inference_Engine_DevGuide.md), common application flow consists of the following steps: 1. **Create Inference Engine Core object** diff --git a/docs/IE_DG/inference_engine_intro.md b/docs/IE_DG/inference_engine_intro.md index 847c0a59e35..a4c33fd6f85 100644 --- a/docs/IE_DG/inference_engine_intro.md +++ b/docs/IE_DG/inference_engine_intro.md @@ -5,7 +5,7 @@ This Guide provides an overview of the Inference Engine describing the typical workflow for performing inference of a pre-trained and optimized deep learning model and a set of sample applications. -> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_intel_index). +> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel). After you have used the Model Optimizer to create an Intermediate Representation (IR), use the Inference Engine to infer the result for a given input data. diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml index 99e91e53ed5..d1fbbe89b2d 100644 --- a/docs/doxygen/ie_docs.xml +++ b/docs/doxygen/ie_docs.xml @@ -101,6 +101,9 @@ limitations under the License. + + + diff --git a/docs/doxygen/openvino_docs.xml b/docs/doxygen/openvino_docs.xml index a47a08e6777..fb898216fe7 100644 --- a/docs/doxygen/openvino_docs.xml +++ b/docs/doxygen/openvino_docs.xml @@ -158,6 +158,9 @@ limitations under the License. + + + @@ -205,6 +208,8 @@ limitations under the License. + + diff --git a/docs/get_started/get_started_linux.md b/docs/get_started/get_started_linux.md index b7b8bd47069..d93201f4665 100644 --- a/docs/get_started/get_started_linux.md +++ b/docs/get_started/get_started_linux.md @@ -522,7 +522,7 @@ source /opt/intel/openvino_2021/bin/setupvars.sh ## Typical Code Sample and Demo Application Syntax Examples -This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages. +This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos) pages. To build all the demos and samples: diff --git a/docs/get_started/get_started_macos.md b/docs/get_started/get_started_macos.md index c58cd418bcf..0cdff6a05a3 100644 --- a/docs/get_started/get_started_macos.md +++ b/docs/get_started/get_started_macos.md @@ -476,7 +476,7 @@ source /opt/intel/openvino_2021/bin/setupvars.sh ## Typical Code Sample and Demo Application Syntax Examples -This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.13 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages. +This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.13 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos) pages. To build all the demos and samples: diff --git a/docs/get_started/get_started_windows.md b/docs/get_started/get_started_windows.md index fa6680d30b9..7f0bc1ff2bc 100644 --- a/docs/get_started/get_started_windows.md +++ b/docs/get_started/get_started_windows.md @@ -484,7 +484,7 @@ Below you can find basic guidelines for executing the OpenVINO™ workflow using ## Typical Code Sample and Demo Application Syntax Examples -This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later and Microsoft Visual Studio 2017 or 2019 installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages. +This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later and Microsoft Visual Studio 2017 or 2019 installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos) pages. To build all the demos and samples: diff --git a/docs/index.md b/docs/index.md index 83abef332b8..4f1012b5353 100644 --- a/docs/index.md +++ b/docs/index.md @@ -16,71 +16,50 @@ OpenVINO™ toolkit: The following diagram illustrates the typical OpenVINO™ workflow (click to see the full-size image): ![](img/OpenVINO-diagram.png) -### Model Preparation -#### Components: [Open Model Zoo](https://github.com/opencv/open_model_zoo), [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), [OpenVINO Training Extentions](https://github.com/openvinotoolkit/training_extensions) +### Model Preparation, Conversion and Optimization You can use your framework of choice to prepare and train a deep learning model or just download a pre-trained model from the Open Model Zoo. The Open Model Zoo includes deep learning solutions to a variety of vision problems, including object recognition, face recognition, pose estimation, text detection, and action recognition, at a range of measured complexities. -Several of these pre-trained models are also used in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos_README). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader_README) tool is used. +Several of these pre-trained models are used also in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader) tool is used. -[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) provides a web-based interface to download a pre-trained model and enables you to visualize, fine-tune, and compare performance of deep learning models. - -If you cannot find the needed model in Open Model Zoo or want to train your own model, use [OpenVINO Training Extentions](https://github.com/openvinotoolkit/training_extensions) which provide a convenient environment to train deep learning models. - -![](img/OV-diagram-step1.png) - -Useful documents for model preparation: -* [Model Downloader](@ref omz_tools_downloader) utility -* [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel) -* [Public Pretrained Models (Open Model Zoo)](@ref omz_models_group_public) -* [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) -* [OpenVINO Training Extentions](https://github.com/openvinotoolkit/training_extensions) - -### Model Conversion -#### Components: [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) - -The [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) is a cross-platform command-line +One of the core component of the OpenVINO™ toolkit is the [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) a cross-platform command-line tool that converts a trained neural network from its source framework to an open-source, nGraph-compatible [Intermediate Representation (IR)](MO_DG/IR_and_opsets.md) for use in inference operations. The Model Optimizer imports models trained in popular frameworks such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX* and performs a few optimizations to remove excess layers and group operations when possible into simpler, faster graphs. - -If your neural network contains layers that are not in the list of known layers for supported frameworks, you can adjust the conversion and optimization process using [Custom Layers](HOWTO/Custom_Layers_Guide.md). - -Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. You can also use the Accuracy Checker as a part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an official OpenVINO™ graphical interface. - ![](img/OV-diagram-step2.png) -Useful documents for model conversion and optimization: +If your neural network model contains layers that are not in the list of known layers for supported frameworks, you can adjust the conversion and optimization process through use of [Custom Layers](HOWTO/Custom_Layers_Guide.md). + +Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio. + +Use the [Post-training Optimization Tool](@ref pot_README) to accelerate the inference of a deep learning model by quantizing it to INT8. + +Useful documents for model optimization: * [Model Optimizer Developer Guide](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) * [Intermediate Representation and Opsets](MO_DG/IR_and_opsets.md) * [Custom Layers Guide](HOWTO/Custom_Layers_Guide.md) * [Accuracy Checker utility](@ref omz_tools_accuracy_checker) -* [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction) +* [Post-training Optimization Tool](@ref pot_README) +* [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) +* [Model Downloader](@ref omz_tools_downloader) utility +* [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel) +* [Public Pretrained Models (Open Model Zoo)](@ref omz_models_group_public) ### Running and Tuning Inference -#### Components: [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), [Post-training Optimization Tool](@ref pot_README), [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf) - The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment). -You can use OpenVINO™ Tuning Utilities with the Inference Engine for trial and test inference on your model. The [Benchmark utility](../inference-engine/tools/benchmark_tool/README.md) uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences. +You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences. + +For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction). +![](img/OV-diagram-step3.png) OpenVINO™ toolkit includes a set of [inference code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos) showing how inference is run and output processed for use in retail environments, classrooms, smart camera applications, and other solutions. -Use the [Post-training Optimization Tool](@ref pot_README) to accelerate the inference of a deep learning model by quantizing it to INT8. Models from Open Model Zoo can be quantized using the [Model Quantizer utility](https://github.com/openvinotoolkit/open_model_zoo/tree/master/tools/downloader#model-quantizer-usage). - -Besides the [Post-training Optimization Tool](@ref pot_README), the [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) can be used for model fine-tuning INT8 quantization or even for applying more aggressive compression methods, such as filter pruning, sparsity, and binarization to further speed up model inference and reduce the footprint. In that case the compression algorithms are integrated into your model training pipeline. - OpenVINO also makes use of open-source and Intel™ tools for traditional graphics processing and performance management. Intel® Media SDK supports accelerated rich-media processing, including transcoding. OpenVINO™ optimizes calls to the rich OpenCV and OpenVX libraries for processing computer vision workloads. And the new DL Streamer integration further accelerates video pipelining and performance. -Try these key tuning tools in your browser with the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) intuitive graphical interface. - -![](img/OV-diagram-step3.png) - Useful documents for inference tuning: * [Inference Engine Developer Guide](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) * [Inference Engine API References](./api_references.html) * [Inference Code Samples](IE_DG/Samples_Overview.md) * [Application Demos](@ref omz_demos) -* [Post-training Optimization Tool](@ref pot_README) * [Low Precision Optimization Guide] (@ref pot_docs_LowPrecisionOptimizationGuide) -* [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) * [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction) * [Intel Media SDK](https://github.com/Intel-Media-SDK/MediaSDK) * [DL Streamer Samples](@ref gst_samples_README) @@ -88,8 +67,6 @@ Useful documents for inference tuning: * [OpenVX](https://software.intel.com/en-us/openvino-ovx-guide) ### Packaging and Deployment -#### Components: [Deployment Manager](./install_guides/deployment-manager-tool.md) - The Intel Distribution of OpenVINO™ toolkit outputs optimized inference runtimes for the following devices: * Intel® CPUs * Intel® Processor Graphics @@ -98,13 +75,12 @@ The Intel Distribution of OpenVINO™ toolkit outputs optimized inference runtim The Inference Engine's plug-in architecture can be extended to meet other specialized needs. [Deployment Manager](./install_guides/deployment-manager-tool.md) is a Python* command-line tool that assembles the tuned model, IR files, your application, and required dependencies into a runtime package for your target device. It outputs packages for CPU, GPU, and VPU on Linux* and Windows*, and Neural Compute Stick-optimized packages with Linux. -![](img/OV-diagram-step4.png) - * [Inference Engine Integration Workflow](IE_DG/Integrate_with_customer_application_new_API.md) * [Inference Engine API References](./api_references.html) * [Inference Engine Plug-in Developer Guide](./ie_plugin_api/index.html) * [Deployment Manager Guide](./install_guides/deployment-manager-tool.md) + ## OpenVINO™ Toolkit Components Intel® Distribution of OpenVINO™ toolkit includes the following components: diff --git a/docs/install_guides/installing-openvino-docker-linux.md b/docs/install_guides/installing-openvino-docker-linux.md index df87cd3d442..46cabb6f934 100644 --- a/docs/install_guides/installing-openvino-docker-linux.md +++ b/docs/install_guides/installing-openvino-docker-linux.md @@ -27,8 +27,9 @@ Prebuilt images are available on: ## Build a Docker* Image -You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. -You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of OpenVINO™ Docker containers. +You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). +The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. +You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of Docker containers with Intel® Distribution of OpenVINO™ toolkit. You can find device specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile below. ## Use Docker* Image for CPU @@ -36,34 +37,40 @@ You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tr - All instructions that are available to host process available for process in container, including, for example, AVX2, AVX512. No restrictions. - Docker\* does not use virtualization or emulation. The process in Docker* is just a regular Linux process, but it is isolated from external world on kernel level. Performance penalty is small. -### Build a Docker* Image for CPU +### Configure a Docker* Image for CPU -You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. -The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. +You don't need to do specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile for CPU. You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). ### Run the Docker* Image for CPU Run the image with the following command: + ```sh docker run -it --rm ``` + ## Use a Docker* Image for GPU -### Build a Docker* Image for GPU + +### Configure a Docker* Image for GPU + +> **NOTE**: Only Intel® integrated graphics are supported. **Prerequisites:** + - GPU is not available in container by default, you must attach it to the container. - Kernel driver must be installed on the host. - Intel® OpenCL™ runtime package must be included into the container. -- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md). +- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md). - -Before building a Docker* image on GPU, add the following commands to a Dockerfile: +To configure a OpenVINO Docker* image with access to GPU, add the following commands to a Dockerfile: **Ubuntu 18.04/20.04**: + ```sh WORKDIR /tmp/opencl RUN useradd -ms /bin/bash -G video,users openvino && \ chown openvino -R /home/openvino + RUN apt-get update && \ apt-get install -y --no-install-recommends ocl-icd-libopencl1 && \ rm -rf /var/lib/apt/lists/* && \ @@ -76,7 +83,24 @@ RUN apt-get update && \ ldconfig && \ rm /tmp/opencl ``` + +or you can use the installation script `install_NEO_OCL_driver.sh` if you previously installed OpenVINO in the Dockerfile, where `INTEL_OPENCL` is the variable to store the default version of Intel® Graphics Compute Runtime for OpenCL™ Driver: + +```sh +WORKDIR /tmp/opencl +RUN useradd -ms /bin/bash -G video,users openvino && \ + chown openvino -R /home/openvino + +# Please use `20.35.17767` for 10th generation Intel® Core™ processor (formerly Ice Lake) or 11th generation Intel® Core™ processor (formerly Tiger Lake) +ARG INTEL_OPENCL=19.41.14441 + +WORKDIR ${INTEL_OPENVINO_DIR}/install_dependencies +RUN ./install_NEO_OCL_driver.sh --no_numa -y --install_driver ${INTEL_OPENCL} && \ + rm -rf /var/lib/apt/lists/* +``` + **CentOS 7/RHEL 8**: + ```sh WORKDIR /tmp/opencl RUN useradd -ms /bin/bash -G video,users openvino && \ @@ -98,9 +122,27 @@ RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-r yum remove -y epel-release ``` +or you can use the installation script `install_NEO_OCL_driver.sh` if you previously installed OpenVINO in the Dockerfile, where `INTEL_OPENCL` is the variable to store the default version of Intel® Graphics Compute Runtime for OpenCL™ Driver: + +```sh +WORKDIR /tmp/opencl +RUN useradd -ms /bin/bash -G video,users openvino && \ + chown openvino -R /home/openvino +RUN groupmod -g 44 video + +# Please use `20.35.17767` for 10th generation Intel® Core™ processor (formerly Ice Lake) or 11th generation Intel® Core™ processor (formerly Tiger Lake) +ARG INTEL_OPENCL=19.41.14441 + +WORKDIR ${INTEL_OPENVINO_DIR}/install_dependencies +RUN ./install_NEO_OCL_driver.sh --no_numa -y --install_driver ${INTEL_OPENCL} && \ + yum clean all && rm -rf /var/cache/yum && \ + yum remove -y epel-release +``` + ### Run the Docker* Image for GPU To make GPU available in the container, attach the GPU to the container using `--device /dev/dri` option and run the container: + ```sh docker run -it --rm --device /dev/dri ``` @@ -108,7 +150,7 @@ docker run -it --rm --device /dev/dri ## Use a Docker* Image for Intel® Neural Compute Stick 2 -### Build and Run the Docker* Image for Intel® Neural Compute Stick 2 +### Configure and Run the Docker* Image for Intel® Neural Compute Stick 2 **Known limitations:** @@ -118,7 +160,8 @@ docker run -it --rm --device /dev/dri Use one of the following options as **Possible solutions for Intel® Neural Compute Stick 2:** -#### Option #1 +#### Option 1 + 1. Get rid of UDEV by rebuilding `libusb` without UDEV support in the Docker* image (add the following commands to a `Dockerfile`): - **Ubuntu 18.04/20.04**: ```sh @@ -192,22 +235,23 @@ RUN /usr/bin/install -c -m 644 libusb-1.0.pc '/usr/local/lib/pkgconfig' && \ docker run -it --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb ``` -#### Option #2 +#### Option 2 Run container in the privileged mode, enable the Docker network configuration as host, and mount all devices to the container: ```sh docker run -it --rm --privileged -v /dev:/dev --network=host ``` > **NOTES**: +> > - It is not secure. > - Conflicts with Kubernetes* and other tools that use orchestration and private networks may occur. ## Use a Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs -### Build Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs +### Configure Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs To use the Docker container for inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs: -1. Set up the environment on the host machine, that is going to be used for running Docker*. -It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board. +1. Set up the environment on the host machine, that is going to be used for running Docker*. +It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board. To learn how to set up the environment (the OpenVINO package or HDDL package must be pre-installed), see [Configuration guide for HDDL device](https://github.com/openvinotoolkit/docker_ci/blob/master/install_guide_vpu_hddl.md) or [Configuration Guide for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs](installing-openvino-linux-ivad-vpu.md). 2. Prepare the Docker* image (add the following commands to a Dockerfile). - **Ubuntu 18.04**: @@ -255,50 +299,57 @@ $HDDL_INSTALL_DIR/hddldaemon ### Run the Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs To run the built Docker* image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, use the following command: + ```sh docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp ``` > **NOTES**: -> -> - The device `/dev/ion` need to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel. +> +> - The device `/dev/ion` needs to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel. > - Since separate inference tasks share the same HDDL service communication interface (the service creates mutexes and a socket file in `/var/tmp`), `/var/tmp` needs to be mounted and shared among them. -In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve, use the following command: +In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu (Input-Output Memory Management Unit) incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve, use the following command: + ```sh -docker run -it --rm --net=host -v /var/tmp:/var/tmp –ipc=host +docker run -it --rm --net=host -v /var/tmp:/var/tmp –-ipc=host ``` + > **NOTES**: -> -> - When building docker images, create a user in the docker file that has the same UID and GID as the user which runs hddldaemon on the host. -> - Run the application in the docker with this user. +> +> - When building Docker images, create a user in the Dockerfile that has the same UID(User Identifier) and GID(Group Identifier) as the user which runs hddldaemon on the host. +> - Run the application in the Docker image with this user. > - Alternatively, you can start hddldaemon with the root user on host, but this approach is not recommended. -### Run Demos in the Docker* Image +### Run Demos in the Docker* Image To run the Security Barrier Camera Demo on a specific inference device, run the following commands with the root privileges (additional third-party dependencies will be installed): **CPU**: + ```sh -docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb +docker run -itu root:root --rm /bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d CPU -sample-options -no_show" ``` **GPU**: + ```sh -docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb +docker run -itu root:root --rm --device /dev/dri:/dev/dri /bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d GPU -sample-options -no_show" ``` **MYRIAD**: + ```sh -docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb +docker run -itu root:root --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb /bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d MYRIAD -sample-options -no_show" ``` **HDDL**: + ```sh -docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb +docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp /bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d HDDL -sample-options -no_show" ``` @@ -312,12 +363,12 @@ For instructions for previous releases with FPGA Support, see documentation for ## Troubleshooting -If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) topic. +If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Run_Locally) topic. ## Additional Resources -* [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. +- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. -* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) +- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) -* Intel® Neural Compute Stick 2 Get Started: [https://software.intel.com/en-us/neural-compute-stick/get-started](https://software.intel.com/en-us/neural-compute-stick/get-started) +- Intel® Neural Compute Stick 2 Get Started: [https://software.intel.com/en-us/neural-compute-stick/get-started](https://software.intel.com/en-us/neural-compute-stick/get-started) diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md index 09435ec7a2a..6696da70586 100644 --- a/docs/install_guides/installing-openvino-docker-windows.md +++ b/docs/install_guides/installing-openvino-docker-windows.md @@ -2,7 +2,7 @@ The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The Intel® Distribution of OpenVINO™ toolkit includes the Intel® Deep Learning Deployment Toolkit. -This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Linux* and its further usage. +This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Windows* and its further usage. ## System Requirements @@ -21,43 +21,46 @@ Prebuilt images are available on [Docker Hub](https://hub.docker.com/u/openvino) ## Build a Docker* Image -You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. +You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). +The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can find device specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile below. -## Build and Run the Docker* Image for CPU +## Configure and Run the Docker* Image for CPU ## Install Additional Dependencies ### Install CMake To add CMake to the image, add the following commands to the Dockerfile: -~~~ + +```bat RUN powershell.exe -Command ` Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; ` Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; ` Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%" -~~~ -In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a docker image: -~~~ +``` + +In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image: + +```bat docker build . -t ` --build-arg HTTPS_PROXY= -~~~ +``` ### Install Microsoft Visual Studio* Build Tools -You can add Microsoft Visual Studio Build Tools* to a Windows* OS Docker image. Available options are to use offline installer for Build Tools -(follow the [Instruction for the offline installer](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019)) or -to use the online installer for Build Tools (follow [Instruction for the online installer](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019)). -Microsoft Visual Studio Build Tools* are licensed as a supplement your existing Microsoft Visual Studio* license. +You can add Microsoft Visual Studio Build Tools* to a Windows* OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools. +Microsoft Visual Studio Build Tools* are licensed as a supplement your existing Microsoft Visual Studio* license. Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio* and Windows* licenses. To add MSBuild 2019 to the image, add the following commands to the Dockerfile: -~~~ + +```bat RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache ` - --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" ` + --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" ` --add Microsoft.VisualStudio.Workload.MSBuildTools ` --add Microsoft.VisualStudio.Workload.UniversalBuildTools ` --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended ` @@ -65,35 +68,38 @@ RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache ` --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` --remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned -~~~ -In case of proxy issues, please use an offline installer for Build Tools (follow [Instruction for the offline installer](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019). +``` + +In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019). ## Run the Docker* Image for CPU -To install the OpenVINO toolkit from the prepared Docker image, run the image with the following command: -~~~ +To start the interactive session, run the following command allows inference on the CPU: + +```bat docker run -it --rm -~~~ +``` If you want to try some demos then run image with the root privileges (some additional 3-rd party dependencies will be installed): -~~~ -docker run -itu ContainerAdministrator --rm cmd /S /C "cd deployment_tools\demo && demo_security_barrier_camera.bat -d CPU -sample-options -no_show" -~~~ -## Build and Run the Docker* Image for GPU +```bat +docker run -itu ContainerAdministrator --rm cmd /S /C "cd deployment_tools\demo && demo_security_barrier_camera.bat -d CPU -sample-options -no_show" +``` + +## Configure and Run the Docker* Image for GPU GPU Acceleration in Windows containers feature requires to meet Windows host, OpenVINO toolkit and Docker* requirements: -* [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration): - * The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher. - * The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported. - * The container host must be running Docker Engine 19.03 or higher. - * The container host must have GPU running display drivers of version WDDM 2.5 or higher. -* [OpenVINO™ GPU requirement](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU): - * Intel Graphics Driver for Windows of version 15.65 or higher. -* [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container): - * Windows host and container version tags must match. - * [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility) +- [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration): + - The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher. + - The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported. + - The container host must be running Docker Engine 19.03 or higher. + - The container host must have GPU running display drivers of version WDDM 2.5 or higher. +- [OpenVINO™ GPU requirement](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU): + - Intel Graphics Driver for Windows of version 15.65 or higher. +- [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container): + - Windows host and container version tags must match. + - [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility) ## Build a Docker* Image for Your Host System @@ -101,50 +107,49 @@ GPU Acceleration in Windows containers feature requires to meet Windows host, Op 2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility). 3. Find the appropriate Windows container base image on [DockerHub*](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction. For example, in [openvino_c_dev_2021.dockerfile](https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/winserver2019/openvino_c_dev_2021.dockerfile), change: - ~~~ + ```bat FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base - ~~~ - to - ~~~ + ``` + to: + ```bat FROM mcr.microsoft.com/windows:20H2 - ~~~ -4. Build the Docker image - ~~~ + ``` +4. Build the Docker image + ```bat docker build --build-arg package_url= -f -t . - ~~~ + ``` 5. Copy `OpenCL.dll` from your `C:\Windows\System32` host folder to any `temp` directory: - ~~~ + ```bat mkdir C:\tmp copy C:\Windows\System32\OpenCL.dll C:\tmp - ~~~ - + ``` + ## Run the Docker* Image for GPU -1. To try inference on a GPU, run the image with the following command: - ~~~ +1. To try inference on a GPU, run the image with the following command: + ```bat docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp - ~~~ - where - * `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device. - * `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression. - * `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder. - + ``` + where + - `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device. + - `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression. + - `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder. 2. Copy `OpenCL.dll` to the `C:\Windows\System32` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device: - ~~~ + ```bat copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0 - ~~~ -3. For example, run the `demo_security_barrier_camera` demo with the command below: - ~~~ + ``` +3. For example, run the `demo_security_barrier_camera` demo with the command below: + ```bat cd bin && setupvars.bat && cd ../ && cd deployment_tools\demo && demo_security_barrier_camera.bat -d GPU -sample-options -no_show - ~~~ + ``` > **NOTE**: Addittional third-party dependencies will be installed. ## Troubleshooting -If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) topic. +If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Run_Locally) topic. ## Additional Resources -* [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. +- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. -* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) +- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) diff --git a/docs/security_guide/workbench.md b/docs/security_guide/workbench.md index 7d8b128cb1f..cfcbdc56b99 100644 --- a/docs/security_guide/workbench.md +++ b/docs/security_guide/workbench.md @@ -12,7 +12,7 @@ is only accessible from the machine the Docker container is built on: application are accessible only from the `localhost` by default. * When using `docker run` to [start the DL Workbench from Docker - Hub](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub), limit connections for the host IP 127.0.0.1. + Hub](@ref workbench_docs_Workbench_DG_Run_Locally), limit connections for the host IP 127.0.0.1. For example, limit the connections for the host IP to the port `5665` with the `-p 127.0.0.1:5665:5665` command . Refer to [Container networking](https://docs.docker.com/config/containers/container-networking/#published-ports) for diff --git a/inference-engine/ie_bridges/c/samples/hello_classification/README.md b/inference-engine/ie_bridges/c/samples/hello_classification/README.md index 2b0ca163ac0..b51d6efe539 100644 --- a/inference-engine/ie_bridges/c/samples/hello_classification/README.md +++ b/inference-engine/ie_bridges/c/samples/hello_classification/README.md @@ -36,7 +36,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -94,7 +94,7 @@ This sample is an API example, for any performance measurements please use the d - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [ie_core_create]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#gaab73c7ee3704c742eaac457636259541 diff --git a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md index 0479ae90278..89692d73dc2 100644 --- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md +++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md @@ -35,7 +35,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to @@ -109,7 +109,7 @@ This sample is an API example, for any performance measurements please use the d - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [ie_network_set_color_format]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga85f3251f1f7b08507c297e73baa58969 diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md index f253da02ec2..0eacb2164c9 100644 --- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md +++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md @@ -42,7 +42,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. Running the application with the `-h` option yields the following usage message: @@ -153,7 +153,7 @@ This sample is an API example, for any performance measurements please use the d - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [ie_infer_request_infer_async]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#gad2351010e292b6faec959a3d5a8fb60e diff --git a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md index 67a8282beaa..48b02493726 100644 --- a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md +++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md @@ -68,7 +68,7 @@ Options: To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -148,7 +148,7 @@ The sample application logs each step in a standard output stream and outputs to - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html diff --git a/inference-engine/ie_bridges/python/sample/hello_classification/README.md b/inference-engine/ie_bridges/python/sample/hello_classification/README.md index 19bfcacddb0..b1953e777d0 100644 --- a/inference-engine/ie_bridges/python/sample/hello_classification/README.md +++ b/inference-engine/ie_bridges/python/sample/hello_classification/README.md @@ -57,7 +57,7 @@ Options: ``` To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -119,7 +119,7 @@ The sample application logs each step in a standard output stream and outputs to - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html diff --git a/inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md b/inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md index ec8903c442c..0430f07dfcd 100644 --- a/inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md +++ b/inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md @@ -65,7 +65,7 @@ Options: ``` To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -116,7 +116,7 @@ The sample application logs each step in a standard output stream and creates an - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html diff --git a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md index 097da5ac35f..43a60b3a331 100644 --- a/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/ngraph_function_creation_sample/README.md @@ -130,7 +130,7 @@ The sample application logs each step in a standard output stream and outputs to - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md index 020fe3869f9..1ce4d2f81c7 100644 --- a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md +++ b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md @@ -67,7 +67,7 @@ Options: To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -117,7 +117,7 @@ The sample application logs each step in a standard output stream and creates an - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/README.md b/inference-engine/ie_bridges/python/sample/speech_sample/README.md index 0d7289145f1..2f7fd4323aa 100644 --- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md @@ -193,7 +193,7 @@ The sample application logs each step in a standard output stream. - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IENetwork.batch_size]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IENetwork.html#a79a647cb1b49645616eaeb2ca255ef2e diff --git a/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md index 9689acacba7..42b7cc29f17 100644 --- a/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md @@ -79,7 +79,7 @@ Options: ``` To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -129,7 +129,7 @@ The sample application logs each step in a standard output stream and creates an - [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) [IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html diff --git a/inference-engine/samples/classification_sample_async/README.md b/inference-engine/samples/classification_sample_async/README.md index 4f93e6dd51f..fd63346078b 100644 --- a/inference-engine/samples/classification_sample_async/README.md +++ b/inference-engine/samples/classification_sample_async/README.md @@ -47,7 +47,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -166,5 +166,5 @@ classid probability - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/hello_classification/README.md b/inference-engine/samples/hello_classification/README.md index 2ee81703812..f70f74343d2 100644 --- a/inference-engine/samples/hello_classification/README.md +++ b/inference-engine/samples/hello_classification/README.md @@ -36,7 +36,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -94,5 +94,5 @@ This sample is an API example, for any performance measurements please use the d - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/hello_nv12_input_classification/README.md b/inference-engine/samples/hello_nv12_input_classification/README.md index a643fcf0d5d..ff088bea6eb 100644 --- a/inference-engine/samples/hello_nv12_input_classification/README.md +++ b/inference-engine/samples/hello_nv12_input_classification/README.md @@ -37,7 +37,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to @@ -115,5 +115,5 @@ This sample is an API example, for any performance measurements please use the d - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/hello_reshape_ssd/README.md b/inference-engine/samples/hello_reshape_ssd/README.md index 4d382f2e590..995fb537144 100644 --- a/inference-engine/samples/hello_reshape_ssd/README.md +++ b/inference-engine/samples/hello_reshape_ssd/README.md @@ -38,7 +38,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. > **NOTES**: @@ -93,5 +93,5 @@ This sample is an API example, for any performance measurements please use the d - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/object_detection_sample_ssd/README.md b/inference-engine/samples/object_detection_sample_ssd/README.md index 4c466d42a34..4c21ab2da87 100644 --- a/inference-engine/samples/object_detection_sample_ssd/README.md +++ b/inference-engine/samples/object_detection_sample_ssd/README.md @@ -38,7 +38,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. Running the application with the `-h` option yields the following usage message: @@ -146,5 +146,5 @@ Parsing input parameters - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/speech_sample/README.md b/inference-engine/samples/speech_sample/README.md index 392b4e1403e..06b230b9871 100644 --- a/inference-engine/samples/speech_sample/README.md +++ b/inference-engine/samples/speech_sample/README.md @@ -246,5 +246,5 @@ All of mentioned files can be downloaded from [https://storage.openvinotoolkit.o - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/style_transfer_sample/README.md b/inference-engine/samples/style_transfer_sample/README.md index dcb178bedda..9e791f168a7 100644 --- a/inference-engine/samples/style_transfer_sample/README.md +++ b/inference-engine/samples/style_transfer_sample/README.md @@ -37,7 +37,7 @@ To build the sample, please use instructions available at [Build the Sample Appl To run the sample, you need specify a model and image: -- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README). +- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data. Running the application with the `-h` option yields the following usage message: @@ -134,5 +134,5 @@ The sample application logs each step in a standard output stream and creates an - [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md) - [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md) -- [Model Downloader](@ref omz_tools_downloader_README) +- [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) From 04ff7a6eded18a011aeb6f545885b4989f7c7652 Mon Sep 17 00:00:00 2001 From: Szymon Durawa Date: Wed, 11 Aug 2021 05:25:02 +0200 Subject: [PATCH 24/24] Move back to ConvolutionBackpropDataLayerTest name. (#6944) --- .../single_layer/convolution_backprop.cpp | 8 ++-- .../convolution_backprop_data.cpp | 42 +++++++++---------- .../convolution_backprop_data.cpp | 16 +++---- .../convolution_backprop_data.cpp | 34 +++++++-------- .../convolution_backprop_data.hpp | 4 +- .../convolution_backprop_data.cpp | 22 +++++++--- 6 files changed, 69 insertions(+), 57 deletions(-) diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp index 0e29deb207e..20ef9cc1a96 100644 --- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp @@ -4,13 +4,13 @@ #include -#include "shared_test_classes/single_layer/convolution_backprop.hpp" +#include "shared_test_classes/single_layer/convolution_backprop_data.hpp" using namespace LayerTestsDefinitions; namespace { -TEST_P(ConvolutionBackpropLayerTest, Serialize) { +TEST_P(ConvolutionBackpropDataLayerTest, Serialize) { Serialize(); } @@ -42,7 +42,7 @@ const auto convolutionBackpropData2DParams = ::testing::Combine( ::testing::ValuesIn(pad_types), ::testing::ValuesIn(outPadding)); INSTANTIATE_TEST_SUITE_P( - smoke_convolutionBackpropData2D_Serialization, ConvolutionBackpropLayerTest, + smoke_convolutionBackpropData2D_Serialization, ConvolutionBackpropDataLayerTest, ::testing::Combine( convolutionBackpropData2DParams, ::testing::ValuesIn(precisions), @@ -53,6 +53,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(inputShapes), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index 7fe868eccba..fb8e870c4f2 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -4,7 +4,7 @@ #include -#include "single_layer_tests/convolution_backprop.hpp" +#include "single_layer_tests/convolution_backprop_data.hpp" #include "common_test_utils/test_constants.hpp" using namespace LayerTestsDefinitions; @@ -51,7 +51,7 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), @@ -62,9 +62,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convol ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), @@ -75,12 +75,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convoluti ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); const std::vector> inputShape2D = {{1, 3, 9, 12}}; const std::vector> outputShapes2D = {{6, 6}, {4, 9}}; -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_OutputShapeDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_OutputShapeDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), @@ -91,7 +91,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_OutputShapeDefined, Con ::testing::ValuesIn(inputShape2D), ::testing::ValuesIn(outputShapes2D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); const std::vector> outputPadding2D = {{1, 1}, {2, 2}}; const std::vector> testStrides2D = {{3, 3}}; @@ -117,7 +117,7 @@ const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::ValuesIn(outputPadding2D) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), @@ -128,9 +128,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputP ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), @@ -141,7 +141,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddi ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); /* ============= 3D ConvolutionBackpropData ============= */ const std::vector> inputShapes3D = {{1, 3, 10, 10, 10}, @@ -174,7 +174,7 @@ const auto conv3DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), @@ -185,9 +185,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convol ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), @@ -198,12 +198,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convoluti ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); const std::vector> inputShape3D = {{1, 3, 10, 10, 10}}; const std::vector> outputShapes3D = {{8, 8, 8}, {10, 10, 10}}; -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_OutputShapeDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_OutputShapeDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), @@ -214,7 +214,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_OutputShapeDefined, Con ::testing::ValuesIn(inputShape3D), ::testing::ValuesIn(outputShapes3D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); const std::vector> outputPadding3D = {{1, 1, 1}, {2, 2, 2}}; const std::vector> testStrides3D = {{3, 3, 3}}; @@ -240,7 +240,7 @@ const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::ValuesIn(outputPadding3D) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), @@ -251,9 +251,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputP ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), @@ -264,6 +264,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp index 69544f12e1f..df0fdc1e2ab 100755 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp @@ -8,18 +8,18 @@ #include "shared_test_classes/base/layer_test_utils.hpp" #include "ngraph_functions/utils/ngraph_helpers.hpp" #include "ngraph_functions/builders.hpp" -#include +#include using namespace InferenceEngine; using namespace CPUTestUtils; namespace CPULayerTestsDefinitions { -using LayerTestsDefinitions::convBackpropSpecificParams; -using LayerTestsDefinitions::convBackpropLayerTestParamsSet; +using LayerTestsDefinitions::convBackpropDataSpecificParams; +using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet; typedef std::tuple< - convBackpropLayerTestParamsSet, + convBackpropDataLayerTestParamsSet, CPUSpecificParams, fusingSpecificParams, std::map > deconvLayerCPUTestParamsSet; @@ -28,14 +28,14 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface obj) { - convBackpropLayerTestParamsSet basicParamsSet; + convBackpropDataLayerTestParamsSet basicParamsSet; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; std::ostringstream result; - result << LayerTestsDefinitions::ConvolutionBackpropLayerTest::getTestCaseName(testing::TestParamInfo( + result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo( basicParamsSet, 0)); result << CPUTestsBase::getTestCaseName(cpuParams); @@ -52,7 +52,7 @@ public: } protected: void SetUp() override { - convBackpropLayerTestParamsSet basicParamsSet; + convBackpropDataLayerTestParamsSet basicParamsSet; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; @@ -63,7 +63,7 @@ protected: std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; std::tie(postOpMgrPtr, fusedOps) = fusingParams; - convBackpropSpecificParams convParams; + convBackpropDataSpecificParams convParams; std::vector inputShape; std::vector outputShape; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index a34a5b75e1b..ad7515c6f5c 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -4,7 +4,7 @@ #include -#include "single_layer_tests/convolution_backprop.hpp" +#include "single_layer_tests/convolution_backprop_data.hpp" #include "common_test_utils/test_constants.hpp" using namespace LayerTestsDefinitions; @@ -56,7 +56,7 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions2D), @@ -67,9 +67,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convol ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions2D), @@ -80,7 +80,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convoluti ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); const std::vector> outputPadding2D = {{1, 1}, {2, 2}}; const std::vector> testStrides2D = {{3, 3}}; @@ -106,7 +106,7 @@ const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::ValuesIn(outputPadding2D) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), @@ -117,9 +117,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputP ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv2DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), @@ -130,7 +130,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddi ::testing::ValuesIn(inputShapes2D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); /* ============= 3D ConvolutionBackpropData ============= */ const std::vector netPrecisions3D = { @@ -166,7 +166,7 @@ const auto conv3DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions3D), @@ -177,9 +177,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convol ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions3D), @@ -190,7 +190,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convoluti ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); const std::vector> outputPadding3D = {{1, 1, 1}, {2, 2, 2}}; const std::vector> testStrides3D = {{3, 3, 3}}; @@ -216,7 +216,7 @@ const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( ::testing::ValuesIn(outputPadding3D) ); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_AutoPadValid_output_padding, ::testing::ValuesIn(netPrecisions), @@ -227,9 +227,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputP ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, +INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropDataLayerTest, ::testing::Combine( conv3DParams_ExplicitPadding_output_padding, ::testing::ValuesIn(netPrecisions), @@ -240,6 +240,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddi ::testing::ValuesIn(inputShapes3D), ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropLayerTest::getTestCaseName); + ConvolutionBackpropDataLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp index 9aeb9a1a2be..933f3d6d89b 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp @@ -24,7 +24,8 @@ typedef std::tuple< std::vector, // Pad end InferenceEngine::SizeVector, // Dilation size_t, // Num out channels - ngraph::op::PadType // Padding type + ngraph::op::PadType, // Padding type + std::vector // Output padding > convBackpropDataSpecificParams; typedef std::tuple< convBackpropDataSpecificParams, @@ -34,6 +35,7 @@ typedef std::tuple< InferenceEngine::Layout, // Input layout InferenceEngine::Layout, // Output layout InferenceEngine::SizeVector, // Input shapes + InferenceEngine::SizeVector, // Output shapes LayerTestsUtils::TargetDevice // Device name > convBackpropDataLayerTestParamsSet; diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp index f2656a3c2ab..c1e642d6764 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp @@ -14,21 +14,24 @@ std::string ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParam InferenceEngine::Precision inPrc, outPrc; InferenceEngine::Layout inLayout, outLayout; InferenceEngine::SizeVector inputShapes; + InferenceEngine::SizeVector outputShapes; std::string targetDevice; - std::tie(convBackpropDataParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, targetDevice) = obj.param; + std::tie(convBackpropDataParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outputShapes, targetDevice) = obj.param; ngraph::op::PadType padType; InferenceEngine::SizeVector kernel, stride, dilation; - std::vector padBegin, padEnd; + std::vector padBegin, padEnd, outPadding; size_t convOutChannels; - std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convBackpropDataParams; std::ostringstream result; result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "OS=" << CommonTestUtils::vec2str(outputShapes) << "_"; result << "K" << CommonTestUtils::vec2str(kernel) << "_"; result << "S" << CommonTestUtils::vec2str(stride) << "_"; result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_"; result << "O=" << convOutChannels << "_"; result << "AP=" << padType << "_"; result << "netPRC=" << netPrecision.name() << "_"; @@ -43,20 +46,27 @@ std::string ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParam void ConvolutionBackpropDataLayerTest::SetUp() { convBackpropDataSpecificParams convBackpropDataParams; std::vector inputShape; + std::vector outputShape; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(convBackpropDataParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = this->GetParam(); + std::tie(convBackpropDataParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = this->GetParam(); ngraph::op::PadType padType; InferenceEngine::SizeVector kernel, stride, dilation; - std::vector padBegin, padEnd; + std::vector padBegin, padEnd, outPadding; size_t convOutChannels; - std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convBackpropDataParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); auto paramOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(params)); auto convBackpropData = std::dynamic_pointer_cast( ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, false, outPadding)); + if (!outputShape.empty()) { + auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); + convBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin, padEnd, dilation, padType, convOutChannels)); + } ngraph::ResultVector results{std::make_shared(convBackpropData)}; function = std::make_shared(results, params, "convolutionBackpropData"); }