Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-08-11 13:39:51 +09:00
commit 0647e68277
281 changed files with 9545 additions and 5485 deletions

View File

@ -28,13 +28,15 @@ jobs:
cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT ..
- name: Check code style
run: cmake --build build --target clang_format_check_all -j8
run: cmake --build . --target clang_format_check_all -j8
working-directory: build
- name: Create code style diff
if: failure()
run: |
cmake --build build --target clang_format_fix_all
cmake --build . --target clang_format_fix_all
git diff > code_style_diff.diff
working-directory: build
- uses: actions/upload-artifact@v2
if: failure()
@ -53,15 +55,13 @@ jobs:
run: sudo apt --assume-yes install shellcheck
- name: Install dependencies
run: |
sudo apt --assume-yes install libusb-1.0-0-dev
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
run: python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
- name: CMake
run: |
mkdir build
cd build
cmake ..
cmake -DENABLE_VPU=OFF ..
- name: ShellCheck
run: cmake --build . --target ie_shellcheck -j8
@ -75,17 +75,16 @@ jobs:
submodules: recursive
- name: Install Clang dependency
run: sudo apt --assume-yes install libusb-1.0-0-dev libclang-9-dev
run: sudo apt --assume-yes install libclang-9-dev
- name: Install Python-based dependencies
run: |
python3 -m pip install pyyaml clang==9.0
run: python3 -m pip install -r cmake/developer_package/ncc_naming_style/requirements_dev.txt
- name: CMake
run: |
mkdir build
cd build
cmake ..
cmake -DENABLE_VPU=OFF ..
- name: Naming convention check
run: cmake --build . --target ncc_all -j8

@ -1 +1 @@
Subproject commit d7d83049708eaa18ea6796adf0eeef85b28ebc1f
Subproject commit 63e59ed312ba7a946779596e86124c1633f67607

View File

@ -33,8 +33,6 @@ if(ENABLE_NCC_STYLE)
find_host_package(Clang QUIET)
if(Clang_FOUND AND TARGET libclang)
get_target_property(libclang_location libclang LOCATION)
set(ncc_wrapper_py "${ncc_style_bin_dir}/ncc_wrapper.py")
configure_file("${ncc_style_dir}/ncc_wrapper.py.in" ${ncc_wrapper_py} @ONLY)
message(STATUS "Found libclang: ${libclang_location}")
else()
message(WARNING "libclang is not found (required for ncc naming style check)")
@ -59,7 +57,6 @@ foreach(req IN LISTS req_lines)
ie_check_pip_package(${req} STATUS)
endforeach()
set(ncc_script_dir "${ncc_style_dir}/ncc/")
set(ncc_script_py "${ncc_style_dir}/ncc/ncc.py")
if(NOT EXISTS ${ncc_script_py})
@ -77,26 +74,33 @@ endif()
#
# ov_ncc_naming_style(FOR_TARGET target_name
# INCLUDE_DIRECTORY dir
# [ADDITIONAL_INCLUDE_DIRECTORIES dir1 dir2 ..])
# [ADDITIONAL_INCLUDE_DIRECTORIES dir1 dir2 ..]
# [DEFINITIONS def1 def2 ..])
#
# FOR_TARGET - name of the target
# INCLUDE_DIRECTORY - directory to check headers from
# ADDITIONAL_INCLUDE_DIRECTORIES - additional include directories used in checked headers
# DEFINITIONS - additional definitions passed to preprocessor stage
#
function(ov_ncc_naming_style)
if(NOT ENABLE_NCC_STYLE)
return()
endif()
cmake_parse_arguments(NCC_STYLE ""
"FOR_TARGET;INCLUDE_DIRECTORY" "ADDITIONAL_INCLUDE_DIRECTORIES" ${ARGN})
cmake_parse_arguments(NCC_STYLE "FAIL"
"FOR_TARGET;INCLUDE_DIRECTORY" "ADDITIONAL_INCLUDE_DIRECTORIES;DEFINITIONS" ${ARGN})
foreach(var FOR_TARGET INCLUDE_DIRECTORY)
if(NOT DEFINED NCC_STYLE_${var})
message(FATAL_ERROR "${var} is not defined in ov_ncc_naming_style function")
endif()
endforeach()
file(GLOB_RECURSE headers
RELATIVE "${NCC_STYLE_INCLUDE_DIRECTORY}"
"${NCC_STYLE_INCLUDE_DIRECTORY}/*.hpp")
set(new_pythonpath "${ncc_script_dir}:$ENV{PYTHOPATH}")
list(APPEND ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_INCLUDE_DIRECTORY}")
list(APPEND NCC_STYLE_ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_INCLUDE_DIRECTORY}")
foreach(header IN LISTS headers)
set(output_file "${ncc_style_bin_dir}/${header}.ncc_style")
@ -106,20 +110,21 @@ function(ov_ncc_naming_style)
OUTPUT
${output_file}
COMMAND
"${CMAKE_COMMAND}" -E env PYTHONPATH=${new_pythonpath}
"${CMAKE_COMMAND}"
-D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
-D "NCC_PY_SCRIPT=${ncc_wrapper_py}"
-D "NCC_PY_SCRIPT=${ncc_script_py}"
-D "INPUT_FILE=${full_header_path}"
-D "OUTPUT_FILE=${output_file}"
-D "DEFINITIONS=${NCC_STYLE_DEFINITIONS}"
-D "CLANG_LIB_PATH=${libclang_location}"
-D "STYLE_FILE=${ncc_style_dir}/openvino.style"
-D "ADDITIONAL_INCLUDE_DIRECTORIES=${ADDITIONAL_INCLUDE_DIRECTORIES}"
-D "ADDITIONAL_INCLUDE_DIRECTORIES=${NCC_STYLE_ADDITIONAL_INCLUDE_DIRECTORIES}"
-D "EXPECTED_FAIL=${NCC_STYLE_FAIL}"
-P "${ncc_style_dir}/ncc_run.cmake"
DEPENDS
"${full_header_path}"
"${ncc_style_dir}/openvino.style"
"${ncc_script_py}"
"${ncc_wrapper_py}"
"${ncc_style_dir}/ncc_run.cmake"
COMMENT
"[ncc naming style] ${header}"
@ -135,3 +140,9 @@ function(ov_ncc_naming_style)
add_dependencies(${NCC_STYLE_FOR_TARGET} ${ncc_target})
add_dependencies(ncc_all ${ncc_target})
endfunction()
if(TARGET ncc_all)
ov_ncc_naming_style(FOR_TARGET ncc_all
INCLUDE_DIRECTORY "${ncc_style_dir}/self_check"
FAIL)
endif()

View File

@ -2,8 +2,8 @@
# SPDX-License-Identifier: Apache-2.0
#
foreach(var NCC_PY_SCRIPT PYTHON_EXECUTABLE OUTPUT_FILE
INPUT_FILE ADDITIONAL_INCLUDE_DIRECTORIES STYLE_FILE)
foreach(var NCC_PY_SCRIPT PYTHON_EXECUTABLE OUTPUT_FILE DEFINITIONS EXPECTED_FAIL
INPUT_FILE ADDITIONAL_INCLUDE_DIRECTORIES STYLE_FILE CLANG_LIB_PATH)
if(NOT DEFINED ${var})
message(FATAL_ERROR "${var} is not defined for ncc_run.cmake")
endif()
@ -11,12 +11,18 @@ endforeach()
file(REMOVE "${OUTPUT_FILE}")
if(DEFINITIONS)
set(defs --definition ${DEFINITIONS})
endif()
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}"
"${NCC_PY_SCRIPT}"
--path ${INPUT_FILE}
--style ${STYLE_FILE}
--clang-lib ${CLANG_LIB_PATH}
${defs}
--include ${ADDITIONAL_INCLUDE_DIRECTORIES}
RESULT_VARIABLE result
OUTPUT_VARIABLE output
@ -25,6 +31,14 @@ execute_process(
file(WRITE "${OUTPUT_FILE}" "${output}")
if(NOT result EQUAL "0")
set(failed ON)
endif()
if(EXPECTED_FAIL AND NOT failed)
message(FATAL_ERROR "[ncc self check] Self check is not failed for ${INPUT_FILE}")
endif()
if(failed AND NOT EXPECTED_FAIL)
# Display the output to console (to parse it form IDE)
message("${output}")
message(FATAL_ERROR "[ncc naming style] Naming style check failed for ${INPUT_FILE}")

View File

@ -1,52 +0,0 @@
#!/usr/bin/python3
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import logging
import os
import sys
from clang.cindex import Config
from ncc import Options, RulesDb, do_validate, Validator
if __name__ == "__main__":
# set path to speicific clang library location
Config.set_library_file('@libclang_location@')
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s',
filename='log.txt', filemode='w')
""" Parse all command line arguments and validate """
op = Options()
op.parse_cmd_line()
if op.args.path is None:
sys.exit(1)
""" Creating the rules database """
rules_db = RulesDb(op._style_file)
""" Check the source code against the configured rules """
errors = 0
for path in op.args.path:
if os.path.isfile(path):
if do_validate(op, path):
v = Validator(rules_db, path, op)
errors += v.validate()
elif os.path.isdir(path):
for (root, subdirs, files) in os.walk(path):
for filename in files:
path = root + '/' + filename
if do_validate(op, path):
v = Validator(rules_db, path, op)
errors += v.validate()
if not op.args.recurse:
break
else:
sys.stderr.write("File '{}' not found!\n".format(path))
if errors:
print("Total number of errors = {}".format(errors))
sys.exit(1)

View File

@ -5,7 +5,7 @@ ClassName: '^([A-Z][\w]+|b?float16|numeric_limits|ngraph_error|stopwatch|unsuppo
# TODO: remove oi_pair
StructName: '^([A-Z][\w]+|element_type_traits|hash|oi_pair)$'
FunctionName: '^(operator\W+|[a-z_\d]+)$'
Namespace: '^[a-z\d_]+$'
Namespace: '^([a-z\d_]+|InferenceEngine)$'
NamespaceAlias: '^[a-z\d_]+$'
UnionName: '[A-Z][\w]+$'
TemplateTemplateParameter: '[A-Z][\w]+'
@ -104,7 +104,7 @@ NullStatement: 'XXXX'
DeclarationStatement: '^.*$'
TranslationUnit: 'XXXX'
UnexposedAttribute: '^.*$'
CxxFinalAttribute: 'XXXX'
CxxFinalAttribute: '^.*$'
CxxOverrideAttribute: '^.*$'
AnnotateAttribute: 'XXXX'
AsmLabelAttribute: 'XXXX'
@ -116,6 +116,7 @@ PreprocessingDirective: 'XXXX'
MacroDefinition: 'XXXX'
MacroInstantiation: 'XXXX'
InclusionDirective: 'XXXX'
TypeAliasTeplateDeclaration: '^.*$'
VariableName:
ScopePrefix:
Global: ''

View File

@ -0,0 +1,8 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
class name {
public:
name() = default;
};

View File

@ -0,0 +1,5 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
void Function();

View File

@ -0,0 +1,10 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
class name {
public:
name() = default;
void Method();
};

View File

@ -2,7 +2,7 @@
## Introduction
As described in [Inference Engine Introduction](inference_engine_intro.md), common application flow consists of the following steps:
As described in [Inference Engine Developer Guide](Deep_Learning_Inference_Engine_DevGuide.md), common application flow consists of the following steps:
1. **Create Inference Engine Core object**

View File

@ -5,7 +5,7 @@
This Guide provides an overview of the Inference Engine describing the typical workflow for performing
inference of a pre-trained and optimized deep learning model and a set of sample applications.
> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_intel_index).
> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel).
After you have used the Model Optimizer to create an Intermediate Representation (IR), use the Inference Engine to infer the result for a given input data.

View File

@ -101,6 +101,9 @@ limitations under the License.
<tab type="user" title="opset2 Specification" url="@ref openvino_docs_ops_opset2"/>
<tab type="user" title="opset1 Specification" url="@ref openvino_docs_ops_opset1"/>
</tab>
<tab type="usergroup" title="Broadcast Rules For Elementwise Operations" url="@ref openvino_docs_ops_broadcast_rules">
<tab type="usergroup" title="Broadcast Rules For Elementwise Operations" url="@ref openvino_docs_ops_broadcast_rules"/>
</tab>
<tab type="usergroup" title="Operations Specifications" url="">
<tab type="user" title="Abs-1" url="@ref openvino_docs_ops_arithmetic_Abs_1"/>
<tab type="user" title="Acos-1" url="@ref openvino_docs_ops_arithmetic_Acos_1"/>

View File

@ -158,6 +158,9 @@ limitations under the License.
<xi:include href="omz_docs.xml" xpointer="omz_models">
<xi:fallback/>
</xi:include>
<tab type="user" title="Dataset Preparation Guide" url="@ref omz_data_datasets"/>
<tab type="user" title="Intel's Pre-Trained Models Device Support" url="@ref omz_models_intel_device_support"/>
<tab type="user" title="Public Pre-Trained Models Device Support" url="@ref omz_models_public_device_support"/>
<xi:include href="omz_docs.xml" xpointer="omz_demos">
<xi:fallback/>
</xi:include>
@ -205,6 +208,8 @@ limitations under the License.
<tab type="user" title="MetaPublish Listeners" url="@ref gst_samples_gst_launch_metapublish_listener"/>
</tab>
<tab type="user" title="gvapython Sample" url="@ref gst_samples_gst_launch_gvapython_face_detection_and_classification_README"/>
<tab type="user" title="Action Recognition Sample" url="@ref gst_samples_gst_launch_action_recognition_README"/>
<tab type="user" title="Human Pose Estimation Sample" url="@ref gst_samples_gst_launch_human_pose_estimation_README"/>
</tab>
<tab type="user" title="Draw Face Attributes C++ Sample" url="@ref gst_samples_cpp_draw_face_attributes_README"/>
<tab type="user" title="Draw Face Attributes Python Sample" url="@ref gst_samples_python_draw_face_attributes_README"/>

View File

@ -522,7 +522,7 @@ source /opt/intel/openvino_2021/bin/setupvars.sh
## <a name="syntax-examples"></a> Typical Code Sample and Demo Application Syntax Examples
This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages.
This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos) pages.
To build all the demos and samples:

View File

@ -476,7 +476,7 @@ source /opt/intel/openvino_2021/bin/setupvars.sh
## <a name="syntax-examples"></a> Typical Code Sample and Demo Application Syntax Examples
This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.13 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages.
This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.13 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos) pages.
To build all the demos and samples:

View File

@ -484,7 +484,7 @@ Below you can find basic guidelines for executing the OpenVINO™ workflow using
## <a name="syntax-examples"></a> Typical Code Sample and Demo Application Syntax Examples
This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later and Microsoft Visual Studio 2017 or 2019 installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages.
This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later and Microsoft Visual Studio 2017 or 2019 installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos) pages.
To build all the demos and samples:

View File

@ -16,71 +16,50 @@ OpenVINO™ toolkit:
The following diagram illustrates the typical OpenVINO™ workflow (click to see the full-size image):
![](img/OpenVINO-diagram.png)
### Model Preparation
#### Components: [Open Model Zoo](https://github.com/opencv/open_model_zoo), [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), [OpenVINO Training Extentions](https://github.com/openvinotoolkit/training_extensions)
### Model Preparation, Conversion and Optimization
You can use your framework of choice to prepare and train a deep learning model or just download a pre-trained model from the Open Model Zoo. The Open Model Zoo includes deep learning solutions to a variety of vision problems, including object recognition, face recognition, pose estimation, text detection, and action recognition, at a range of measured complexities.
Several of these pre-trained models are also used in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos_README). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader_README) tool is used.
Several of these pre-trained models are used also in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader) tool is used.
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) provides a web-based interface to download a pre-trained model and enables you to visualize, fine-tune, and compare performance of deep learning models.
If you cannot find the needed model in Open Model Zoo or want to train your own model, use [OpenVINO Training Extentions](https://github.com/openvinotoolkit/training_extensions) which provide a convenient environment to train deep learning models.
![](img/OV-diagram-step1.png)
Useful documents for model preparation:
* [Model Downloader](@ref omz_tools_downloader) utility
* [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel)
* [Public Pretrained Models (Open Model Zoo)](@ref omz_models_group_public)
* [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction)
* [OpenVINO Training Extentions](https://github.com/openvinotoolkit/training_extensions)
### Model Conversion
#### Components: [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
The [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) is a cross-platform command-line
One of the core component of the OpenVINO™ toolkit is the [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) a cross-platform command-line
tool that converts a trained neural network from its source framework to an open-source, nGraph-compatible [Intermediate Representation (IR)](MO_DG/IR_and_opsets.md) for use in inference operations. The Model Optimizer imports models trained in popular frameworks such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX* and performs a few optimizations to remove excess layers and group operations when possible into simpler, faster graphs.
If your neural network contains layers that are not in the list of known layers for supported frameworks, you can adjust the conversion and optimization process using [Custom Layers](HOWTO/Custom_Layers_Guide.md).
Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. You can also use the Accuracy Checker as a part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an official OpenVINO™ graphical interface.
![](img/OV-diagram-step2.png)
Useful documents for model conversion and optimization:
If your neural network model contains layers that are not in the list of known layers for supported frameworks, you can adjust the conversion and optimization process through use of [Custom Layers](HOWTO/Custom_Layers_Guide.md).
Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio.
Use the [Post-training Optimization Tool](@ref pot_README) to accelerate the inference of a deep learning model by quantizing it to INT8.
Useful documents for model optimization:
* [Model Optimizer Developer Guide](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
* [Intermediate Representation and Opsets](MO_DG/IR_and_opsets.md)
* [Custom Layers Guide](HOWTO/Custom_Layers_Guide.md)
* [Accuracy Checker utility](@ref omz_tools_accuracy_checker)
* [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction)
* [Post-training Optimization Tool](@ref pot_README)
* [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction)
* [Model Downloader](@ref omz_tools_downloader) utility
* [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel)
* [Public Pretrained Models (Open Model Zoo)](@ref omz_models_group_public)
### Running and Tuning Inference
#### Components: [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), [Post-training Optimization Tool](@ref pot_README), [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf)
The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment).
You can use OpenVINO™ Tuning Utilities with the Inference Engine for trial and test inference on your model. The [Benchmark utility](../inference-engine/tools/benchmark_tool/README.md) uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences.
You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences.
For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction).
![](img/OV-diagram-step3.png)
OpenVINO™ toolkit includes a set of [inference code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos) showing how inference is run and output processed for use in retail environments, classrooms, smart camera applications, and other solutions.
Use the [Post-training Optimization Tool](@ref pot_README) to accelerate the inference of a deep learning model by quantizing it to INT8. Models from Open Model Zoo can be quantized using the [Model Quantizer utility](https://github.com/openvinotoolkit/open_model_zoo/tree/master/tools/downloader#model-quantizer-usage).
Besides the [Post-training Optimization Tool](@ref pot_README), the [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) can be used for model fine-tuning INT8 quantization or even for applying more aggressive compression methods, such as filter pruning, sparsity, and binarization to further speed up model inference and reduce the footprint. In that case the compression algorithms are integrated into your model training pipeline.
OpenVINO also makes use of open-source and Intel™ tools for traditional graphics processing and performance management. Intel® Media SDK supports accelerated rich-media processing, including transcoding. OpenVINO™ optimizes calls to the rich OpenCV and OpenVX libraries for processing computer vision workloads. And the new DL Streamer integration further accelerates video pipelining and performance.
Try these key tuning tools in your browser with the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) intuitive graphical interface.
![](img/OV-diagram-step3.png)
Useful documents for inference tuning:
* [Inference Engine Developer Guide](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
* [Inference Engine API References](./api_references.html)
* [Inference Code Samples](IE_DG/Samples_Overview.md)
* [Application Demos](@ref omz_demos)
* [Post-training Optimization Tool](@ref pot_README)
* [Low Precision Optimization Guide] (@ref pot_docs_LowPrecisionOptimizationGuide)
* [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf)
* [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction)
* [Intel Media SDK](https://github.com/Intel-Media-SDK/MediaSDK)
* [DL Streamer Samples](@ref gst_samples_README)
@ -88,8 +67,6 @@ Useful documents for inference tuning:
* [OpenVX](https://software.intel.com/en-us/openvino-ovx-guide)
### Packaging and Deployment
#### Components: [Deployment Manager](./install_guides/deployment-manager-tool.md)
The Intel Distribution of OpenVINO™ toolkit outputs optimized inference runtimes for the following devices:
* Intel® CPUs
* Intel® Processor Graphics
@ -98,13 +75,12 @@ The Intel Distribution of OpenVINO™ toolkit outputs optimized inference runtim
The Inference Engine's plug-in architecture can be extended to meet other specialized needs. [Deployment Manager](./install_guides/deployment-manager-tool.md) is a Python* command-line tool that assembles the tuned model, IR files, your application, and required dependencies into a runtime package for your target device. It outputs packages for CPU, GPU, and VPU on Linux* and Windows*, and Neural Compute Stick-optimized packages with Linux.
![](img/OV-diagram-step4.png)
* [Inference Engine Integration Workflow](IE_DG/Integrate_with_customer_application_new_API.md)
* [Inference Engine API References](./api_references.html)
* [Inference Engine Plug-in Developer Guide](./ie_plugin_api/index.html)
* [Deployment Manager Guide](./install_guides/deployment-manager-tool.md)
## OpenVINO™ Toolkit Components
Intel® Distribution of OpenVINO™ toolkit includes the following components:

View File

@ -27,8 +27,9 @@ Prebuilt images are available on:
## Build a Docker* Image
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of OpenVINO™ Docker containers.
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci).
The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of Docker containers with Intel® Distribution of OpenVINO™ toolkit. You can find device specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile below.
## Use Docker* Image for CPU
@ -36,34 +37,40 @@ You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tr
- All instructions that are available to host process available for process in container, including, for example, AVX2, AVX512. No restrictions.
- Docker\* does not use virtualization or emulation. The process in Docker* is just a regular Linux process, but it is isolated from external world on kernel level. Performance penalty is small.
### <a name="building-for-cpu"></a>Build a Docker* Image for CPU
### <a name="configuring-for-cpu"></a>Configure a Docker* Image for CPU
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit.
The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You don't need to do specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile for CPU. You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci).
### Run the Docker* Image for CPU
Run the image with the following command:
```sh
docker run -it --rm <image_name>
```
## Use a Docker* Image for GPU
### Build a Docker* Image for GPU
### Configure a Docker* Image for GPU
> **NOTE**: Only Intel® integrated graphics are supported.
**Prerequisites:**
- GPU is not available in container by default, you must attach it to the container.
- Kernel driver must be installed on the host.
- Intel® OpenCL™ runtime package must be included into the container.
- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md).
- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md).
Before building a Docker* image on GPU, add the following commands to a Dockerfile:
To configure a OpenVINO Docker* image with access to GPU, add the following commands to a Dockerfile:
**Ubuntu 18.04/20.04**:
```sh
WORKDIR /tmp/opencl
RUN useradd -ms /bin/bash -G video,users openvino && \
chown openvino -R /home/openvino
RUN apt-get update && \
apt-get install -y --no-install-recommends ocl-icd-libopencl1 && \
rm -rf /var/lib/apt/lists/* && \
@ -76,7 +83,24 @@ RUN apt-get update && \
ldconfig && \
rm /tmp/opencl
```
or you can use the installation script `install_NEO_OCL_driver.sh` if you previously installed OpenVINO in the Dockerfile, where `INTEL_OPENCL` is the variable to store the default version of Intel® Graphics Compute Runtime for OpenCL™ Driver:
```sh
WORKDIR /tmp/opencl
RUN useradd -ms /bin/bash -G video,users openvino && \
chown openvino -R /home/openvino
# Please use `20.35.17767` for 10th generation Intel® Core™ processor (formerly Ice Lake) or 11th generation Intel® Core™ processor (formerly Tiger Lake)
ARG INTEL_OPENCL=19.41.14441
WORKDIR ${INTEL_OPENVINO_DIR}/install_dependencies
RUN ./install_NEO_OCL_driver.sh --no_numa -y --install_driver ${INTEL_OPENCL} && \
rm -rf /var/lib/apt/lists/*
```
**CentOS 7/RHEL 8**:
```sh
WORKDIR /tmp/opencl
RUN useradd -ms /bin/bash -G video,users openvino && \
@ -98,9 +122,27 @@ RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-r
yum remove -y epel-release
```
or you can use the installation script `install_NEO_OCL_driver.sh` if you previously installed OpenVINO in the Dockerfile, where `INTEL_OPENCL` is the variable to store the default version of Intel® Graphics Compute Runtime for OpenCL™ Driver:
```sh
WORKDIR /tmp/opencl
RUN useradd -ms /bin/bash -G video,users openvino && \
chown openvino -R /home/openvino
RUN groupmod -g 44 video
# Please use `20.35.17767` for 10th generation Intel® Core™ processor (formerly Ice Lake) or 11th generation Intel® Core™ processor (formerly Tiger Lake)
ARG INTEL_OPENCL=19.41.14441
WORKDIR ${INTEL_OPENVINO_DIR}/install_dependencies
RUN ./install_NEO_OCL_driver.sh --no_numa -y --install_driver ${INTEL_OPENCL} && \
yum clean all && rm -rf /var/cache/yum && \
yum remove -y epel-release
```
### Run the Docker* Image for GPU
To make GPU available in the container, attach the GPU to the container using `--device /dev/dri` option and run the container:
```sh
docker run -it --rm --device /dev/dri <image_name>
```
@ -108,7 +150,7 @@ docker run -it --rm --device /dev/dri <image_name>
## Use a Docker* Image for Intel® Neural Compute Stick 2
### Build and Run the Docker* Image for Intel® Neural Compute Stick 2
### Configure and Run the Docker* Image for Intel® Neural Compute Stick 2
**Known limitations:**
@ -118,7 +160,8 @@ docker run -it --rm --device /dev/dri <image_name>
Use one of the following options as **Possible solutions for Intel® Neural Compute Stick 2:**
#### Option #1
#### Option 1
1. Get rid of UDEV by rebuilding `libusb` without UDEV support in the Docker* image (add the following commands to a `Dockerfile`):
- **Ubuntu 18.04/20.04**:
```sh
@ -192,22 +235,23 @@ RUN /usr/bin/install -c -m 644 libusb-1.0.pc '/usr/local/lib/pkgconfig' && \
docker run -it --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
```
#### Option #2
#### Option 2
Run container in the privileged mode, enable the Docker network configuration as host, and mount all devices to the container:
```sh
docker run -it --rm --privileged -v /dev:/dev --network=host <image_name>
```
> **NOTES**:
>
> - It is not secure.
> - Conflicts with Kubernetes* and other tools that use orchestration and private networks may occur.
## Use a Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
### Build Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
### Configure Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
To use the Docker container for inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs:
1. Set up the environment on the host machine, that is going to be used for running Docker*.
It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board.
1. Set up the environment on the host machine, that is going to be used for running Docker*.
It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board.
To learn how to set up the environment (the OpenVINO package or HDDL package must be pre-installed), see [Configuration guide for HDDL device](https://github.com/openvinotoolkit/docker_ci/blob/master/install_guide_vpu_hddl.md) or [Configuration Guide for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs](installing-openvino-linux-ivad-vpu.md).
2. Prepare the Docker* image (add the following commands to a Dockerfile).
- **Ubuntu 18.04**:
@ -255,50 +299,57 @@ $HDDL_INSTALL_DIR/hddldaemon
### Run the Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
To run the built Docker* image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, use the following command:
```sh
docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp <image_name>
```
> **NOTES**:
>
> - The device `/dev/ion` need to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel.
>
> - The device `/dev/ion` needs to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel.
> - Since separate inference tasks share the same HDDL service communication interface (the service creates mutexes and a socket file in `/var/tmp`), `/var/tmp` needs to be mounted and shared among them.
In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve, use the following command:
In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu (Input-Output Memory Management Unit) incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve, use the following command:
```sh
docker run -it --rm --net=host -v /var/tmp:/var/tmp ipc=host <image_name>
docker run -it --rm --net=host -v /var/tmp:/var/tmp -ipc=host <image_name>
```
> **NOTES**:
>
> - When building docker images, create a user in the docker file that has the same UID and GID as the user which runs hddldaemon on the host.
> - Run the application in the docker with this user.
>
> - When building Docker images, create a user in the Dockerfile that has the same UID(User Identifier) and GID(Group Identifier) as the user which runs hddldaemon on the host.
> - Run the application in the Docker image with this user.
> - Alternatively, you can start hddldaemon with the root user on host, but this approach is not recommended.
### Run Demos in the Docker* Image
### Run Demos in the Docker* Image
To run the Security Barrier Camera Demo on a specific inference device, run the following commands with the root privileges (additional third-party dependencies will be installed):
**CPU**:
```sh
docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
docker run -itu root:root --rm <image_name>
/bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d CPU -sample-options -no_show"
```
**GPU**:
```sh
docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
docker run -itu root:root --rm --device /dev/dri:/dev/dri <image_name>
/bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d GPU -sample-options -no_show"
```
**MYRIAD**:
```sh
docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
docker run -itu root:root --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
/bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d MYRIAD -sample-options -no_show"
```
**HDDL**:
```sh
docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp --device /dev/dri:/dev/dri --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb <image_name>
docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp <image_name>
/bin/bash -c "apt update && apt install sudo && deployment_tools/demo/demo_security_barrier_camera.sh -d HDDL -sample-options -no_show"
```
@ -312,12 +363,12 @@ For instructions for previous releases with FPGA Support, see documentation for
## Troubleshooting
If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) topic.
If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Run_Locally) topic.
## Additional Resources
* [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
* Intel® Neural Compute Stick 2 Get Started: [https://software.intel.com/en-us/neural-compute-stick/get-started](https://software.intel.com/en-us/neural-compute-stick/get-started)
- Intel® Neural Compute Stick 2 Get Started: [https://software.intel.com/en-us/neural-compute-stick/get-started](https://software.intel.com/en-us/neural-compute-stick/get-started)

View File

@ -2,7 +2,7 @@
The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The Intel® Distribution of OpenVINO™ toolkit includes the Intel® Deep Learning Deployment Toolkit.
This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Linux* and its further usage.
This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Windows* and its further usage.
## System Requirements
@ -21,43 +21,46 @@ Prebuilt images are available on [Docker Hub](https://hub.docker.com/u/openvino)
## Build a Docker* Image
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci).
The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can find device specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile below.
## Build and Run the Docker* Image for CPU
## Configure and Run the Docker* Image for CPU
## Install Additional Dependencies
### Install CMake
To add CMake to the image, add the following commands to the Dockerfile:
~~~
```bat
RUN powershell.exe -Command `
Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; `
Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; `
Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force
RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%"
~~~
In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a docker image:
~~~
```
In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image:
```bat
docker build . -t <image_name> `
--build-arg HTTPS_PROXY=<https://your_proxy_server:port>
~~~
```
### Install Microsoft Visual Studio* Build Tools
You can add Microsoft Visual Studio Build Tools* to a Windows* OS Docker image. Available options are to use offline installer for Build Tools
(follow the [Instruction for the offline installer](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019)) or
to use the online installer for Build Tools (follow [Instruction for the online installer](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019)).
Microsoft Visual Studio Build Tools* are licensed as a supplement your existing Microsoft Visual Studio* license.
You can add Microsoft Visual Studio Build Tools* to a Windows* OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools.
Microsoft Visual Studio Build Tools* are licensed as a supplement your existing Microsoft Visual Studio* license.
Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio* and Windows* licenses.
To add MSBuild 2019 to the image, add the following commands to the Dockerfile:
~~~
```bat
RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe
RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache `
--installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" `
--installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" `
--add Microsoft.VisualStudio.Workload.MSBuildTools `
--add Microsoft.VisualStudio.Workload.UniversalBuildTools `
--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended `
@ -65,35 +68,38 @@ RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache `
--remove Microsoft.VisualStudio.Component.Windows10SDK.10586 `
--remove Microsoft.VisualStudio.Component.Windows10SDK.14393 `
--remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned
~~~
In case of proxy issues, please use an offline installer for Build Tools (follow [Instruction for the offline installer](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019).
```
In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019).
## Run the Docker* Image for CPU
To install the OpenVINO toolkit from the prepared Docker image, run the image with the following command:
~~~
To start the interactive session, run the following command allows inference on the CPU:
```bat
docker run -it --rm <image_name>
~~~
```
If you want to try some demos then run image with the root privileges (some additional 3-rd party dependencies will be installed):
~~~
docker run -itu ContainerAdministrator --rm <image_name> cmd /S /C "cd deployment_tools\demo && demo_security_barrier_camera.bat -d CPU -sample-options -no_show"
~~~
## Build and Run the Docker* Image for GPU
```bat
docker run -itu ContainerAdministrator --rm <image_name> cmd /S /C "cd deployment_tools\demo && demo_security_barrier_camera.bat -d CPU -sample-options -no_show"
```
## Configure and Run the Docker* Image for GPU
GPU Acceleration in Windows containers feature requires to meet Windows host, OpenVINO toolkit and Docker* requirements:
* [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration):
* The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
* The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported.
* The container host must be running Docker Engine 19.03 or higher.
* The container host must have GPU running display drivers of version WDDM 2.5 or higher.
* [OpenVINO™ GPU requirement](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU):
* Intel Graphics Driver for Windows of version 15.65 or higher.
* [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container):
* Windows host and container version tags must match.
* [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility)
- [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration):
- The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
- The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported.
- The container host must be running Docker Engine 19.03 or higher.
- The container host must have GPU running display drivers of version WDDM 2.5 or higher.
- [OpenVINO™ GPU requirement](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU):
- Intel Graphics Driver for Windows of version 15.65 or higher.
- [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container):
- Windows host and container version tags must match.
- [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility)
## Build a Docker* Image for Your Host System
@ -101,50 +107,49 @@ GPU Acceleration in Windows containers feature requires to meet Windows host, Op
2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility).
3. Find the appropriate Windows container base image on [DockerHub*](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction.
For example, in [openvino_c_dev_2021.dockerfile](https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/winserver2019/openvino_c_dev_2021.dockerfile), change:
~~~
```bat
FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base
~~~
to
~~~
```
to:
```bat
FROM mcr.microsoft.com/windows:20H2
~~~
4. Build the Docker image
~~~
```
4. Build the Docker image
```bat
docker build --build-arg package_url=<OpenVINO pkg> -f <Dockerfile> -t <image_name> .
~~~
```
5. Copy `OpenCL.dll` from your `C:\Windows\System32` host folder to any `temp` directory:
~~~
```bat
mkdir C:\tmp
copy C:\Windows\System32\OpenCL.dll C:\tmp
~~~
```
## Run the Docker* Image for GPU
1. To try inference on a GPU, run the image with the following command:
~~~
1. To try inference on a GPU, run the image with the following command:
```bat
docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp <image_name>
~~~
where
* `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device.
* `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression.
* `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder.
```
where
- `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device.
- `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression.
- `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder.
2. Copy `OpenCL.dll` to the `C:\Windows\System32` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device:
~~~
```bat
copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0
~~~
3. For example, run the `demo_security_barrier_camera` demo with the command below:
~~~
```
3. For example, run the `demo_security_barrier_camera` demo with the command below:
```bat
cd bin && setupvars.bat && cd ../ && cd deployment_tools\demo && demo_security_barrier_camera.bat -d GPU -sample-options -no_show
~~~
```
> **NOTE**: Addittional third-party dependencies will be installed.
## Troubleshooting
If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) topic.
If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Run_Locally) topic.
## Additional Resources
* [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
* Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)

View File

@ -29,21 +29,20 @@ o_{i} = a_{i} \wedge b_{i}
**Inputs**
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
* **2**: A tensor of type *T* and arbitrary shape. **Required.**
* **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
* **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type boolean.
* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
**Types**
* *T*: boolean type.
* *T_BOOL*: `boolean`.
**Examples**
*Example 1*
*Example 1: no broadcast*
```xml
<layer ... type="LogicalAnd">
@ -66,7 +65,7 @@ o_{i} = a_{i} \wedge b_{i}
</layer>
```
*Example 2: broadcast*
*Example 2: numpy broadcast*
```xml
<layer ... type="LogicalAnd">
<input>

View File

@ -6,43 +6,43 @@
**Short description**: *LogicalOr* performs element-wise logical OR operation with two given tensors applying multi-directional broadcast rules.
**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
After broadcasting *LogicalOr* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} \lor b_{i}
\f]
**Attributes**:
* *auto_broadcast*
* **Description**: specifies rules used for auto-broadcasting of input tensors.
* **Range of values**:
* *none* - no auto-broadcasting is allowed, all input shapes should match
* *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
* *none* - no auto-broadcasting is allowed, all input shapes must match
* *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
* *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
* **Type**: string
* **Default value**: "numpy"
* **Required**: *no*
**Inputs**
* **1**: A tensor of type *T*. **Required.**
* **2**: A tensor of type *T*. **Required.**
* **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
* **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise logical OR operation. A tensor of type boolean.
* **1**: The result of element-wise *LogicalOr* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
**Types**
* *T*: boolean type.
**Detailed description**
Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
After broadcasting *LogicalOr* does the following with the input tensors *a* and *b*:
\f[
o_{i} = a_{i} or b_{i}
\f]
* *T_BOOL*: `boolean`.
**Examples**
*Example 1*
*Example 1: no broadcast*
```xml
<layer ... type="LogicalOr">
@ -65,7 +65,7 @@ o_{i} = a_{i} or b_{i}
</layer>
```
*Example 2: broadcast*
*Example 2: numpy broadcast*
```xml
<layer ... type="LogicalOr">
<input>

View File

@ -12,7 +12,7 @@ is only accessible from the machine the Docker container is built on:
application are accessible only from the `localhost` by default.
* When using `docker run` to [start the DL Workbench from Docker
Hub](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub), limit connections for the host IP 127.0.0.1.
Hub](@ref workbench_docs_Workbench_DG_Run_Locally), limit connections for the host IP 127.0.0.1.
For example, limit the connections for the host IP to the port `5665` with the `-p
127.0.0.1:5665:5665` command . Refer to [Container
networking](https://docs.docker.com/config/containers/container-networking/#published-ports) for

View File

@ -0,0 +1,15 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "comparison.hpp"
namespace reference_tests {
namespace ComparisonOpsRefTestDefinitions {
namespace {
TEST_P(ReferenceComparisonLayerTest, CompareWithHardcodedRefs) {
Exec();
}
} // namespace
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -60,4 +60,4 @@ private:
}
};
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests
} // namespace reference_tests

View File

@ -20,10 +20,6 @@ namespace reference_tests {
namespace ComparisonOpsRefTestDefinitions {
namespace {
TEST_P(ReferenceComparisonLayerTest, EqualCompareWithHardcodedRefs) {
Exec();
}
template <element::Type_t IN_ET>
std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
using T = typename element_type_traits<IN_ET>::value_type;
@ -81,4 +77,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparis
ReferenceComparisonLayerTest::getTestCaseName);
} // namespace
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests
} // namespace reference_tests

View File

@ -18,9 +18,6 @@ using ComparisonTypes = ngraph::helpers::ComparisonTypes;
namespace reference_tests {
namespace ComparisonOpsRefTestDefinitions {
namespace {
TEST_P(ReferenceComparisonLayerTest, LessCompareWithHardcodedRefs) {
Exec();
}
template <element::Type_t IN_ET>
std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
@ -79,4 +76,4 @@ std::vector<RefComparisonParams> generateComparisonCombinedParams() {
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName);
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests
} // namespace reference_tests

View File

@ -18,9 +18,6 @@ using ComparisonTypes = ngraph::helpers::ComparisonTypes;
namespace reference_tests {
namespace ComparisonOpsRefTestDefinitions {
namespace {
TEST_P(ReferenceComparisonLayerTest, LessEqualCompareWithHardcodedRefs) {
Exec();
}
template <element::Type_t IN_ET>
std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
@ -79,4 +76,4 @@ std::vector<RefComparisonParams> generateComparisonCombinedParams() {
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
ReferenceComparisonLayerTest::getTestCaseName);
} // namespace ComparisonOpsRefTestDefinitions
} // namespace reference_tests
} // namespace reference_tests

View File

@ -0,0 +1,17 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "logical.hpp"
namespace reference_tests {
namespace LogicalOpsRefTestDefinitions {
namespace {
TEST_P(ReferenceLogicalLayerTest, LogicalWithHardcodedRefs) {
Exec();
}
} // namespace
} // namespace LogicalOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -0,0 +1,62 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <ie_ngraph_utils.hpp>
#include <ngraph/ngraph.hpp>
#include <shared_test_classes/base/layer_test_utils.hpp>
#include <vector>
#include "base_reference_test.hpp"
#include "ngraph_functions/builders.hpp"
namespace reference_tests {
namespace LogicalOpsRefTestDefinitions {
struct RefLogicalParams {
ngraph::helpers::LogicalTypes opType;
Tensor input1;
Tensor input2;
Tensor expected;
};
struct Builder : ParamsBuilder<RefLogicalParams> {
REFERENCE_TESTS_ADD_SET_PARAM(Builder, opType);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, input1);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, input2);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected);
};
class ReferenceLogicalLayerTest : public testing::TestWithParam<RefLogicalParams>, public CommonReferenceTest {
public:
void SetUp() override {
const auto& params = GetParam();
function = CreateFunction(params.opType, params.input1.shape, params.input2.shape, params.input1.type);
inputData = {params.input1.data, params.input2.data};
refOutData = {params.expected.data};
}
static std::string getTestCaseName(const testing::TestParamInfo<RefLogicalParams>& obj) {
const auto& param = obj.param;
std::ostringstream result;
result << "LogicalType=" << param.opType << "_";
result << "inpt_shape1=" << param.input1.shape << "_";
result << "inpt_shape2=" << param.input2.shape << "_";
result << "iType=" << param.input1.type << "_";
result << "oType=" << param.expected.type;
return result.str();
}
private:
static std::shared_ptr<ngraph::Function> CreateFunction(ngraph::helpers::LogicalTypes op_type, const ngraph::PartialShape& input_shape1,
const ngraph::PartialShape& input_shape2, const ngraph::element::Type& elem_type) {
const auto in1 = std::make_shared<ngraph::op::Parameter>(elem_type, input_shape1);
const auto in2 = std::make_shared<ngraph::op::Parameter>(elem_type, input_shape2);
const auto logical_op = ngraph::builder::makeLogical(in1, in2, op_type);
return std::make_shared<ngraph::Function>(ngraph::NodeVector {logical_op}, ngraph::ParameterVector {in1, in2});
}
};
} // namespace LogicalOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -10,74 +10,39 @@
#include <shared_test_classes/base/layer_test_utils.hpp>
#include <tuple>
#include "base_reference_test.hpp"
#include "logical.hpp"
using namespace reference_tests;
using namespace ngraph;
using namespace InferenceEngine;
using LogicalTypes = ngraph::helpers::LogicalTypes;
namespace reference_tests {
namespace LogicalOpsRefTestDefinitions {
namespace {
struct LogicalAndParams {
template <class IT, class OT>
LogicalAndParams(const ngraph::PartialShape& input_shape1, const ngraph::PartialShape& input_shape2 ,
const std::vector<IT>& iValues1, const std::vector<IT>& iValues2, const std::vector<OT>& oValues)
: pshape1(input_shape1), pshape2(input_shape2), inType(ngraph::element::boolean), outType(ngraph::element::boolean),
inputData1(CreateBlob(ngraph::element::boolean, iValues1)), inputData2(CreateBlob(ngraph::element::boolean, iValues2)),
refData(CreateBlob(ngraph::element::boolean, oValues)) {}
ngraph::PartialShape pshape1;
ngraph::PartialShape pshape2;
ngraph::element::Type inType;
ngraph::element::Type outType;
InferenceEngine::Blob::Ptr inputData1;
InferenceEngine::Blob::Ptr inputData2;
InferenceEngine::Blob::Ptr refData;
};
class ReferenceLogicalAndLayerTest : public testing::TestWithParam<LogicalAndParams>, public CommonReferenceTest {
public:
void SetUp() override {
auto params = GetParam();
function = CreateFunction(params.pshape1, params.pshape2, params.inType);
inputData = {params.inputData1, params.inputData2};
refOutData = {params.refData};
}
static std::string getTestCaseName(const testing::TestParamInfo<LogicalAndParams>& obj) {
auto param = obj.param;
std::ostringstream result;
result << "input_shape1=" << param.pshape1 << "_";
result << "input_shape2=" << param.pshape2 << "_";
result << "iType=" << param.inType << "_";
result << "oType=" << param.outType;
return result.str();
}
private:
static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape1,
const PartialShape& input_shape2, const element::Type& input_type) {
const auto in = std::make_shared<op::Parameter>(input_type, input_shape1);
const auto in2 = std::make_shared<op::Parameter>(input_type, input_shape2);
const auto logical_and = std::make_shared<op::v1::LogicalAnd>(in, in2);
return std::make_shared<Function>(NodeVector {logical_and}, ParameterVector {in, in2});
}
};
TEST_P(ReferenceLogicalAndLayerTest, CompareWithHardcodedRefs) {
Exec();
std::vector<RefLogicalParams> generateLogicalParams() {
std::vector<RefLogicalParams> logicalParams {
Builder {}
.opType(LogicalTypes::LOGICAL_AND)
.input1({{2, 2}, element::boolean, std::vector<char> {true, false, true, false}})
.input2({{2, 2}, element::boolean, std::vector<char> {false, true, true, false}})
.expected({{2, 2}, element::boolean, std::vector<char> {false, false, true, false}}),
Builder {}
.opType(LogicalTypes::LOGICAL_AND)
.input1({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}})
.input2({{1, 1, 2, 1}, element::boolean, std::vector<char> {true, false}})
.expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}}),
Builder {}
.opType(LogicalTypes::LOGICAL_AND)
.input1({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true}})
.input2({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, false}})
.expected({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, false}})};
return logicalParams;
}
INSTANTIATE_TEST_SUITE_P(
smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalAndLayerTest,
::testing::Values(
LogicalAndParams(ngraph::PartialShape {2, 2}, ngraph::PartialShape {2, 2},
std::vector<char> {true, false, true, false},
std::vector<char> {false, true, true, false},
std::vector<char> {false, false, true, false}),
LogicalAndParams(ngraph::PartialShape {2, 1, 2, 1}, ngraph::PartialShape {1, 1, 2, 1},
std::vector<char> {true, false, true, false},
std::vector<char> {true, false},
std::vector<char> {true, false, true, false}),
LogicalAndParams(ngraph::PartialShape {3, 4}, ngraph::PartialShape {3, 4},
std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true},
std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, false},
std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, false})),
ReferenceLogicalAndLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()),
ReferenceLogicalLayerTest::getTestCaseName);
} // namespace
} // namespace LogicalOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -0,0 +1,48 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <ie_ngraph_utils.hpp>
#include <ngraph/ngraph.hpp>
#include <shared_test_classes/base/layer_test_utils.hpp>
#include <tuple>
#include "logical.hpp"
using namespace ngraph;
using namespace InferenceEngine;
using LogicalTypes = ngraph::helpers::LogicalTypes;
namespace reference_tests {
namespace LogicalOpsRefTestDefinitions {
namespace {
std::vector<RefLogicalParams> generateLogicalParams() {
std::vector<RefLogicalParams> logicalParams {
Builder {}
.opType(LogicalTypes::LOGICAL_OR)
.input1({{2, 2}, element::boolean, std::vector<char> {true, false, true, false}})
.input2({{2, 2}, element::boolean, std::vector<char> {false, true, true, false}})
.expected({{2, 2}, element::boolean, std::vector<char> {true, true, true, false}}),
Builder {}
.opType(LogicalTypes::LOGICAL_OR)
.input1({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}})
.input2({{1, 1, 2, 1}, element::boolean, std::vector<char> {true, false}})
.expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}}),
Builder {}
.opType(LogicalTypes::LOGICAL_OR)
.input1({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true}})
.input2({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, true, true, false, false, true, true, false}})
.expected({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, true, true, false, false, true, true, true}})};
return logicalParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_LogicalOr_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()),
ReferenceLogicalLayerTest::getTestCaseName);
} // namespace
} // namespace LogicalOpsRefTestDefinitions
} // namespace reference_tests

View File

@ -36,7 +36,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -94,7 +94,7 @@ This sample is an API example, for any performance measurements please use the d
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[ie_core_create]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Core.html#gaab73c7ee3704c742eaac457636259541

View File

@ -35,7 +35,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to
@ -109,7 +109,7 @@ This sample is an API example, for any performance measurements please use the d
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[ie_network_set_color_format]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__Network.html#ga85f3251f1f7b08507c297e73baa58969

View File

@ -42,7 +42,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
Running the application with the `-h` option yields the following usage message:
@ -153,7 +153,7 @@ This sample is an API example, for any performance measurements please use the d
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[ie_infer_request_infer_async]:https://docs.openvinotoolkit.org/latest/ie_c_api/group__InferRequest.html#gad2351010e292b6faec959a3d5a8fb60e

View File

@ -33,9 +33,10 @@
find_package(PythonInterp 3 QUIET)
if( PYTHONINTERP_FOUND )
get_filename_component( _python_path ${PYTHON_EXECUTABLE} PATH )
file(TO_CMAKE_PATH "$ENV{HOME}" ENV_HOME)
find_host_program( CYTHON_EXECUTABLE
NAMES cython cython.bat cython3
HINTS ${_python_path} $ENV{HOME}/.local/bin
HINTS ${_python_path} ${ENV_HOME}/.local/bin
)
else()
find_host_program( CYTHON_EXECUTABLE

View File

@ -68,7 +68,7 @@ Options:
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -148,7 +148,7 @@ The sample application logs each step in a standard output stream and outputs to
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html

View File

@ -57,7 +57,7 @@ Options:
```
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -119,7 +119,7 @@ The sample application logs each step in a standard output stream and outputs to
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html

View File

@ -65,7 +65,7 @@ Options:
```
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -116,7 +116,7 @@ The sample application logs each step in a standard output stream and creates an
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html

View File

@ -130,7 +130,7 @@ The sample application logs each step in a standard output stream and outputs to
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html

View File

@ -67,7 +67,7 @@ Options:
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -117,7 +117,7 @@ The sample application logs each step in a standard output stream and creates an
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html

View File

@ -193,7 +193,7 @@ The sample application logs each step in a standard output stream.
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IENetwork.batch_size]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IENetwork.html#a79a647cb1b49645616eaeb2ca255ef2e

View File

@ -79,7 +79,7 @@ Options:
```
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -129,7 +129,7 @@ The sample application logs each step in a standard output stream and creates an
- [Integrate the Inference Engine with Your Application](../../../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
[IECore]:https://docs.openvinotoolkit.org/latest/ie_python_api/classie__api_1_1IECore.html

View File

@ -47,7 +47,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -166,5 +166,5 @@ classid probability
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -36,7 +36,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -94,5 +94,5 @@ This sample is an API example, for any performance measurements please use the d
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -37,7 +37,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to
@ -115,5 +115,5 @@ This sample is an API example, for any performance measurements please use the d
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -38,7 +38,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
> **NOTES**:
@ -93,5 +93,5 @@ This sample is an API example, for any performance measurements please use the d
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -38,7 +38,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
Running the application with the `-h` option yields the following usage message:
@ -146,5 +146,5 @@ Parsing input parameters
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -246,5 +246,5 @@ All of mentioned files can be downloaded from [https://storage.openvinotoolkit.o
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -37,7 +37,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
To run the sample, you need specify a model and image:
- you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
- you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
- you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.
Running the application with the `-h` option yields the following usage message:
@ -134,5 +134,5 @@ The sample application logs each step in a standard output stream and creates an
- [Integrate the Inference Engine with Your Application](../../../docs/IE_DG/Integrate_with_customer_application_new_API.md)
- [Using Inference Engine Samples](../../../docs/IE_DG/Samples_Overview.md)
- [Model Downloader](@ref omz_tools_downloader_README)
- [Model Downloader](@ref omz_tools_downloader)
- [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)

View File

@ -25,8 +25,17 @@ constexpr uint32_t noOfInputsLowPrecDivisor = 16;
constexpr uint32_t affineMaxBatchSize = 8;
constexpr uint32_t maxPoolMaxWindowSize = 6;
constexpr uint32_t copyMaxGrouping = 8;
constexpr uint32_t transposeMaxSize = 65528;
inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
auto shape_no_1 = shape;
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
if (shape_no_1.size() != 2) return false;
size_t min, max;
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
}
namespace Cnn2D {
struct RangeLimit {

View File

@ -279,19 +279,20 @@ void make_gna_pwl(const DnnActivation fun,
gnalog() << "=========================== LeakyReLU Segments ======================\n";
int32_t x_lower = INT32_MIN;
int32_t x_upper = INT32_MAX;
int16_t y_lower = y_min;
int32_t y_lower = y_min;
int16_t y_upper = y_max;
if (fun.fqParams.set) {
x_lower = std::max(FLOAT_TO_INT64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast<int64_t>(x_lower));
x_upper = std::min(FLOAT_TO_INT64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast<int64_t>(x_upper));
y_lower = std::max(FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * out_scale), static_cast<int32_t>(y_lower));
// y_lower can be reduced with negative slope
y_lower = *fun.fqParams.input_low * 1.25 * out_scale;
y_upper = std::min(FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast<int32_t>(y_upper));
} else {
if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
}
gna_pwl[0].yBase = y_lower * fun.args.lrelu.negative_slope;
gna_pwl[0].yBase = std::max(FLOAT_TO_INT32(y_lower * fun.args.lrelu.negative_slope), static_cast<int32_t>(y_min));
s = gna_slope(fun.args.lrelu.negative_slope, in_scale, out_scale);
gna_pwl[0].xBase = (x_lower & XBASEMASK) | s.slope_scale_index; // zero out the 2 lsb
gna_pwl[0].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);

View File

@ -100,14 +100,70 @@ class ModelQuantizer {
int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize) const {
ScaleFactorCalculator sf(net, mandWeightsBytesSize, optWeightsBytesSize, inputsBytesSize, fakeQuantize);
while (!sf.allLayersProcessed()) {
for (auto &&layer : sf.getStartLayers()) {
int infiniteLoopCount = 0;
std::vector<std::string> infiniteLoopPattern;
std::vector<std::string> infiniteLoopHistory;
while (!sf.allLayersProcessed() && infiniteLoopCount <= 2) {
auto layers = sf.getStartLayers();
infiniteLoopHistory.emplace_back(layers.front()->name);
for (auto &&layer : layers) {
transformLayer(layer, sf);
// transforming until we reached cases where output scale updated due to situation in downstream layer
if (sf.needToRestart()) {
infiniteLoopHistory.back() += "#" + layer->name;
break;
}
}
// looking for infinite loop by using algorithm of compute prefix function, complexity O(N)
std::map<int, int> prefixFunction;
int k = infiniteLoopHistory.size();
for (int i = infiniteLoopHistory.size() - 2; i >= 0; i--) {
while (k < infiniteLoopHistory.size() && infiniteLoopHistory[k - 1] != infiniteLoopHistory[i]) {
auto iter = prefixFunction.find(k);
k = iter == prefixFunction.end() ? infiniteLoopHistory.size() : iter->second;
}
if (infiniteLoopHistory[k - 1] == infiniteLoopHistory[i]) {
k--;
}
if ((infiniteLoopHistory.size() - i) % 2 == 0 && (infiniteLoopHistory.size() - i) / 2 == infiniteLoopHistory.size() - k) {
infiniteLoopPattern.clear();
int patternLength = (infiniteLoopHistory.size() - i)/2;
for (int j = 0; j < patternLength; j++) {
infiniteLoopPattern.emplace_back(infiniteLoopHistory[infiniteLoopHistory.size() - patternLength + j]);
}
infiniteLoopHistory.clear();
gnalog() << "infinite loop detected\n";
break;
}
prefixFunction.emplace(i, k);
}
if (infiniteLoopHistory.empty()) {
infiniteLoopCount++;
} else {
if (infiniteLoopCount > 0 &&
(infiniteLoopHistory.size()%infiniteLoopPattern.size() == 0 || sf.allLayersProcessed()) &&
!std::equal(infiniteLoopHistory.begin() + (infiniteLoopHistory.size() - infiniteLoopPattern.size()),
infiniteLoopHistory.end(), infiniteLoopPattern.begin())) {
infiniteLoopCount = 0;
infiniteLoopPattern.clear();
gnalog() << "infinite loop fixed\n";
}
}
sf.SetInfiniteLoopCount(infiniteLoopCount);
}
if (infiniteLoopCount > 0) {
std::string additionalInformation;
for (const auto& p : infiniteLoopPattern) {
additionalInformation += '\n' + p;
}
THROW_GNA_EXCEPTION << "infinite loop: " + additionalInformation;
}
}
};

View File

@ -205,7 +205,7 @@ class ScaleFactorPerLayer {
* @param result
* @return
*/
bool operator()(T cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
bool operator()(T cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize, int infiniteLoopCount) {
return false;
}
};
@ -438,7 +438,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
}
public :
bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, bool fakeQuantize,
int infiniteLoopCount) {
if ( !cnnLayer ) {
IE_THROW() << "Incorrect Convolutional Layer pointer \n";
}
@ -477,7 +478,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) ||
(fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) &&
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale())) {
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || infiniteLoopCount > 0) {
// means we already restarted propagation input memory layer
// need to search for requantiseable layer prior memory output layer
InferenceEngine::CNNLayerPtr restartedLayer;
@ -645,7 +646,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
template<>
class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
public:
bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
if ( !eltwiseLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Eltwise Layer pointer \n";
}
@ -814,7 +816,8 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
template<>
class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
public:
bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
if ( !concatLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Concat Layer pointer \n";
}
@ -872,15 +875,8 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
// find a source quant value
// - 1st candidate - input layer
// - 2nd candidate - non-activation layer with non-1 scale factor
static std::map<std::string, size_t> restarted_counter;
auto restartedCountIt = restarted_counter.find(concatLayer->name);
if (restartedCountIt == restarted_counter.end()) {
auto pos = restarted_counter.insert({ concatLayer->name, 0 });
restartedCountIt = pos.first;
}
if (sourceLayerIt == inputLayers.end()) {
if (((restartedCountIt->second) / 2) % 2 == 1) {
if (infiniteLoopCount % 2 == 1) {
std::reverse(inputLayers.begin(), inputLayers.end());
}
@ -898,7 +894,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
}
}
} else {
if (((restartedCountIt->second) / 4) % 2 == 0) {
if (infiniteLoopCount % 4 == 2 || infiniteLoopCount % 4 == 3) {
auto sourceLayerCheck = [](InferenceEngine::CNNLayerPtr& inputLayer) {
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
LayerInfo info(inputLayer);
@ -916,8 +912,6 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), nonDefaultScaleFactor);
}
}
++restartedCountIt->second;
}
std::set<size_t> concatIdxToUpdate;
@ -978,7 +972,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
gnalog() << "[UFS] from : " << concatLayer->name << " reached: " << layer->name;
// found that direct input to concat is a indirect parent of align filter - so no link required
auto info = LayerInfo(layer);
if (!info.isWeightable() && !info.isActivation() && !info.isConst()) {
if (!info.isWeightable() && !info.isActivation() && !info.isConst() && !info.isMemory()) {
gnalog() << "... skipped\n";
return;
}
@ -1030,8 +1024,8 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
}
quantDataForConCatInput->_dst_quant.SetScale(newScaleFactor);
} else if (restarLayerInfo.isConst()) {
gnalog() << "... warning const layer will be requantized\n";
} else if (restarLayerInfo.isConst() || restarLayerInfo.isMemory()) {
gnalog() << "... warning " << restartedLayer->type << " layer will be requantized\n";
quantDataForConCatInput->_src_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
quantDataForConCatInput->_dst_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
} else {
@ -1057,7 +1051,8 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
};
public:
bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
if ( !wl ) {
THROW_GNA_EXCEPTION << "Incorrect Weightable Layer pointer \n";
} else if (!wl->_weights) {
@ -1211,7 +1206,8 @@ class ScaleFactorPerLayer<InferenceEngine::ConvolutionLayer*> : public ScaleFact
template<>
class ScaleFactorPerLayer<InferenceEngine::GemmLayer*> {
public:
bool operator() (InferenceEngine::GemmLayer* gemmLayer, int weightsSize, int inputSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
bool operator() (InferenceEngine::GemmLayer* gemmLayer, int weightsSize, int inputSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
if ( !gemmLayer ) {
THROW_GNA_EXCEPTION << "Incorrect Gemm Layer pointer \n";
}
@ -1270,6 +1266,7 @@ class ScaleFactorCalculator {
int optWeightsBytesSize;
bool isFakeQuantize;
int inputsBytesSize;
int infiniteLoopCount = 0;
public:
ScaleFactorCalculator(Cnt &net, int mandWeightsBytesSize, int optWeightsBytesSize, int inputsBytesSize, bool fakeQuantize)
@ -1286,6 +1283,9 @@ class ScaleFactorCalculator {
std::vector<InferenceEngine::CNNLayerPtr> getStartLayers() const {
return std::vector<InferenceEngine::CNNLayerPtr>(idx, std::end(net));
}
void SetInfiniteLoopCount(int infiniteLoopCount) {
this->infiniteLoopCount = infiniteLoopCount;
}
template<class T>
bool operator()(T ptr) const {
needRestart = false;
@ -1296,7 +1296,7 @@ class ScaleFactorCalculator {
weightsBytesSize = optWeightsBytesSize;
}
if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize)) {
if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, inputsBytesSize, result, isFakeQuantize, infiniteLoopCount)) {
return false;
}
if (result) {

View File

@ -145,12 +145,15 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
size_t output_layer_size = 0;
for (int j = 0; j != getInputTo(layer->outData[i]).size(); j++) {
auto outFunctionalLayer = CNNNetGetNextLayerSkipCertain(layer, i, j, [](CNNLayerPtr l) {
auto outFunctionalLayer = CNNNetCheckNextLayerSkipCertain(layer, i, j, true, [](CNNLayerPtr l) {
return LayerInfo(l).isNonFunctional();
});
if (!outFunctionalLayer.first) {
THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer";
output_layer_size =
InferenceEngine::details::product(begin(layer->outData[i]->getDims()),
end(layer->outData[i]->getDims())) * layer->outData[i]->getPrecision().size();
continue;
}
for (int idx : outFunctionalLayer.second) {

View File

@ -58,11 +58,11 @@
#include "transformations/remove_extra_reshapes.hpp"
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
#include "transformations/insert_transpose_before_matmul.hpp"
#include "transformations/reorder_activation_and_pooling.hpp"
#include "transformations/swap_input_matmul_gna.hpp"
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
#include "transformations/handle_transposes_around_matmul.hpp"
#include "transformations/decompose_2d_conv.hpp"
#include "transformations/convert_padded2valid_conv.hpp"
@ -702,7 +702,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<SplitConvolutionWithFq>();
manager.register_pass<SplitConvolutionWithBias>();
manager.register_pass<SplitConvolution>();
manager.register_pass<InsertTransposeBeforeMatmul>();
manager.register_pass<HandleTransposesAroundMatMul>();
manager.register_pass<SwapInputMatMul>();
manager.register_pass<InsertTransposeAfterConvOrPool>();
manager.register_pass<ReorderActivationAndPooling>();
@ -757,7 +757,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
passes->registerPass<RemoveConstPass>();
passes->registerPass<UnrollTIPass>();
passes->registerPass<RemoveConstPass>();
passes->registerPass<InsertIdentityToLSTMCellPass>();
passes->registerPass<UnrollLSTMCellPass>();
passes->registerPass<RemoveSingleInputConcatPass>();

View File

@ -131,6 +131,12 @@ static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer,
return copyWithQuant;
}
static bool hasNextFuncLayer(const CNNLayerPtr layer) {
return CNNNetHasNextLayerSkipCertain(layer, 0, 0, [](CNNLayerPtr layer) {
return LayerInfo(layer).isNonFunctional();
});
}
static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayerPtr l, std::shared_ptr<IPassManager> passmanager) {
std::vector<CNNLayerPtr> prevLayers;
@ -796,7 +802,8 @@ void InsertIdentityLayerPass::run() {
for (auto && nextLayer : getInputTo(nextData)) {
if (nextLayer.second.get() == l.get())
continue;
if (getCandidatesForIdentityInsertion(nextLayer.second, getPassManager()).empty()) {
if (getCandidatesForIdentityInsertion(nextLayer.second, getPassManager()).empty() &&
hasNextFuncLayer(nextLayer.second)) {
notAll = true;
}
}
@ -1608,44 +1615,6 @@ void BroadcastConstPass::run() {
}
}
void InsertIdentityToLSTMCellPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityToLSTMCellPass");
for (auto layer : *pLayers) {
if (layer->type == "LSTMCell") {
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
// which results in scratch buffer being used so outputs cannot be used in form of blob or by non-functional layers
// downside is scaling down from i32 to i16 which may
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
int numOfIdentityLayers = ((this->getPassManager())->getIntVar(identityLayersCounterName))++;
auto activationName = std::string("lstm_identity_") + std::to_string(numOfIdentityLayers);
auto& output = layer->outData[output_idx];
auto& input_to = getInputTo(output);
CNNLayerPtr activationLayer =
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", InferenceEngine::Precision::FP32}));
auto dataPtr = std::make_shared<Data>("lstm_identity_data_" + std::to_string(numOfIdentityLayers), output->getTensorDesc());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
auto activationLayerWithQuant = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) : activationLayer;
getCreatorLayer(dataPtr) = activationLayerWithQuant;
activationLayerWithQuant->outData.push_back(dataPtr);
activationLayerWithQuant->insData.push_back(output);
auto& activationInputTo = getInputTo(dataPtr);
for (auto& input : input_to) {
auto& next_layer = input.second;
activationInputTo[input.first] = next_layer;
std::replace_if(std::begin(next_layer->insData), std::end(next_layer->insData),
[output](DataWeakPtr data) { return data.lock() == output; }, dataPtr);
}
input_to.clear();
input_to[activationName] = activationLayerWithQuant;
}
}
}
}
void BreakFusingOfOutputLayersPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BreakFusingOfOutputLayersPass");
#if GNA_LIB_VER == 1
@ -2133,8 +2102,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
THROW_GNA_LAYER_EXCEPTION(fqLayer) << "Zero levels";
}
// Before FQ layer is removed, the previous layer has to be updated with its quantization data
auto quantParamsPrevLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
// Before FQ layer is removed, the previous functional layer has to be updated with its quantization data
auto prevFuncLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, [](CNNLayerPtr layer) {
return LayerInfo(layer).isNonFunctional();
});
auto quantParamsPrevLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevFuncLayer);
quantParamsPrevLayer->_dst_quant.SetLevels(fqLevels);
quantParamsPrevLayer->_dst_quant.SetMinValues({ inputRange.first[0] }, true);
quantParamsPrevLayer->_dst_quant.SetMaxValues({ inputRange.second[0] }, true);

View File

@ -0,0 +1,132 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/handle_transposes_around_matmul.hpp"
#include <numeric>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/pattern/op/or.hpp>
#include <ngraph/rt_info.hpp>
#include "gna_plugin_log.hpp"
#include "backend/gna_limitations.hpp"
using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(HandleTransposesAroundMatMul, "HandleTransposesAroundMatMul", 0);
NGRAPH_RTTI_DEFINITION(HandleTransposeBeforeMatMul, "HandleTransposeBeforeMatMul", 0);
NGRAPH_RTTI_DEFINITION(HandleTransposeAfterMatMul, "HandleTransposeAfterMatMul", 0);
static void ReplaceTransposeWithReshape(std::shared_ptr<ngraph::Node> transpose_node) {
auto shape = transpose_node->get_output_shape(0);
auto reshape_const = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
ngraph::Shape{shape.size()}, shape);
auto reshape_node = std::make_shared<ngraph::opset7::Reshape>(transpose_node->input_value(0), reshape_const, false);
reshape_node->set_friendly_name(transpose_node->get_friendly_name() + "/reshape");
ngraph::copy_runtime_info(transpose_node, reshape_node);
transpose_node->output(0).replace(reshape_node->output(0));
}
static void InsertTranspose(std::shared_ptr<ngraph::Node> prev_node, const std::string& base_name) {
auto consumers = prev_node->output(0).get_target_inputs();
const auto orig_shape = prev_node->get_output_shape(0);
std::vector<size_t> transpose_ids;
for (size_t i = 0; i < orig_shape.size(); ++i) {
if (orig_shape[i] > 1) {
transpose_ids.push_back(i);
}
}
IE_ASSERT(transpose_ids.size() == 2);
std::vector<size_t> permute_order(orig_shape.size());
std::iota(std::begin(permute_order), std::end(permute_order), 0);
std::swap(permute_order[transpose_ids[0]], permute_order[transpose_ids[1]]);
auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{permute_order.size()}, permute_order);
auto transpose = std::make_shared<ngraph::opset7::Transpose>(prev_node, transpose_order);
transpose->set_friendly_name(base_name + "/in_transpose");
auto reshapeConstAfter = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
ngraph::Shape{orig_shape.size()}, orig_shape);
auto reshapeAfter = std::make_shared<ngraph::opset7::Reshape>(transpose, reshapeConstAfter, false);
reshapeAfter->set_friendly_name(base_name + "/reshape_after_transpose");
ngraph::copy_runtime_info(prev_node, ngraph::NodeVector{transpose, reshapeAfter});
for (auto input : consumers) {
input.replace_source_output(reshapeAfter);
}
}
HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()}, VerifyReshape());
auto transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({reshape,
ngraph::pattern::any_input()});
auto matmul_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{reshape, transpose});
auto matmul1 = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({matmul_input, ngraph::pattern::any_input()});
auto matmul2 = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), matmul_input});
auto matmul = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{matmul1, matmul2});
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
const auto& pattern_map = m.get_pattern_value_map();
auto transpose_it = pattern_map.find(transpose);
if (transpose_it != std::end(pattern_map)) {
ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
} else {
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
if (!GNALimitations::IsTransposeSupported(reshape_node->get_output_shape(0))) return false;
auto matmul_it = pattern_map.find(matmul1);
auto matmul_out = matmul_it != std::end(pattern_map) ? matmul_it->second : pattern_map.at(matmul2);
InsertTranspose(reshape_node, matmul_out.get_node_shared_ptr()->get_friendly_name());
}
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "HandleTransposeBeforeMatMul");
this->register_matcher(m, callback);
}
HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() {
auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>();
auto fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({matmul, ngraph::pattern::any_input(),
ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()});
auto transpose_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{matmul, fq});
auto transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, ngraph::pattern::any_input()});
auto reshape_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{transpose_input, transpose});
auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({reshape_input,
ngraph::pattern::any_input()}, VerifyReshape());
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
const auto& pattern_map = m.get_pattern_value_map();
auto transpose_it = pattern_map.find(transpose);
if (transpose_it != std::end(pattern_map)) {
ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
} else {
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
if (!GNALimitations::IsTransposeSupported(reshape_node->get_input_shape(0))) return false;
auto matmul_node = pattern_map.at(matmul).get_node_shared_ptr();
InsertTranspose(matmul_node, matmul_node->get_friendly_name());
}
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(reshape, "HandleTransposeAfterMatMul");
this->register_matcher(m, callback);
}
bool VerifyReshape::operator()(const ngraph::Output<ngraph::Node>& reshape_out) const {
auto in_shape = reshape_out.get_node_shared_ptr()->get_input_shape(0);
auto out_shape = reshape_out.get_node_shared_ptr()->get_output_shape(0);
// Check if Reshape changes the final 2d shape of Affine primitive
in_shape.erase(std::remove(in_shape.begin(), in_shape.end(), 1), in_shape.end());
out_shape.erase(std::remove(out_shape.begin(), out_shape.end(), 1), out_shape.end());
return in_shape != out_shape;
}
HandleTransposesAroundMatMul::HandleTransposesAroundMatMul() {
add_matcher<HandleTransposeBeforeMatMul>();
add_matcher<HandleTransposeAfterMatMul>();
}

View File

@ -0,0 +1,63 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace GNAPluginNS {
struct VerifyReshape {
bool operator()(const ngraph::Output<ngraph::Node>& reshape_out) const;
};
/**
* @brief Inserts Transpose before MatMul or removes it (if it exists) if there is Reshape
* before MatMul which changes the batch size:
* [1, A*B] [1, A*B]
* | |
* Reshape Reshape
* | |
* [1, A, 1, B] [1, A, 1, B]
* | |
* | Transpose
* | -> |
* | <- [1, B, 1, A]
* | |
* MatMul MatMul
*/
class HandleTransposeBeforeMatMul : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
HandleTransposeBeforeMatMul();
};
/**
* @brief Inserts Transpose after MatMul or removes it (if it exists) if there is Reshape
* after MatMul which changes the batch size:
* MatMul MatMul
* | |
* [1, A, 1, B] [1, A, 1, B]
* | |
* | Transpose
* | -> |
* | <- [1, B, 1, A]
* | |
* Reshape Reshape
* | |
* [1, A*B] [1, A*B]
*/
class HandleTransposeAfterMatMul : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
HandleTransposeAfterMatMul();
};
class HandleTransposesAroundMatMul: public ngraph::pass::GraphRewrite {
public:
NGRAPH_RTTI_DECLARATION;
HandleTransposesAroundMatMul();
};
} // namespace GNAPluginNS

View File

@ -1,69 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/insert_transpose_before_matmul.hpp"
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/pattern/op/or.hpp>
#include <ngraph/rt_info.hpp>
using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(InsertTransposeBeforeMatmul, "InsertTransposeBeforeMatmul", 0);
InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
MATCHER_SCOPE(InsertTransposeBeforeMatmul);
auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()},
ngraph::pattern::rank_equals(2));
auto matmul1 = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), reshape});
auto matmul2 = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({reshape, ngraph::pattern::any_input()});
auto root = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{matmul1, matmul2});
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
auto& pattern_map = m.get_pattern_value_map();
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
auto reshape_in_shape = reshape_node->get_input_shape(0);
auto reshape_out_shape = reshape_node->get_output_shape(0);
if (reshape_in_shape.front() == reshape_out_shape.front()) {
return false;
}
if (reshape_out_shape[0] == 1 || reshape_out_shape[1] == 1) {
return false;
}
size_t min, max;
std::tie(min, max) = std::minmax(reshape_out_shape[0], reshape_out_shape[1]);
if (min > 8 || max % 8 != 0) return false;
auto consumers = reshape_node->output(0).get_target_inputs();
auto matmul_node = consumers.begin()->get_node()->shared_from_this();
auto transpose_order = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>{1, 0});
auto transpose = register_new_node<ngraph::opset7::Transpose>(reshape_node, transpose_order);
transpose->set_friendly_name(matmul_node->get_friendly_name() + "/in_transpose");
auto transpose_out_shape = transpose->output(0).get_shape();
std::swap(transpose_out_shape[0], transpose_out_shape[1]);
auto reshapeConstAfter = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
ngraph::Shape{2},
transpose_out_shape);
auto reshapeAfter = std::make_shared<ngraph::opset7::Reshape>(transpose, reshapeConstAfter, false);
reshapeAfter->set_friendly_name(matmul_node->get_friendly_name() + "/reshape_after_transpose");
for (auto input : consumers) {
input.replace_source_output(reshapeAfter);
}
ngraph::copy_runtime_info(matmul_node, {transpose, reshapeAfter});
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(root, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -1,30 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace GNAPluginNS {
/**
* @brief Inserts Transpose before MatMul in the following topology:
* [1, A]
* |
* Reshape
* |
* [B, C],
* 1 < B <= 8, C % 8 == 0 or
* B % 8 == 0, 1 < C <= 8
* | Const
* \ /
* Matmul
*/
class InsertTransposeBeforeMatmul : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
InsertTransposeBeforeMatmul();
};
} // namespace GNAPluginNS

View File

@ -0,0 +1,28 @@
BasedOnStyle: Google
IndentWidth: 4
UseTab: Never
ColumnLimit: 120
Language: Cpp
Standard: Cpp11
AccessModifierOffset: -4
AlignConsecutiveMacros: true
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Empty
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
BinPackArguments: false
BinPackParameters: false
CommentPragmas: '^#'
DerivePointerAlignment: false
FixNamespaceComments: true
IndentCaseLabels: false
IndentPPDirectives: AfterHash
ForEachMacros:
- foreach
- FOREACH_CHILD

View File

@ -77,7 +77,7 @@ endif()
addVersionDefines(src/ie_version.cpp CI_BUILD_NUMBER)
set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include/ie")
set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
file (GLOB_RECURSE PUBLIC_HEADERS
${PUBLIC_HEADERS_DIR}/*.hpp
@ -97,7 +97,7 @@ add_library(${TARGET_NAME}_plugin_api INTERFACE)
target_include_directories(${TARGET_NAME}_plugin_api INTERFACE
"${IE_MAIN_SOURCE_DIR}/src/plugin_api"
$<TARGET_PROPERTY:${TARGET_NAME}_preproc,INTERFACE_INCLUDE_DIRECTORIES>
${PUBLIC_HEADERS_DIR})
${PUBLIC_HEADERS_DIR} ${PUBLIC_HEADERS_DIR}/ie)
target_link_libraries(${TARGET_NAME}_plugin_api INTERFACE pugixml::static openvino::itt)
@ -106,7 +106,7 @@ set_ie_threading_interface_for(${TARGET_NAME}_plugin_api)
file(GLOB_RECURSE plugin_api_src "${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.hpp"
"${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.h")
add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_src})
add_clang_format_target(${TARGET_NAME}_plugin_api_clang FOR_SOURCES ${plugin_api_src})
# Create object library
@ -142,7 +142,7 @@ if (TBBBIND_2_4_FOUND)
target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TBBBIND_2_4_IMPORTED_TARGETS})
endif()
add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}_obj)
# Create shared library file from object library
@ -152,6 +152,10 @@ add_library(${TARGET_NAME} SHARED
${vs_version_file}
$<TARGET_OBJECTS:${TARGET_NAME}_obj>)
ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}
INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/openvino"
ADDITIONAL_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:ngraph,INTERFACE_INCLUDE_DIRECTORIES>)
ie_add_vs_version_file(NAME ${TARGET_NAME}
FILEDESCRIPTION "Inference Engine Core Runtime library")
@ -167,6 +171,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMA
target_include_directories(${TARGET_NAME} INTERFACE
$<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
$<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}/ie>
$<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
$<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include/ie>
PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
@ -269,7 +275,7 @@ endif()
ie_cpack_add_component(core REQUIRED DEPENDS ${core_components})
ie_cpack_add_component(core_dev REQUIRED core ngraph_dev)
install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}/include
install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}
COMPONENT core_dev)
install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
@ -299,7 +305,7 @@ install(EXPORT InferenceEngineTargets
COMPONENT core_dev)
set(IE_NGRAPH_DIR "${CMAKE_BINARY_DIR}/ngraph")
set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}")
set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}/ie")
set(IE_PARALLEL_CMAKE "${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake")
configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"

View File

@ -10,9 +10,9 @@
*/
#pragma once
#include "ie_plugin_config.hpp"
#include "ie_api.h"
#include "gpu/gpu_config.hpp"
#include "ie_api.h"
#include "ie_plugin_config.hpp"
namespace InferenceEngine {
@ -24,8 +24,8 @@ namespace CLDNNConfigParams {
/**
* @brief shortcut for defining configuration keys
*/
#define CLDNN_CONFIG_KEY(name) InferenceEngine::CLDNNConfigParams::_CONFIG_KEY(CLDNN_##name)
#define DECLARE_CLDNN_CONFIG_KEY(name) DECLARE_CONFIG_KEY(CLDNN_##name)
#define CLDNN_CONFIG_KEY(name) InferenceEngine::CLDNNConfigParams::_CONFIG_KEY(CLDNN_##name)
#define DECLARE_CLDNN_CONFIG_KEY(name) DECLARE_CONFIG_KEY(CLDNN_##name)
#define DECLARE_CLDNN_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(CLDNN_##name)
/**
@ -67,9 +67,10 @@ DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR);
/**
* @brief This key enables FP16 precision for quantized models.
* By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then non-quantized layers
* will be converted back to FP16 after LPT, which might imrpove the performance if a model has a lot of compute operations in
* non-quantized path. This key has no effect if current device doesn't have INT8 optimization capabilities.
* By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then
* non-quantized layers will be converted back to FP16 after LPT, which might imrpove the performance if a model has a
* lot of compute operations in non-quantized path. This key has no effect if current device doesn't have INT8
* optimization capabilities.
*/
DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS);

View File

@ -15,12 +15,12 @@
#include <utility>
#include <vector>
#include "ie_icnn_network.hpp"
#include "ie_blob.h"
#include "ie_common.h"
#include "ie_data.h"
#include "ie_extension.h"
#include <ngraph/function.hpp>
#include "ie_icnn_network.hpp"
#include "ngraph/function.hpp"
namespace InferenceEngine {
@ -52,8 +52,7 @@ public:
* @param network Pointer to the ngraph::Function object
* @param exts Vector of pointers to IE extension objects
*/
explicit CNNNetwork(const std::shared_ptr<ngraph::Function>& network,
const std::vector<IExtensionPtr>& exts = {});
explicit CNNNetwork(const std::shared_ptr<ngraph::Function>& network, const std::vector<IExtensionPtr>& exts = {});
/**
* @brief Gets the network output Data node information. The received info is stored in the given Data node.

View File

@ -10,18 +10,24 @@
#pragma once
#include <ostream>
#include <map>
#include <memory>
#include <ostream>
#include <string>
#include <vector>
#include "ie_parameter.hpp"
#include "ie_remote_context.hpp"
#include "cpp/ie_cnn_network.h"
#include "cpp/ie_infer_request.hpp"
#include "details/ie_so_loader.h"
#include "ie_iexecutable_network.hpp"
#include "ie_parameter.hpp"
#include "ie_remote_context.hpp"
namespace ov {
namespace runtime {
class Core;
} // namespace runtime
} // namespace ov
namespace InferenceEngine {
class IExecutableNetworkInternal;
@ -30,17 +36,18 @@ class IExecutableNetworkInternal;
* @brief This is an interface of an executable network
*/
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
details::SharedObjectLoader _so;
std::shared_ptr<IExecutableNetworkInternal> _impl;
details::SharedObjectLoader _so;
std::shared_ptr<IExecutableNetworkInternal> _impl;
/**
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
* @param so Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin object is destroyed.
* @param so Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin
* object is destroyed.
* @param impl Initialized shared pointer
*/
ExecutableNetwork(const details::SharedObjectLoader& so,
const std::shared_ptr<IExecutableNetworkInternal>& impl);
ExecutableNetwork(const details::SharedObjectLoader& so, const std::shared_ptr<IExecutableNetworkInternal>& impl);
friend class Core;
friend class ov::runtime::Core;
public:
/**

View File

@ -13,10 +13,10 @@
#include <memory>
#include <string>
#include "ie_blob.h"
#include "cpp/ie_memory_state.hpp"
#include "ie_iinfer_request.hpp"
#include "details/ie_so_loader.h"
#include "ie_blob.h"
#include "ie_iinfer_request.hpp"
namespace InferenceEngine {
@ -33,16 +33,16 @@ class ICompletionCallbackWrapper;
* It can throw exceptions safely for the application, where it is properly handled.
*/
class INFERENCE_ENGINE_API_CLASS(InferRequest) {
details::SharedObjectLoader _so;
std::shared_ptr<IInferRequestInternal> _impl;
details::SharedObjectLoader _so;
std::shared_ptr<IInferRequestInternal> _impl;
/**
* @brief Constructs InferRequest from the initialized std::shared_ptr
* @param so Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is destroyed.
* @param so Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is
* destroyed.
* @param impl Initialized shared pointer
*/
InferRequest(const details::SharedObjectLoader& so,
const std::shared_ptr<IInferRequestInternal>& impl);
InferRequest(const details::SharedObjectLoader& so, const std::shared_ptr<IInferRequestInternal>& impl);
friend class ExecutableNetwork;
public:
@ -93,7 +93,7 @@ public:
* @param data A reference to input. The type of Blob must correspond to the network input precision and size.
* @param info Preprocess info for blob.
*/
void SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info);
void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info);
/**
* @brief Gets pre-process for input data
@ -176,9 +176,11 @@ private:
void SetCompletionCallbackImpl(IInferRequest::CompletionCallback);
IE_SUPPRESS_DEPRECATED_END
template<typename T>
template <typename T>
struct SetCallback {
void operator()(std::function<void()> f) {_this.SetCompletionCallbackImpl(std::move(f));}
void operator()(std::function<void()> f) {
_this.SetCompletionCallbackImpl(std::move(f));
}
InferRequest& _this;
};
@ -188,7 +190,7 @@ public:
*
* @param callbackToSet callback object which will be called on when inference finish.
*/
template<typename F>
template <typename F>
void SetCompletionCallback(F callbackToSet) {
SetCallback<F>{*this}(std::move(callbackToSet));
}
@ -207,7 +209,7 @@ public:
* @return A shared pointer to IInferRequest interface
*/
INFERENCE_ENGINE_DEPRECATED("Will be removed")
operator std::shared_ptr<IInferRequest> ();
operator std::shared_ptr<IInferRequest>();
IE_SUPPRESS_DEPRECATED_END
/**
@ -238,7 +240,7 @@ public:
/**
* @private
*/
template<>
template <>
struct InferRequest::SetCallback<std::function<void(InferRequest, StatusCode)>> {
void operator()(std::function<void(InferRequest, StatusCode)> f) {
_this.SetCompletionCallbackImpl(std::move(f));
@ -251,7 +253,7 @@ IE_SUPPRESS_DEPRECATED_START
/**
* @private
*/
template<>
template <>
struct InferRequest::SetCallback<IInferRequest::CompletionCallback> {
void operator()(IInferRequest::CompletionCallback f) {
_this.SetCompletionCallbackImpl(std::move(f));

View File

@ -10,12 +10,12 @@
#pragma once
#include <string>
#include <memory>
#include <string>
#include "details/ie_so_loader.h"
#include "ie_api.h"
#include "ie_blob.h"
#include "details/ie_so_loader.h"
namespace InferenceEngine {
@ -25,16 +25,16 @@ class IVariableStateInternal;
* @brief VariableState class
*/
class INFERENCE_ENGINE_API_CLASS(VariableState) {
details::SharedObjectLoader _so;
std::shared_ptr<IVariableStateInternal> _impl;
details::SharedObjectLoader _so;
std::shared_ptr<IVariableStateInternal> _impl;
/**
* @brief Constructs VariableState from the initialized std::shared_ptr
* @param impl Initialized shared pointer
* @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
* @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin
* object is destroyed.
*/
VariableState(const details::SharedObjectLoader& so,
const std::shared_ptr<IVariableStateInternal>& impl);
VariableState(const details::SharedObjectLoader& so, const std::shared_ptr<IVariableStateInternal>& impl);
friend class InferRequest;
friend class ExecutableNetwork;
@ -52,7 +52,7 @@ public:
/**
* @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
* @return A string representing a state name
*/
std::string GetName() const;

View File

@ -4,7 +4,7 @@
/**
* @brief A header file for the BlobIterator class
*
*
* @file ie_blob_iterator.hpp
*/
@ -31,7 +31,7 @@ public:
* @param lk Rvalue of the memory instance to move from
* @param offset Size of offset in memory
*/
explicit BlobIterator(LockedMemory<T>&& lk, size_t offset = 0): _mem(std::move(lk)), _offset(offset) {}
explicit BlobIterator(LockedMemory<T>&& lk, size_t offset = 0) : _mem(std::move(lk)), _offset(offset) {}
/**
* @brief Increments an offset of the current BlobIterator instance

View File

@ -4,7 +4,7 @@
/**
* @brief The header file defines utility PreAllocator class
*
*
* @file ie_pre_allocator.hpp
*/
#pragma once
@ -23,7 +23,7 @@ class PreAllocator final : public IAllocator {
size_t _sizeInBytes;
public:
PreAllocator(void* ptr, size_t bytes_size): _actualData(ptr), _sizeInBytes(bytes_size) {}
PreAllocator(void* ptr, size_t bytes_size) : _actualData(ptr), _sizeInBytes(bytes_size) {}
/**
* @brief Locks a handle to heap memory accessible by any memory manipulation routines
* @return The generic pointer to a memory buffer

View File

@ -41,7 +41,7 @@ public:
* @brief Loads a library with the name specified.
* @param pluginName Full or relative path to the plugin library
*/
explicit SharedObjectLoader(const char * pluginName);
explicit SharedObjectLoader(const char* pluginName);
/**
* @brief A destructor

View File

@ -9,10 +9,10 @@
#pragma once
#include <cassert>
#include <functional>
#include <memory>
#include <string>
#include <type_traits>
#include <functional>
#include "ie_common.h"
#include "ie_so_loader.h"
@ -31,7 +31,8 @@ class SOCreatorTrait {};
* @tparam C A char type
*/
template <typename C>
using enableIfSupportedChar = typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type;
using enableIfSupportedChar =
typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type;
/**
* @brief This class instantiate object using shared library
@ -44,8 +45,10 @@ class SOPointer {
IE_SUPPRESS_DEPRECATED_START
struct HasRelease {
template <typename C> static char test(decltype(&C::Release));
template <typename C> static long test(...);
template <typename C>
static char test(decltype(&C::Release));
template <typename C>
static long test(...);
constexpr static const bool value = sizeof(test<T>(nullptr)) == sizeof(char);
};
IE_SUPPRESS_DEPRECATED_END
@ -60,10 +63,8 @@ public:
* @brief The main constructor
* @param name Name of a shared library file
*/
template <typename C,
typename = enableIfSupportedChar<C>>
SOPointer(const std::basic_string<C> & name)
: _so(name.c_str()) {
template <typename C, typename = enableIfSupportedChar<C>>
SOPointer(const std::basic_string<C>& name) : _so(name.c_str()) {
Load(std::integral_constant<bool, HasRelease::value>{});
}
@ -78,8 +79,7 @@ public:
* @brief Constructs an object with existing loader
* @param so Existing pointer to a library loader
*/
explicit SOPointer(const SharedObjectLoader& so)
: _so(so) {
explicit SOPointer(const SharedObjectLoader& so) : _so(so) {
Load(std::integral_constant<bool, HasRelease::value>{});
}
@ -88,9 +88,8 @@ public:
* @param that copied SOPointer object
*/
template <typename U>
SOPointer(const SOPointer<U>& that)
: _so(that._so),
_ptr(std::dynamic_pointer_cast<T>(that._ptr)) {
SOPointer(const SOPointer<U>& that) : _so(that._so),
_ptr(std::dynamic_pointer_cast<T>(that._ptr)) {
IE_ASSERT(_ptr != nullptr);
}
@ -123,7 +122,7 @@ public:
return _so;
}
operator std::shared_ptr<T>& () noexcept {
operator std::shared_ptr<T>&() noexcept {
return _ptr;
}
@ -136,7 +135,8 @@ protected:
void* create = nullptr;
try {
create = _so.get_symbol((SOCreatorTrait<T>::name + std::string("Shared")).c_str());
} catch (const NotFound&) {}
} catch (const NotFound&) {
}
if (create == nullptr) {
create = _so.get_symbol(SOCreatorTrait<T>::name);
using CreateF = StatusCode(T*&, ResponseDesc*);
@ -144,17 +144,23 @@ protected:
ResponseDesc desc;
StatusCode sts = reinterpret_cast<CreateF*>(create)(object, &desc);
if (sts != OK) {
IE_EXCEPTION_SWITCH(sts, ExceptionType,
InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << desc.msg)
IE_EXCEPTION_SWITCH(sts,
ExceptionType,
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
std::stringstream{} << IE_LOCATION << desc.msg)
}
IE_SUPPRESS_DEPRECATED_START
_ptr = std::shared_ptr<T>(object, [] (T* ptr){ptr->Release();});
_ptr = std::shared_ptr<T>(object, [](T* ptr) {
ptr->Release();
});
IE_SUPPRESS_DEPRECATED_END
} else {
using CreateF = void(std::shared_ptr<T>&);
reinterpret_cast<CreateF*>(create)(_ptr);
}
} catch(...) {details::Rethrow();}
} catch (...) {
details::Rethrow();
}
}
/**
@ -164,7 +170,9 @@ protected:
try {
using CreateF = void(std::shared_ptr<T>&);
reinterpret_cast<CreateF*>(_so.get_symbol(SOCreatorTrait<T>::name))(_ptr);
} catch(...) {details::Rethrow();}
} catch (...) {
details::Rethrow();
}
}
/**

View File

@ -31,36 +31,36 @@ namespace GNAConfigParams {
*/
#define GNA_CONFIG_VALUE(name) InferenceEngine::GNAConfigParams::GNA_##name
#define DECLARE_GNA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GNA_##name)
#define DECLARE_GNA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GNA_##name)
#define DECLARE_GNA_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GNA_##name)
/**
* @brief Scale factor that is calculated by user, in order to use static quantisation feature
* This option should be used with floating point value serialized to string with decimal separator equals to . (dot)
* @details For multiple input case, individual scale factors can be passed, using KEY_GNA_SCALE_FACTOR[_input_layer_name]
* where input_layer can be obtained from from CNNNetwork::GetInputsInfo
*/
* @brief Scale factor that is calculated by user, in order to use static quantisation feature
* This option should be used with floating point value serialized to string with decimal separator equals to . (dot)
* @details For multiple input case, individual scale factors can be passed, using
* KEY_GNA_SCALE_FACTOR[_input_layer_name] where input_layer can be obtained from from CNNNetwork::GetInputsInfo
*/
DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
/**
* @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary,
* currently supported values are I16, I8
*/
* @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary,
* currently supported values are I16, I8
*/
DECLARE_GNA_CONFIG_KEY(PRECISION);
/**
* @brief if turned on, dump GNA firmware model into specified file
*/
* @brief if turned on, dump GNA firmware model into specified file
*/
DECLARE_GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE);
/**
* @brief information on GNA generation chosen for firmware model dump, can be overridden by GNA3
*/
* @brief information on GNA generation chosen for firmware model dump, can be overridden by GNA3
*/
DECLARE_GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION);
/**
* @brief GNA proc_type setting that should be one of GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT
*/
* @brief GNA proc_type setting that should be one of GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT
*/
DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
DECLARE_GNA_CONFIG_VALUE(AUTO);
@ -79,62 +79,63 @@ DECLARE_GNA_CONFIG_VALUE(AVX2);
DECLARE_GNA_CONFIG_VALUE(AVX2_EXACT);
/**
* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
* By default (in case of no value set) the behavior depends on GNA HW availability:
* If GNA HW is present, use the option corresponding to this HW.
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library.
* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0.
* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
*/
* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
* By default (in case of no value set) the behavior depends on GNA HW availability:
* If GNA HW is present, use the option corresponding to this HW.
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library.
* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0.
* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
*/
DECLARE_GNA_CONFIG_KEY(EXEC_TARGET);
DECLARE_GNA_CONFIG_VALUE(TARGET_2_0);
DECLARE_GNA_CONFIG_VALUE(TARGET_3_0);
/**
* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
* By default the same as GNA_EXEC_TARGET.
*/
* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
* By default the same as GNA_EXEC_TARGET.
*/
DECLARE_GNA_CONFIG_KEY(COMPILE_TARGET);
/**
* @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES
*/
* @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES
*/
DECLARE_GNA_CONFIG_KEY(COMPACT_MODE);
/**
* @brief The option to enable/disable uniformly distributed PWL algorithm.
* By default (in case of NO value set) the optimized algorithm called "Recursive Descent Algorithm for Finding
* the Optimal Minimax Piecewise Linear Approximation of Convex Functions is used.
* If value is YES then simple uniform distribution used to create PWL approximation of activation functions
* Uniform distribution usually gives poor approximation with same number of segments
*/
* @brief The option to enable/disable uniformly distributed PWL algorithm.
* By default (in case of NO value set) the optimized algorithm called "Recursive Descent Algorithm for Finding
* the Optimal Minimax Piecewise Linear Approximation of Convex Functions is used.
* If value is YES then simple uniform distribution used to create PWL approximation of activation functions
* Uniform distribution usually gives poor approximation with same number of segments
*/
DECLARE_GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN);
/**
* @brief The option to allow to specify the maximum error percent that the optimized algorithm finding
* will use to find PWL functions.
* By default (in case of NO value set), 1.0 value is used.
*/
* @brief The option to allow to specify the maximum error percent that the optimized algorithm finding
* will use to find PWL functions.
* By default (in case of NO value set), 1.0 value is used.
*/
DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
/**
* @brief By default, the GNA plugin uses one worker thread for inference computations.
* This parameter allows you to create up to 127 threads for software modes.
*
* Note that multithreading mode does not guarantee the same computation order as order
* of issuing. Additionally, in this case, software modes do not implement any serializations.
*/
* @brief By default, the GNA plugin uses one worker thread for inference computations.
* This parameter allows you to create up to 127 threads for software modes.
*
* Note that multithreading mode does not guarantee the same computation order as order
* of issuing. Additionally, in this case, software modes do not implement any serializations.
*/
DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS);
} // namespace GNAConfigParams
namespace Metrics {
/**
* @brief Metric to get a std::string of GNA Library version, usually in the form <API_REVISION>.<RELEASE_LINE>.<RELEASE>.<BUILD>
*/
DECLARE_METRIC_KEY(GNA_LIBRARY_FULL_VERSION, std::string);
/**
* @brief Metric to get a std::string of GNA Library version, usually in the form
* <API_REVISION>.<RELEASE_LINE>.<RELEASE>.<BUILD>
*/
DECLARE_METRIC_KEY(GNA_LIBRARY_FULL_VERSION, std::string);
} // namespace Metrics
namespace PluginConfigParams {

View File

@ -11,27 +11,29 @@
#include <string>
#include "ie_parameter.hpp"
namespace InferenceEngine {
namespace gpu {
namespace details {
/**
* @brief This wrapper class is used to obtain low-level handles
* from remote blob or context object parameters.
*/
* @brief This wrapper class is used to obtain low-level handles
* from remote blob or context object parameters.
*/
class param_map_obj_getter {
protected:
/**
* @brief Template function that returns specified
* object parameter typecasted to desired user type
*/
* @brief Template function that returns specified
* object parameter typecasted to desired user type
*/
template <typename Result, typename Tmp>
Result _ObjFromParams(const ParamMap& params,
const std::string& handle_Key,
const std::string& type_Key,
const std::string& obj_T1,
const std::string& obj_T2 = "__") const {
const std::string& handle_Key,
const std::string& type_Key,
const std::string& obj_T1,
const std::string& obj_T2 = "__") const {
auto itrType = params.find(type_Key);
if (itrType == params.end())
IE_THROW() << "Parameter of type " << type_Key << " not found";
@ -50,9 +52,9 @@ protected:
}
/**
* @brief Same as _ObjFromParams(), but should be used if check
* for object type is not required
*/
* @brief Same as _ObjFromParams(), but should be used if check
* for object type is not required
*/
template <typename Result>
Result _ObjFromParamSimple(const ParamMap& params, const std::string& handle_Key) const {
auto itrHandle = params.find(handle_Key);
@ -65,11 +67,10 @@ protected:
}
/**
* @brief Template function that extracts string value
* from map entry under specified key
*/
std::string _StrFromParams(const ParamMap& params,
std::string Key) const {
* @brief Template function that extracts string value
* from map entry under specified key
*/
std::string _StrFromParams(const ParamMap& params, std::string Key) const {
auto itrType = params.find(Key);
if (itrType == params.end())
IE_THROW() << "Parameter key " << Key << " not found";

View File

@ -20,7 +20,7 @@ namespace Metrics {
* @def GPU_METRIC_KEY(name)
* @brief shortcut for defining GPU plugin metrics
*/
#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
/**
@ -30,7 +30,8 @@ namespace Metrics {
#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
/**
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size,
* for dGPU - dedicated gpu memory size
*/
DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
@ -60,8 +61,8 @@ namespace GPUConfigParams {
/**
* @brief shortcut for defining configuration keys
*/
#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name)
#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name)
#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name)
#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name)
#define DECLARE_GPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GPU_##name)
/**
@ -93,10 +94,11 @@ DECLARE_GPU_CONFIG_KEY(NV12_TWO_INPUTS);
DECLARE_GPU_CONFIG_KEY(MAX_NUM_THREADS);
/**
* @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count.
* This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb).
* Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16).
* Note that turning this key on will increase the graph loading time in proportion to the iteration counts.
* @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration
* count. This key is turned on by default. Turning this key on will achieve better inference performance for loops with
* not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better
* performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that
* turning this key on will increase the graph loading time in proportion to the iteration counts.
* Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
DECLARE_GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING);

View File

@ -11,13 +11,13 @@
*/
#pragma once
#include <d3d11.h>
#include <memory>
#include <string>
#include "gpu/gpu_context_api_ocl.hpp"
#include <d3d11.h>
namespace InferenceEngine {
namespace gpu {
@ -37,12 +37,13 @@ public:
/**
* @brief ID3D11Device conversion operator for the D3DContext object.
* @return Pointer to underlying ID3D11Device interface
* @return Pointer to underlying ID3D11Device interface
*/
operator ID3D11Device*() {
return _ObjFromParams<ID3D11Device*, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(VA_DEVICE),
GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED));
GPU_PARAM_KEY(VA_DEVICE),
GPU_PARAM_KEY(CONTEXT_TYPE),
GPU_PARAM_VALUE(VA_SHARED));
}
};
@ -67,12 +68,13 @@ public:
/**
* @brief ID3D11Buffer conversion operator for the D3DContext object.
* @return Pointer to underlying ID3D11Buffer interface
* @return Pointer to underlying ID3D11Buffer interface
*/
operator ID3D11Buffer*() {
return _ObjFromParams<ID3D11Buffer*, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER));
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(DX_BUFFER));
}
};
@ -97,12 +99,13 @@ public:
/**
* @brief ID3D11Texture2D conversion operator for the D3DContext object.
* @return Pointer to underlying ID3D11Texture2D interface
* @return Pointer to underlying ID3D11Texture2D interface
*/
operator ID3D11Texture2D*() {
return _ObjFromParams<ID3D11Texture2D*, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(VA_SURFACE));
}
/**
@ -111,8 +114,9 @@ public:
*/
uint32_t plane() {
return _ObjFromParams<uint32_t, uint32_t>(getParams(),
GPU_PARAM_KEY(VA_PLANE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
GPU_PARAM_KEY(VA_PLANE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(VA_SURFACE));
}
};
@ -125,18 +129,19 @@ public:
* @param nv12_surf A ID3D11Texture2D instance to create NV12 blob from
* @return NV12 remote blob
*/
static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, ID3D11Texture2D* nv12_surf) {
static inline Blob::Ptr make_shared_blob_nv12(size_t height,
size_t width,
RemoteContext::Ptr ctx,
ID3D11Texture2D* nv12_surf) {
// despite of layout, blob dimensions always follow in N,C,H,W order
TensorDesc desc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
TensorDesc desc(Precision::U8, {1, 1, height, width}, Layout::NHWC);
ParamMap blobParams = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(nv12_surf) },
{ GPU_PARAM_KEY(VA_PLANE), uint32_t(0) }
};
ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(nv12_surf)},
{GPU_PARAM_KEY(VA_PLANE), uint32_t(0)}};
Blob::Ptr y_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(desc, blobParams));
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC);
blobParams[GPU_PARAM_KEY(MEM_HANDLE)] = static_cast<gpu_handle_param>(nv12_surf);
blobParams[GPU_PARAM_KEY(VA_PLANE)] = uint32_t(1);
Blob::Ptr uv_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(uvdesc, blobParams));
@ -152,10 +157,12 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot
* @return A shared remote context instance
*/
static inline D3DContext::Ptr make_shared_context(Core& core, std::string deviceName, ID3D11Device* device) {
// clang-format off
ParamMap contextParams = {
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) },
{ GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device) }
{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)},
{GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device)}
};
// clang-format on
return std::dynamic_pointer_cast<D3DContext>(core.CreateContext(deviceName, contextParams));
}
@ -172,10 +179,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
IE_THROW() << "Invalid remote context passed";
}
ParamMap params = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) },
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(buffer) }
};
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER)},
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(buffer)}};
return std::dynamic_pointer_cast<D3DBufferBlob>(casted->CreateBlob(desc, params));
}
@ -188,16 +193,17 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
* @return Smart pointer to created RemoteBlob object cast to base class
* @note The underlying ID3D11Texture2D can also be a plane of output surface of DXGI video decoder
*/
static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, ID3D11Texture2D* surface, uint32_t plane = 0) {
static inline Blob::Ptr make_shared_blob(const TensorDesc& desc,
RemoteContext::Ptr ctx,
ID3D11Texture2D* surface,
uint32_t plane = 0) {
auto casted = std::dynamic_pointer_cast<D3DContext>(ctx);
if (nullptr == casted) {
IE_THROW() << "Invalid remote context passed";
}
ParamMap params = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(surface) },
{ GPU_PARAM_KEY(VA_PLANE), plane }
};
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(surface)},
{GPU_PARAM_KEY(VA_PLANE), plane}};
return std::dynamic_pointer_cast<D3DSurface2DBlob>(casted->CreateBlob(desc, params));
}

View File

@ -13,13 +13,12 @@
#include <memory>
#include <string>
#include "ie_compound_blob.h"
#include "ie_remote_context.hpp"
#include "ie_core.hpp"
#include "gpu/gpu_params.hpp"
#include "gpu/gpu_ocl_wrapper.hpp"
#include "gpu/details/gpu_context_helpers.hpp"
#include "gpu/gpu_ocl_wrapper.hpp"
#include "gpu/gpu_params.hpp"
#include "ie_compound_blob.h"
#include "ie_core.hpp"
#include "ie_remote_context.hpp"
namespace InferenceEngine {
@ -42,8 +41,11 @@ public:
* @return `cl_context`
*/
cl_context get() {
return _ObjFromParams<cl_context, gpu_handle_param>(getParams(), GPU_PARAM_KEY(OCL_CONTEXT),
GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL), GPU_PARAM_VALUE(VA_SHARED));
return _ObjFromParams<cl_context, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(OCL_CONTEXT),
GPU_PARAM_KEY(CONTEXT_TYPE),
GPU_PARAM_VALUE(OCL),
GPU_PARAM_VALUE(VA_SHARED));
}
/**
@ -105,8 +107,11 @@ public:
* @return underlying OpenCL memory object handle
*/
cl_mem get() {
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER), GPU_PARAM_VALUE(DX_BUFFER));
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(MEM_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(OCL_BUFFER),
GPU_PARAM_VALUE(DX_BUFFER));
}
/**
@ -150,8 +155,11 @@ public:
* @return `cl_mem`
*/
cl_mem get() {
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D), GPU_PARAM_VALUE(VA_SURFACE));
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(MEM_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(OCL_IMAGE2D),
GPU_PARAM_VALUE(VA_SURFACE));
}
/**
@ -179,7 +187,9 @@ public:
* @param nv12_image_plane_uv cl::Image2D object containing UV plane data.
* @return A shared remote blob instance
*/
static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2D& nv12_image_plane_y, cl::Image2D& nv12_image_plane_uv) {
static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx,
cl::Image2D& nv12_image_plane_y,
cl::Image2D& nv12_image_plane_uv) {
auto casted = std::dynamic_pointer_cast<ClContext>(ctx);
if (nullptr == casted) {
IE_THROW() << "Invalid remote context passed";
@ -189,15 +199,13 @@ static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2
size_t height = nv12_image_plane_y.getImageInfo<CL_IMAGE_HEIGHT>();
// despite of layout, blob dimensions always follow in N,C,H,W order
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
TensorDesc ydesc(Precision::U8, {1, 1, height, width}, Layout::NHWC);
ParamMap blobParams = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) },
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(nv12_image_plane_y.get()) }
};
ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D)},
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(nv12_image_plane_y.get())}};
Blob::Ptr y_blob = std::dynamic_pointer_cast<Blob>(casted->CreateBlob(ydesc, blobParams));
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC);
blobParams[GPU_PARAM_KEY(MEM_HANDLE)] = static_cast<gpu_handle_param>(nv12_image_plane_uv.get());
Blob::Ptr uv_blob = std::dynamic_pointer_cast<Blob>(casted->CreateBlob(uvdesc, blobParams));
@ -213,10 +221,8 @@ static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2
* @return A shared remote context instance
*/
static inline RemoteContext::Ptr make_shared_context(Core& core, std::string deviceName, cl_context ctx) {
ParamMap contextParams = {
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL) },
{ GPU_PARAM_KEY(OCL_CONTEXT), static_cast<gpu_handle_param>(ctx) }
};
ParamMap contextParams = {{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL)},
{GPU_PARAM_KEY(OCL_CONTEXT), static_cast<gpu_handle_param>(ctx)}};
return core.CreateContext(deviceName, contextParams);
}
@ -243,10 +249,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
IE_THROW() << "Invalid remote context passed";
}
ParamMap params = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) },
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer.get()) }
};
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER)},
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer.get())}};
return std::dynamic_pointer_cast<Blob>(casted->CreateBlob(desc, params));
}
@ -263,10 +267,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
IE_THROW() << "Invalid remote context passed";
}
ParamMap params = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) },
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer) }
};
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER)},
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer)}};
return std::dynamic_pointer_cast<Blob>(casted->CreateBlob(desc, params));
}
@ -283,10 +285,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
IE_THROW() << "Invalid remote context passed";
}
ParamMap params = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) },
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(image.get()) }
};
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D)},
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(image.get())}};
return std::dynamic_pointer_cast<Blob>(casted->CreateBlob(desc, params));
}

View File

@ -16,7 +16,9 @@
#include "gpu/gpu_context_api_ocl.hpp"
// clang-format off
#include <va/va.h>
// clang-format on
namespace InferenceEngine {
@ -41,8 +43,9 @@ public:
*/
operator VADisplay() {
return _ObjFromParams<VADisplay, gpu_handle_param>(getParams(),
GPU_PARAM_KEY(VA_DEVICE),
GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED));
GPU_PARAM_KEY(VA_DEVICE),
GPU_PARAM_KEY(CONTEXT_TYPE),
GPU_PARAM_VALUE(VA_SHARED));
}
};
@ -71,8 +74,9 @@ public:
*/
operator VASurfaceID() {
return _ObjFromParams<VASurfaceID, uint32_t>(getParams(),
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(VA_SURFACE));
}
/**
@ -81,8 +85,9 @@ public:
*/
uint32_t plane() {
return _ObjFromParams<uint32_t, uint32_t>(getParams(),
GPU_PARAM_KEY(VA_PLANE),
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
GPU_PARAM_KEY(VA_PLANE),
GPU_PARAM_KEY(SHARED_MEM_TYPE),
GPU_PARAM_VALUE(VA_SURFACE));
}
};
@ -95,17 +100,18 @@ public:
* @param nv12_surf NV12 `VASurfaceID` to create NV12 from
* @return A remote NV12 blob wrapping `VASurfaceID`
*/
static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, VASurfaceID nv12_surf) {
static inline Blob::Ptr make_shared_blob_nv12(size_t height,
size_t width,
RemoteContext::Ptr ctx,
VASurfaceID nv12_surf) {
// despite of layout, blob dimensions always follow in N, C, H, W order
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
ParamMap blobParams = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), nv12_surf },
{ GPU_PARAM_KEY(VA_PLANE), uint32_t(0) }
};
TensorDesc ydesc(Precision::U8, {1, 1, height, width}, Layout::NHWC);
ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), nv12_surf},
{GPU_PARAM_KEY(VA_PLANE), uint32_t(0)}};
Blob::Ptr y_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(ydesc, blobParams));
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC);
blobParams[GPU_PARAM_KEY(VA_PLANE)] = uint32_t(1);
Blob::Ptr uv_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(uvdesc, blobParams));
@ -120,10 +126,8 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot
* @return A remote context wrapping `VADisplay`
*/
static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceName, VADisplay device) {
ParamMap contextParams = {
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) },
{ GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device) }
};
ParamMap contextParams = {{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)},
{GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device)}};
return std::dynamic_pointer_cast<VAContext>(core.CreateContext(deviceName, contextParams));
}
@ -135,16 +139,17 @@ static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceN
* @param plane An index of a plane inside `VASurfaceID` to create blob from
* @return A remote blob wrapping `VASurfaceID`
*/
static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, VASurfaceID surface, uint32_t plane = 0) {
static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc,
RemoteContext::Ptr ctx,
VASurfaceID surface,
uint32_t plane = 0) {
auto casted = std::dynamic_pointer_cast<VAContext>(ctx);
if (nullptr == casted) {
IE_THROW() << "Invalid remote context passed";
}
ParamMap params = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), surface },
{ GPU_PARAM_KEY(VA_PLANE), plane }
};
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), surface},
{GPU_PARAM_KEY(VA_PLANE), plane}};
return std::dynamic_pointer_cast<VASurfaceBlob>(casted->CreateBlob(desc, params));
}

View File

@ -15,32 +15,32 @@
*/
#ifndef CL_HPP_ENABLE_EXCEPTIONS
# define CL_HPP_ENABLE_EXCEPTIONS
# define CL_HPP_ENABLE_EXCEPTIONS
#endif
#ifdef CL_HPP_MINIMUM_OPENCL_VERSION
# if CL_HPP_MINIMUM_OPENCL_VERSION < 120
# error "CL_HPP_MINIMUM_OPENCL_VERSION must be >= 120"
# endif
# if CL_HPP_MINIMUM_OPENCL_VERSION < 120
# error "CL_HPP_MINIMUM_OPENCL_VERSION must be >= 120"
# endif
#else
# define CL_HPP_MINIMUM_OPENCL_VERSION 120
# define CL_HPP_MINIMUM_OPENCL_VERSION 120
#endif
#ifdef CL_HPP_TARGET_OPENCL_VERSION
# if CL_HPP_TARGET_OPENCL_VERSION < 120
# error "CL_HPP_TARGET_OPENCL_VERSION must be >= 120"
# endif
# if CL_HPP_TARGET_OPENCL_VERSION < 120
# error "CL_HPP_TARGET_OPENCL_VERSION must be >= 120"
# endif
#else
# define CL_HPP_TARGET_OPENCL_VERSION 120
# define CL_HPP_TARGET_OPENCL_VERSION 120
#endif
#ifdef __GNUC__
# pragma GCC diagnostic push
# pragma GCC system_header
# pragma GCC diagnostic push
# pragma GCC system_header
#endif
#include <CL/cl2.hpp>
#ifdef __GNUC__
# pragma GCC diagnostic pop
# pragma GCC diagnostic pop
#endif

View File

@ -41,8 +41,7 @@ namespace GPUContextParams {
* @def DECLARE_GPU_PARAM_KEY(name, ...)
* @brief Shortcut for defining object parameter keys
*/
#define DECLARE_GPU_PARAM_KEY(name, ...) \
static constexpr auto PARAM_##name = #name
#define DECLARE_GPU_PARAM_KEY(name, ...) static constexpr auto PARAM_##name = #name
/**
* @brief Shared device context type: can be either pure OpenCL (OCL)
* or shared video decoder (VA_SHARED) context

View File

@ -24,7 +24,7 @@ namespace HeteroConfigParams {
* @def HETERO_CONFIG_KEY(name)
* @brief Shortcut for defining HETERO configuration keys
*/
#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name)
#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name)
#define DECLARE_HETERO_CONFIG_KEY(name) DECLARE_CONFIG_KEY(HETERO_##name)
/**

View File

@ -9,9 +9,10 @@
*/
#pragma once
#include "ie_api.h"
#include <memory>
#include "ie_api.h"
namespace InferenceEngine {
/**
@ -19,7 +20,7 @@ namespace InferenceEngine {
*/
enum LockOp {
LOCK_FOR_READ = 0, //!< A flag to lock data for read
LOCK_FOR_WRITE //!< A flag to lock data for write
LOCK_FOR_WRITE //!< A flag to lock data for write
};
/**
@ -60,7 +61,7 @@ public:
virtual bool free(void* handle) noexcept = 0;
protected:
~IAllocator() = default;
~IAllocator() = default;
};
/**

View File

@ -10,101 +10,101 @@
#pragma once
#if defined(USE_STATIC_IE) || (defined(__GNUC__) && (__GNUC__ < 4))
# define INFERENCE_ENGINE_API(...) extern "C" __VA_ARGS__
# define INFERENCE_ENGINE_API_CPP(...) __VA_ARGS__
# define INFERENCE_ENGINE_API_CLASS(...) __VA_ARGS__
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
# define INFERENCE_ENGINE_API(...) extern "C" __VA_ARGS__
# define INFERENCE_ENGINE_API_CPP(...) __VA_ARGS__
# define INFERENCE_ENGINE_API_CLASS(...) __VA_ARGS__
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
#else
# if defined(_WIN32)
# define INFERENCE_ENGINE_CDECL
# ifdef IMPLEMENT_INFERENCE_ENGINE_API
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllexport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllexport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllexport) __VA_ARGS__
# else
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllimport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllimport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllimport) __VA_ARGS__
# endif
# else
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
# define INFERENCE_ENGINE_API(...) extern "C" __attribute__((visibility("default"))) __VA_ARGS__
# define INFERENCE_ENGINE_API_CPP(...) __attribute__((visibility("default"))) __VA_ARGS__
# define INFERENCE_ENGINE_API_CLASS(...) __attribute__((visibility("default"))) __VA_ARGS__
# endif
# if defined(_WIN32)
# define INFERENCE_ENGINE_CDECL
# ifdef IMPLEMENT_INFERENCE_ENGINE_API
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllexport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllexport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllexport) __VA_ARGS__
# else
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllimport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllimport) __VA_ARGS__ __cdecl
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllimport) __VA_ARGS__
# endif
# else
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
# define INFERENCE_ENGINE_API(...) extern "C" __attribute__((visibility("default"))) __VA_ARGS__
# define INFERENCE_ENGINE_API_CPP(...) __attribute__((visibility("default"))) __VA_ARGS__
# define INFERENCE_ENGINE_API_CLASS(...) __attribute__((visibility("default"))) __VA_ARGS__
# endif
#endif
#if defined(_WIN32)
# define INFERENCE_ENGINE_DEPRECATED(msg) __declspec(deprecated(msg))
# define INFERENCE_ENGINE_DEPRECATED(msg) __declspec(deprecated(msg))
#elif defined __INTEL_COMPILER
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg)))
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg)))
#elif defined(__GNUC__)
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated((msg))))
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated((msg))))
#else
# define INFERENCE_ENGINE_DEPRECATED(msg)
# define INFERENCE_ENGINE_DEPRECATED(msg)
#endif
#if defined IMPLEMENT_INFERENCE_ENGINE_API || defined IMPLEMENT_INFERENCE_ENGINE_PLUGIN
# define INFERENCE_ENGINE_INTERNAL(msg)
# define INFERENCE_ENGINE_INTERNAL(msg)
#else
# define INFERENCE_ENGINE_INTERNAL(msg) INFERENCE_ENGINE_DEPRECATED(msg)
# define INFERENCE_ENGINE_INTERNAL(msg) INFERENCE_ENGINE_DEPRECATED(msg)
#endif
// Suppress warning "-Wdeprecated-declarations" / C4996
#if defined(_MSC_VER)
# define IE_DO_PRAGMA(x) __pragma(x)
# define IE_DO_PRAGMA(x) __pragma(x)
#elif defined(__GNUC__)
# define IE_DO_PRAGMA(x) _Pragma(#x)
# define IE_DO_PRAGMA(x) _Pragma(# x)
#else
# define IE_DO_PRAGMA(x)
# define IE_DO_PRAGMA(x)
#endif
#if defined(_MSC_VER) && !defined(__clang__)
# define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(warning(push)) \
IE_DO_PRAGMA(warning(disable : 4996))
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
# define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(warning(push)) \
IE_DO_PRAGMA(warning(disable : 4996))
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
#elif defined(__INTEL_COMPILER)
# define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(warning(push)) \
IE_DO_PRAGMA(warning(disable : 1478))
IE_DO_PRAGMA(warning(disable : 1786))
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
# define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(warning(push)) \
IE_DO_PRAGMA(warning(disable : 1478))
IE_DO_PRAGMA(warning(disable : 1786))
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
#elif defined(__clang__) || ((__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ > 405))
# define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(GCC diagnostic push) \
IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop)
# define IE_SUPPRESS_DEPRECATED_START \
IE_DO_PRAGMA(GCC diagnostic push) \
IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop)
#else
# define IE_SUPPRESS_DEPRECATED_START
# define IE_SUPPRESS_DEPRECATED_END
# define IE_SUPPRESS_DEPRECATED_START
# define IE_SUPPRESS_DEPRECATED_END
#endif
#ifdef _WIN32
# define _IE_SUPPRESS_DEPRECATED_START_MSVC IE_SUPPRESS_DEPRECATED_START
# define _IE_SUPPRESS_DEPRECATED_END_MSVC IE_SUPPRESS_DEPRECATED_END
# define _IE_SUPPRESS_DEPRECATED_START_MSVC IE_SUPPRESS_DEPRECATED_START
# define _IE_SUPPRESS_DEPRECATED_END_MSVC IE_SUPPRESS_DEPRECATED_END
#else
# define _IE_SUPPRESS_DEPRECATED_START_MSVC
# define _IE_SUPPRESS_DEPRECATED_END_MSVC
# define _IE_SUPPRESS_DEPRECATED_START_MSVC
# define _IE_SUPPRESS_DEPRECATED_END_MSVC
#endif
#if defined __GNUC__ && (__GNUC__ <= 4 || (__GNUC__ == 5 && __GNUC_MINOR__ <= 5) || \
(defined __i386__ || defined __arm__ || defined __aarch64__))
# define _IE_SUPPRESS_DEPRECATED_START_GCC IE_SUPPRESS_DEPRECATED_START
# define _IE_SUPPRESS_DEPRECATED_END_GCC IE_SUPPRESS_DEPRECATED_END
# define _IE_SUPPRESS_DEPRECATED_START_GCC IE_SUPPRESS_DEPRECATED_START
# define _IE_SUPPRESS_DEPRECATED_END_GCC IE_SUPPRESS_DEPRECATED_END
#else
# define _IE_SUPPRESS_DEPRECATED_START_GCC
# define _IE_SUPPRESS_DEPRECATED_END_GCC
# define _IE_SUPPRESS_DEPRECATED_START_GCC
# define _IE_SUPPRESS_DEPRECATED_END_GCC
#endif
#ifndef ENABLE_UNICODE_PATH_SUPPORT
# ifdef _WIN32
# if defined __INTEL_COMPILER || defined _MSC_VER
# define ENABLE_UNICODE_PATH_SUPPORT
# endif
# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__)
# define ENABLE_UNICODE_PATH_SUPPORT
# endif
# ifdef _WIN32
# if defined __INTEL_COMPILER || defined _MSC_VER
# define ENABLE_UNICODE_PATH_SUPPORT
# endif
# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__)
# define ENABLE_UNICODE_PATH_SUPPORT
# endif
#endif
/**
@ -114,17 +114,17 @@
*/
#if defined(_WIN32)
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
# define INFERENCE_PLUGIN_API(type) extern "C" __declspec(dllexport) type
# else
# define INFERENCE_PLUGIN_API(type) extern "C" type
# endif
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
# define INFERENCE_PLUGIN_API(type) extern "C" __declspec(dllexport) type
# else
# define INFERENCE_PLUGIN_API(type) extern "C" type
# endif
#elif (__GNUC__ >= 4) // NOLINT
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
# define INFERENCE_PLUGIN_API(type) extern "C" __attribute__((visibility("default"))) type
# else
# define INFERENCE_PLUGIN_API(type) extern "C" type
# endif
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
# define INFERENCE_PLUGIN_API(type) extern "C" __attribute__((visibility("default"))) type
# else
# define INFERENCE_PLUGIN_API(type) extern "C" type
# endif
#else
# define INFERENCE_PLUGIN_API(TYPE) extern "C" TYPE
# define INFERENCE_PLUGIN_API(TYPE) extern "C" TYPE
#endif

View File

@ -19,13 +19,13 @@
#include <utility>
#include <vector>
#include "details/ie_blob_iterator.hpp"
#include "details/ie_pre_allocator.hpp"
#include "ie_allocator.hpp"
#include "ie_common.h"
#include "ie_layouts.h"
#include "ie_locked_memory.hpp"
#include "ie_precision.hpp"
#include "details/ie_blob_iterator.hpp"
#include "details/ie_pre_allocator.hpp"
namespace InferenceEngine {
@ -120,7 +120,7 @@ public:
*
* @param tensorDesc Defines the layout and dims of the blob
*/
explicit Blob(const TensorDesc& tensorDesc): tensorDesc(tensorDesc) {}
explicit Blob(const TensorDesc& tensorDesc) : tensorDesc(tensorDesc) {}
/**
* @brief Returns the tensor description
@ -146,7 +146,8 @@ public:
* @return The total number of elements
*/
virtual size_t size() const noexcept {
if (tensorDesc.getLayout() == Layout::SCALAR) return 1;
if (tensorDesc.getLayout() == Layout::SCALAR)
return 1;
return product(tensorDesc.getDims());
}
@ -233,7 +234,8 @@ protected:
* @return Result of multiplication
*/
static size_t product(const SizeVector& dims) noexcept {
if (dims.empty()) return 0;
if (dims.empty())
return 0;
return std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies<size_t>());
}
@ -278,7 +280,7 @@ std::shared_ptr<const T> as(const Blob::CPtr& blob) noexcept {
* @note Any Blob implementation that represents a concept of a tensor in memory (for example,
* TBlob) must be a subclass of MemoryBlob instead of Blob
*/
class INFERENCE_ENGINE_API_CLASS(MemoryBlob): public Blob {
class INFERENCE_ENGINE_API_CLASS(MemoryBlob) : public Blob {
public:
/**
* @brief A smart pointer to the MemoryBlob object
@ -300,7 +302,7 @@ public:
*
* @param tensorDesc Defines the layout and dims of the blob
*/
explicit MemoryBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {}
explicit MemoryBlob(const TensorDesc& tensorDesc) : Blob(tensorDesc) {}
/**
* @brief Returns the tensor description
@ -323,7 +325,8 @@ public:
* @return The total number of elements
*/
size_t size() const noexcept override {
if (tensorDesc.getLayout() == Layout::SCALAR) return 1;
if (tensorDesc.getLayout() == Layout::SCALAR)
return 1;
return product(tensorDesc.getDims());
}
@ -493,7 +496,7 @@ public:
*
* @param tensorDesc Tensor description
*/
explicit TBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {}
explicit TBlob(const TensorDesc& tensorDesc) : MemoryBlob(tensorDesc) {}
/**
* @brief The constructor creates a TBlob object with the specified dimensions and layout
@ -506,7 +509,7 @@ public:
* @param data_size Length of the pre-allocated array. If not set, size is assumed equal
* to the dot product of dims.
*/
TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0): MemoryBlob(tensorDesc) {
TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0) : MemoryBlob(tensorDesc) {
if (data_size == 0) {
data_size = size();
}
@ -528,8 +531,10 @@ public:
* @param alloc An allocator
*/
TBlob(const TensorDesc& tensorDesc, const std::shared_ptr<IAllocator>& alloc)
: MemoryBlob(tensorDesc), _allocator(alloc) {
if (_allocator == nullptr) IE_THROW() << "TBlob allocator was not initialized.";
: MemoryBlob(tensorDesc),
_allocator(alloc) {
if (_allocator == nullptr)
IE_THROW() << "TBlob allocator was not initialized.";
}
/**
@ -537,7 +542,7 @@ public:
*
* @param blob Source blob
*/
TBlob(const TBlob<T>& blob): MemoryBlob(blob.getTensorDesc()) {
TBlob(const TBlob<T>& blob) : MemoryBlob(blob.getTensorDesc()) {
copyFrom(blob);
}
@ -546,7 +551,7 @@ public:
*
* @param blob rvalue to make a move from
*/
TBlob(TBlob<T>&& blob): MemoryBlob(blob.getTensorDesc()) {
TBlob(TBlob<T>&& blob) : MemoryBlob(blob.getTensorDesc()) {
moveFrom(blob);
}
@ -592,11 +597,9 @@ public:
return;
}
_handle.reset(
rawHandle,
[allocator](void* rawHandle) {
allocator->free(rawHandle);
});
_handle.reset(rawHandle, [allocator](void* rawHandle) {
allocator->free(rawHandle);
});
}
bool deallocate() noexcept override {
@ -611,14 +614,14 @@ public:
return std::move(lockme<const void>());
}
LockedMemory<void> rwmap()noexcept override {
LockedMemory<void> rwmap() noexcept override {
return std::move(lockme<void>());
}
LockedMemory<const void> rmap() const noexcept override {
return std::move(lockme<const void>());
}
LockedMemory<void> wmap()noexcept override {
LockedMemory<void> wmap() noexcept override {
return std::move(lockme<void>());
}
@ -725,7 +728,7 @@ protected:
template <class S>
LockedMemory<S> lockme() const {
return LockedMemory<S>(_allocator.get(), getHandle(), 0);
// getTensorDesc().getBlockingDesc().getOffsetPadding());
// getTensorDesc().getBlockingDesc().getOffsetPadding());
}
const std::shared_ptr<IAllocator>& getAllocator() const noexcept override {
@ -746,11 +749,10 @@ protected:
* @param origBlob An original blob
* @param roi A ROI object
*/
TBlob(const TBlob& origBlob, const ROI& roi) :
MemoryBlob(make_roi_desc(origBlob.getTensorDesc(), roi, true)),
_allocator(origBlob._allocator) {
IE_ASSERT(origBlob._handle != nullptr)
<< "Original Blob must be allocated before ROI creation";
TBlob(const TBlob& origBlob, const ROI& roi)
: MemoryBlob(make_roi_desc(origBlob.getTensorDesc(), roi, true)),
_allocator(origBlob._allocator) {
IE_ASSERT(origBlob._handle != nullptr) << "Original Blob must be allocated before ROI creation";
_handle = origBlob._handle;
}
@ -784,7 +786,7 @@ template <typename Type>
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc) {
if (!tensorDesc.getPrecision().hasStorageType<Type>())
IE_THROW() << "Cannot make shared blob! "
<< "The blob type cannot be used to store objects of current precision";
<< "The blob type cannot be used to store objects of current precision";
return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc);
}
@ -798,11 +800,12 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorD
* @return A shared pointer to the newly created blob of the given type
*/
template <typename Type>
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc, Type* ptr,
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc,
Type* ptr,
size_t size = 0) {
if (!tensorDesc.getPrecision().hasStorageType<Type>())
IE_THROW() << "Cannot make shared blob! "
<< "The blob type cannot be used to store objects of current precision";
<< "The blob type cannot be used to store objects of current precision";
return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc, ptr, size);
}
@ -816,10 +819,11 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorD
*/
template <typename Type>
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(
const TensorDesc& tensorDesc, const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
const TensorDesc& tensorDesc,
const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
if (!tensorDesc.getPrecision().hasStorageType<Type>())
IE_THROW() << "Cannot make shared blob! "
<< "The blob type cannot be used to store objects of current precision";
<< "The blob type cannot be used to store objects of current precision";
return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc, alloc);
}

View File

@ -11,18 +11,19 @@
#include <algorithm>
#include <cstdlib>
#include <iterator>
#include <map>
#include <memory>
#include <ostream>
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <stdexcept>
#include <iterator>
#include <string>
#include <vector>
#include "ie_api.h"
#include <ie_api.h>
#ifndef NDEBUG
#include <cassert>
# include <cassert>
#endif
namespace InferenceEngine {
/**
@ -58,9 +59,9 @@ using DataWeakPtr = std::weak_ptr<Data>;
* @brief The method holds the user values to enable binding of data per graph node.
*/
union UserValue {
int v_int; //!< An integer value
int v_int; //!< An integer value
float v_float; //!< A floating point value
void* v_ptr; //!< A pointer to a void
void* v_ptr; //!< A pointer to a void
};
/**
@ -71,15 +72,15 @@ enum Layout : uint8_t {
ANY = 0, //!< "any" layout
// I/O data layouts
NCHW = 1, //!< NCHW layout for input / output blobs
NHWC = 2, //!< NHWC layout for input / output blobs
NCHW = 1, //!< NCHW layout for input / output blobs
NHWC = 2, //!< NHWC layout for input / output blobs
NCDHW = 3, //!< NCDHW layout for input / output blobs
NDHWC = 4, //!< NDHWC layout for input / output blobs
// weight layouts
OIHW = 64, //!< NDHWC layout for operation weights
GOIHW = 65, //!< NDHWC layout for operation weights
OIDHW = 66, //!< NDHWC layout for operation weights
OIHW = 64, //!< NDHWC layout for operation weights
GOIHW = 65, //!< NDHWC layout for operation weights
OIDHW = 66, //!< NDHWC layout for operation weights
GOIDHW = 67, //!< NDHWC layout for operation weights
// Scalar
@ -189,9 +190,9 @@ struct InferenceEngineProfileInfo {
* @brief Defines the general status of the layer
*/
enum LayerStatus {
NOT_RUN, //!< A layer is not executed
NOT_RUN, //!< A layer is not executed
OPTIMIZED_OUT, //!< A layer is optimized out during graph optimization phase
EXECUTED //!< A layer is executed
EXECUTED //!< A layer is executed
};
/**
@ -292,10 +293,12 @@ using ConstOutputsDataMap = std::map<std::string, CDataPtr>;
using OutputsDataMap = std::map<std::string, DataPtr>;
namespace details {
struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception")
INFERENCE_ENGINE_API_CLASS(InferenceEngineException) : public std::runtime_error {
struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception") INFERENCE_ENGINE_API_CLASS(InferenceEngineException)
: public std::runtime_error {
using std::runtime_error::runtime_error;
bool hasStatus() const {return true;}
bool hasStatus() const {
return true;
}
StatusCode getStatus() const;
};
} // namespace details
@ -311,18 +314,22 @@ IE_SUPPRESS_DEPRECATED_END
/// @cond
namespace details {
template<typename ExceptionType> struct ExceptionTraits;
template <typename ExceptionType>
struct ExceptionTraits;
}
#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \
struct INFERENCE_ENGINE_API_CLASS(ExceptionType) final : public InferenceEngine::Exception { \
using Exception::Exception; \
}; \
namespace details { \
template<> struct ExceptionTraits<ExceptionType> { \
static const char* string() {return "[ " #statusCode " ]";} \
}; \
}
#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \
struct INFERENCE_ENGINE_API_CLASS(ExceptionType) final : public InferenceEngine::Exception { \
using Exception::Exception; \
}; \
namespace details { \
template <> \
struct ExceptionTraits<ExceptionType> { \
static const char* string() { \
return "[ " #statusCode " ]"; \
} \
}; \
}
/// @endcond
/** @brief This class represents StatusCode::GENERAL_ERROR exception */
@ -380,7 +387,7 @@ namespace details {
/**
* @brief Tag struct used to throw exception
*/
template<typename ExceptionType>
template <typename ExceptionType>
struct ThrowNow final {
[[noreturn]] void operator<<=(const std::ostream& ostream) {
std::ostringstream stream;
@ -391,31 +398,32 @@ struct ThrowNow final {
/// @cond
#ifndef NDEBUG
#define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__<< ' '
# define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__ << ' '
#else
#define IE_LOCATION ""
# define IE_LOCATION ""
#endif // NDEBUG
// WARNING: DO NOT USE THIS MACRO! Use openvino/pp.hpp macro library
#define IE_PP_EXPAND(X) X
#define IE_PP_NARG(...) IE_PP_EXPAND(IE_PP_NARG_(__VA_ARGS__, IE_PP_RSEQ_N()))
#define IE_PP_NARG_(...) IE_PP_EXPAND(IE_PP_ARG_N(__VA_ARGS__))
#define IE_PP_EXPAND(X) X
#define IE_PP_NARG(...) IE_PP_EXPAND(IE_PP_NARG_(__VA_ARGS__, IE_PP_RSEQ_N()))
#define IE_PP_NARG_(...) IE_PP_EXPAND(IE_PP_ARG_N(__VA_ARGS__))
#define IE_PP_ARG_N(_0, _1, N, ...) N
#define IE_PP_RSEQ_N() 0, 1, 0
#define IE_PP_NO_ARGS(NAME) ,
#define IE_PP_CAT3_(x, y, z) x ## y ## z
#define IE_PP_CAT3(x, y, z) IE_PP_CAT3_(x, y, z)
#define IE_PP_OVERLOAD(NAME, ...) IE_PP_EXPAND(IE_PP_CAT3(NAME, _, IE_PP_EXPAND(IE_PP_NARG(IE_PP_NO_ARGS __VA_ARGS__ (NAME))))(__VA_ARGS__))
#define IE_PP_RSEQ_N() 0, 1, 0
#define IE_PP_NO_ARGS(NAME) ,
#define IE_PP_CAT3_(x, y, z) x##y##z
#define IE_PP_CAT3(x, y, z) IE_PP_CAT3_(x, y, z)
#define IE_PP_OVERLOAD(NAME, ...) \
IE_PP_EXPAND(IE_PP_CAT3(NAME, _, IE_PP_EXPAND(IE_PP_NARG(IE_PP_NO_ARGS __VA_ARGS__(NAME))))(__VA_ARGS__))
// ENDWARNING
#define IE_THROW_0() \
InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError> {} <<= std::stringstream {} \
<< IE_LOCATION
#define IE_THROW_0() \
InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError>{} <<= std::stringstream{} << IE_LOCATION
#define IE_THROW_1(ExceptionType) \
InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType> {} <<= std::stringstream {} \
<< IE_LOCATION << InferenceEngine::details::ExceptionTraits<InferenceEngine::ExceptionType>::string() << ' '
#define IE_THROW_1(ExceptionType) \
InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType>{} <<= \
std::stringstream{} << IE_LOCATION \
<< InferenceEngine::details::ExceptionTraits<InferenceEngine::ExceptionType>::string() \
<< ' '
/// @endcond
/**
@ -429,31 +437,35 @@ struct ThrowNow final {
* @brief Uses assert() function if NDEBUG is not defined, InferenceEngine exception otherwise
*/
#ifdef NDEBUG
#define IE_ASSERT(EXPRESSION) \
if (!(EXPRESSION)) \
IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION // NOLINT
# define IE_ASSERT(EXPRESSION) \
if (!(EXPRESSION)) \
IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION // NOLINT
#else
/**
* @private
*/
struct NullStream {
template <typename T>
NullStream& operator<<(const T&) noexcept {return *this;}
NullStream& operator<<(const T&) noexcept {
return *this;
}
};
#define IE_ASSERT(EXPRESSION) \
assert((EXPRESSION)); \
InferenceEngine::details::NullStream()
# define IE_ASSERT(EXPRESSION) \
assert((EXPRESSION)); \
InferenceEngine::details::NullStream()
#endif // NDEBUG
/// @cond
#define THROW_IE_EXCEPTION \
InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException> {} <<= std::stringstream {} \
<< IE_LOCATION
#define THROW_IE_EXCEPTION \
InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException>{} <<= std::stringstream{} \
<< IE_LOCATION
#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \
case InferenceEngine::STATUS_CODE : { \
using InferenceEngine::EXCEPTION_TYPE; using TYPE_ALIAS = EXCEPTION_TYPE; __VA_ARGS__; \
#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \
case InferenceEngine::STATUS_CODE: { \
using InferenceEngine::EXCEPTION_TYPE; \
using TYPE_ALIAS = EXCEPTION_TYPE; \
__VA_ARGS__; \
} break;
/// @endcond
@ -461,28 +473,29 @@ struct NullStream {
* @def IE_EXCEPTION_SWITCH
* @brief Generate Switch statement over error codes adn maps them to coresponding exceptions type
*/
#define IE_EXCEPTION_SWITCH(STATUS, TYPE_ALIAS, ...) \
switch (STATUS) { \
IE_EXCEPTION_CASE(TYPE_ALIAS, GENERAL_ERROR , GeneralError , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_IMPLEMENTED , NotImplemented , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_LOADED , NetworkNotLoaded , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, PARAMETER_MISMATCH , ParameterMismatch , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_FOUND , NotFound , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, OUT_OF_BOUNDS , OutOfBounds , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, UNEXPECTED , Unexpected , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, REQUEST_BUSY , RequestBusy , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, RESULT_NOT_READY , ResultNotReady , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_ALLOCATED , NotAllocated , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_NOT_STARTED , InferNotStarted , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_READ , NetworkNotRead , __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_CANCELLED , InferCancelled , __VA_ARGS__) \
default: IE_ASSERT(!"Unreachable"); \
#define IE_EXCEPTION_SWITCH(STATUS, TYPE_ALIAS, ...) \
switch (STATUS) { \
IE_EXCEPTION_CASE(TYPE_ALIAS, GENERAL_ERROR, GeneralError, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_IMPLEMENTED, NotImplemented, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_LOADED, NetworkNotLoaded, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, PARAMETER_MISMATCH, ParameterMismatch, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_FOUND, NotFound, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, OUT_OF_BOUNDS, OutOfBounds, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, UNEXPECTED, Unexpected, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, REQUEST_BUSY, RequestBusy, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, RESULT_NOT_READY, ResultNotReady, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_ALLOCATED, NotAllocated, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_NOT_STARTED, InferNotStarted, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_READ, NetworkNotRead, __VA_ARGS__) \
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_CANCELLED, InferCancelled, __VA_ARGS__) \
default: \
IE_ASSERT(!"Unreachable"); \
}
} // namespace details
} // namespace InferenceEngine
#if defined(_WIN32)
#define __PRETTY_FUNCTION__ __FUNCSIG__
# define __PRETTY_FUNCTION__ __FUNCSIG__
#else
#define __PRETTY_FUNCTION__ __PRETTY_FUNCTION__
# define __PRETTY_FUNCTION__ __PRETTY_FUNCTION__
#endif

View File

@ -22,7 +22,7 @@ namespace InferenceEngine {
* Compound blob is a wrapper blob over references to underlying blobs. These blobs should share
* some properties and can be grouped into a single entity.
*/
class INFERENCE_ENGINE_API_CLASS(CompoundBlob): public Blob {
class INFERENCE_ENGINE_API_CLASS(CompoundBlob) : public Blob {
public:
/**
* @brief A smart pointer to the CompoundBlob object
@ -118,7 +118,7 @@ protected:
/**
* @brief Represents a blob that contains two planes (Y and UV) in NV12 color format
*/
class INFERENCE_ENGINE_API_CLASS(NV12Blob): public CompoundBlob {
class INFERENCE_ENGINE_API_CLASS(NV12Blob) : public CompoundBlob {
public:
/**
* @brief A smart pointer to the NV12Blob object
@ -220,7 +220,7 @@ public:
* Please note that reference to Blob::Ptr is returned. I.e. the reference will be valid until
* the I420Blob object is destroyed.
*
* @return constant reference to shared pointer object of Y plane*
* @return constant reference to shared pointer object of Y plane*
*/
const Blob::Ptr& y() const noexcept;
@ -273,7 +273,7 @@ public:
* in the OPTIMIZATION_CAPABILITIES metric.
*/
class INFERENCE_ENGINE_API_CLASS(BatchedBlob) : public CompoundBlob {
public:
public:
/**
* @brief A smart pointer to the BatchedBlob object
*/

View File

@ -15,11 +15,11 @@
#include <string>
#include <vector>
#include "ie_version.hpp"
#include "cpp/ie_executable_network.hpp"
#include "ie_extension.h"
#include "ie_plugin_config.hpp"
#include "ie_remote_context.hpp"
#include "cpp/ie_executable_network.hpp"
#include "ie_version.hpp"
namespace InferenceEngine {
@ -89,7 +89,7 @@ public:
* For ONNX case the second parameter should contain empty blob.
* @note Created InferenceEngine::CNNNetwork object shares the weights with `weights` object.
* So, do not create `weights` on temporary data which can be later freed, since the network
* constant datas become to point to invalid memory.
* constant data becomes to point to invalid memory.
* @return CNNNetwork
*/
CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const;
@ -106,9 +106,9 @@ public:
* operation
* @return An executable network reference
*/
ExecutableNetwork LoadNetwork(
const CNNNetwork& network, const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
ExecutableNetwork LoadNetwork(const CNNNetwork& network,
const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
/**
* @brief Reads model and creates an executable network from IR or ONNX file
@ -123,9 +123,9 @@ public:
*
* @return An executable network reference
*/
ExecutableNetwork LoadNetwork(
const std::string& modelPath, const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
ExecutableNetwork LoadNetwork(const std::string& modelPath,
const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
/**
* @brief Registers extension
@ -141,9 +141,9 @@ public:
* operation
* @return An executable network object
*/
ExecutableNetwork LoadNetwork(
const CNNNetwork& network, RemoteContext::Ptr context,
const std::map<std::string, std::string>& config = {});
ExecutableNetwork LoadNetwork(const CNNNetwork& network,
RemoteContext::Ptr context,
const std::map<std::string, std::string>& config = {});
/**
* @brief Registers extension for the specified plugin
@ -162,9 +162,9 @@ public:
* operation*
* @return An executable network reference
*/
ExecutableNetwork ImportNetwork(
const std::string& modelFileName, const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
ExecutableNetwork ImportNetwork(const std::string& modelFileName,
const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
/**
* @brief Creates an executable network from a previously exported network
@ -174,7 +174,8 @@ public:
* operation*
* @return An executable network reference
*/
ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName,
ExecutableNetwork ImportNetwork(std::istream& networkModel,
const std::string& deviceName,
const std::map<std::string, std::string>& config = {});
/**
@ -208,9 +209,9 @@ public:
* @param config Optional map of pairs: (config parameter name, config parameter value)
* @return An object containing a map of pairs a layer name -> a device name supporting this layer.
*/
QueryNetworkResult QueryNetwork(
const CNNNetwork& network, const std::string& deviceName,
const std::map<std::string, std::string>& config = {}) const;
QueryNetworkResult QueryNetwork(const CNNNetwork& network,
const std::string& deviceName,
const std::map<std::string, std::string>& config = {}) const;
/**
* @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp

View File

@ -27,6 +27,7 @@ namespace InferenceEngine {
*/
class INFERENCE_ENGINE_API_CLASS(Data) {
class Impl;
public:
/**
* @brief An empty constructor (dimensionless)
@ -58,7 +59,7 @@ public:
* @param data A data object to copy from
* @return An assigned object
*/
Data & operator = (const Data& data);
Data& operator=(const Data& data);
/**
* @brief Checks if the current node is resolved

View File

@ -14,9 +14,9 @@
#include <string>
#include <vector>
#include <ngraph/opsets/opset.hpp>
#include "ie_iextension.h"
#include "details/ie_so_pointer.hpp"
#include "ie_iextension.h"
#include "ngraph/opsets/opset.hpp"
namespace InferenceEngine {
namespace details {
@ -46,9 +46,8 @@ public:
*
* @param name Full or relative path to extension library
*/
template <typename C,
typename = details::enableIfSupportedChar<C>>
explicit Extension(const std::basic_string<C>& name): actual(name) {}
template <typename C, typename = details::enableIfSupportedChar<C>>
explicit Extension(const std::basic_string<C>& name) : actual(name) {}
/**
* @brief Gets the extension version information
@ -79,7 +78,8 @@ public:
* @return vector of strings
*/
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override {
if (node == nullptr) IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
if (node == nullptr)
IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
return actual->getImplTypes(node);
}
@ -90,7 +90,8 @@ public:
* @return shared pointer to implementation
*/
ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override {
if (node == nullptr) IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
if (node == nullptr)
IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
return actual->getImplementation(node, implType);
}
@ -107,7 +108,7 @@ protected:
* @param name extension library name
* @return shared pointer to extension
*/
template<typename T = IExtension>
template <typename T = IExtension>
INFERENCE_ENGINE_DEPRECATED("Use std::make_shared<Extension>")
inline std::shared_ptr<T> make_so_pointer(const std::string& name) {
return std::make_shared<Extension>(name);
@ -120,7 +121,7 @@ inline std::shared_ptr<T> make_so_pointer(const std::string& name) {
* @param name extension library name
* @return shared pointer to extension
*/
template<typename T = IExtension>
template <typename T = IExtension>
INFERENCE_ENGINE_DEPRECATED("Use std::make_shared<Extension>")
inline std::shared_ptr<IExtension> make_so_pointer(const std::wstring& name) {
return std::make_shared<Extension>(name);

View File

@ -17,8 +17,7 @@
#include "ie_common.h"
#include "ie_data.h"
#include "ie_input_info.hpp"
#include <ngraph/function.hpp>
#include "ngraph/function.hpp"
namespace InferenceEngine {
@ -29,7 +28,7 @@ _IE_SUPPRESS_DEPRECATED_START_GCC
* @interface ICNNNetwork
* @brief This is the main interface to describe the NN topology
*/
class INFERENCE_ENGINE_API_CLASS(ICNNNetwork): public std::enable_shared_from_this<ICNNNetwork> {
class INFERENCE_ENGINE_API_CLASS(ICNNNetwork) : public std::enable_shared_from_this<ICNNNetwork> {
public:
IE_SUPPRESS_DEPRECATED_START
/**
@ -127,7 +126,8 @@ public:
* @return Status code of the operation
*/
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead")
virtual StatusCode addOutput(const std::string& layerName, size_t outputIndex = 0,
virtual StatusCode addOutput(const std::string& layerName,
size_t outputIndex = 0,
ResponseDesc* resp = nullptr) noexcept = 0;
/**
@ -219,8 +219,7 @@ public:
* @return Status code of the operation
*/
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead")
virtual StatusCode serialize(std::ostream& xmlStream, Blob::Ptr& binData, ResponseDesc* resp) const
noexcept = 0;
virtual StatusCode serialize(std::ostream& xmlStream, Blob::Ptr& binData, ResponseDesc* resp) const noexcept = 0;
/**
* @deprecated Use InferenceEngine::CNNNetwork wrapper instead
@ -233,10 +232,11 @@ public:
* @return Status code of the operation
*/
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead")
virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept {
(void) ov_name;
(void) orig_name;
(void) resp;
virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const
noexcept {
(void)ov_name;
(void)orig_name;
(void)resp;
return NOT_IMPLEMENTED;
}

View File

@ -9,9 +9,9 @@
*/
#pragma once
#include <ostream>
#include <map>
#include <memory>
#include <ostream>
#include <string>
#include <vector>

View File

@ -15,20 +15,20 @@
#include <vector>
#include "ie_api.h"
#include "ie_blob.h"
#include "ie_common.h"
#include "ie_layouts.h"
#include "ie_blob.h"
#include "ie_version.hpp"
#include <ngraph/opsets/opset.hpp>
#include "ngraph/opsets/opset.hpp"
/**
* @def INFERENCE_EXTENSION_API(TYPE)
* @brief Defines Inference Engine Extension API method
*/
#if defined(_WIN32) && defined(IMPLEMENT_INFERENCE_EXTENSION_API)
#define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE
# define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE
#else
#define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
# define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
#endif
namespace InferenceEngine {
@ -131,7 +131,8 @@ public:
* @param resp Response descriptor
* @return Status code
*/
virtual StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
virtual StatusCode execute(std::vector<Blob::Ptr>& inputs,
std::vector<Blob::Ptr>& outputs,
ResponseDesc* resp) noexcept = 0;
};
@ -183,7 +184,8 @@ public:
/**
* @brief Implements deprecated API
*/
INFERENCE_ENGINE_DEPRECATED("Do not override or use this method. Use IE_DEFINE_EXTENSION_CREATE_FUNCTION to export extension")
INFERENCE_ENGINE_DEPRECATED(
"Do not override or use this method. Use IE_DEFINE_EXTENSION_CREATE_FUNCTION to export extension")
virtual void Release() noexcept {
delete this;
}
@ -217,15 +219,17 @@ INFERENCE_EXTENSION_API(StatusCode)
CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept;
#else
INFERENCE_EXTENSION_API(StatusCode)
CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept INFERENCE_ENGINE_DEPRECATED("Use IE_DEFINE_EXTENSION_CREATE_FUNCTION macro");
CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept INFERENCE_ENGINE_DEPRECATED(
"Use IE_DEFINE_EXTENSION_CREATE_FUNCTION macro");
#endif
/**
* @def IE_DEFINE_EXTENSION_CREATE_FUNCTION
* @brief Generates extension creation function
*/
#define IE_DEFINE_EXTENSION_CREATE_FUNCTION(ExtensionType) \
INFERENCE_EXTENSION_API(void) InferenceEngine::CreateExtensionShared(std::shared_ptr<InferenceEngine::IExtension>& ext) { \
ext = std::make_shared<ExtensionType>(); \
}
#define IE_DEFINE_EXTENSION_CREATE_FUNCTION(ExtensionType) \
INFERENCE_EXTENSION_API(void) \
InferenceEngine::CreateExtensionShared(std::shared_ptr<InferenceEngine::IExtension>& ext) { \
ext = std::make_shared<ExtensionType>(); \
}
} // namespace InferenceEngine

View File

@ -26,7 +26,8 @@ _IE_SUPPRESS_DEPRECATED_START_GCC
* @deprecated Use InferenceEngine::InferRequest C++ wrapper
* @brief This is an interface of asynchronous infer request
*/
class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper") IInferRequest : public std::enable_shared_from_this<IInferRequest> {
class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper") IInferRequest
: public std::enable_shared_from_this<IInferRequest> {
public:
/**
* @enum WaitMode
@ -83,7 +84,10 @@ public:
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
*/
virtual StatusCode SetBlob(const char *name, const Blob::Ptr &data, const PreProcessInfo& info, ResponseDesc *resp) noexcept = 0;
virtual StatusCode SetBlob(const char* name,
const Blob::Ptr& data,
const PreProcessInfo& info,
ResponseDesc* resp) noexcept = 0;
/**
* @brief Gets pre-process for input data
@ -92,7 +96,8 @@ public:
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: OK (0) for success
*/
virtual StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc *resp) const noexcept = 0;
virtual StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc* resp) const
noexcept = 0;
/**
* @brief Infers specified input(s) in synchronous mode
*
@ -200,4 +205,4 @@ protected:
_IE_SUPPRESS_DEPRECATED_END_GCC
} // namespace InferenceEngine
} // namespace InferenceEngine

View File

@ -66,8 +66,11 @@ public:
* @param dimOffsets per-dimension offset from the padding to actual data,
* @param strides strides for each dimension
*/
BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset,
const SizeVector& dimOffsets, const SizeVector& strides);
BlockingDesc(const SizeVector& blocked_dims,
const SizeVector& order,
size_t offset,
const SizeVector& dimOffsets,
const SizeVector& strides);
/**
* @brief Returns the blocked dimensions vector
@ -335,11 +338,11 @@ private:
* @brief This structure describes ROI data for image-like tensors.
*/
struct ROI {
size_t id = 0; //!< ID of a ROI (offset over batch dimension)
size_t posX = 0; //!< W upper left coordinate of ROI
size_t posY = 0; //!< H upper left coordinate of ROI
size_t sizeX = 0; //!< W size of ROI
size_t sizeY = 0; //!< H size of ROI
size_t id = 0; //!< ID of a ROI (offset over batch dimension)
size_t posX = 0; //!< W upper left coordinate of ROI
size_t posY = 0; //!< H upper left coordinate of ROI
size_t sizeX = 0; //!< W size of ROI
size_t sizeY = 0; //!< H size of ROI
ROI() = default;
@ -351,9 +354,12 @@ struct ROI {
* @param sizeX W size of ROI
* @param sizeY H size of ROI
*/
ROI(size_t id, size_t posX, size_t posY, size_t sizeX, size_t sizeY) :
id(id), posX(posX), posY(posY), sizeX(sizeX), sizeY(sizeY) {
}
ROI(size_t id, size_t posX, size_t posY, size_t sizeX, size_t sizeY)
: id(id),
posX(posX),
posY(posY),
sizeX(sizeX),
sizeY(sizeY) {}
};
/**
@ -366,9 +372,6 @@ struct ROI {
*
* @return A newly created TensorDesc object representing ROI.
*/
INFERENCE_ENGINE_API_CPP(TensorDesc) make_roi_desc(
const TensorDesc& origDesc,
const ROI& roi,
bool useOrigMemDesc);
INFERENCE_ENGINE_API_CPP(TensorDesc) make_roi_desc(const TensorDesc& origDesc, const ROI& roi, bool useOrigMemDesc);
} // namespace InferenceEngine

View File

@ -43,7 +43,10 @@ public:
* @param offsetInBytes Offset in originally locked region
*/
LockedMemoryBase(IAllocator* ptr, void* handle, LockOp lockFlag, size_t offsetInBytes)
: _allocator(ptr), _handle(handle), _lockFlag(lockFlag), _offset(offsetInBytes) {}
: _allocator(ptr),
_handle(handle),
_lockFlag(lockFlag),
_offset(offsetInBytes) {}
/**
* @brief A copy constructor
@ -51,7 +54,10 @@ public:
* @param that An rvalue reference for the other LockedMemoryBase instance
*/
LockedMemoryBase(LockedMemoryBase&& that) noexcept
: _allocator(that._allocator), _handle(that._handle), _lockFlag(that._lockFlag), _offset(that._offset) {
: _allocator(that._allocator),
_handle(that._handle),
_lockFlag(that._lockFlag),
_offset(that._offset) {
that._locked = nullptr;
}
@ -86,7 +92,8 @@ protected:
* @return The pointer to the locked object, nullptr otherwise
*/
virtual T* dereference() const {
if (_locked != nullptr) return _locked;
if (_locked != nullptr)
return _locked;
if (_allocator == nullptr) {
return nullptr;
@ -134,7 +141,7 @@ public:
* @param that Rvalue reference for the other LockedMemoryBase instance
* @param offset Offset value
*/
LockedMemory(LockedMemory<T>&& that, size_t offset): base(std::move(that)) {
LockedMemory(LockedMemory<T>&& that, size_t offset) : base(std::move(that)) {
base::_offset = offset;
}
@ -242,7 +249,7 @@ public:
* @param that Rvalue reference for the other LockedMemoryBase instance
* @param offset Offset value
*/
LockedMemory(LockedMemory<void>&& that, size_t offset): base(std::move(that)) {
LockedMemory(LockedMemory<void>&& that, size_t offset) : base(std::move(that)) {
base::_offset = offset;
}
@ -326,7 +333,7 @@ public:
* @param handle Handle provided by allocator
* @param offset Offset in bytes in originally locked region
*/
LockedMemory(IAllocator* ptr, void* handle, size_t offset): base(ptr, handle, LOCK_FOR_READ, offset) {}
LockedMemory(IAllocator* ptr, void* handle, size_t offset) : base(ptr, handle, LOCK_FOR_READ, offset) {}
/**
* @brief A default copy constructor that accepts rvalue
@ -341,7 +348,7 @@ public:
* @param that Rvalue reference for the other LockedMemoryBase instance
* @param offset Offset value
*/
LockedMemory(LockedMemory<const T>&& that, size_t offset): base(std::move(that)) {
LockedMemory(LockedMemory<const T>&& that, size_t offset) : base(std::move(that)) {
base::_offset = offset;
}

View File

@ -17,33 +17,33 @@
#include <cstddef>
#include <type_traits>
#define IE_THREAD_TBB 0
#define IE_THREAD_OMP 1
#define IE_THREAD_SEQ 2
#define IE_THREAD_TBB 0
#define IE_THREAD_OMP 1
#define IE_THREAD_SEQ 2
#define IE_THREAD_TBB_AUTO 3
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#ifndef NOMINMAX
# define NOMINMAX
#endif
#ifndef TBB_PREVIEW_LOCAL_OBSERVER
# define TBB_PREVIEW_LOCAL_OBSERVER 1
#endif
#ifndef TBB_PREVIEW_NUMA_SUPPORT
# define TBB_PREVIEW_NUMA_SUPPORT 1
#endif
#ifndef TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION
# define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1
#endif
# ifndef NOMINMAX
# define NOMINMAX
# endif
# ifndef TBB_PREVIEW_LOCAL_OBSERVER
# define TBB_PREVIEW_LOCAL_OBSERVER 1
# endif
# ifndef TBB_PREVIEW_NUMA_SUPPORT
# define TBB_PREVIEW_NUMA_SUPPORT 1
# endif
# ifndef TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION
# define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1
# endif
#include "tbb/blocked_range.h"
#include "tbb/blocked_range2d.h"
#include "tbb/blocked_range3d.h"
#include "tbb/parallel_for.h"
#include "tbb/parallel_reduce.h"
#include "tbb/parallel_sort.h"
#include "tbb/task_arena.h"
#include "tbb/task_scheduler_observer.h"
# include "tbb/blocked_range.h"
# include "tbb/blocked_range2d.h"
# include "tbb/blocked_range3d.h"
# include "tbb/parallel_for.h"
# include "tbb/parallel_reduce.h"
# include "tbb/parallel_sort.h"
# include "tbb/task_arena.h"
# include "tbb/task_scheduler_observer.h"
inline int parallel_get_max_threads() {
return tbb::this_task_arena::max_concurrency();
@ -60,31 +60,31 @@ inline void parallel_set_num_threads(int) {
inline int parallel_get_env_threads() {
return 0;
}
#if IE_THREAD == IE_THREAD_TBB
#define PARTITIONING , tbb::static_partitioner()
# if IE_THREAD == IE_THREAD_TBB
# define PARTITIONING , tbb::static_partitioner()
// The TBB version less than 2018u1 has no static_partitioner argument for
// tbb::parallel_deterministic_reduce. So will fallback to non deterministic version.
#if (TBB_INTERFACE_VERSION >= 10001)
#define _TBB_REDUCE_FUNC tbb::parallel_deterministic_reduce
#else
#define _TBB_REDUCE_FUNC tbb::parallel_reduce
#endif
# if (TBB_INTERFACE_VERSION >= 10001)
# define _TBB_REDUCE_FUNC tbb::parallel_deterministic_reduce
# else
# define _TBB_REDUCE_FUNC tbb::parallel_reduce
# endif
#else
#define PARTITIONING
#endif
# else
# define PARTITIONING
# endif
#elif IE_THREAD == IE_THREAD_OMP
#include <omp.h>
# include <omp.h>
#include <algorithm>
#include <cstdlib>
#include <string>
# include <algorithm>
# include <cstdlib>
# include <string>
/* MSVC still supports omp 2.0 only */
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#define collapse(x)
#endif // defined(_MSC_VER) && !defined(__INTEL_COMPILER)
# if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
# define collapse(x)
# endif // defined(_MSC_VER) && !defined(__INTEL_COMPILER)
inline int parallel_get_max_threads() {
return omp_get_max_threads();
}
@ -110,7 +110,7 @@ inline int parallel_get_env_threads() {
}
#elif IE_THREAD == IE_THREAD_SEQ
#include <algorithm> // NOLINT
# include <algorithm> // NOLINT
inline int parallel_get_env_threads() {
return 1;
}
@ -133,7 +133,8 @@ namespace InferenceEngine {
template <typename F>
void parallel_nt(int nthr, const F& func) {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
if (nthr == 0) nthr = parallel_get_max_threads();
if (nthr == 0)
nthr = parallel_get_max_threads();
if (nthr == 1) {
func(0, 1);
return;
@ -148,7 +149,7 @@ void parallel_nt(int nthr, const F& func) {
return;
}
#pragma omp parallel num_threads(nthr)
# pragma omp parallel num_threads(nthr)
func(parallel_get_thread_num(), parallel_get_num_threads());
#elif IE_THREAD == IE_THREAD_SEQ
func(0, 1);
@ -168,18 +169,20 @@ void parallel_nt_static(int nthr, const F& func) {
return;
}
if (nthr == 0) nthr = parallel_get_max_threads();
if (nthr == 0)
nthr = parallel_get_max_threads();
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
tbb::parallel_for(
0, nthr,
0,
nthr,
[&](int ithr) {
func(ithr, nthr);
},
tbb::static_partitioner {});
tbb::static_partitioner{});
#elif IE_THREAD == IE_THREAD_OMP
#pragma omp parallel num_threads(nthr)
# pragma omp parallel num_threads(nthr)
{ func(parallel_get_thread_num(), parallel_get_num_threads()); }
#endif
}
@ -200,10 +203,12 @@ template <typename T0, typename R, typename F>
R parallel_sum(const T0& D0, const R& input, const F& func) {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
return _TBB_REDUCE_FUNC(
tbb::blocked_range<T0>(0, D0), input,
tbb::blocked_range<T0>(0, D0),
input,
[&](const tbb::blocked_range<T0>& r, R init) -> R {
R sum = init;
for (T0 dim1 = r.begin(); dim1 < r.end(); ++dim1) sum += func(dim1);
for (T0 dim1 = r.begin(); dim1 < r.end(); ++dim1)
sum += func(dim1);
return sum;
},
[](R x, R y) -> R {
@ -212,15 +217,15 @@ R parallel_sum(const T0& D0, const R& input, const F& func) {
#else
R sum = input;
#ifdef _MSC_VER
# ifdef _MSC_VER
using T0_IT = typename std::make_signed<T0>::type;
#else
# else
using T0_IT = T0;
#endif
# endif
#if IE_THREAD == IE_THREAD_OMP
#pragma omp parallel for reduction(+ : sum) schedule(static)
#endif
# if IE_THREAD == IE_THREAD_OMP
# pragma omp parallel for reduction(+ : sum) schedule(static)
# endif
for (T0_IT dim1 = 0; dim1 < static_cast<T0_IT>(D0); dim1++) {
sum += static_cast<R>(func(dim1));
}
@ -232,7 +237,8 @@ template <typename T0, typename T1, typename R, typename F>
R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
return _TBB_REDUCE_FUNC(
tbb::blocked_range2d<T0, T1>(0, D0, 0, D1), input,
tbb::blocked_range2d<T0, T1>(0, D0, 0, D1),
input,
[&](const tbb::blocked_range2d<T0, T1>& r, R init) -> R {
R sum = init;
for (T0 dim2 = r.rows().begin(); dim2 < r.rows().end(); dim2++) {
@ -248,17 +254,17 @@ R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
#else
R sum = input;
#ifdef _MSC_VER
# ifdef _MSC_VER
using T0_IT = typename std::make_signed<T0>::type;
using T1_IT = typename std::make_signed<T1>::type;
#else
# else
using T0_IT = T0;
using T1_IT = T1;
#endif
# endif
#if IE_THREAD == IE_THREAD_OMP
#pragma omp parallel for collapse(2) reduction(+ : sum) schedule(static)
#endif
# if IE_THREAD == IE_THREAD_OMP
# pragma omp parallel for collapse(2) reduction(+ : sum) schedule(static)
# endif
for (T0_IT dim2 = 0; dim2 < D0; dim2++) {
for (T1_IT dim1 = 0; dim1 < D1; dim1++) {
sum += func(dim2, dim1);
@ -271,7 +277,8 @@ template <typename T0, typename T1, typename T2, typename R, typename F>
R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const F& func) {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
return _TBB_REDUCE_FUNC(
tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2), input,
tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2),
input,
[&](const tbb::blocked_range3d<T0, T1, T2>& r, R init) -> R {
R sum = init;
for (T0 dim1 = r.pages().begin(); dim1 < r.pages().end(); dim1++) {
@ -289,19 +296,19 @@ R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const
#else
R sum = input;
#ifdef _MSC_VER
# ifdef _MSC_VER
using T0_IT = typename std::make_signed<T0>::type;
using T1_IT = typename std::make_signed<T1>::type;
using T2_IT = typename std::make_signed<T2>::type;
#else
# else
using T0_IT = T0;
using T1_IT = T1;
using T2_IT = T2;
#endif
# endif
#if IE_THREAD == IE_THREAD_OMP
#pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static)
#endif
# if IE_THREAD == IE_THREAD_OMP
# pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static)
# endif
for (T0_IT dim1 = 0; dim1 < static_cast<T0_IT>(D0); dim1++) {
for (T1_IT dim2 = 0; dim2 < static_cast<T1_IT>(D1); dim2++) {
for (T2_IT dim3 = 0; dim3 < static_cast<T2_IT>(D2); dim3++) {
@ -353,31 +360,28 @@ inline void splitter(const T& n, const Q& team, const Q& tid, T& n_start, T& n_e
}
namespace details {
template<typename T>
struct num_of_lambda_args : public num_of_lambda_args<decltype(&T::operator())> {
};
template <typename T>
struct num_of_lambda_args : public num_of_lambda_args<decltype(&T::operator())> {};
template<typename C, typename R, typename... Args>
struct num_of_lambda_args<R(C::*)(Args...) const> {
constexpr static int value = sizeof...(Args);
};
template <typename C, typename R, typename... Args>
struct num_of_lambda_args<R (C::*)(Args...) const> {
constexpr static int value = sizeof...(Args);
};
template<typename ACT, typename ...T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
typename std::enable_if<N_ARGS == sizeof...(T) + 1, void>::type
call_with_args(ACT body, size_t g_id, T ...arg) {
body(g_id, arg...);
}
template <typename ACT, typename... T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
typename std::enable_if<N_ARGS == sizeof...(T) + 1, void>::type call_with_args(ACT body, size_t g_id, T... arg) {
body(g_id, arg...);
}
template<typename ACT, typename ...T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
typename std::enable_if<N_ARGS == sizeof...(T), void>::type
call_with_args(ACT body, size_t g_id, T ...arg) {
body(arg...);
}
template <typename ACT, typename... T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
typename std::enable_if<N_ARGS == sizeof...(T), void>::type call_with_args(ACT body, size_t g_id, T... arg) {
body(arg...);
}
} // namespace details
template <typename T0, typename F>
void for_1d(const int& ithr, const int& nthr, const T0& D0, const F& func) {
T0 d0 {0}, end {0};
T0 d0{0}, end{0};
splitter(D0, nthr, ithr, d0, end);
for (; d0 < end; ++d0)
details::call_with_args(func, ithr, d0);
@ -388,12 +392,14 @@ void parallel_for(const T0& D0, const F& func) {
#if IE_THREAD == IE_THREAD_TBB
auto work_amount = static_cast<size_t>(D0);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
if (static_cast<size_t>(nthr) > work_amount)
nthr = static_cast<int>(work_amount);
if (nthr == 1) {
for_1d(0, 1, D0, func);
} else {
tbb::parallel_for(
0, nthr,
0,
nthr,
[&](int ithr) {
for_1d(ithr, nthr, D0, func);
},
@ -405,7 +411,7 @@ void parallel_for(const T0& D0, const F& func) {
for_1d(ithr, nthr, D0, func);
});
#elif IE_THREAD == IE_THREAD_OMP
#pragma omp parallel
# pragma omp parallel
for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func);
#elif IE_THREAD == IE_THREAD_SEQ
for_1d(0, 1, D0, func);
@ -415,12 +421,13 @@ void parallel_for(const T0& D0, const F& func) {
template <typename T0, typename T1, typename F>
void for_2d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const F& func) {
const size_t work_amount = (size_t)D0 * D1;
if (work_amount == 0) return;
size_t start {0}, end {0};
if (work_amount == 0)
return;
size_t start{0}, end{0};
splitter(work_amount, nthr, ithr, start, end);
T0 d0 {0};
T1 d1 {0};
T0 d0{0};
T1 d1{0};
parallel_it_init(start, d0, D0, d1, D1);
for (size_t iwork = start; iwork < end; ++iwork) {
details::call_with_args(func, ithr, d0, d1);
@ -433,12 +440,14 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
#if IE_THREAD == IE_THREAD_TBB
auto work_amount = static_cast<size_t>(D0 * D1);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
if (static_cast<size_t>(nthr) > work_amount)
nthr = static_cast<int>(work_amount);
if (nthr == 1) {
for_2d(0, 1, D0, D1, func);
} else {
tbb::parallel_for(
0, nthr,
0,
nthr,
[&](int ithr) {
for_2d(ithr, nthr, D0, D1, func);
},
@ -450,7 +459,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
for_2d(ithr, nthr, D0, D1, func);
});
#elif IE_THREAD == IE_THREAD_OMP
#pragma omp parallel
# pragma omp parallel
for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func);
#elif IE_THREAD == IE_THREAD_SEQ
for_2d(0, 1, D0, D1, func);
@ -460,13 +469,14 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
template <typename T0, typename T1, typename T2, typename F>
void for_3d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const F& func) {
const size_t work_amount = (size_t)D0 * D1 * D2;
if (work_amount == 0) return;
size_t start {0}, end {0};
if (work_amount == 0)
return;
size_t start{0}, end{0};
splitter(work_amount, nthr, ithr, start, end);
T0 d0 {0};
T1 d1 {0};
T2 d2 {0};
T0 d0{0};
T1 d1{0};
T2 d2{0};
parallel_it_init(start, d0, D0, d1, D1, d2, D2);
for (size_t iwork = start; iwork < end; ++iwork) {
details::call_with_args(func, ithr, d0, d1, d2);
@ -479,12 +489,14 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
#if IE_THREAD == IE_THREAD_TBB
auto work_amount = static_cast<size_t>(D0 * D1 * D2);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
if (static_cast<size_t>(nthr) > work_amount)
nthr = static_cast<int>(work_amount);
if (nthr == 1) {
for_3d(0, 1, D0, D1, D2, func);
} else {
tbb::parallel_for(
0, nthr,
0,
nthr,
[&](int ithr) {
for_3d(ithr, nthr, D0, D1, D2, func);
},
@ -496,7 +508,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
for_3d(ithr, nthr, D0, D1, D2, func);
});
#elif IE_THREAD == IE_THREAD_OMP
#pragma omp parallel
# pragma omp parallel
for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func);
#elif IE_THREAD == IE_THREAD_SEQ
for_3d(0, 1, D0, D1, D2, func);
@ -506,14 +518,15 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
template <typename T0, typename T1, typename T2, typename T3, typename F>
void for_4d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const T3& D3, const F& func) {
const size_t work_amount = (size_t)D0 * D1 * D2 * D3;
if (work_amount == 0) return;
size_t start {0}, end {0};
if (work_amount == 0)
return;
size_t start{0}, end{0};
splitter(work_amount, nthr, ithr, start, end);
T0 d0 {0};
T1 d1 {0};
T2 d2 {0};
T3 d3 {0};
T0 d0{0};
T1 d1{0};
T2 d2{0};
T3 d3{0};
parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3);
for (size_t iwork = start; iwork < end; ++iwork) {
details::call_with_args(func, ithr, d0, d1, d2, d3);
@ -526,12 +539,14 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
#if IE_THREAD == IE_THREAD_TBB
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
if (static_cast<size_t>(nthr) > work_amount)
nthr = static_cast<int>(work_amount);
if (nthr == 1) {
for_4d(0, 1, D0, D1, D2, D3, func);
} else {
tbb::parallel_for(
0, nthr,
0,
nthr,
[&](int ithr) {
for_4d(ithr, nthr, D0, D1, D2, D3, func);
},
@ -543,7 +558,7 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
for_4d(ithr, nthr, D0, D1, D2, D3, func);
});
#elif IE_THREAD == IE_THREAD_OMP
#pragma omp parallel
# pragma omp parallel
for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func);
#elif IE_THREAD == IE_THREAD_SEQ
for_4d(0, 1, D0, D1, D2, D3, func);
@ -551,18 +566,25 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
}
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename F>
void for_5d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const T3& D3, const T4& D4,
void for_5d(const int& ithr,
const int& nthr,
const T0& D0,
const T1& D1,
const T2& D2,
const T3& D3,
const T4& D4,
const F& func) {
const size_t work_amount = (size_t)D0 * D1 * D2 * D3 * D4;
if (work_amount == 0) return;
size_t start {0}, end {0};
if (work_amount == 0)
return;
size_t start{0}, end{0};
splitter(work_amount, nthr, ithr, start, end);
T0 d0 {0};
T1 d1 {0};
T2 d2 {0};
T3 d3 {0};
T4 d4 {0};
T0 d0{0};
T1 d1{0};
T2 d2{0};
T3 d3{0};
T4 d4{0};
parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3, d4, D4);
for (size_t iwork = start; iwork < end; ++iwork) {
details::call_with_args(func, ithr, d0, d1, d2, d3, d4);
@ -575,12 +597,14 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
#if IE_THREAD == IE_THREAD_TBB
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3 * D4);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
if (static_cast<size_t>(nthr) > work_amount)
nthr = static_cast<int>(work_amount);
if (nthr == 1) {
for_5d(0, 1, D0, D1, D2, D3, D4, func);
} else {
tbb::parallel_for(
0, nthr,
0,
nthr,
[&](int ithr) {
for_5d(ithr, nthr, D0, D1, D2, D3, D4, func);
},
@ -592,7 +616,7 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
for_5d(ithr, nthr, D0, D1, D2, D3, D4, func);
});
#elif IE_THREAD == IE_THREAD_OMP
#pragma omp parallel
# pragma omp parallel
for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func);
#elif IE_THREAD == IE_THREAD_SEQ
for_5d(0, 1, D0, D1, D2, D3, D4, func);

View File

@ -72,7 +72,7 @@ public:
*
* @param str char array
*/
Parameter(const char* str): Parameter(std::string(str)) {} // NOLINT
Parameter(const char* str) : Parameter(std::string(str)) {} // NOLINT
/**
* @brief Destructor
@ -89,7 +89,8 @@ public:
return *this;
}
clear();
if (!parameter.empty()) ptr = parameter.ptr->copy();
if (!parameter.empty())
ptr = parameter.ptr->copy();
return *this;
}
@ -279,7 +280,7 @@ private:
return id == typeid(T);
}
Any* copy() const override {
return new RealData {get()};
return new RealData{get()};
}
T& get() & {
@ -291,14 +292,12 @@ private:
}
template <class U>
typename std::enable_if<!HasOperatorEqual<U>::value, bool>::type
equal(const Any& left, const Any& rhs) const {
typename std::enable_if<!HasOperatorEqual<U>::value, bool>::type equal(const Any& left, const Any& rhs) const {
IE_THROW() << "Parameter doesn't contain equal operator";
}
template <class U>
typename std::enable_if<HasOperatorEqual<U>::value, bool>::type
equal(const Any& left, const Any& rhs) const {
typename std::enable_if<HasOperatorEqual<U>::value, bool>::type equal(const Any& left, const Any& rhs) const {
return dyn_cast<U>(&left) == dyn_cast<U>(&rhs);
}
@ -306,13 +305,11 @@ private:
return rhs.is(typeid(T)) && equal<T>(*this, rhs);
}
template <class U>
typename std::enable_if<!HasOutputStreamOperator<U>::value, void>::type
print(std::ostream& stream, const U& object) const {}
template <class U, typename std::enable_if<!HasOutputStreamOperator<U>::value, bool>::type = true>
void print(std::ostream& stream, const U& object) const {}
template <class U>
typename std::enable_if<HasOutputStreamOperator<U>::value, void>::type
print(std::ostream& stream, const U& object) const {
template <class U, typename std::enable_if<HasOutputStreamOperator<U>::value, bool>::type = true>
void print(std::ostream& stream, const U& object) const {
stream << object;
}
@ -323,13 +320,15 @@ private:
template <typename T>
static T& dyn_cast(Any* obj) {
if (obj == nullptr) IE_THROW() << "Parameter is empty!";
if (obj == nullptr)
IE_THROW() << "Parameter is empty!";
return dynamic_cast<RealData<T>&>(*obj).get();
}
template <typename T>
static const T& dyn_cast(const Any* obj) {
if (obj == nullptr) IE_THROW() << "Parameter is empty!";
if (obj == nullptr)
IE_THROW() << "Parameter is empty!";
return dynamic_cast<const RealData<T>&>(*obj).get();
}
@ -338,7 +337,7 @@ private:
/**
* @brief An std::map object containing parameters
*/
*/
using ParamMap = std::map<std::string, Parameter>;
#ifdef __ANDROID__

View File

@ -10,10 +10,10 @@
*/
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <vector>
#include <map>
#include "ie_precision.hpp"
@ -37,12 +37,11 @@ namespace Metrics {
#define EXEC_NETWORK_METRIC_KEY(name) METRIC_KEY(name)
#ifndef DECLARE_METRIC_KEY_IMPL
#define DECLARE_METRIC_KEY(name, ...) \
static constexpr auto METRIC_##name = #name
# define DECLARE_METRIC_KEY(name, ...) static constexpr auto METRIC_##name = # name
#else
#define DECLARE_METRIC_KEY(name, ...) \
static constexpr auto METRIC_##name = #name; \
DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__)
# define DECLARE_METRIC_KEY(name, ...) \
static constexpr auto METRIC_##name = #name; \
DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__)
#endif
#define DECLARE_EXEC_NETWORK_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(name, __VA_ARGS__)
@ -51,7 +50,7 @@ namespace Metrics {
* @def METRIC_VALUE(name)
* @brief shortcut for defining metric values
*/
#define METRIC_VALUE(name) InferenceEngine::Metrics::name
#define METRIC_VALUE(name) InferenceEngine::Metrics::name
#define DECLARE_METRIC_VALUE(name) static constexpr auto name = #name
/**
@ -162,9 +161,15 @@ enum class DeviceType {
/** @cond INTERNAL */
inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics::DeviceType& deviceType) {
switch (deviceType) {
case InferenceEngine::Metrics::DeviceType::discrete: os << "discrete"; break;
case InferenceEngine::Metrics::DeviceType::integrated: os << "integrated"; break;
default: os << "unknown"; break;
case InferenceEngine::Metrics::DeviceType::discrete:
os << "discrete";
break;
case InferenceEngine::Metrics::DeviceType::integrated:
os << "integrated";
break;
default:
os << "unknown";
break;
}
return os;
@ -177,7 +182,8 @@ inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics
DECLARE_METRIC_KEY(DEVICE_TYPE, DeviceType);
/**
* @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device
* @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by
* specified device
*/
DECLARE_METRIC_KEY(DEVICE_GOPS, std::map<InferenceEngine::Precision, float>);
@ -212,15 +218,15 @@ namespace PluginConfigParams {
* @def CONFIG_KEY(name)
* @brief shortcut for defining configuration keys
*/
#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name)
#define _CONFIG_KEY(name) KEY_##name
#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name)
#define _CONFIG_KEY(name) KEY_##name
#define DECLARE_CONFIG_KEY(name) static constexpr auto _CONFIG_KEY(name) = #name
/**
* @def CONFIG_VALUE(name)
* @brief shortcut for defining configuration values
*/
#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
#define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name
/**
@ -239,13 +245,14 @@ DECLARE_CONFIG_KEY(CPU_THREADS_NUM);
*
* It is passed to Core::SetConfig(), this option should be used with values:
* PluginConfigParams::NO (no pinning for CPU inference threads)
* PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static benchmarks),
* PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static
* benchmarks),
*
* the following options are implemented only for the TBB as a threading option
* PluginConfigParams::NUMA (pinning threads to NUMA nodes, best for real-life, contented cases)
* on the Windows and MacOS* this option behaves as YES
* PluginConfigParams::HYBRID_AWARE (let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for latency tasks)
* on the hybrid CPUs this option is default
* PluginConfigParams::HYBRID_AWARE (let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for
* latency tasks) on the hybrid CPUs this option is default
*
* Also, the settings are ignored, if the OpenVINO compiled with OpenMP and any affinity-related OpenMP's
* environment variable is set (as affinity is configured explicitly)
@ -313,12 +320,15 @@ DECLARE_CONFIG_KEY(CONFIG_FILE);
DECLARE_CONFIG_KEY(LOG_LEVEL);
DECLARE_CONFIG_VALUE(LOG_NONE); // turn off logging
DECLARE_CONFIG_VALUE(LOG_ERROR); // error events that might still allow the application to continue running
DECLARE_CONFIG_VALUE(LOG_WARNING); // potentially harmful situations which may further lead to ERROR
DECLARE_CONFIG_VALUE(
LOG_INFO); // informational messages that display the progress of the application at coarse-grained level
DECLARE_CONFIG_VALUE(LOG_DEBUG); // fine-grained events that are most useful to debug an application.
DECLARE_CONFIG_VALUE(LOG_TRACE); // finer-grained informational events than the DEBUG
DECLARE_CONFIG_VALUE(LOG_ERROR); // error events that might still allow the
// application to continue running
DECLARE_CONFIG_VALUE(LOG_WARNING); // potentially harmful situations which may
// further lead to ERROR
DECLARE_CONFIG_VALUE(LOG_INFO); // informational messages that display the progress of the
// application at coarse-grained level
DECLARE_CONFIG_VALUE(LOG_DEBUG); // fine-grained events that are most useful to
// debug an application.
DECLARE_CONFIG_VALUE(LOG_TRACE); // finer-grained informational events than the DEBUG
/**
* @brief the key for setting of required device to execute on
@ -349,7 +359,6 @@ DECLARE_CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS);
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork::GetExecGraphInfo::serialize method")
DECLARE_CONFIG_KEY(DUMP_EXEC_GRAPH_AS_DOT);
/**
* @brief The name for setting to execute in bfloat16 precision whenever it is possible
*

View File

@ -15,7 +15,6 @@
#include "ie_common.h"
namespace InferenceEngine {
/**
@ -230,10 +229,23 @@ public:
static Precision FromStr(const std::string& str) {
static const std::unordered_map<std::string, ePrecision> names = {
#define PRECISION_NAME(s) {#s, s}
PRECISION_NAME(Q78), PRECISION_NAME(BOOL), PRECISION_NAME(BF16),
PRECISION_NAME(I4), PRECISION_NAME(I8), PRECISION_NAME(I16), PRECISION_NAME(I32), PRECISION_NAME(I64),
PRECISION_NAME(U4), PRECISION_NAME(U8), PRECISION_NAME(U16), PRECISION_NAME(U32), PRECISION_NAME(U64),
PRECISION_NAME(FP32), PRECISION_NAME(FP64), PRECISION_NAME(FP16), PRECISION_NAME(MIXED),
PRECISION_NAME(Q78),
PRECISION_NAME(BOOL),
PRECISION_NAME(BF16),
PRECISION_NAME(I4),
PRECISION_NAME(I8),
PRECISION_NAME(I16),
PRECISION_NAME(I32),
PRECISION_NAME(I64),
PRECISION_NAME(U4),
PRECISION_NAME(U8),
PRECISION_NAME(U16),
PRECISION_NAME(U32),
PRECISION_NAME(U64),
PRECISION_NAME(FP32),
PRECISION_NAME(FP64),
PRECISION_NAME(FP16),
PRECISION_NAME(MIXED),
PRECISION_NAME(BIN),
#undef PRECISION_NAME
};
@ -292,12 +304,15 @@ protected:
* @returns True if strings are the same
*/
static bool areSameStrings(const char* l, const char* r) noexcept {
if (l == r) return true;
if (l == r)
return true;
if (l == nullptr || r == nullptr) return false;
if (l == nullptr || r == nullptr)
return false;
for (; *l && *r; l++, r++) {
if (*l != *r) return false;
if (*l != *r)
return false;
}
return *l == *r;
}
@ -366,7 +381,7 @@ struct PrecisionTrait<Precision::BF16> {
using value_type = int16_t;
enum { is_float = true };
};
template<>
template <>
struct PrecisionTrait<Precision::Q78> {
using value_type = uint16_t;
enum { is_float = false };
@ -484,7 +499,8 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector<Precision>&
}
inline constexpr uint32_t getPrecisionMask(
InferenceEngine::Precision::ePrecision precision1, InferenceEngine::Precision::ePrecision precision2,
InferenceEngine::Precision::ePrecision precision1,
InferenceEngine::Precision::ePrecision precision2,
InferenceEngine::Precision::ePrecision precision3 = InferenceEngine::Precision::MIXED,
InferenceEngine::Precision::ePrecision precision4 = InferenceEngine::Precision::MIXED) {
return (precision1) | (precision2 << 8) | (precision3 << 16) | (precision4 << 24);

View File

@ -3,8 +3,8 @@
//
/**
* @brief This header file provides structures to store info about pre-processing of network inputs (scale, mean image,
* ...)
* @brief This header file provides structures to store info about pre-processing of
* network inputs (scale, mean image, ...)
*
* @file ie_preprocess.hpp
*/
@ -155,8 +155,7 @@ public:
} else if (meanImage.get()->getTensorDesc().getDims().size() != 2) {
IE_THROW() << "Failed to set invalid mean image for channel: number of dimensions != 2";
} else if (channel >= _channelsInfo.size()) {
IE_THROW() << "Channel " << channel
<< " exceed number of PreProcess channels: " << _channelsInfo.size();
IE_THROW() << "Channel " << channel << " exceed number of PreProcess channels: " << _channelsInfo.size();
}
_variant = MEAN_IMAGE;
_channelsInfo[channel]->meanData = meanImage;

View File

@ -4,7 +4,7 @@
/**
* @brief This is a header file for the IE RemoteContext and RemoteBlob classes
*
*
* @file ie_remote_context.hpp
*/
#pragma once
@ -44,7 +44,7 @@ public:
* @brief Constructor. Creates an empty RemoteBlob object with the specified precision.
* @param tensorDesc Defines the layout and dims of the blob
*/
explicit RemoteBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {}
explicit RemoteBlob(const TensorDesc& tensorDesc) : MemoryBlob(tensorDesc) {}
/**
* @brief Returns a map of device-specific parameters required for low-level
@ -103,8 +103,8 @@ public:
* @return true if this object can be dynamically cast to the type T*. Otherwise, false
*/
template <typename T,
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
bool is() noexcept {
return dynamic_cast<T*>(this) != nullptr;
}
@ -116,8 +116,8 @@ public:
* @return true if this object can be dynamically cast to the type const T*. Otherwise, false
*/
template <typename T,
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
bool is() const noexcept {
return dynamic_cast<const T*>(this) != nullptr;
}
@ -129,9 +129,9 @@ public:
* @return Raw pointer to the object of the type T or nullptr on error
*/
template <typename T,
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
T * as() noexcept {
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
T* as() noexcept {
return dynamic_cast<T*>(this);
}
@ -142,9 +142,9 @@ public:
* @return Raw pointer to the object of the type const T or nullptr on error
*/
template <typename T,
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
const T * as() const noexcept {
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
const T* as() const noexcept {
return dynamic_cast<const T*>(this);
}
@ -190,4 +190,3 @@ inline RemoteBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::P
}
} // namespace InferenceEngine

Some files were not shown because too many files have changed in this diff Show More