Merge remote-tracking branch 'upstream/master' into add_mxnet_operations
This commit is contained in:
commit
fe4e714c76
@ -241,7 +241,7 @@ jobs:
|
||||
. $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/tests/mo/unit_tests --junitxml=TEST-ModelOptimizer.xml
|
||||
displayName: 'Model Optimizer UT'
|
||||
continueOnError: false
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
|
||||
workingDirectory: $(INSTALL_TEST_DIR)
|
||||
@ -334,7 +334,7 @@ jobs:
|
||||
displayName: 'Samples Smoke Tests'
|
||||
continueOnError: false
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
- script: |
|
||||
export DATA_PATH=$(MODELS_PATH)
|
||||
@ -353,7 +353,7 @@ jobs:
|
||||
workingDirectory: $(LAYER_TESTS_DIR)
|
||||
displayName: 'Layer Tests'
|
||||
continueOnError: false
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
- task: PublishTestResults@2
|
||||
condition: always()
|
||||
|
@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- checkout: none
|
||||
|
||||
- script: git -C ~/work/openvino checkout -m --recurse-submodules $(Build.SourceVersion)
|
||||
- script: git -C ~/work/openvino checkout -m $(Build.SourceVersion) && git -C ~/work/openvino submodule update --init --recursive
|
||||
displayName: checkout
|
||||
|
||||
# Should be after 'Install dependencies' because Git lfs is not installed
|
||||
@ -71,7 +71,7 @@ jobs:
|
||||
./buildreleasenolto.sh
|
||||
libinference_engine_preproc.so
|
||||
MKLDNNPlugin
|
||||
clDNNPlugin
|
||||
ov_intel_gpu_plugin
|
||||
clDNN_unit_tests64
|
||||
gpuFuncTests
|
||||
displayName: Build Lin
|
||||
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -57,7 +57,7 @@
|
||||
path = thirdparty/onednn_gpu
|
||||
url = https://github.com/oneapi-src/oneDNN.git
|
||||
[submodule "tools/pot/thirdparty/open_model_zoo"]
|
||||
path = tools/pot/thirdparty/open_model_zoo
|
||||
path = thirdparty/open_model_zoo
|
||||
url = https://github.com/openvinotoolkit/open_model_zoo.git
|
||||
[submodule "thirdparty/json/nlohmann_json"]
|
||||
path = thirdparty/json/nlohmann_json
|
||||
|
@ -66,7 +66,7 @@ Jenkinsfile @openvinotoolkit/openvino-admins
|
||||
/src/inference/include/ie/gna/ @openvinotoolkit/openvino-ie-gna-maintainers
|
||||
|
||||
# IE MULTI:
|
||||
/inference-engine/src/multi_device/ @openvinotoolkit/openvino-ie-multi-maintainers
|
||||
/src/plugins/auto/ @openvinotoolkit/openvino-ie-multi-maintainers
|
||||
/src/inference/include/ie/multi-device/ @openvinotoolkit/openvino-ie-multi-maintainers
|
||||
|
||||
# IE Tests:
|
||||
|
@ -79,8 +79,20 @@ function(_ie_add_api_validator_post_build_step)
|
||||
_ie_add_api_validator_post_build_step_recursive(TARGET ${API_VALIDATOR_TARGET})
|
||||
|
||||
# remove targets which were tested before
|
||||
|
||||
foreach(item IN LISTS VALIDATED_LIBRARIES)
|
||||
foreach(target IN LISTS API_VALIDATOR_TARGETS)
|
||||
list(FIND VALIDATED_LIBRARIES ${target} index)
|
||||
if (NOT index EQUAL -1)
|
||||
list(APPEND VALIDATED_TARGETS ${target})
|
||||
endif()
|
||||
if(TARGET "${target}")
|
||||
get_target_property(orig_target ${target} ALIASED_TARGET)
|
||||
list(FIND VALIDATED_LIBRARIES ${orig_target} index)
|
||||
if (NOT index EQUAL -1)
|
||||
list(APPEND VALIDATED_TARGETS ${target})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
foreach(item IN LISTS VALIDATED_TARGETS)
|
||||
list(REMOVE_ITEM API_VALIDATOR_TARGETS ${item})
|
||||
endforeach()
|
||||
|
||||
|
@ -3,7 +3,8 @@
|
||||
#
|
||||
|
||||
set(FRONTEND_INSTALL_INCLUDE "runtime/include/")
|
||||
set(FRONTEND_NAME_SUFFIX "_ov_frontend")
|
||||
set(FRONTEND_NAME_PREFIX "ov_")
|
||||
set(FRONTEND_NAME_SUFFIX "_frontend")
|
||||
|
||||
set(FRONTEND_NAMES "" CACHE INTERNAL "")
|
||||
|
||||
@ -20,7 +21,7 @@ function(ov_target_link_frontends TARGET_NAME)
|
||||
endif()
|
||||
|
||||
foreach(name IN LISTS FRONTEND_NAMES)
|
||||
set(frontend_target_name "${name}${FRONTEND_NAME_SUFFIX}")
|
||||
set(frontend_target_name "${FRONTEND_NAME_PREFIX}${name}${FRONTEND_NAME_SUFFIX}")
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ${frontend_target_name})
|
||||
endforeach()
|
||||
endfunction()
|
||||
@ -99,7 +100,7 @@ macro(ov_add_frontend)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(TARGET_NAME "${OV_FRONTEND_NAME}${FRONTEND_NAME_SUFFIX}")
|
||||
set(TARGET_NAME "${FRONTEND_NAME_PREFIX}${OV_FRONTEND_NAME}${FRONTEND_NAME_SUFFIX}")
|
||||
|
||||
list(APPEND FRONTEND_NAMES ${OV_FRONTEND_NAME})
|
||||
set(FRONTEND_NAMES "${FRONTEND_NAMES}" CACHE INTERNAL "" FORCE)
|
||||
|
@ -117,20 +117,20 @@ function(ie_add_plugin)
|
||||
# fake dependencies to build in the following order:
|
||||
# IE -> IE readers -> IE inference plugins -> IE-based apps
|
||||
if(BUILD_SHARED_LIBS)
|
||||
if(TARGET ir_ov_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} ir_ov_frontend)
|
||||
if(TARGET ov_ir_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} ov_ir_frontend)
|
||||
endif()
|
||||
if(TARGET inference_engine_ir_v7_reader)
|
||||
add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_v7_reader)
|
||||
endif()
|
||||
if(TARGET onnx_ov_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} onnx_ov_frontend)
|
||||
if(TARGET ov_onnx_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} ov_onnx_frontend)
|
||||
endif()
|
||||
if(TARGET paddlepaddle_ov_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} paddlepaddle_ov_frontend)
|
||||
if(TARGET ov_paddlepaddle_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} ov_paddlepaddle_frontend)
|
||||
endif()
|
||||
if(TARGET tensorflow_ov_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} tensorflow_ov_frontend)
|
||||
if(TARGET ov_tensorflow_frontend)
|
||||
add_dependencies(${IE_PLUGIN_NAME} ov_tensorflow_frontend)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -28,11 +28,11 @@
|
||||
#
|
||||
# ngraph::common - nGraph frontend common
|
||||
#
|
||||
# ngraph_onnx_ov_frontend_FOUND - True if the system has onnx_ov_frontend library
|
||||
# ngraph::onnx_ov_frontend - ONNX FrontEnd target (optional)
|
||||
# ngraph_ov_onnx_frontend_FOUND - True if the system has ov_onnx_frontend library
|
||||
# ngraph::ov_onnx_frontend - ONNX FrontEnd target (optional)
|
||||
#
|
||||
# ngraph_paddlepaddle_frontend_FOUND - True if the system has PDPD frontend
|
||||
# ngraph::paddlepaddle_ov_frontend - nGraph PDPD frontend (optional)
|
||||
# ngraph::ov_paddlepaddle_frontend - nGraph PDPD frontend (optional)
|
||||
#
|
||||
|
||||
@PACKAGE_INIT@
|
||||
@ -58,38 +58,38 @@ if(TARGET openvino::frontend::common AND NOT TARGET ngraph::frontend_common)
|
||||
INTERFACE_LINK_LIBRARIES openvino::frontend::common)
|
||||
endif()
|
||||
|
||||
if(TARGET openvino::frontend::onnx AND NOT TARGET ngraph::onnx_ov_frontend)
|
||||
add_library(ngraph::onnx_ov_frontend INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::onnx_ov_frontend PROPERTIES
|
||||
if(TARGET openvino::frontend::onnx AND NOT TARGET ngraph::ov_onnx_frontend)
|
||||
add_library(ngraph::ov_onnx_frontend INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::ov_onnx_frontend PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES openvino::frontend::onnx)
|
||||
endif()
|
||||
|
||||
if(TARGET openvino::frontend::paddlepaddle AND NOT TARGET ngraph::paddlepaddle_ov_frontend)
|
||||
add_library(ngraph::paddlepaddle_ov_frontend INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::paddlepaddle_ov_frontend PROPERTIES
|
||||
if(TARGET openvino::frontend::paddlepaddle AND NOT TARGET ngraph::ov_paddlepaddle_frontend)
|
||||
add_library(ngraph::ov_paddlepaddle_frontend INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::ov_paddlepaddle_frontend PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES openvino::frontend::paddlepaddle)
|
||||
endif()
|
||||
|
||||
if(TARGET openvino::frontend::tensorflow AND NOT TARGET ngraph::tensorflow_ov_frontend)
|
||||
add_library(ngraph::tensorflow_ov_frontend INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::tensorflow_ov_frontend PROPERTIES
|
||||
if(TARGET openvino::frontend::tensorflow AND NOT TARGET ngraph::ov_tensorflow_frontend)
|
||||
add_library(ngraph::ov_tensorflow_frontend INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::ov_tensorflow_frontend PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES openvino::frontend::tensorflow)
|
||||
endif()
|
||||
|
||||
set(ngraph_ngraph_FOUND ON)
|
||||
set(NGRAPH_LIBRARIES ngraph::ngraph)
|
||||
|
||||
set(ngraph_onnx_ov_frontend_FOUND ${OpenVINO_Frontend_ONNX_FOUND})
|
||||
set(ngraph_ov_onnx_frontend_FOUND ${OpenVINO_Frontend_ONNX_FOUND})
|
||||
set(ngraph_onnx_importer_FOUND ${OpenVINO_Frontend_ONNX_FOUND})
|
||||
|
||||
if(ngraph_onnx_importer_FOUND)
|
||||
set(ONNX_IMPORTER_LIBRARIES ngraph::onnx_ov_frontend)
|
||||
set(ONNX_IMPORTER_LIBRARIES ngraph::ov_onnx_frontend)
|
||||
# ngraph::onnx_importer target and variables are deprecated
|
||||
# but need to create a dummy target for BW compatibility
|
||||
if(NOT TARGET ngraph::onnx_importer)
|
||||
add_library(ngraph::onnx_importer INTERFACE IMPORTED)
|
||||
set_target_properties(ngraph::onnx_importer PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES ngraph::onnx_ov_frontend)
|
||||
INTERFACE_LINK_LIBRARIES ngraph::ov_onnx_frontend)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -2,12 +2,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#! [complex:transformation]
|
||||
import logging as log
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from mo.graph.graph import Graph
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from openvino.tools.mo.graph.graph import Graph
|
||||
|
||||
|
||||
class Complex(FrontReplacementSubgraph):
|
||||
@ -41,4 +38,3 @@ class Complex(FrontReplacementSubgraph):
|
||||
# change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
|
||||
complex_node.out_port(0).get_connection().set_source(input_node_output_port)
|
||||
#! [complex:transformation]
|
||||
|
||||
|
@ -4,11 +4,11 @@
|
||||
#! [complex_abs:transformation]
|
||||
import numpy as np
|
||||
|
||||
from extensions.ops.elementwise import Pow
|
||||
from extensions.ops.ReduceOps import ReduceSum
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.graph.graph import Graph, Node
|
||||
from mo.ops.const import Const
|
||||
from openvino.tools.mo.ops.elementwise import Pow
|
||||
from openvino.tools.mo.ops.ReduceOps import ReduceSum
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
|
||||
from openvino.tools.mo.graph.graph import Graph, Node
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
|
||||
|
||||
class ComplexAbs(FrontReplacementOp):
|
||||
|
@ -3,8 +3,7 @@
|
||||
|
||||
# ! [fft_ext:extractor]
|
||||
from ...ops.FFT import FFT
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.utils.error import Error
|
||||
from openvino.tools.mo.front.extractor import FrontExtractorOp
|
||||
|
||||
|
||||
class FFT2DFrontExtractor(FrontExtractorOp):
|
||||
|
@ -2,9 +2,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#! [fft:operation]
|
||||
from mo.front.common.partial_infer.elemental import copy_shape_infer
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.ops.op import Op
|
||||
from openvino.tools.mo.front.common.partial_infer.elemental import copy_shape_infer
|
||||
from openvino.tools.mo.graph.graph import Graph
|
||||
from openvino.tools.mo.ops.op import Op
|
||||
|
||||
|
||||
class FFT(Op):
|
||||
|
@ -40,13 +40,13 @@ This library contains the classes to:
|
||||
|
||||
Starting from 2022.1 release, OpenVINO Runtime introduced a concept of frontend plugins. Such plugins can be automatically dynamically loaded by OpenVINO Runtime dynamically depending on file format:
|
||||
* Linux* OS:
|
||||
- `libir_ov_frontend.so` to read a network from IR
|
||||
- `libpaddlepaddle_ov_frontend.so` to read a network from PaddlePaddle model format
|
||||
- `libonnx_ov_frontend.so` to read a network from ONNX model format
|
||||
- `libov_ir_frontend.so` to read a network from IR
|
||||
- `libov_paddlepaddle_frontend.so` to read a network from PaddlePaddle model format
|
||||
- `libov_onnx_frontend.so` to read a network from ONNX model format
|
||||
* Windows* OS:
|
||||
- `ir_ov_frontend.dll` to read a network from IR
|
||||
- `paddlepaddle_ov_frontend.dll` to read a network from PaddlePaddle model format
|
||||
- `onnx_ov_frontend.dll` to read a network from ONNX model format
|
||||
- `ov_ir_frontend.dll` to read a network from IR
|
||||
- `ov_paddlepaddle_frontend.dll` to read a network from PaddlePaddle model format
|
||||
- `ov_onnx_frontend.dll` to read a network from ONNX model format
|
||||
|
||||
### Device-Specific Plugin Libraries
|
||||
|
||||
|
@ -62,7 +62,7 @@ The example below demonstrates how to unregister an operator from the destructor
|
||||
## Requirements for Building with CMake
|
||||
|
||||
A program that uses the `register_operator` functionality requires `openvino::core` and `openvino::frontend::onnx` libraries in addition to the OpenVINO Inference Runtime.
|
||||
The `onnx_ov_frontend` is a component of the `OpenVINO` package , so `find_package(OpenVINO REQUIRED COMPONENTS ONNX)` can find both.
|
||||
The `ov_onnx_frontend` is a component of the `OpenVINO` package , so `find_package(OpenVINO REQUIRED COMPONENTS ONNX)` can find both.
|
||||
Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
|
||||
|
||||
See CMakeLists.txt below for reference:
|
||||
|
@ -45,13 +45,13 @@ This library contains the classes to:
|
||||
|
||||
Starting from 2022.1 release, OpenVINO Runtime introduced a concept of frontend plugins. Such plugins can be automatically dynamically loaded by OpenVINO Runtime dynamically depending on file format:
|
||||
* Unix* OS:
|
||||
- `libir_ov_frontend.so` to read a network from IR
|
||||
- `libpaddlepaddle_ov_frontend.so` to read a network from PaddlePaddle model format
|
||||
- `libonnx_ov_frontend.so` to read a network from ONNX model format
|
||||
- `libov_ir_frontend.so` to read a network from IR
|
||||
- `libov_paddlepaddle_frontend.so` to read a network from PaddlePaddle model format
|
||||
- `libov_onnx_frontend.so` to read a network from ONNX model format
|
||||
* Windows* OS:
|
||||
- `ir_ov_frontend.dll` to read a network from IR
|
||||
- `paddlepaddle_ov_frontend.dll` to read a network from PaddlePaddle model format
|
||||
- `onnx_ov_frontend.dll` to read a network from ONNX model format
|
||||
- `ov_ir_frontend.dll` to read a network from IR
|
||||
- `ov_paddlepaddle_frontend.dll` to read a network from PaddlePaddle model format
|
||||
- `ov_onnx_frontend.dll` to read a network from ONNX model format
|
||||
|
||||
### Device-specific Plugin Libraries ###
|
||||
|
||||
|
@ -639,9 +639,9 @@ graph. Consider the extractor for the TensorFlow\* operation `Const` (refer to t
|
||||
`extensions/front/tf/const_ext.py`):
|
||||
|
||||
```py
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
|
||||
from mo.ops.const import Const
|
||||
from openvino.tools.mo.front.extractor import FrontExtractorOp
|
||||
from openvino.tools.mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
|
||||
|
||||
class ConstExtractor(FrontExtractorOp):
|
||||
@ -679,9 +679,9 @@ Consider another example with an extractor of ONNX\* operation `Constant` (refer
|
||||
from onnx import numpy_helper
|
||||
from onnx.numpy_helper import to_array
|
||||
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.front.onnx.extractors.utils import onnx_attr
|
||||
from mo.ops.const import Const
|
||||
from openvino.tools.mo.front.extractor import FrontExtractorOp
|
||||
from openvino.tools.mo.front.onnx.extractors.utils import onnx_attr
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
|
||||
|
||||
class ConstantExtractor(FrontExtractorOp):
|
||||
@ -814,11 +814,11 @@ fusing of the sub-graph defining the [Mish](../../../ops/activation/Mish_4.md) a
|
||||
operation:
|
||||
|
||||
```py
|
||||
from extensions.front.Softplus_fusion import SoftplusFusion
|
||||
from extensions.ops.activation_ops import Mish
|
||||
from mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from mo.front.subgraph_matcher import SubgraphMatch
|
||||
from mo.graph.graph import Graph, rename_nodes
|
||||
from openvino.tools.mo.front.Softplus_fusion import SoftplusFusion
|
||||
from openvino.tools.mo.ops.activation_ops import Mish
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from openvino.tools.mo.front.subgraph_matcher import SubgraphMatch
|
||||
from openvino.tools.mo.graph.graph import Graph, rename_nodes
|
||||
|
||||
|
||||
class MishFusion(FrontReplacementSubgraph):
|
||||
@ -886,12 +886,12 @@ transformation.
|
||||
Consider an example transformation from the file is `extensions/front/Pack.py` which replaces operation `Pack` from
|
||||
the TensorFlow\*:
|
||||
```py
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.front.tf.graph_utils import create_op_with_const_inputs
|
||||
from mo.graph.graph import Node, Graph, rename_nodes
|
||||
from mo.ops.concat import Concat
|
||||
from mo.ops.unsqueeze import Unsqueeze
|
||||
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
|
||||
from openvino.tools.mo.front.tf.graph_utils import create_op_with_const_inputs
|
||||
from openvino.tools.mo.graph.graph import Node, Graph, rename_nodes
|
||||
from openvino.tools.mo.ops.concat import Concat
|
||||
from openvino.tools.mo.ops.unsqueeze import Unsqueeze
|
||||
|
||||
|
||||
class Pack(FrontReplacementOp):
|
||||
@ -932,11 +932,11 @@ specification.
|
||||
```py
|
||||
import logging as log
|
||||
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.front.common.replacement import FrontReplacementPattern
|
||||
from mo.graph.graph import Graph
|
||||
from mo.ops.const import Const
|
||||
from mo.utils.error import Error
|
||||
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementPattern
|
||||
from openvino.tools.mo.graph.graph import Graph
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
from openvino.tools.mo.utils.error import Error
|
||||
|
||||
|
||||
class SqueezeNormalize(FrontReplacementPattern):
|
||||
@ -1200,13 +1200,13 @@ The example of the configuration file for this type of transformation is `extens
|
||||
and the corresponding transformation file is `./extensions/front/YOLO.py`:
|
||||
|
||||
```py
|
||||
from extensions.front.no_op_eraser import NoOpEraser
|
||||
from extensions.front.standalone_const_eraser import StandaloneConstEraser
|
||||
from extensions.ops.regionyolo import RegionYoloOp
|
||||
from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.ops.result import Result
|
||||
from mo.utils.error import Error
|
||||
from openvino.tools.mo.front.no_op_eraser import NoOpEraser
|
||||
from openvino.tools.mo.front.standalone_const_eraser import StandaloneConstEraser
|
||||
from openvino.tools.mo.ops.regionyolo import RegionYoloOp
|
||||
from openvino.tools.mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
|
||||
from openvino.tools.mo.graph.graph import Node, Graph
|
||||
from openvino.tools.mo.ops.result import Result
|
||||
from openvino.tools.mo.utils.error import Error
|
||||
|
||||
|
||||
class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):
|
||||
|
@ -20,9 +20,9 @@ assume that we have already created the `CustomOp` class (inherited from `Op` cl
|
||||
for this MXNet custom operation as described in the [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
|
||||
|
||||
```py
|
||||
from extension.ops.custom_op import CustomOp # implementation of the MO operation class
|
||||
from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
|
||||
from mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
from openvino.tools.mo.ops.custom_op import CustomOp # implementation of the MO operation class
|
||||
from openvino.tools.mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
|
||||
from openvino.tools.mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
|
||||
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp): # inherit from specific base class
|
||||
op = 'MyCustomOp' # the value corresponding to the `op_type` value of the MXNet operation
|
||||
|
@ -40,8 +40,8 @@ operation `ProposalOp` which corresponds to `Proposal` operation described in th
|
||||
document. Refer to the source code below for a detailed explanation of the extractor.
|
||||
|
||||
```py
|
||||
from extensions.ops.proposal import ProposalOp
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
from openvino.tools.mo.ops.proposal import ProposalOp
|
||||
from openvino.tools.mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
|
||||
|
||||
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
|
@ -46,7 +46,7 @@ if(OpenCV_FOUND)
|
||||
endif()
|
||||
|
||||
if(ENABLE_OV_ONNX_FRONTEND)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE onnx_ov_frontend)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ov_onnx_frontend)
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
|
@ -1134,8 +1134,38 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
|
||||
if (!isSuitableParent1 && !isSuitableParent2)
|
||||
continue;
|
||||
|
||||
auto mergedConv = isSuitableParent1 ? parent1 : parent2;
|
||||
auto peerNode = isSuitableParent1 ? parent2 : parent1;
|
||||
std::shared_ptr<MKLDNNNode> mergedConv;
|
||||
std::shared_ptr<MKLDNNNode> peerNode;
|
||||
|
||||
if (isSuitableParent1 && isSuitableParent2) {
|
||||
// not merged operation (peerNode) has to be in low precision
|
||||
const auto isBranchQuantized = [](const MKLDNNNodePtr& branchParent) {
|
||||
const auto& fused = branchParent->getFusedWith();
|
||||
const auto branchPrecision = fused.empty() ?
|
||||
branchParent->getOriginalOutputPrecisionAtPort(0) :
|
||||
fused[fused.size() - 1]->getOriginalOutputPrecisionAtPort(0);
|
||||
return (branchPrecision == Precision::I8) || (branchPrecision == Precision::U8);
|
||||
};
|
||||
|
||||
const auto isBranch1Quantized = isBranchQuantized(graphNode->getParentEdgesAtPort(0)[0]->getParent());
|
||||
const auto isBranch2Quantized = isBranchQuantized(graphNode->getParentEdgesAtPort(1)[0]->getParent());
|
||||
if (isBranch1Quantized || isBranch2Quantized) {
|
||||
// INT8
|
||||
const auto parent1CanBeMerged = parent1->getChildEdges().size() == 1ul;
|
||||
|
||||
// if both branches are quantized, then parent1 is selected (result is not changed)
|
||||
mergedConv = isBranch2Quantized && parent1CanBeMerged ? parent1 : parent2;
|
||||
peerNode = isBranch2Quantized && parent1CanBeMerged ? parent2 : parent1;
|
||||
} else {
|
||||
// original FP32
|
||||
mergedConv = isSuitableParent1 ? parent1 : parent2;
|
||||
peerNode = isSuitableParent1 ? parent2 : parent1;
|
||||
}
|
||||
} else {
|
||||
mergedConv = isSuitableParent1 ? parent1 : parent2;
|
||||
peerNode = isSuitableParent1 ? parent2 : parent1;
|
||||
}
|
||||
|
||||
if (isSuitableParent1 && isSuitableParent2) {
|
||||
if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
|
||||
mergedConv->getChildEdges().size() != 1) {
|
||||
|
@ -1102,7 +1102,7 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) {
|
||||
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
|
||||
}
|
||||
|
||||
|
@ -602,7 +602,7 @@ protected:
|
||||
* Seed node should call this routine and pass its post operations list as parameter.
|
||||
* @param ops List of fused post operations
|
||||
*/
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1);
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims);
|
||||
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
|
||||
|
||||
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }
|
||||
|
@ -1132,8 +1132,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
ops.append_sum(1.0);
|
||||
} else {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -352,8 +352,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
|
||||
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
|
||||
} else {
|
||||
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
eltwiseNode->appendPostOps(ops, dims);
|
||||
} else {
|
||||
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
|
@ -365,9 +365,8 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vec
|
||||
for (auto &node : fusedWith) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
// use legacy depthwise since backprop convolution does not support binary post ops
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
eltwiseNode->appendPostOps(ops, dims);
|
||||
continue;
|
||||
}
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
|
@ -1744,7 +1744,7 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
|
||||
MKLDNNNode::fuseInto(parentNode);
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) {
|
||||
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
|
||||
|
||||
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
@ -1775,11 +1775,11 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &p
|
||||
}
|
||||
} else {
|
||||
const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0;
|
||||
constexpr int align = 16; // always align for legacy scale/shift post ops
|
||||
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
|
||||
if (getAlgorithm() != EltwisePrelu) {
|
||||
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
|
||||
}
|
||||
|
||||
/* @todo legacy depthwise post ops are kept for now
|
||||
* for performance reasons
|
||||
*/
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override;
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) override;
|
||||
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
|
||||
void fuseInto(MKLDNNNodePtr& parentNode) override;
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
|
@ -1706,8 +1706,13 @@ void MKLDNNFakeQuantizeNode::initializePostOpData(const VectorDims &dims, const
|
||||
isPostOpDataInitialized = true;
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
initializePostOpData(postOpDims, align);
|
||||
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) {
|
||||
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
|
||||
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
|
||||
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
|
||||
const size_t bufferAlignment = 16;
|
||||
|
||||
initializePostOpData(postOpDims, bufferAlignment);
|
||||
|
||||
if (getAlgorithm() == FQBinarization) {
|
||||
ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);
|
||||
|
@ -120,10 +120,7 @@ public:
|
||||
InferenceEngine::Precision getInputPrecision() const { return inputPrecision; }
|
||||
InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; }
|
||||
|
||||
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
|
||||
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
|
||||
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = 16) override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}) override;
|
||||
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
@ -198,9 +198,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = -1;
|
||||
if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
|
||||
} else {
|
||||
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ protected:
|
||||
uni_vmovdqu(b, a); // b = a
|
||||
uni_vmovdqu(c, a); // c = a
|
||||
uni_vpcmpeqd(b, b, zero); // if (a == 0) b = 1 else b = 0
|
||||
uni_vpand(c, mask); // c = a & 01111111100000000000000000000000
|
||||
uni_vpand(c, c, mask); // c = a & 01111111100000000000000000000000
|
||||
uni_vpcmpeqd(c, c, zero); // if (c == 0) c = 1 else c = 0
|
||||
uni_vtestps(b, c); // if ((!b & c) == 0) CF = 1 else CF = 0
|
||||
}
|
||||
|
@ -2102,8 +2102,7 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
eltwiseNode->appendPostOps(ops, dims);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -891,8 +891,7 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, postOpDims, align);
|
||||
eltwiseNode->appendPostOps(ops, postOpDims);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
|
||||
|
@ -813,8 +813,7 @@ void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr& kernel_attrs, con
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
eltwiseNode->appendPostOps(ops, dims);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -2779,8 +2779,7 @@ void MKLDNNReduceNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, postOpDims, align);
|
||||
eltwiseNode->appendPostOps(ops, postOpDims);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
|
||||
|
@ -54,6 +54,11 @@ private:
|
||||
const mv_blob_header& blobHdr,
|
||||
std::vector<char>& blob);
|
||||
|
||||
void serializeParamsAndResults(
|
||||
const Model& model,
|
||||
const mv_blob_header& blobHdr,
|
||||
std::vector<char>& blob);
|
||||
|
||||
ElfN_Ehdr createElfHeader();
|
||||
|
||||
void getMetaData(
|
||||
|
@ -62,4 +62,16 @@ VPU_PACKED(mv_stage_header {
|
||||
uint32_t numShaves;
|
||||
};)
|
||||
|
||||
VPU_PACKED(network_info_header {
|
||||
uint32_t parameters_size;
|
||||
uint32_t results_size;
|
||||
};)
|
||||
|
||||
VPU_PACKED(network_params_header {
|
||||
uint32_t name_lenght;
|
||||
uint32_t shape_size;
|
||||
uint32_t element_type_bytesize;
|
||||
uint32_t output_tensor_names_size;
|
||||
};)
|
||||
|
||||
} // namespace vpu
|
||||
|
@ -26,6 +26,9 @@ public:
|
||||
const ie::InputsDataMap& getNetworkInputs() const { return _networkInputs; }
|
||||
const ie::OutputsDataMap& getNetworkOutputs() const { return _networkOutputs; }
|
||||
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& getNetworkParemeters() const { return _parameters; }
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& getNetworkResults() const { return _results; }
|
||||
|
||||
uint32_t getStageCount() const { return _blobHeader.stages_count; }
|
||||
|
||||
uint32_t getMagicNumber() const { return _blobHeader.magic_number; }
|
||||
@ -36,6 +39,8 @@ public:
|
||||
uint32_t getNumberOfShaves() const { return _blobHeader.number_of_shaves; }
|
||||
uint32_t getNumberOfSlices() const { return _blobHeader.number_of_cmx_slices; }
|
||||
|
||||
uint32_t getFileSize() const { return _blobHeader.file_size; }
|
||||
|
||||
const DataInfo& getInputInfo() const { return _inputInfo; }
|
||||
const DataInfo& getOutputInfo() const { return _outputInfo; }
|
||||
|
||||
@ -49,6 +54,9 @@ private:
|
||||
ie::InputsDataMap _networkInputs;
|
||||
ie::OutputsDataMap _networkOutputs;
|
||||
|
||||
std::vector<std::shared_ptr<const ov::Node>> _parameters = {};
|
||||
std::vector<std::shared_ptr<const ov::Node>> _results = {};
|
||||
|
||||
DataInfo _inputInfo;
|
||||
DataInfo _outputInfo;
|
||||
};
|
||||
|
@ -89,6 +89,7 @@ std::set<std::string> getSupportedLayers(const ie::CNNNetwork& network, const Pl
|
||||
|
||||
const uint32_t BLOB_MAGIC_NUMBER = 9709;
|
||||
const uint32_t BLOB_VERSION_MAJOR = 6;
|
||||
// Must be changed when possible
|
||||
const uint32_t BLOB_VERSION_MINOR = 0;
|
||||
|
||||
} // namespace vpu
|
||||
|
@ -15,6 +15,9 @@
|
||||
#include <description_buffer.hpp>
|
||||
#include <xml_parse_utils.h>
|
||||
|
||||
#include <ngraph/ops.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
@ -161,6 +164,118 @@ void BackEnd::serializeConstShapes(const Model& model, const mv_blob_header& blo
|
||||
}
|
||||
}
|
||||
|
||||
void BackEnd::serializeParamsAndResults(const Model& model, const mv_blob_header& blobHdr,
|
||||
std::vector<char>& blob) {
|
||||
const auto networkParams = model->attrs().getOrDefault<ov::ParameterVector>("networkParameters");
|
||||
const auto networkResults = model->attrs().getOrDefault<ov::ResultVector>("networkResults");
|
||||
|
||||
auto getNetworkParameterHeader = [](const std::shared_ptr<ov::Node>& node) {
|
||||
network_params_header nph;
|
||||
nph.element_type_bytesize = sizeof(node->get_element_type().operator ov::element::Type_t());
|
||||
nph.name_lenght = node->get_friendly_name().size();
|
||||
nph.shape_size = node->get_shape().size();
|
||||
nph.output_tensor_names_size = node->get_output_tensor(0).get_names().size();
|
||||
return nph;
|
||||
};
|
||||
|
||||
uint32_t networkInfoOffset = blob.size();
|
||||
auto serializeParameters = [&blob, &networkInfoOffset,
|
||||
&getNetworkParameterHeader](
|
||||
const std::shared_ptr<ov::Node>& node) {
|
||||
BlobSerializer headerSerializer;
|
||||
BlobSerializer shapeSerializer;
|
||||
BlobSerializer elementTypeSerializer;
|
||||
BlobSerializer tensorNamesSerializer;
|
||||
BlobSerializer inputNameForResultSerializer;
|
||||
|
||||
const auto nph = getNetworkParameterHeader(node);
|
||||
const bool isResult = ov::is_type<ov::op::v0::Result>(node);
|
||||
int totalNetworkInfoOffset =
|
||||
networkInfoOffset + sizeof(nph) + nph.name_lenght +
|
||||
nph.element_type_bytesize +
|
||||
sizeof(size_t) * (nph.output_tensor_names_size + nph.shape_size);
|
||||
|
||||
for (const auto& name : node->get_output_tensor(0).get_names()) {
|
||||
totalNetworkInfoOffset += sizeof(size_t) + name.size();
|
||||
}
|
||||
if (isResult) {
|
||||
totalNetworkInfoOffset +=
|
||||
sizeof(size_t) +
|
||||
ngraph::op::util::create_ie_output_name(node->input_value(0)).size();
|
||||
}
|
||||
|
||||
blob.resize(totalNetworkInfoOffset);
|
||||
|
||||
headerSerializer.append(nph);
|
||||
std::copy_n(headerSerializer.data(), sizeof(nph),
|
||||
blob.data() + networkInfoOffset);
|
||||
|
||||
networkInfoOffset += sizeof(nph);
|
||||
const auto nodeName = node->get_friendly_name();
|
||||
VPU_THROW_UNLESS(
|
||||
node->get_output_partial_shape(0).rank().is_static(),
|
||||
"Serialization of shapes with dynamic rank is not supported");
|
||||
const auto nodeShape = node->get_output_partial_shape(0).get_shape();
|
||||
const auto nodeElType =
|
||||
node->get_element_type().operator ov::element::Type_t();
|
||||
|
||||
std::copy_n(nodeName.data(), nodeName.size(),
|
||||
blob.data() + networkInfoOffset);
|
||||
networkInfoOffset += nph.name_lenght;
|
||||
|
||||
for (const auto shapeIdx : nodeShape) {
|
||||
shapeSerializer.append(shapeIdx);
|
||||
}
|
||||
std::copy_n(shapeSerializer.data(),
|
||||
shapeSerializer.size(), blob.data() + networkInfoOffset);
|
||||
networkInfoOffset += shapeSerializer.size();
|
||||
elementTypeSerializer.append(nodeElType);
|
||||
std::copy_n(elementTypeSerializer.data(), nph.element_type_bytesize,
|
||||
blob.data() + networkInfoOffset);
|
||||
networkInfoOffset += nph.element_type_bytesize;
|
||||
|
||||
for (const auto& name : node->get_output_tensor(0).get_names()) {
|
||||
tensorNamesSerializer.append(name.size());
|
||||
for (const auto ch : name) {
|
||||
tensorNamesSerializer.append(ch);
|
||||
}
|
||||
}
|
||||
std::copy_n(tensorNamesSerializer.data(), tensorNamesSerializer.size(),
|
||||
blob.data() + networkInfoOffset);
|
||||
networkInfoOffset += tensorNamesSerializer.size();
|
||||
|
||||
if (isResult) {
|
||||
const auto inputNameForResult =
|
||||
ngraph::op::util::create_ie_output_name(node->input_value(0));
|
||||
inputNameForResultSerializer.append(inputNameForResult.size());
|
||||
for (const auto ch : inputNameForResult) {
|
||||
inputNameForResultSerializer.append(ch);
|
||||
}
|
||||
std::copy_n(inputNameForResultSerializer.data(),
|
||||
inputNameForResultSerializer.size(),
|
||||
blob.data() + networkInfoOffset);
|
||||
networkInfoOffset += inputNameForResultSerializer.size();
|
||||
}
|
||||
};
|
||||
|
||||
BlobSerializer networkInfoSerializer;
|
||||
network_info_header nih;
|
||||
nih.parameters_size = networkParams.size();
|
||||
nih.results_size = networkResults.size();
|
||||
blob.resize(networkInfoOffset + sizeof(nih));
|
||||
networkInfoSerializer.append(nih);
|
||||
std::copy_n(networkInfoSerializer.data(), sizeof(nih), blob.data() + networkInfoOffset);
|
||||
networkInfoOffset += sizeof(nih);
|
||||
|
||||
for (const auto& param : networkParams) {
|
||||
serializeParameters(param);
|
||||
}
|
||||
|
||||
for (const auto& result : networkResults) {
|
||||
serializeParameters(result);
|
||||
}
|
||||
}
|
||||
|
||||
void BackEnd::serialize(
|
||||
const Model& model,
|
||||
std::vector<char>& blob,
|
||||
@ -271,6 +386,12 @@ void BackEnd::serialize(
|
||||
|
||||
serializeConstData(model, blobHdr, blob);
|
||||
serializeConstShapes(model, blobHdr, blob);
|
||||
const auto networkParams = model->attrs().getOrDefault<ov::ParameterVector>("networkParameters");
|
||||
const auto networkResults = model->attrs().getOrDefault<ov::ResultVector>("networkResults");
|
||||
// To avoid constant network case
|
||||
if (!networkParams.empty() && !networkResults.empty()) {
|
||||
serializeParamsAndResults(model, blobHdr, blob);
|
||||
}
|
||||
|
||||
blobHeader.first = blob.data();
|
||||
blobHeader.second = sizeof(ElfN_Ehdr) + sizeof(mv_blob_header);
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <string>
|
||||
|
||||
#include <ie_input_info.hpp>
|
||||
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <vpu/graph_transformer.hpp>
|
||||
#include <vpu/backend/blob_format.hpp>
|
||||
#include <vpu/model/data.hpp>
|
||||
@ -116,6 +116,64 @@ void BlobReader::parse(const std::vector<char>& blob) {
|
||||
_networkOutputs[processedOutput.getName()] = std::make_shared<ie::Data>(processedOutput);
|
||||
}
|
||||
}
|
||||
if (blob.size() != _blobHeader.file_size) {
|
||||
auto networkInfoOffset = _blobHeader.file_size;
|
||||
const auto nih = readFromBlob<network_info_header>(blob, networkInfoOffset);
|
||||
auto extractParameter = [&blob, &networkInfoOffset](bool isResult) {
|
||||
const auto nph = readFromBlob<network_params_header>(blob, networkInfoOffset);
|
||||
std::string parameterFriendlyName(nph.name_lenght, '0');
|
||||
|
||||
for (auto idx = 0; idx < nph.name_lenght; ++idx) {
|
||||
parameterFriendlyName[idx] = readFromBlob<char>(blob, networkInfoOffset);
|
||||
}
|
||||
|
||||
ov::Shape parameterShape(nph.shape_size);
|
||||
for (auto idx = 0; idx < nph.shape_size; ++idx) {
|
||||
parameterShape[idx] = readFromBlob<size_t>(blob, networkInfoOffset);
|
||||
}
|
||||
|
||||
ov::element::Type_t parameterType = readFromBlob<ov::element::Type_t>(blob, networkInfoOffset);
|
||||
std::shared_ptr<ov::Node> parameter =
|
||||
std::make_shared<ov::op::v0::Parameter>(parameterType,
|
||||
parameterShape);
|
||||
|
||||
std::unordered_set<std::string> tensorNames;
|
||||
for (auto idx = 0; idx < nph.output_tensor_names_size; ++idx) {
|
||||
const auto nameLenght = readFromBlob<size_t>(blob, networkInfoOffset);
|
||||
std::string tensorName;
|
||||
for (auto nameSymbolIdx = 0; nameSymbolIdx < nameLenght; ++nameSymbolIdx) {
|
||||
tensorName += readFromBlob<char>(blob, networkInfoOffset);
|
||||
}
|
||||
tensorNames.insert(tensorName);
|
||||
}
|
||||
if (isResult) {
|
||||
auto fakeParameter = parameter;
|
||||
parameter = std::make_shared<ov::op::v0::Result>(parameter);
|
||||
|
||||
const auto inputNameLenght = readFromBlob<size_t>(blob, networkInfoOffset);
|
||||
|
||||
std::string inputName;
|
||||
for (auto nameSymbolIdx = 0; nameSymbolIdx < inputNameLenght; ++nameSymbolIdx) {
|
||||
inputName += readFromBlob<char>(blob, networkInfoOffset);
|
||||
}
|
||||
fakeParameter->set_friendly_name(inputName);
|
||||
|
||||
parameter = parameter->copy_with_new_inputs({fakeParameter});
|
||||
}
|
||||
parameter->set_friendly_name(parameterFriendlyName);
|
||||
parameter->output(0).get_tensor().set_names(tensorNames);
|
||||
|
||||
return parameter;
|
||||
};
|
||||
|
||||
for (auto paramIdx = 0; paramIdx < nih.parameters_size; ++paramIdx) {
|
||||
_parameters.emplace_back(extractParameter(false));
|
||||
}
|
||||
|
||||
for (auto paramIdx = 0; paramIdx < nih.results_size; ++paramIdx) {
|
||||
_results.emplace_back(extractParameter(true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace vpu
|
||||
|
@ -492,7 +492,14 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
|
||||
|
||||
model->attrs().set<int>("index", g_counter.fetch_add(1));
|
||||
model->attrs().set<Resources>("resources", env.resources);
|
||||
|
||||
// Transmitting Information about the parameters/results of the network for
|
||||
// the possibility of importing it
|
||||
if (network.getFunction() != nullptr) {
|
||||
model->attrs().set<ov::ParameterVector>(
|
||||
"networkParameters", network.getFunction()->get_parameters());
|
||||
model->attrs().set<ov::ResultVector>(
|
||||
"networkResults", network.getFunction()->get_results());
|
||||
}
|
||||
//
|
||||
// Update IE Network
|
||||
//
|
||||
|
@ -164,11 +164,20 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
|
||||
|
||||
this->_networkInputs = blobReader.getNetworkInputs();
|
||||
this->_networkOutputs = blobReader.getNetworkOutputs();
|
||||
std::size_t numStages = blobReader.getStageCount();
|
||||
auto blobHeader = blobReader.getHeader();
|
||||
if (blobSize == blobReader.getFileSize()) {
|
||||
_log->warning(
|
||||
"Older version of blob. Unable to get information about network "
|
||||
"parameters/results. Please recompile blob");
|
||||
}
|
||||
this->setInputs(blobReader.getNetworkParemeters());
|
||||
this->setOutputs(blobReader.getNetworkResults());
|
||||
|
||||
_inputInfo = blobReader.getInputInfo();
|
||||
_outputInfo = blobReader.getOutputInfo();
|
||||
|
||||
std::size_t numStages = blobReader.getStageCount();
|
||||
auto blobHeader = blobReader.getHeader();
|
||||
|
||||
openDevice(devicePool);
|
||||
_executor->allocateGraph(_device, _graphDesc, _graphBlob, blobHeader, numStages, networkName, _actualNumExecutors);
|
||||
_graphMetaData.stagesMeta.resize(numStages);
|
||||
|
37
inference-engine/thirdparty/clDNN/api/intel_gpu/primitives/slice.hpp
vendored
Normal file
37
inference-engine/thirdparty/clDNN/api/intel_gpu/primitives/slice.hpp
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
/// @addtogroup cpp_api C++ API
|
||||
/// @{
|
||||
/// @addtogroup cpp_topology Network Topology
|
||||
/// @{
|
||||
/// @addtogroup cpp_primitives Primitives
|
||||
/// @{
|
||||
|
||||
/// @brief
|
||||
/// @details
|
||||
struct slice : public primitive_base<slice> {
|
||||
CLDNN_DECLARE_PRIMITIVE(slice)
|
||||
|
||||
/// @brief Constructs slice primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param inputs List of primitive ids.
|
||||
slice(const primitive_id& id,
|
||||
const std::vector<primitive_id>& inputs,
|
||||
const tensor output_shape,
|
||||
const primitive_id& ext_prim_id = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base{id, inputs, ext_prim_id, output_padding},
|
||||
output_shape {output_shape}
|
||||
{}
|
||||
|
||||
tensor output_shape;
|
||||
};
|
||||
/// @}
|
||||
/// @}
|
||||
/// @}
|
||||
} // namespace cldnn
|
@ -24,6 +24,7 @@ private:
|
||||
debug_configuration();
|
||||
public:
|
||||
static const char *prefix;
|
||||
int help; // Print help messages
|
||||
int verbose; // Verbose execution
|
||||
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
|
||||
int disable_usm; // Disable usm usage
|
||||
@ -34,6 +35,7 @@ public:
|
||||
std::string dump_layers; // Dump intermediate buffers of specified layers only, separated by space
|
||||
std::string dry_run_path; // Dry run and serialize execution graph into the specified path
|
||||
int dump_layers_dst_only; // Dump only output of layers
|
||||
int dump_layers_limit_batch; // Limit the size of batch to dump
|
||||
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
|
||||
static const debug_configuration *get_instance();
|
||||
};
|
||||
|
@ -55,6 +55,7 @@ enum class KernelType {
|
||||
DEPTH_TO_SPACE,
|
||||
BATCH_TO_SPACE,
|
||||
SHUFFLE_CHANNELS,
|
||||
SLICE,
|
||||
STRIDED_SLICE,
|
||||
REVERSE_SEQUENCE,
|
||||
BINARY_CONVOLUTION,
|
||||
|
111
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.cpp
vendored
Normal file
111
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.cpp
vendored
Normal file
@ -0,0 +1,111 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include"slice_kernel_ref.h"
|
||||
#include <kernel_selector_utils.h>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
void addJitConstantsForAttribute(kernel_selector::JitConstants &jit,
|
||||
const std::string &name, const std::vector<std::int32_t> &attribute) {
|
||||
using namespace kernel_selector;
|
||||
jit.AddConstant(MakeJitConstant(name + "_BATCH", attribute[0]));
|
||||
jit.AddConstant(MakeJitConstant(name + "_FEATURE", attribute[1]));
|
||||
if (attribute.size() == 5) { // BFZYX
|
||||
jit.AddConstant(MakeJitConstant(name + "_Z", attribute[2]));
|
||||
jit.AddConstant(MakeJitConstant(name + "_Y", attribute[3]));
|
||||
jit.AddConstant(MakeJitConstant(name + "_X", attribute[4]));
|
||||
} else { // BFYX
|
||||
jit.AddConstant(MakeJitConstant(name + "_Y", attribute[2]));
|
||||
jit.AddConstant(MakeJitConstant(name + "_X", attribute[3]));
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
KernelsData SliceKernelRef::GetKernelsData(const Params ¶ms,
|
||||
const optional_params &options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
KernelData kernel_data = KernelData::Default<slice_params>(params);
|
||||
slice_params &new_params =
|
||||
dynamic_cast<slice_params&>(*kernel_data.params.get());
|
||||
auto dispatch_data = SetDefault(new_params, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options);
|
||||
auto slice_specific_jit = GetJitConstants(new_params);
|
||||
auto jit = CreateJit(kernelName, slice_specific_jit, entry_point);
|
||||
|
||||
FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo,
|
||||
kernelName, jit, entry_point);
|
||||
|
||||
return {kernel_data};
|
||||
}
|
||||
|
||||
KernelsPriority SliceKernelRef::GetKernelsPriority(const Params&/*params*/,
|
||||
const optional_params&/*options*/) const {
|
||||
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
}
|
||||
|
||||
ParamsKey SliceKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableInputDataType(Datatype::UINT8);
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableInputDataType(Datatype::INT32);
|
||||
k.EnableInputDataType(Datatype::INT64);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::INT32);
|
||||
k.EnableOutputDataType(Datatype::INT64);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::bfzyx);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::bfzyx);
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool SliceKernelRef::Validate(const Params &p, const optional_params &o) const {
|
||||
if (p.GetType() != KernelType::SLICE || o.GetType() != KernelType::SLICE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const slice_params ¶ms = dynamic_cast<const slice_params&>(p);
|
||||
if (params.inputs.empty())
|
||||
return false;
|
||||
|
||||
if (params.output.Dimentions() > 5 || params.inputs[0].Dimentions() > 5)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
JitConstants SliceKernelRef::GetJitConstants(const slice_params ¶ms) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
addJitConstantsForAttribute(jit, "SLICE_BEGIN", params.start);
|
||||
addJitConstantsForAttribute(jit, "SLICE_END", params.end);
|
||||
addJitConstantsForAttribute(jit, "SLICE_STEP", params.step);
|
||||
return jit;
|
||||
}
|
||||
|
||||
CommonDispatchData SliceKernelRef::SetDefault(const slice_params ¶ms,
|
||||
const optional_params&) const {
|
||||
CommonDispatchData dispatchData;
|
||||
dispatchData.gws = { params.output.Batch().v, params.output.Feature().v,
|
||||
params.output.Z().v * params.output.Y().v * params.output.X().v };
|
||||
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws,
|
||||
params.engineInfo);
|
||||
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
42
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.h
vendored
Normal file
42
inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/slice/slice_kernel_ref.h
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel_base_opencl.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
struct slice_params: public base_params {
|
||||
slice_params() : base_params(KernelType::SLICE) {}
|
||||
|
||||
std::vector<std::int32_t> start;
|
||||
std::vector<std::int32_t> end;
|
||||
std::vector<std::int32_t> step;
|
||||
};
|
||||
|
||||
struct slice_optional_params : optional_params {
|
||||
slice_optional_params() : optional_params(KernelType::SLICE) {}
|
||||
};
|
||||
|
||||
class SliceKernelRef: public KernelBaseOpenCL {
|
||||
public:
|
||||
SliceKernelRef() :
|
||||
KernelBaseOpenCL { "slice_ref" } {
|
||||
}
|
||||
KernelsData GetKernelsData(const Params ¶ms,
|
||||
const optional_params &options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params ¶ms,
|
||||
const optional_params &options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
bool Validate(const Params &p, const optional_params &o) const override;
|
||||
|
||||
private:
|
||||
JitConstants GetJitConstants(const slice_params ¶ms) const;
|
||||
CommonDispatchData SetDefault(const slice_params ¶ms,
|
||||
const optional_params&) const;
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,18 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "slice_kernel_selector.h"
|
||||
#include "slice_kernel_ref.h"
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
slice_kernel_selector::slice_kernel_selector() {
|
||||
Attach<SliceKernelRef>();
|
||||
}
|
||||
|
||||
KernelsData slice_kernel_selector::GetBestKernels(const Params ¶ms,
|
||||
const optional_params &options) const {
|
||||
return GetNaiveBestKernel(params, options, KernelType::SLICE);
|
||||
}
|
||||
|
||||
} // namespace kernel_selector
|
@ -0,0 +1,23 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <kernel_selector.h>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
class slice_kernel_selector : public kernel_selector_base {
|
||||
public:
|
||||
static slice_kernel_selector& Instance() {
|
||||
static slice_kernel_selector instance_;
|
||||
return instance_;
|
||||
}
|
||||
|
||||
slice_kernel_selector();
|
||||
|
||||
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
|
||||
};
|
||||
|
||||
} // namespace kernel_selector
|
36
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/slice_ref.cl
vendored
Normal file
36
inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/slice_ref.cl
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
|
||||
{
|
||||
const uint batch = get_global_id(0);
|
||||
const uint feature = get_global_id(1);
|
||||
#if INPUT0_DIMS <= 4
|
||||
const uint xy = get_global_id(2);
|
||||
const uint y = xy / OUTPUT_SIZE_X;
|
||||
const uint x = xy % OUTPUT_SIZE_X;
|
||||
const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x);
|
||||
const uint input_index = INPUT0_GET_INDEX(
|
||||
SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
|
||||
SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
|
||||
SLICE_BEGIN_Y + y * SLICE_STEP_Y,
|
||||
SLICE_BEGIN_X + x * SLICE_STEP_X);
|
||||
#elif INPUT0_DIMS == 5
|
||||
const uint xyz = get_global_id(2);
|
||||
const uint yx = xyz % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
|
||||
const uint z = xyz / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
|
||||
const uint y = yx / OUTPUT_SIZE_X;
|
||||
const uint x = yx % OUTPUT_SIZE_X;
|
||||
const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x);
|
||||
const uint input_index = INPUT0_GET_INDEX(
|
||||
SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
|
||||
SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
|
||||
SLICE_BEGIN_Z + z * SLICE_STEP_Z,
|
||||
SLICE_BEGIN_Y + y * SLICE_STEP_Y,
|
||||
SLICE_BEGIN_X + x * SLICE_STEP_X);
|
||||
#endif
|
||||
output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
|
||||
}
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
@ -97,10 +99,39 @@ void get_common_debug_env_var(const std::string &var, T &val) {
|
||||
return get_debug_env_var(var, val, allowed_option_prefixes);
|
||||
}
|
||||
|
||||
static void print_help_messages() {
|
||||
std::vector<std::pair<std::string, std::string>> message_list;
|
||||
message_list.emplace_back("OV_GPU_Help", "Print help messages");
|
||||
message_list.emplace_back("OV_GPU_Verbose", "Verbose execution");
|
||||
message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
|
||||
message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
|
||||
message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
|
||||
message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
|
||||
message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path");
|
||||
message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers");
|
||||
message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
|
||||
message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
|
||||
message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation");
|
||||
|
||||
auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(),
|
||||
[](std::pair<std::string, std::string>& a, std::pair<std::string, std::string>& b){
|
||||
return a.first.size() < b.first.size();
|
||||
});
|
||||
int name_width = static_cast<int>(max_name_length_item->first.size()) + 2;
|
||||
|
||||
GPU_DEBUG_COUT << "Supported environment variables for debugging" << std::endl;
|
||||
for (auto& p : message_list) {
|
||||
GPU_DEBUG_COUT << " - " << std::left << std::setw(name_width) << p.first + ": " << p.second << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
debug_configuration::debug_configuration()
|
||||
: verbose(0)
|
||||
: help(0)
|
||||
, verbose(0)
|
||||
, print_multi_kernel_perf(0)
|
||||
, disable_usm(0)
|
||||
, dump_graphs(std::string())
|
||||
@ -110,8 +141,10 @@ debug_configuration::debug_configuration()
|
||||
, dump_layers_dst_only(0)
|
||||
, dry_run_path(std::string())
|
||||
, disable_onednn(0)
|
||||
, dump_layers_limit_batch(std::numeric_limits<int>::max())
|
||||
, base_batch_for_memory_estimation(-1) {
|
||||
#ifdef GPU_DEBUG_CONFIG
|
||||
get_gpu_debug_env_var("Help", help);
|
||||
get_common_debug_env_var("Verbose", verbose);
|
||||
get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf);
|
||||
get_gpu_debug_env_var("DisableUsm", disable_usm);
|
||||
@ -120,10 +153,16 @@ debug_configuration::debug_configuration()
|
||||
get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
|
||||
get_gpu_debug_env_var("DumpLayers", dump_layers);
|
||||
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
|
||||
get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
|
||||
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
|
||||
get_gpu_debug_env_var("DryRunPath", dry_run_path);
|
||||
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
|
||||
|
||||
if (help > 0) {
|
||||
print_help_messages();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (dump_layers.length() > 0)
|
||||
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
|
||||
#endif
|
||||
|
@ -7,12 +7,34 @@
|
||||
#include "pass_manager.h"
|
||||
#include "program_node.h"
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
#include "fully_connected_inst.h"
|
||||
#include <impls/onednn/utils.hpp>
|
||||
#endif
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
void add_onednn_optimization_attributes::run(program& p) {
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (node->get_preferred_impl_type() == impl_types::onednn) {
|
||||
if (node->is_type<fully_connected>()) {
|
||||
auto fc_prim = node->as<fully_connected>().get_primitive();
|
||||
|
||||
// Reshape fused ops tensors for OneDNN FC if needed
|
||||
if (fc_prim->input_size == 3) {
|
||||
for (auto& fused_prim : node->get_fused_primitives()) {
|
||||
auto fused_node = fused_prim.node;
|
||||
if (fused_node->is_type<eltwise>()) {
|
||||
auto& dependency = node->get_dependency(fused_prim.dep_start_idx);
|
||||
auto original_layout = dependency.get_output_layout();
|
||||
onednn::combine_bf_with_first_spatial_dim(original_layout);
|
||||
dependency.set_output_layout(original_layout, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node->init_onednn_primitive_attributes();
|
||||
}
|
||||
}
|
||||
|
@ -436,10 +436,5 @@ void graph_initializations::run(program& p) {
|
||||
}
|
||||
set_outputs(p);
|
||||
p.get_processing_order().calc_processing_order(p);
|
||||
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (!node->is_type<data>())
|
||||
node->get_output_layout();
|
||||
}
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -65,6 +65,7 @@ void register_implementations() {
|
||||
REGISTER_OCL(softmax);
|
||||
REGISTER_OCL(space_to_batch);
|
||||
REGISTER_OCL(space_to_depth);
|
||||
REGISTER_OCL(slice);
|
||||
REGISTER_OCL(strided_slice);
|
||||
REGISTER_OCL(tile);
|
||||
REGISTER_OCL(lstm_dynamic_input);
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include "intel_gpu/primitives/scatter_nd_update.hpp"
|
||||
#include "intel_gpu/primitives/select.hpp"
|
||||
#include "intel_gpu/primitives/shuffle_channels.hpp"
|
||||
#include "intel_gpu/primitives/slice.hpp"
|
||||
#include "intel_gpu/primitives/softmax.hpp"
|
||||
#include "intel_gpu/primitives/space_to_batch.hpp"
|
||||
#include "intel_gpu/primitives/strided_slice.hpp"
|
||||
@ -73,7 +74,7 @@ void register_implementations();
|
||||
|
||||
namespace detail {
|
||||
|
||||
#define REGISTER_OCL(prim) \
|
||||
#define REGISTER_OCL(prim) \
|
||||
struct attach_##prim##_impl { \
|
||||
attach_##prim##_impl(); \
|
||||
}
|
||||
@ -130,6 +131,7 @@ REGISTER_OCL(scatter_elements_update);
|
||||
REGISTER_OCL(scatter_nd_update);
|
||||
REGISTER_OCL(select);
|
||||
REGISTER_OCL(shuffle_channels);
|
||||
REGISTER_OCL(slice);
|
||||
REGISTER_OCL(softmax);
|
||||
REGISTER_OCL(space_to_batch);
|
||||
REGISTER_OCL(space_to_depth);
|
||||
|
138
inference-engine/thirdparty/clDNN/src/impls/ocl/slice.cpp
vendored
Normal file
138
inference-engine/thirdparty/clDNN/src/impls/ocl/slice.cpp
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <slice_inst.h>
|
||||
#include <slice/slice_kernel_ref.h>
|
||||
#include <data_inst.h>
|
||||
#include <intel_gpu/runtime/error_handler.hpp>
|
||||
#include <impls/implementation_map.hpp>
|
||||
#include <slice/slice_kernel_selector.h>
|
||||
#include "primitive_base.hpp"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
template<typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
|
||||
std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
|
||||
mem_lock<T> lock{node.get_attached_memory_ptr(), stream};
|
||||
T* data = lock.data();
|
||||
std::vector<std::int32_t> integer_data;
|
||||
integer_data.reserve(node.get_output_layout().count());
|
||||
std::copy(data, data + node.get_output_layout().count(), std::back_inserter(integer_data));
|
||||
return integer_data;
|
||||
}
|
||||
|
||||
std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
|
||||
switch (node.get_output_layout().data_type) {
|
||||
case data_types::u8:
|
||||
return extractIntegerData<std::uint8_t>(node, stream);
|
||||
case data_types::i8:
|
||||
return extractIntegerData<std::int8_t>(node, stream);
|
||||
case data_types::i32:
|
||||
return extractIntegerData<std::int32_t>(node, stream);
|
||||
case data_types::i64:
|
||||
return extractIntegerData<std::int64_t>(node, stream);
|
||||
default:
|
||||
CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), "Slice parameter",
|
||||
node.get_output_layout().data_type, "Any integral type",
|
||||
data_types::i32, "Slice parameters should be of integral type.");
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<std::int32_t> extractShape(kernel_selector::Tensor::DataTensor& tensor) {
|
||||
auto logical_dims = tensor.LogicalDims();
|
||||
// LogicalDims method returns dims in reversed order
|
||||
return {logical_dims.rbegin(), logical_dims.rend()};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
struct slice_impl : typed_primitive_impl_ocl<slice> {
|
||||
using parent = typed_primitive_impl_ocl<slice>;
|
||||
using parent::parent;
|
||||
|
||||
enum InputIndices {
|
||||
kData,
|
||||
kStart,
|
||||
kEnd,
|
||||
kStep,
|
||||
kAxes,
|
||||
kInputsNum
|
||||
};
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<slice_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const slice_node& arg) {
|
||||
auto params = get_default_params<kernel_selector::slice_params>(
|
||||
arg);
|
||||
auto op_params = get_default_optional_params<
|
||||
kernel_selector::slice_optional_params>(
|
||||
arg.get_program());
|
||||
const auto& inputs = arg.get_dependencies();
|
||||
const stream& stream = arg.get_program().get_stream();
|
||||
auto start_elts = extractIntegerData(inputs[InputIndices::kStart]->as<data>(), stream);
|
||||
auto end_elts = extractIntegerData(inputs[InputIndices::kEnd]->as<data>(), stream);
|
||||
auto step_elts = extractIntegerData(inputs[InputIndices::kStep]->as<data>(), stream);
|
||||
auto data_shape = extractShape(params.inputs[0]);
|
||||
std::vector<std::int32_t> axes(data_shape.size());
|
||||
if (inputs.size() == InputIndices::kInputsNum)
|
||||
axes = std::move(extractIntegerData(inputs[InputIndices::kAxes]->as<data>(), stream));
|
||||
else
|
||||
std::iota(axes.begin(), axes.end(), 0);
|
||||
std::vector<std::int32_t> selected_start(data_shape.size(), 0);
|
||||
std::vector<std::int32_t> selected_step(data_shape.size(), 1);
|
||||
std::vector<std::int32_t> selected_end(data_shape);
|
||||
for (int axe = 0; axe < axes.size(); axe++) {
|
||||
auto transformed_axe = axes[axe] < 0 ? data_shape.size() + axes[axe] : axes[axe];
|
||||
auto start = start_elts[axe];
|
||||
auto end = end_elts[axe];
|
||||
auto dim_size = data_shape[transformed_axe];
|
||||
selected_start[transformed_axe] = std::max(std::min(start < 0 ? dim_size + start : start, dim_size - 1), 0);
|
||||
selected_end[transformed_axe] = std::max(std::min(end < 0 ? dim_size + end : end, dim_size - 1), 0);
|
||||
selected_step[transformed_axe] = step_elts[axe];
|
||||
}
|
||||
params.start = std::move(selected_start);
|
||||
params.end = std::move(selected_end);
|
||||
params.step = std::move(selected_step);
|
||||
auto &kernel_selector =
|
||||
kernel_selector::slice_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(params, op_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new slice_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_slice_impl::attach_slice_impl() {
|
||||
implementation_map<slice>::add(impl_types::ocl, slice_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -128,20 +128,6 @@ public:
|
||||
static primitive_impl* create(const fully_connected_node& arg) {
|
||||
auto& engine = arg.get_program().get_engine();
|
||||
auto desc = get_fully_connected_descriptor(arg);
|
||||
auto prim = arg.get_primitive();
|
||||
|
||||
if (prim->input_size == 3) {
|
||||
for (auto& fused_node : arg.get_fused_primitives()) {
|
||||
auto node = fused_node.node;
|
||||
if (node->is_type<eltwise>()) {
|
||||
auto& dependency = arg.get_dependency(fused_node.dep_start_idx);
|
||||
auto original_layout = dependency.get_output_layout();
|
||||
onednn::combine_bf_with_first_spatial_dim(original_layout);
|
||||
dependency.set_output_layout(original_layout, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto attr = arg.get_onednn_primitive_attributes();
|
||||
dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};
|
||||
|
||||
|
38
inference-engine/thirdparty/clDNN/src/include/slice_inst.h
vendored
Normal file
38
inference-engine/thirdparty/clDNN/src/include/slice_inst.h
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <intel_gpu/primitives/slice.hpp>
|
||||
#include "primitive_inst.h"
|
||||
#include <intel_gpu/runtime/error_handler.hpp>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <>
|
||||
struct typed_program_node<slice> : public typed_program_node_base<slice> {
|
||||
using parent = typed_program_node_base<slice>;
|
||||
|
||||
public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(std::size_t index = 0) const { return get_dependency(index); }
|
||||
};
|
||||
|
||||
using slice_node = typed_program_node<slice>;
|
||||
|
||||
template <>
|
||||
class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
|
||||
using parent = typed_primitive_inst_base<slice>;
|
||||
|
||||
public:
|
||||
static layout calc_output_layout(slice_node const& node);
|
||||
static std::string to_string(slice_node const& node);
|
||||
|
||||
public:
|
||||
typed_primitive_inst(network& network, slice_node const& desc);
|
||||
};
|
||||
|
||||
using slice_inst = typed_primitive_inst<slice>;
|
||||
|
||||
} // namespace cldnn
|
@ -110,8 +110,18 @@ template <class T>
|
||||
static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
auto&& size = mem->get_layout().size;
|
||||
|
||||
file_stream << "shape: " << size.to_string() << " ";
|
||||
file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" << std::endl;
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1);
|
||||
tensor tmp_size(size);
|
||||
tmp_size.batch[0] = batch_size;
|
||||
if (tmp_size == size) {
|
||||
file_stream << "shape: " << size.to_string() << " ";
|
||||
file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" << std::endl;
|
||||
} else {
|
||||
file_stream << "shape: " << tmp_size.to_string() << " ";
|
||||
file_stream << "(count: " << tmp_size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format)
|
||||
<< ", original shape: " << size.to_string() << ")" << std::endl;
|
||||
}
|
||||
|
||||
mem_lock<T, mem_lock_type::read> lock(mem, stream);
|
||||
auto mem_ptr = lock.data();
|
||||
@ -119,7 +129,7 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
|
||||
std::stringstream buffer;
|
||||
|
||||
for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) {
|
||||
for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) {
|
||||
for (cldnn::tensor::value_type b = 0; b < batch_size; ++b) {
|
||||
for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) {
|
||||
for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) {
|
||||
for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) {
|
||||
|
@ -1225,8 +1225,13 @@ program::primitives_info program::get_current_stage_info() const {
|
||||
|
||||
void program::save_pass_info(std::string pass_name) {
|
||||
// TODO: Directory path here can be probably changed to some bool flag
|
||||
if (!options.get<build_option_type::graph_dumps_dir>()->directory_path.empty())
|
||||
if (!options.get<build_option_type::graph_dumps_dir>()->directory_path.empty()) {
|
||||
for (auto& node : this->get_processing_order()) {
|
||||
if (!node->is_type<data>())
|
||||
node->get_output_layout();
|
||||
}
|
||||
optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
|
||||
}
|
||||
}
|
||||
|
||||
void program::add_optimized_primitive_info(primitive_id optimized_primitive_id,
|
||||
|
40
inference-engine/thirdparty/clDNN/src/slice.cpp
vendored
Normal file
40
inference-engine/thirdparty/clDNN/src/slice.cpp
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <slice_inst.h>
|
||||
#include "primitive_type_base.h"
|
||||
#include <sstream>
|
||||
#include <json_object.h>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
primitive_type_id slice::type_id() {
|
||||
static primitive_type_base<slice> instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
slice_inst::typed_primitive_inst(network& network, slice_node const& node)
|
||||
: parent(network, node) {}
|
||||
|
||||
layout slice_inst::calc_output_layout(slice_node const& node) {
|
||||
auto primitive = node.get_primitive();
|
||||
auto input_layout = node.input(0).get_output_layout();
|
||||
return {input_layout.data_type, input_layout.format, primitive->output_shape};
|
||||
}
|
||||
|
||||
std::string slice_inst::to_string(slice_node const& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
json_composite slice_info;
|
||||
slice_info.add("input id", node.input().id());
|
||||
slice_info.add("begin_param id", node.get_dependency(1).id());
|
||||
slice_info.add("end_param id", node.get_dependency(2).id());
|
||||
slice_info.add("step_param id", node.get_dependency(3).id());
|
||||
slice_info.add("axis_param id", node.get_dependency(4).id());
|
||||
node_info->add("slice info", slice_info);
|
||||
std::stringstream primitive_description;
|
||||
node_info->dump(primitive_description);
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
@ -608,6 +608,7 @@ public:
|
||||
#define CASE_FC_U8S8_3D_1 {2, 32, 1, 3}, {2, 32, 1, 16}, {16, 3, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
|
||||
#define CASE_FC_U8S8_3D_2 {1, 1, 1, 3}, {1, 1, 1, 32}, {32, 3, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
|
||||
#define CASE_FC_U8S8_3D_3 {2, 3, 1, 1}, {2, 3, 1, 15}, {15, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
|
||||
#define CASE_FC_U8S8_3D_4 {1, 512, 1, 1024}, {1, 384, 1, 1024}, {1024, 1024, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_NORMALIZE_I8_1 {1, 2, 3, 3}, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||
|
||||
@ -9258,7 +9259,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_elements_activation_scale_eltwise,
|
||||
}));
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
class ConvFusingTestOneDNN : public WeightsPrimitiveFusingTest<bc_test_params> {
|
||||
class WeightsPrimitiveFusingTestOneDNN : public WeightsPrimitiveFusingTest<bc_test_params> {
|
||||
public:
|
||||
void execute(bc_test_params& p) {
|
||||
// Onednn post operation has issue in a machine that does not support imad.
|
||||
@ -9299,7 +9300,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class conv_int8_eltwise_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_int8_eltwise_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_int8_eltwise_onednn, u8_eltwise_sum_out) {
|
||||
auto p = GetParam();
|
||||
|
||||
@ -9364,7 +9365,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_eltwise_onednn,
|
||||
bc_test_params{CASE_CONV3D_S8S8_5, 3, 4},
|
||||
}));
|
||||
|
||||
class conv_fp32_activation_abs_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_fp32_activation_abs_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_fp32_activation_abs_onednn, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9387,7 +9388,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_abs_onednn,
|
||||
bc_test_params{CASE_CONV_FP16_4, 2, 3},
|
||||
}));
|
||||
|
||||
class conv_fp32_activation_mish_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_fp32_activation_mish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_fp32_activation_mish_onednn, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9410,7 +9411,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_mish_onednn,
|
||||
bc_test_params{CASE_CONV_FP16_4, 2, 3},
|
||||
}));
|
||||
|
||||
class conv_fp32_activation_swish_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_fp32_activation_swish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_fp32_activation_swish_onednn, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9433,7 +9434,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_swish_onednn,
|
||||
bc_test_params{CASE_CONV_FP16_4, 2, 3},
|
||||
}));
|
||||
|
||||
class conv_fp32_activation_hswish_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_fp32_activation_hswish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_fp32_activation_hswish_onednn, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9456,7 +9457,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_hswish_onednn,
|
||||
bc_test_params{CASE_CONV_FP16_4, 2, 3},
|
||||
}));
|
||||
|
||||
class conv_fp32_activation_exp_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_fp32_activation_exp_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_fp32_activation_exp_onednn, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9479,7 +9480,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_exp_onednn,
|
||||
bc_test_params{CASE_CONV_FP16_4, 2, 3},
|
||||
}));
|
||||
|
||||
class conv_int8_quantize_u8_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_int8_quantize_u8_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_int8_quantize_u8_onednn, per_channel) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9526,7 +9527,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_quantize_u8_onednn,
|
||||
bc_test_params{CASE_CONV_S8S8_3, 2, 3},
|
||||
}));
|
||||
|
||||
class conv_int8_activation_eltwise_quantize_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_int8_activation_eltwise_quantize_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_int8_activation_eltwise_quantize_onednn, bsv32_fsv32) {
|
||||
auto p = GetParam();
|
||||
layout eltwise_layout = get_output_layout(p);
|
||||
@ -9578,7 +9579,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_activation_eltwise_quantize_oned
|
||||
bc_test_params{CASE_CONV_S8S8_15, 2, 5},
|
||||
}));
|
||||
|
||||
class conv_int8_scale_shift_swish_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_int8_scale_shift_swish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_int8_scale_shift_swish_onednn, bsv32_fsv32) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9617,7 +9618,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_scale_shift_swish_onednn,
|
||||
bc_test_params{CASE_CONV_S8S8_15, 2, 7},
|
||||
}));
|
||||
|
||||
class conv_int8_eltwise_scale_onednn : public ConvFusingTestOneDNN {};
|
||||
class conv_int8_eltwise_scale_onednn : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(conv_int8_eltwise_scale_onednn, u8_eltwise_prod_out_reuse) {
|
||||
auto p = GetParam();
|
||||
|
||||
@ -9667,7 +9668,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_eltwise_scale_onednn,
|
||||
// Limitations: no
|
||||
// DNNL_VERBOSE log without optimization: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_linear:1:-128
|
||||
// DNNL_VERBOSE log with optimization: attr-post-ops:eltwise_linear:12.75:-0.5
|
||||
class post_ops_optimizations_onednn_eltw_linear_eltw_linear : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_onednn_eltw_linear_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_onednn_eltw_linear_eltw_linear, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9718,7 +9719,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_eltw_linear_
|
||||
// Limitations: beta = 0 in eltw_linear
|
||||
// DNNL_VERBOSE log without optimization: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_round+eltwise_linear:2.00784+eltwise_clip:0:512
|
||||
// DNNL_VERBOSE log with optimization: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_round:0:0:2.00784+eltwise_clip:0:512
|
||||
class post_ops_optimizations_onednn_eltw_non_linear_eltw_linear : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_onednn_eltw_non_linear_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_onednn_eltw_non_linear_eltw_linear, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9769,7 +9770,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_eltw_non_lin
|
||||
// Limitations: alpha = 1 and scale = 1 in eltw_linear; binary_add is a constant compile-time buffer
|
||||
// DNNL_VERBOSE log without optimization: attr-oscale:2 attr-post-ops:binary_add:f32:2+eltwise_linear:1:-127+eltwise_clip:-127:127
|
||||
// DNNL_VERBOSE log with optimization: attr-oscale:2 attr-post-ops:binary_add:f32:2+eltwise_clip:-127:127
|
||||
class post_ops_optimizations_onednn_binary_add_eltw_linear : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_onednn_binary_add_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_onednn_binary_add_eltw_linear, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9820,7 +9821,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_binary_add_e
|
||||
// Limitations: beta = 0 in eltw_linear; binary_mul is a constant compile-time buffer
|
||||
// DNNL_VERBOSE log without optimization: attr-oscale:2 attr-post-ops:binary_mul:f32:2+eltwise_linear:2.01575+eltwise_clip:0:512
|
||||
// DNNL_VERBOSE log with optimization: attr-oscale:2 attr-post-ops:binary_mul:f32:2+eltwise_clip:0:512
|
||||
class post_ops_optimizations_onednn_binary_mul_eltw_linear : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_onednn_binary_mul_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_onednn_binary_mul_eltw_linear, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9871,7 +9872,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_binary_mul_e
|
||||
// Limitations: beta = 0 in eltw_linear
|
||||
// DNNL_VERBOSE log without optimization: attr-oscale:2 attr-post-ops:eltwise_linear:2.01575+eltwise_clip:0:512
|
||||
// DNNL_VERBOSE log with optimization: attr-oscale:2 attr-post-ops:eltwise_clip:0:512
|
||||
class post_ops_optimizations_onednn_oscale_eltw_linear : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_onednn_oscale_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_onednn_oscale_eltw_linear, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9920,7 +9921,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_oscale_eltw_
|
||||
// Limitations: beta = 0 in eltw_linear
|
||||
// DNNL_VERBOSE log without optimization: attr-post-ops:eltwise_relu+sum:1:0:u8+eltwise_linear:12.7+eltwise_clip:0:127
|
||||
// DNNL_VERBOSE log with optimization: attr-post-ops:eltwise_relu:0:0:12.7+sum:12.7:0:u8+eltwise_clip:0:127
|
||||
class post_ops_optimizations_onednn_eltw_any_sum_eltw_linear : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_onednn_eltw_any_sum_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_onednn_eltw_any_sum_eltw_linear, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -9970,7 +9971,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_eltw_any_sum
|
||||
// Input range uses in 2 cases: not per-tensor output range or out_lo > out_hi
|
||||
// Here's out_lo > out_hi and no optimizations
|
||||
// DNNL_VERBOSE log: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_round+eltwise_linear:-1:127
|
||||
class post_ops_optimizations_input_range : public ConvFusingTestOneDNN {};
|
||||
class post_ops_optimizations_input_range : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(post_ops_optimizations_input_range, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
@ -10015,6 +10016,33 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_input_range,
|
||||
bc_test_params{CASE_CONV_S8S8_14, 2, 3},
|
||||
bc_test_params{CASE_CONV_S8S8_15, 2, 3},
|
||||
}));
|
||||
|
||||
class fc_int8_inputs_fused_fp32_sum : public WeightsPrimitiveFusingTestOneDNN {};
|
||||
TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
|
||||
auto p = GetParam();
|
||||
auto shift_layout = layout{ p.default_type, p.default_format, tensor{1, 1, 1, p.kernel.batch[0]} };
|
||||
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_fc_weights_layout(p))),
|
||||
data("bias", get_mem(get_fc_bias_layout(p))),
|
||||
data("shift_data", get_mem(shift_layout, 1)),
|
||||
fully_connected("fc_prim", "input", "weights", "bias", cldnn::data_types::f32, "", padding(), get_fc_output_dim_size(p)),
|
||||
eltwise("shift", {"fc_prim", "shift_data"}, eltwise_mode::sum, cldnn::data_types::f32),
|
||||
crop("crop", "shift", get_output_layout(p).size, {0, 0, 0, 0}),
|
||||
reorder("reorder_bfyx", "crop", p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_inputs_fused_fp32_sum, ::testing::ValuesIn(std::vector<bc_test_params>{
|
||||
// OneDNN has issue with small shapes - ticket 7064
|
||||
// bc_test_params{ CASE_FC_U8S8_3D_1, 2, 4 },
|
||||
// bc_test_params{ CASE_FC_U8S8_3D_2, 2, 4 },
|
||||
bc_test_params{ CASE_FC_U8S8_3D_4, 2, 4 },
|
||||
|
||||
}));
|
||||
#endif
|
||||
|
||||
|
||||
|
144
inference-engine/thirdparty/clDNN/tests/test_cases/slice.cpp
vendored
Normal file
144
inference-engine/thirdparty/clDNN/tests/test_cases/slice.cpp
vendored
Normal file
@ -0,0 +1,144 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/slice.hpp>
|
||||
#include <intel_gpu/primitives/data.hpp>
|
||||
|
||||
#include <random>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename T>
|
||||
class SliceTest : public ::testing::Test {
|
||||
public:
|
||||
static std::vector<T> GenInput(int size) {
|
||||
std::vector<T> result;
|
||||
for (int i = 0; i < size; i++)
|
||||
result.push_back(i);
|
||||
return result;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
assert(input_shape_.size() == 4 || input_shape_.size() == 5);
|
||||
format input_format = input_shape_.size() == 4 ? format::bfyx : format::bfzyx;
|
||||
layout data_layout ( input_type_, input_format, tensor{input_shape_} );
|
||||
std::vector<T> input_vals = GenInput(data_layout.get_linear_size());
|
||||
memory::ptr input = engine_.allocate_memory(data_layout);
|
||||
set_values(input, input_vals);
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("start", start_));
|
||||
topology.add(data("stop", stop_));
|
||||
topology.add(data("step", step_));
|
||||
std::vector<primitive_id> inputs {"input", "start", "stop", "step"};
|
||||
if (axes_) {
|
||||
topology.add(data("axes", axes_));
|
||||
inputs.push_back("axes");
|
||||
}
|
||||
topology.add(slice("slice", inputs, tensor{output_shape_}));
|
||||
|
||||
network network(engine_, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "slice");
|
||||
|
||||
auto output = outputs.at("slice").get_memory();
|
||||
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
ASSERT_EQ(output_ptr.size(), expected_output_.size());
|
||||
for (size_t i = 0; i < output_ptr.size(); ++i)
|
||||
EXPECT_TRUE(are_equal(expected_output_[i], output_ptr[i], 2e-3));
|
||||
}
|
||||
|
||||
data_types DataType() const;
|
||||
|
||||
protected:
|
||||
engine& engine_ = get_test_engine();
|
||||
std::vector<std::int32_t> input_shape_;
|
||||
data_types input_type_ {DataType()};
|
||||
memory::ptr start_;
|
||||
memory::ptr stop_;
|
||||
memory::ptr step_;
|
||||
memory::ptr axes_;
|
||||
std::vector<std::int32_t> output_shape_;
|
||||
std::vector<T> expected_output_;
|
||||
};
|
||||
|
||||
template<>
|
||||
data_types SliceTest<float>::DataType() const {return data_types::f32;}
|
||||
|
||||
template<>
|
||||
data_types SliceTest<int>::DataType() const { return data_types::i32; }
|
||||
|
||||
template<>
|
||||
data_types SliceTest<long long>::DataType() const { return data_types::i64; }
|
||||
|
||||
using testing::Types;
|
||||
typedef Types<float, int, long long> DataTypes;
|
||||
TYPED_TEST_SUITE(SliceTest, DataTypes);
|
||||
|
||||
TYPED_TEST(SliceTest, bfyx_positive_step) {
|
||||
this->input_shape_ = { 1, 2, 100, 12 };
|
||||
this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->start_, {0, 1, 0, 1});
|
||||
this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->stop_, { 1, 2, 5, 100 });
|
||||
this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->step_, { 1, 1, 1, 10 });
|
||||
this->output_shape_ = { 1, 1, 5, 10 };
|
||||
this->expected_output_ = {
|
||||
1201, 1211, 1221, 1231, 1241, 1301, 1311, 1321, 1331, 1341,
|
||||
1401, 1411, 1421, 1431, 1441, 1501, 1511, 1521, 1531, 1541,
|
||||
1601, 1611, 1621, 1631, 1641, 1701, 1711, 1721, 1731, 1741,
|
||||
1801, 1811, 1821, 1831, 1841, 1901, 1911, 1921, 1931, 1941,
|
||||
2001, 2011, 2021, 2031, 2041, 2101, 2111, 2121, 2131, 2141
|
||||
};
|
||||
}
|
||||
|
||||
TYPED_TEST(SliceTest, bfyx_negative_step) {
|
||||
this->input_shape_ = { 1, 2, 100, 12 };
|
||||
this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->start_, { 1, 2, 5, 100 });
|
||||
this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->stop_, {0, 1, 0, 1});
|
||||
this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->step_, { -1, -1, -1, -10 });
|
||||
this->output_shape_ = { 1, 1, 5, 10 };
|
||||
this->expected_output_ = {
|
||||
1799, 1789, 1779, 1769, 1759, 1699, 1689, 1679, 1669, 1659,
|
||||
1599, 1589, 1579, 1569, 1559, 1499, 1489, 1479, 1469, 1459,
|
||||
1399, 1389, 1379, 1369, 1359, 1299, 1289, 1279, 1269, 1259,
|
||||
1199, 1189, 1179, 1169, 1159, 1099, 1089, 1079, 1069, 1059,
|
||||
999, 989, 979, 969, 959, 899, 889, 879, 869, 859
|
||||
};
|
||||
}
|
||||
|
||||
TYPED_TEST(SliceTest, bfzyx) {
|
||||
this->input_shape_ = { 2, 3, 10, 12, 5 };
|
||||
this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->start_, { 0, 0, 0, 0, 0 });
|
||||
this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->stop_, {1, 2, 2, 2, 2});
|
||||
this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
|
||||
set_values<int64_t>(this->step_, { 1, 1, 1, 1, 1 });
|
||||
this->output_shape_ = { 1, 2, 2, 2, 2 };
|
||||
this->expected_output_ = {
|
||||
0, 1, 10, 11, 120, 121, 130, 131,
|
||||
600, 601, 610, 611, 720, 721, 730, 731
|
||||
};
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
2
inference-engine/thirdparty/mkl-dnn
vendored
2
inference-engine/thirdparty/mkl-dnn
vendored
@ -1 +1 @@
|
||||
Subproject commit 5adbcb757c77f1bf0cd11ad58dd92e93ea2e3561
|
||||
Subproject commit acee807d84944008df6741677ab52e01d790d58a
|
@ -79,11 +79,14 @@ Options:
|
||||
-h, --help Print a usage message
|
||||
-m "<path>" Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with a trained compiled model.
|
||||
-i "<path>" Optional. Path to a folder with images and/or binaries or to specific image or binary file.
|
||||
In case of dynamic shapes networks with several inputs provide the same number of files for each input (except cases with single file for any input):
|
||||
"input1:1.jpg input2:1.bin", "input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin ".
|
||||
Also you can pass specific keys for inputs: "random" - for fillling input with random data, "image_info" - for filling input with image size.
|
||||
-d "<device>" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU.
|
||||
Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin.
|
||||
Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin.
|
||||
Use "-d GPU.X" format to specify device id for GPU devices.
|
||||
The application looks for a suitable plugin for the specified device.
|
||||
The application looks for a suitable plugin for the specified device.
|
||||
-l "<absolute_path>" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
|
||||
Or
|
||||
-c "<absolute_path>" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
|
||||
@ -99,11 +102,23 @@ Options:
|
||||
-stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
|
||||
-t Optional. Time, in seconds, to execute topology.
|
||||
-progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
|
||||
-shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
|
||||
-shape Optional. Set shape for network input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
|
||||
This parameter affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?].
|
||||
For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].
|
||||
-data_shape Required for networks with dynamic shapes. Set shape for input blobs.
|
||||
In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]".
|
||||
In case of several input sizes provide the same number for
|
||||
each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]",
|
||||
"input1[1,1,128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]".
|
||||
If network shapes are all static specifying the option will cause an exception.
|
||||
-layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
|
||||
-cache_dir "<path>" Optional. Enables caching of loaded models to specified directory.
|
||||
-load_from_file Optional. Loads model from file directly without ReadNetwork.
|
||||
-latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
|
||||
-inference_only Optional. Measure only inference stage. Default option for static models.
|
||||
Dynamic models are measured in full mode which includes inputs setup stage,
|
||||
inference only mode available for them with single input data shape only.
|
||||
To enable full mode for static models pass \"false\" value to this argument: ex. -inference_only=false".
|
||||
|
||||
CPU-specific performance options:
|
||||
-nstreams "<integer>" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
|
||||
@ -117,16 +132,19 @@ Options:
|
||||
-enforcebf16="<true/false>" Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform.
|
||||
-pin "YES"/"HYBRID_AWARE"/"NUMA"/"NO"
|
||||
Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):
|
||||
enabling threads->cores pinning ("YES", which is already default for a conventional CPU),
|
||||
letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs)
|
||||
threads->(NUMA)nodes ("NUMA") or
|
||||
completely disable ("NO") CPU inference threads pinning.
|
||||
enabling threads->cores pinning ("YES", which is already default for a conventional CPU),
|
||||
letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs)
|
||||
threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU inference threads pinning.
|
||||
-ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
|
||||
-op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
|
||||
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.
|
||||
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required.
|
||||
Overwrites precision from ip and op options for specified layers.
|
||||
|
||||
Statistics dumping options:
|
||||
-report_type "<type>" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
|
||||
-report_type "<type>" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency.
|
||||
"average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network.
|
||||
"detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters
|
||||
and latency for each executed infer request.
|
||||
-report_folder Optional. Path to a folder where statistics report is stored.
|
||||
-exec_graph_path Optional. Path to a file where to store executable graph information serialized.
|
||||
-pc Optional. Report performance counters.
|
||||
@ -181,33 +199,55 @@ This section provides step-by-step instructions on how to run the Benchmark Tool
|
||||
The application outputs the number of executed iterations, total duration of execution, latency, and throughput.
|
||||
Additionally, if you set the `-report_type` parameter, the application outputs statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
|
||||
|
||||
Below are fragments of sample output for CPU and GPU devices:
|
||||
Below are fragments of sample output static and dynamic networks:
|
||||
|
||||
* For CPU:
|
||||
* For static network:
|
||||
```
|
||||
[Step 8/9] Measuring performance (Start inference asynchronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
|
||||
Progress: [....................] 100.00% done
|
||||
|
||||
[Step 9/9] Dumping statistics report
|
||||
[ INFO ] Statistics collecting was not requested. No reports are dumped.
|
||||
Progress: [....................] 100.00% done
|
||||
|
||||
Count: 4612 iterations
|
||||
Duration: 60110.04 ms
|
||||
Latency: 50.99 ms
|
||||
Throughput: 76.73 FPS
|
||||
```
|
||||
|
||||
* For GPU:
|
||||
```
|
||||
[Step 10/11] Measuring performance (Start inference asynchronously, 5 inference requests using 4 streams for CPU, limits: 120000 ms duration)
|
||||
Progress: [....................] 100% done
|
||||
[Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, limits: 60000 ms duration)
|
||||
[ INFO ] BENCHMARK IS IN INFERENCE ONLY MODE.
|
||||
[ INFO ] Input blobs will be filled once before performance measurements.
|
||||
[ INFO ] First inference took 26.26 ms
|
||||
Progress: [................... ] 99% done
|
||||
|
||||
[Step 11/11] Dumping statistics report
|
||||
Count: 102515 iterations
|
||||
Duration: 120007.38 ms
|
||||
Latency: 5.84 ms
|
||||
Throughput: 854.24 FP
|
||||
[ INFO ] Count: 6640 iterations
|
||||
[ INFO ] Duration: 60039.70 ms
|
||||
[ INFO ] Latency:
|
||||
[ INFO ] Median: 35.36 ms
|
||||
[ INFO ] Avg: 36.12 ms
|
||||
[ INFO ] Min: 18.55 ms
|
||||
[ INFO ] Max: 88.96 ms
|
||||
[ INFO ] Throughput: 110.59 FPS
|
||||
```
|
||||
|
||||
* For dynamic network:
|
||||
```
|
||||
[Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, limits: 60000 ms duration)
|
||||
[ INFO ] BENCHMARK IS IN FULL MODE.
|
||||
[ INFO ] Inputs setup stage will be included in performance measurements.
|
||||
[ INFO ] First inference took 26.80 ms
|
||||
Progress: [................... ] 99% done
|
||||
|
||||
[Step 11/11] Dumping statistics report
|
||||
[ INFO ] Count: 5199 iterations
|
||||
[ INFO ] Duration: 60043.34 ms
|
||||
[ INFO ] Latency:
|
||||
[ INFO ] Median: 41.58 ms
|
||||
[ INFO ] Avg: 46.07 ms
|
||||
[ INFO ] Min: 8.44 ms
|
||||
[ INFO ] Max: 115.65 ms
|
||||
[ INFO ] Latency for each data shape group:
|
||||
[ INFO ] 1. data : [1, 3, 224, 224]
|
||||
[ INFO ] Median: 38.37 ms
|
||||
[ INFO ] Avg: 30.29 ms
|
||||
[ INFO ] Min: 8.44 ms
|
||||
[ INFO ] Max: 61.30 ms
|
||||
[ INFO ] 2. data : [1, 3, 448, 448]
|
||||
[ INFO ] Median: 68.21 ms
|
||||
[ INFO ] Avg: 61.85 ms
|
||||
[ INFO ] Min: 29.58 ms
|
||||
[ INFO ] Max: 115.65 ms
|
||||
[ INFO ] Throughput: 86.59 FPS
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
@ -19,7 +19,12 @@ static const char help_message[] = "Print a usage message";
|
||||
|
||||
/// @brief message for images argument
|
||||
static const char input_message[] =
|
||||
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
|
||||
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.\n"
|
||||
" In case of dynamic shapes networks with several inputs provide the same number"
|
||||
" of files for each input (except cases with single file for any input):"
|
||||
"\"input1:1.jpg input2:1.bin\", \"input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin \"."
|
||||
" Also you can pass specific keys for inputs: \"random\" - for fillling input with random data,"
|
||||
" \"image_info\" - for filling input with image size.";
|
||||
|
||||
/// @brief message for model argument
|
||||
static const char model_message[] =
|
||||
@ -136,6 +141,9 @@ static const char progress_message[] =
|
||||
// @brief message for performance counters option
|
||||
static const char pc_message[] = "Optional. Report performance counters.";
|
||||
|
||||
// @brief message for performance counters for sequence option
|
||||
static const char pcseq_message[] = "Optional. Report latencies for each shape in -data_shape sequence.";
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
// @brief message for switching memory allocation type option
|
||||
static const char use_device_mem_message[] =
|
||||
@ -155,9 +163,19 @@ static const char dump_config_message[] =
|
||||
#endif
|
||||
|
||||
static const char shape_message[] =
|
||||
"Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
|
||||
"\"[1,3,224,224]\""
|
||||
" in case of one input size.";
|
||||
"Optional. Set shape for network input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
|
||||
" in case of one input size. This parameter affect model input shape and can be dynamic."
|
||||
" For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]."
|
||||
" For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].";
|
||||
|
||||
static const char data_shape_message[] =
|
||||
" Required for networks with dynamic shapes. Set shape for input blobs."
|
||||
" In case of one input size: \"[1,3,224,224]\" or \"input1[1,3,224,224],input2[1,4]\"."
|
||||
" In case of several input sizes provide the same number for each input (except cases with single shape for any "
|
||||
"input):"
|
||||
" \"[1,3,128,128][3,3,128,128][1,3,320,320]\", \"input1[1,1,128,128][1,1,256,256],input2[80,1]\""
|
||||
" or \"input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]\"."
|
||||
" If network shapes are all static specifying the option will cause an exception.";
|
||||
|
||||
static const char layout_message[] =
|
||||
"Optional. Prompts how network layouts should be treated by application. "
|
||||
@ -196,6 +214,13 @@ static constexpr char input_image_mean_message[] =
|
||||
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
|
||||
"Example: -imean data[255,255,255],info[255,255,255]\n";
|
||||
|
||||
static constexpr char inference_only_message[] =
|
||||
"Optional. Measure only inference stage. Default option for static models. Dynamic models"
|
||||
" are measured in full mode which includes inputs setup stage,"
|
||||
" inference only mode available for them with single input data shape only."
|
||||
" To enable full mode for static models pass \"false\" value to this argument:"
|
||||
" ex. \"-inference_only=false\".\n";
|
||||
|
||||
/// @brief Define flag for showing help message <br>
|
||||
DEFINE_bool(h, false, help_message);
|
||||
|
||||
@ -276,6 +301,9 @@ DEFINE_bool(progress, false, progress_message);
|
||||
/// @brief Define flag for showing performance counters <br>
|
||||
DEFINE_bool(pc, false, pc_message);
|
||||
|
||||
/// @brief Define flag for showing performance sequence counters <br>
|
||||
DEFINE_bool(pcseq, false, pcseq_message);
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers
|
||||
DEFINE_bool(use_device_mem, false, use_device_mem_message);
|
||||
@ -292,6 +320,9 @@ DEFINE_string(dump_config, "", dump_config_message);
|
||||
/// @brief Define flag for input shape <br>
|
||||
DEFINE_string(shape, "", shape_message);
|
||||
|
||||
/// @brief Define flag for input blob shape <br>
|
||||
DEFINE_string(data_shape, "", data_shape_message);
|
||||
|
||||
/// @brief Define flag for layout shape <br>
|
||||
DEFINE_string(layout, "", layout_message);
|
||||
|
||||
@ -322,6 +353,9 @@ DEFINE_string(iscale, "", input_image_scale_message);
|
||||
/// @brief Define flag for using input image mean <br>
|
||||
DEFINE_string(imean, "", input_image_mean_message);
|
||||
|
||||
/// @brief Define flag for inference only mode <br>
|
||||
DEFINE_bool(inference_only, true, inference_only_message);
|
||||
|
||||
/**
|
||||
* @brief This function show a help message
|
||||
*/
|
||||
@ -346,8 +380,9 @@ static void showUsage() {
|
||||
std::cout << " -t " << execution_time_message << std::endl;
|
||||
std::cout << " -progress " << progress_message << std::endl;
|
||||
std::cout << " -shape " << shape_message << std::endl;
|
||||
std::cout << " -data_shape " << data_shape_message << std::endl;
|
||||
std::cout << " -layout " << layout_message << std::endl;
|
||||
std::cout << " -cache_dir \"<path>\" " << cache_dir_message << std::endl;
|
||||
std::cout << " -cache_dir \"<path>\" " << cache_dir_message << std::endl;
|
||||
std::cout << " -load_from_file " << load_from_file_message << std::endl;
|
||||
std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl;
|
||||
std::cout << std::endl << " device-specific performance options:" << std::endl;
|
||||
@ -363,6 +398,7 @@ static void showUsage() {
|
||||
std::cout << " -report_folder " << report_folder_message << std::endl;
|
||||
std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl;
|
||||
std::cout << " -pc " << pc_message << std::endl;
|
||||
std::cout << " -pcseq " << pcseq_message << std::endl;
|
||||
#ifdef USE_OPENCV
|
||||
std::cout << " -dump_config " << dump_config_message << std::endl;
|
||||
std::cout << " -load_config " << load_config_message << std::endl;
|
||||
@ -373,4 +409,5 @@ static void showUsage() {
|
||||
std::cout << " -iop \"<value>\" " << iop_message << std::endl;
|
||||
std::cout << " -iscale " << input_image_scale_message << std::endl;
|
||||
std::cout << " -imean " << input_image_mean_message << std::endl;
|
||||
std::cout << " -inference_only " << inference_only_message << std::endl;
|
||||
}
|
||||
|
@ -18,13 +18,12 @@
|
||||
// clang-format off
|
||||
#include "inference_engine.hpp"
|
||||
|
||||
#include "remote_blobs_filling.hpp"
|
||||
#include "statistics_report.hpp"
|
||||
#include "utils.hpp"
|
||||
// clang-format on
|
||||
|
||||
typedef std::chrono::high_resolution_clock Time;
|
||||
typedef std::chrono::nanoseconds ns;
|
||||
|
||||
typedef std::function<void(size_t id, const double latency)> QueueCallbackFunction;
|
||||
typedef std::function<void(size_t id, size_t group_id, const double latency)> QueueCallbackFunction;
|
||||
|
||||
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
|
||||
/// time.
|
||||
@ -37,10 +36,12 @@ public:
|
||||
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
|
||||
: _request(net.CreateInferRequest()),
|
||||
_id(id),
|
||||
_callbackQueue(callbackQueue) {
|
||||
_lat_group_id(0),
|
||||
_callbackQueue(callbackQueue),
|
||||
outputClBuffer() {
|
||||
_request.SetCompletionCallback([&]() {
|
||||
_endTime = Time::now();
|
||||
_callbackQueue(_id, getExecutionTimeInMilliseconds());
|
||||
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
|
||||
});
|
||||
}
|
||||
|
||||
@ -57,7 +58,7 @@ public:
|
||||
_startTime = Time::now();
|
||||
_request.Infer();
|
||||
_endTime = Time::now();
|
||||
_callbackQueue(_id, getExecutionTimeInMilliseconds());
|
||||
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> getPerformanceCounts() {
|
||||
@ -77,26 +78,48 @@ public:
|
||||
return static_cast<double>(execTime.count()) * 0.000001;
|
||||
}
|
||||
|
||||
void setLatencyGroupId(size_t id) {
|
||||
_lat_group_id = id;
|
||||
}
|
||||
|
||||
// in case of using GPU memory we need to allocate CL buffer for
|
||||
// output blobs. By encapsulating cl buffer inside InferReqWrap
|
||||
// we will control the number of output buffers and access to it.
|
||||
std::map<std::string, ::gpu::BufferType>& getOutputClBuffer() {
|
||||
return outputClBuffer;
|
||||
}
|
||||
|
||||
private:
|
||||
InferenceEngine::InferRequest _request;
|
||||
Time::time_point _startTime;
|
||||
Time::time_point _endTime;
|
||||
size_t _id;
|
||||
size_t _lat_group_id;
|
||||
QueueCallbackFunction _callbackQueue;
|
||||
std::map<std::string, ::gpu::BufferType> outputClBuffer;
|
||||
};
|
||||
|
||||
class InferRequestsQueue final {
|
||||
public:
|
||||
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) {
|
||||
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net,
|
||||
size_t nireq,
|
||||
size_t lat_group_n,
|
||||
bool enable_lat_groups)
|
||||
: enable_lat_groups(enable_lat_groups) {
|
||||
for (size_t id = 0; id < nireq; id++) {
|
||||
requests.push_back(std::make_shared<InferReqWrap>(
|
||||
net,
|
||||
id,
|
||||
std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
|
||||
requests.push_back(std::make_shared<InferReqWrap>(net,
|
||||
id,
|
||||
std::bind(&InferRequestsQueue::putIdleRequest,
|
||||
this,
|
||||
std::placeholders::_1,
|
||||
std::placeholders::_2,
|
||||
std::placeholders::_3)));
|
||||
_idleIds.push(id);
|
||||
}
|
||||
_latency_groups.resize(lat_group_n);
|
||||
resetTimes();
|
||||
}
|
||||
|
||||
~InferRequestsQueue() {
|
||||
// Inference Request guarantee that it will wait for all asynchronous internal tasks in destructor
|
||||
// So it should be released before any context that the request can use inside internal asynchronous tasks
|
||||
@ -111,15 +134,21 @@ public:
|
||||
_startTime = Time::time_point::max();
|
||||
_endTime = Time::time_point::min();
|
||||
_latencies.clear();
|
||||
for (auto& group : _latency_groups) {
|
||||
group.clear();
|
||||
}
|
||||
}
|
||||
|
||||
double getDurationInMilliseconds() {
|
||||
return std::chrono::duration_cast<ns>(_endTime - _startTime).count() * 0.000001;
|
||||
}
|
||||
|
||||
void putIdleRequest(size_t id, const double latency) {
|
||||
void putIdleRequest(size_t id, size_t lat_group_id, const double latency) {
|
||||
std::unique_lock<std::mutex> lock(_mutex);
|
||||
_latencies.push_back(latency);
|
||||
if (enable_lat_groups) {
|
||||
_latency_groups[lat_group_id].push_back(latency);
|
||||
}
|
||||
_idleIds.push(id);
|
||||
_endTime = std::max(Time::now(), _endTime);
|
||||
_cv.notify_one();
|
||||
@ -147,6 +176,10 @@ public:
|
||||
return _latencies;
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> getLatencyGroups() {
|
||||
return _latency_groups;
|
||||
}
|
||||
|
||||
std::vector<InferReqWrap::Ptr> requests;
|
||||
|
||||
private:
|
||||
@ -156,4 +189,6 @@ private:
|
||||
Time::time_point _startTime;
|
||||
Time::time_point _endTime;
|
||||
std::vector<double> _latencies;
|
||||
std::vector<std::vector<double>> _latency_groups;
|
||||
bool enable_lat_groups;
|
||||
};
|
||||
|
@ -3,7 +3,10 @@
|
||||
//
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -13,6 +16,8 @@
|
||||
#include "format_reader_ptr.h"
|
||||
|
||||
#include "inputs_filling.hpp"
|
||||
#include "shared_blob_allocator.hpp"
|
||||
#include "utils.hpp"
|
||||
// clang-format on
|
||||
|
||||
using namespace InferenceEngine;
|
||||
@ -42,207 +47,464 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
|
||||
return filtered;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillBlobImage(Blob::Ptr& inputBlob,
|
||||
const std::vector<std::string>& filePaths,
|
||||
const size_t& batchSize,
|
||||
const benchmark_app::InputInfo& app_info,
|
||||
const size_t& requestId,
|
||||
const size_t& inputId,
|
||||
const size_t& inputSize) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
if (!minput) {
|
||||
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
|
||||
"fillBlobImage, "
|
||||
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto minputHolder = minput->wmap();
|
||||
auto inputBlobData = minputHolder.as<T*>();
|
||||
|
||||
/** Collect images data ptrs **/
|
||||
std::vector<std::shared_ptr<uint8_t>> vreader;
|
||||
vreader.reserve(batchSize);
|
||||
|
||||
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
|
||||
i++, inputIndex += inputSize) {
|
||||
inputIndex %= filePaths.size();
|
||||
|
||||
slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl;
|
||||
FormatReader::ReaderPtr reader(filePaths[inputIndex].c_str());
|
||||
if (reader.get() == nullptr) {
|
||||
slog::warn << "Image " << filePaths[inputIndex] << " cannot be read!" << slog::endl << slog::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
/** Getting image data **/
|
||||
std::shared_ptr<uint8_t> imageData(reader->getData(app_info.width(), app_info.height()));
|
||||
if (imageData) {
|
||||
vreader.push_back(imageData);
|
||||
}
|
||||
}
|
||||
|
||||
/** Fill input tensor with images. First b channel, then g and r channels **/
|
||||
const size_t numChannels = app_info.channels();
|
||||
const size_t width = app_info.width();
|
||||
const size_t height = app_info.height();
|
||||
/** Iterate over all input images **/
|
||||
for (size_t imageId = 0; imageId < vreader.size(); ++imageId) {
|
||||
/** Iterate over all width **/
|
||||
for (size_t w = 0; w < app_info.width(); ++w) {
|
||||
/** Iterate over all height **/
|
||||
for (size_t h = 0; h < app_info.height(); ++h) {
|
||||
/** Iterate over all channels **/
|
||||
for (size_t ch = 0; ch < numChannels; ++ch) {
|
||||
/** [images stride + channels stride + pixel id ] all in
|
||||
* bytes **/
|
||||
size_t offset = imageId * numChannels * width * height +
|
||||
(((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
|
||||
? (ch * width * height + h * width + w)
|
||||
: (h * width * numChannels + w * numChannels + ch));
|
||||
inputBlobData[offset] =
|
||||
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) -
|
||||
static_cast<T>(app_info.mean[ch])) /
|
||||
static_cast<T>(app_info.scale[ch]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillBlobBinary(Blob::Ptr& inputBlob,
|
||||
const std::vector<std::string>& filePaths,
|
||||
const size_t& batchSize,
|
||||
const size_t& requestId,
|
||||
const size_t& inputId,
|
||||
const size_t& inputSize) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
auto adjBatchSize = batchSize;
|
||||
|
||||
// Check layout
|
||||
std::stringstream ss;
|
||||
auto tensorDesc = inputBlob->getTensorDesc();
|
||||
ss << tensorDesc.getLayout();
|
||||
auto layout = ss.str();
|
||||
std::size_t batchIndex = layout.find("N");
|
||||
if (batchIndex == std::string::npos) {
|
||||
adjBatchSize = 1;
|
||||
} else if (tensorDesc.getDims().at(batchIndex) != batchSize) {
|
||||
adjBatchSize = tensorDesc.getDims().at(batchIndex);
|
||||
}
|
||||
|
||||
if (!minput) {
|
||||
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
|
||||
"fillBlobBinary, "
|
||||
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto minputHolder = minput->wmap();
|
||||
|
||||
auto inputBlobData = minputHolder.as<char*>();
|
||||
for (size_t i = 0ULL, inputIndex = requestId * adjBatchSize * inputSize + inputId; i < adjBatchSize;
|
||||
i++, inputIndex += inputSize) {
|
||||
inputIndex %= filePaths.size();
|
||||
|
||||
slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl;
|
||||
std::ifstream binaryFile(filePaths[inputIndex], std::ios_base::binary | std::ios_base::ate);
|
||||
if (!binaryFile) {
|
||||
IE_THROW() << "Cannot open " << filePaths[inputIndex];
|
||||
}
|
||||
|
||||
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
|
||||
binaryFile.seekg(0, std::ios_base::beg);
|
||||
if (!binaryFile.good()) {
|
||||
IE_THROW() << "Can not read " << filePaths[inputIndex];
|
||||
}
|
||||
auto inputSize = inputBlob->size() * sizeof(T) / adjBatchSize;
|
||||
if (fileSize != inputSize) {
|
||||
IE_THROW() << "File " << filePaths[inputIndex] << " contains " << std::to_string(fileSize)
|
||||
<< " bytes "
|
||||
"but the network expects "
|
||||
<< std::to_string(inputSize);
|
||||
}
|
||||
binaryFile.read(&inputBlobData[i * inputSize], inputSize);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using uniformDistribution = typename std::conditional<
|
||||
std::is_floating_point<T>::value,
|
||||
std::uniform_real_distribution<T>,
|
||||
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
|
||||
|
||||
template <typename T, typename T2>
|
||||
void fillBlobRandom(Blob::Ptr& inputBlob,
|
||||
T rand_min = std::numeric_limits<uint8_t>::min(),
|
||||
T rand_max = std::numeric_limits<uint8_t>::max()) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
if (!minput) {
|
||||
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
|
||||
"fillBlobRandom, "
|
||||
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto minputHolder = minput->wmap();
|
||||
template <typename T>
|
||||
InferenceEngine::Blob::Ptr createBlobFromImage(const std::vector<std::string>& files,
|
||||
size_t inputId,
|
||||
size_t batchSize,
|
||||
const benchmark_app::InputInfo& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
size_t blob_size =
|
||||
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
|
||||
T* data = new T[blob_size];
|
||||
|
||||
auto inputBlobData = minputHolder.as<T*>();
|
||||
std::mt19937 gen(0);
|
||||
uniformDistribution<T2> distribution(rand_min, rand_max);
|
||||
for (size_t i = 0; i < inputBlob->size(); i++) {
|
||||
inputBlobData[i] = static_cast<T>(distribution(gen));
|
||||
/** Collect images data ptrs **/
|
||||
std::vector<std::shared_ptr<uint8_t>> vreader;
|
||||
vreader.reserve(batchSize);
|
||||
|
||||
for (size_t b = 0; b < batchSize; ++b) {
|
||||
auto inputIndex = (inputId + b) % files.size();
|
||||
if (filenames_used) {
|
||||
*filenames_used += (filenames_used->empty() ? "" : ", ") + files[inputIndex];
|
||||
}
|
||||
FormatReader::ReaderPtr reader(files[inputIndex].c_str());
|
||||
if (reader.get() == nullptr) {
|
||||
slog::warn << "Image " << files[inputIndex] << " cannot be read!" << slog::endl << slog::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
/** Getting image data **/
|
||||
std::shared_ptr<uint8_t> imageData(reader->getData(inputInfo.width(), inputInfo.height()));
|
||||
if (imageData) {
|
||||
vreader.push_back(imageData);
|
||||
}
|
||||
}
|
||||
|
||||
/** Fill input tensor with image. First b channel, then g and r channels **/
|
||||
const size_t numChannels = inputInfo.channels();
|
||||
const size_t width = inputInfo.width();
|
||||
const size_t height = inputInfo.height();
|
||||
/** Iterate over all input images **/
|
||||
for (size_t b = 0; b < batchSize; ++b) {
|
||||
/** Iterate over all width **/
|
||||
for (size_t w = 0; w < width; ++w) {
|
||||
/** Iterate over all height **/
|
||||
for (size_t h = 0; h < height; ++h) {
|
||||
/** Iterate over all channels **/
|
||||
for (size_t ch = 0; ch < numChannels; ++ch) {
|
||||
/** [images stride + channels stride + pixel id ] all in
|
||||
* bytes **/
|
||||
size_t offset = b * numChannels * width * height +
|
||||
(((inputInfo.layout == "NCHW") || (inputInfo.layout == "CHW"))
|
||||
? (ch * width * height + h * width + w)
|
||||
: (h * width * numChannels + w * numChannels + ch));
|
||||
data[offset] =
|
||||
(static_cast<T>(vreader.at(b).get()[h * width * numChannels + w * numChannels + ch]) -
|
||||
static_cast<T>(inputInfo.mean[ch])) /
|
||||
static_cast<T>(inputInfo.scale[ch]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
|
||||
auto blob =
|
||||
InferenceEngine::make_shared_blob<T>(tDesc,
|
||||
std::make_shared<SharedBlobAllocator<T>>(data, blob_size * sizeof(T)));
|
||||
blob->allocate();
|
||||
return blob;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fillBlobImInfo(Blob::Ptr& inputBlob, const size_t& batchSize, std::pair<size_t, size_t> image_size) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
|
||||
if (!minput) {
|
||||
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
|
||||
"fillBlobImInfo, "
|
||||
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto minputHolder = minput->wmap();
|
||||
InferenceEngine::Blob::Ptr createBlobImInfo(const std::pair<size_t, size_t>& image_size,
|
||||
size_t batchSize,
|
||||
const benchmark_app::InputInfo& inputInfo) {
|
||||
size_t blob_size =
|
||||
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
|
||||
T* data = new T[blob_size];
|
||||
|
||||
auto inputBlobData = minputHolder.as<T*>();
|
||||
for (size_t b = 0; b < batchSize; b++) {
|
||||
size_t iminfoSize = inputBlob->size() / batchSize;
|
||||
size_t iminfoSize = blob_size / batchSize;
|
||||
for (size_t i = 0; i < iminfoSize; i++) {
|
||||
size_t index = b * iminfoSize + i;
|
||||
if (0 == i)
|
||||
inputBlobData[index] = static_cast<T>(image_size.first);
|
||||
data[index] = static_cast<T>(image_size.first);
|
||||
else if (1 == i)
|
||||
inputBlobData[index] = static_cast<T>(image_size.second);
|
||||
data[index] = static_cast<T>(image_size.second);
|
||||
else
|
||||
inputBlobData[index] = 1;
|
||||
data[index] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
|
||||
InferenceEngine::Blob::Ptr blob =
|
||||
InferenceEngine::make_shared_blob<T>(tDesc,
|
||||
std::make_shared<SharedBlobAllocator<T>>(data, blob_size * sizeof(T)));
|
||||
blob->allocate();
|
||||
return blob;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
InferenceEngine::Blob::Ptr createBlobFromBinary(const std::vector<std::string>& files,
|
||||
size_t inputId,
|
||||
size_t batchSize,
|
||||
const benchmark_app::InputInfo& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
size_t blob_size =
|
||||
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
|
||||
char* data = new char[blob_size * sizeof(T)];
|
||||
|
||||
// adjust batch size
|
||||
std::stringstream ss;
|
||||
ss << inputInfo.originalLayout;
|
||||
std::string layout = ss.str();
|
||||
if (layout.find("N") == std::string::npos) {
|
||||
batchSize = 1;
|
||||
} else if (inputInfo.batch() != batchSize) {
|
||||
batchSize = inputInfo.batch();
|
||||
}
|
||||
|
||||
for (size_t b = 0; b < batchSize; ++b) {
|
||||
size_t inputIndex = (inputId + b) % files.size();
|
||||
std::ifstream binaryFile(files[inputIndex], std::ios_base::binary | std::ios_base::ate);
|
||||
if (!binaryFile) {
|
||||
IE_THROW() << "Cannot open " << files[inputIndex];
|
||||
}
|
||||
|
||||
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
|
||||
binaryFile.seekg(0, std::ios_base::beg);
|
||||
if (!binaryFile.good()) {
|
||||
IE_THROW() << "Can not read " << files[inputIndex];
|
||||
}
|
||||
auto inputSize = blob_size * sizeof(T) / batchSize;
|
||||
if (fileSize != inputSize) {
|
||||
IE_THROW() << "File " << files[inputIndex] << " contains " << std::to_string(fileSize)
|
||||
<< " bytes "
|
||||
"but the network expects "
|
||||
<< std::to_string(inputSize);
|
||||
}
|
||||
|
||||
if (inputInfo.layout != "CN") {
|
||||
binaryFile.read(&data[b * inputSize], inputSize);
|
||||
} else {
|
||||
for (int i = 0; i < inputInfo.channels(); i++) {
|
||||
binaryFile.read(&data[(i * batchSize + b) * sizeof(T)], sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
if (filenames_used) {
|
||||
*filenames_used += (filenames_used->empty() ? "" : ", ") + files[inputIndex];
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
|
||||
InferenceEngine::Blob::Ptr blob =
|
||||
InferenceEngine::make_shared_blob<T>(tDesc,
|
||||
std::make_shared<SharedBlobAllocator<T>>((T*)data, blob_size * sizeof(T)));
|
||||
blob->allocate();
|
||||
return blob;
|
||||
}
|
||||
|
||||
template <typename T, typename T2>
|
||||
InferenceEngine::Blob::Ptr createBlobRandom(const benchmark_app::InputInfo& inputInfo,
|
||||
T rand_min = std::numeric_limits<uint8_t>::min(),
|
||||
T rand_max = std::numeric_limits<uint8_t>::max()) {
|
||||
size_t blob_size =
|
||||
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
|
||||
T* data = new T[blob_size];
|
||||
|
||||
std::mt19937 gen(0);
|
||||
uniformDistribution<T2> distribution(rand_min, rand_max);
|
||||
for (size_t i = 0; i < blob_size; i++) {
|
||||
data[i] = static_cast<T>(distribution(gen));
|
||||
}
|
||||
|
||||
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
|
||||
InferenceEngine::Blob::Ptr blob =
|
||||
InferenceEngine::make_shared_blob<T>(tDesc,
|
||||
std::make_shared<SharedBlobAllocator<T>>(data, blob_size * sizeof(T)));
|
||||
blob->allocate();
|
||||
return blob;
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr getImageBlob(const std::vector<std::string>& files,
|
||||
size_t inputId,
|
||||
size_t batchSize,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
auto precision = inputInfo.second.precision;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
return createBlobFromImage<float>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
return createBlobFromImage<short>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
return createBlobFromImage<int32_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
return createBlobFromImage<int64_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::U8) {
|
||||
return createBlobFromImage<uint8_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
|
||||
}
|
||||
}
|
||||
|
||||
void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests) {
|
||||
std::vector<std::pair<size_t, size_t>> input_image_sizes;
|
||||
for (auto& item : app_inputs_info) {
|
||||
if (item.second.isImage()) {
|
||||
input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
|
||||
}
|
||||
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions ("
|
||||
<< item.second.layout << "): ";
|
||||
for (const auto& i : item.second.shape) {
|
||||
slog::info << i << " ";
|
||||
}
|
||||
slog::info << slog::endl;
|
||||
InferenceEngine::Blob::Ptr getImInfoBlob(const std::pair<size_t, size_t>& image_size,
|
||||
size_t batchSize,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo) {
|
||||
auto precision = inputInfo.second.precision;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
return createBlobImInfo<float>(image_size, batchSize, inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
return createBlobImInfo<short>(image_size, batchSize, inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
return createBlobImInfo<int32_t>(image_size, batchSize, inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
return createBlobImInfo<int64_t>(image_size, batchSize, inputInfo.second);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr getBinaryBlob(const std::vector<std::string>& files,
|
||||
size_t inputId,
|
||||
size_t batchSize,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
auto precision = inputInfo.second.precision;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
return createBlobFromBinary<float>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
return createBlobFromBinary<short>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
return createBlobFromBinary<int32_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
return createBlobFromBinary<int64_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else if ((precision == InferenceEngine::Precision::U8) || (precision == InferenceEngine::Precision::BOOL)) {
|
||||
return createBlobFromBinary<uint8_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr getRandomBlob(const std::pair<std::string, benchmark_app::InputInfo>& inputInfo) {
|
||||
auto precision = inputInfo.second.precision;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
return createBlobRandom<float, float>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
return createBlobRandom<short, short>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
return createBlobRandom<int32_t, int32_t>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
return createBlobRandom<int64_t, int64_t>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::U8) {
|
||||
// uniform_int_distribution<uint8_t> is not allowed in the C++17
|
||||
// standard and vs2017/19
|
||||
return createBlobRandom<uint8_t, uint32_t>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::I8) {
|
||||
// uniform_int_distribution<int8_t> is not allowed in the C++17 standard
|
||||
// and vs2017/19
|
||||
return createBlobRandom<int8_t, int32_t>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::U16) {
|
||||
return createBlobRandom<uint16_t, uint16_t>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::I16) {
|
||||
return createBlobRandom<int16_t, int16_t>(inputInfo.second);
|
||||
} else if (precision == InferenceEngine::Precision::BOOL) {
|
||||
return createBlobRandom<uint8_t, uint32_t>(inputInfo.second, 0, 1);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
|
||||
}
|
||||
}
|
||||
|
||||
std::string getTestInfoStreamHeader(benchmark_app::InputInfo& inputInfo) {
|
||||
std::stringstream strOut;
|
||||
strOut << "(" << inputInfo.layout << ", " << inputInfo.precision << ", " << getShapeString(inputInfo.dataShape)
|
||||
<< ", ";
|
||||
if (inputInfo.partialShape.is_dynamic()) {
|
||||
strOut << std::string("dyn:") << inputInfo.partialShape << "):\t";
|
||||
} else {
|
||||
strOut << "static):\t";
|
||||
}
|
||||
return strOut.str();
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobs(
|
||||
std::map<std::string, std::vector<std::string>>& inputFiles,
|
||||
std::vector<benchmark_app::InputsInfo>& app_inputs_info) {
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> blobs;
|
||||
if (app_inputs_info.empty()) {
|
||||
throw std::logic_error("Inputs Info for network is empty!");
|
||||
}
|
||||
|
||||
size_t imageInputCount = input_image_sizes.size();
|
||||
size_t binaryInputCount = app_inputs_info.size() - imageInputCount;
|
||||
if (!inputFiles.empty() && inputFiles.size() != app_inputs_info[0].size()) {
|
||||
throw std::logic_error("Number of inputs specified in -i must be equal number of network inputs!");
|
||||
}
|
||||
|
||||
// count image type inputs of network
|
||||
std::vector<std::pair<size_t, size_t>> net_input_im_sizes;
|
||||
for (auto& inputs_info : app_inputs_info) {
|
||||
for (auto& input : inputs_info) {
|
||||
if (input.second.isImage()) {
|
||||
net_input_im_sizes.push_back(std::make_pair(input.second.width(), input.second.height()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& files : inputFiles) {
|
||||
if (!files.first.empty() && app_inputs_info[0].find(files.first) == app_inputs_info[0].end()) {
|
||||
throw std::logic_error("Input name \"" + files.first +
|
||||
"\" used in -i parameter doesn't match any network's input");
|
||||
}
|
||||
|
||||
std::string input_name = files.first.empty() ? app_inputs_info[0].begin()->first : files.first;
|
||||
auto input = app_inputs_info[0].at(input_name);
|
||||
if (!files.second.empty() && files.second[0] != "random" && files.second[0] != "image_info") {
|
||||
if (input.isImage()) {
|
||||
files.second = filterFilesByExtensions(files.second, supported_image_extensions);
|
||||
} else if (input.isImageInfo() && net_input_im_sizes.size() == app_inputs_info.size()) {
|
||||
slog::info << "Input '" << input_name
|
||||
<< "' probably is image info. All files for this input will"
|
||||
" be ignored."
|
||||
<< slog::endl;
|
||||
files.second = {"image_info"};
|
||||
continue;
|
||||
} else {
|
||||
files.second = filterFilesByExtensions(files.second, supported_binary_extensions);
|
||||
}
|
||||
}
|
||||
|
||||
if (files.second.empty()) {
|
||||
slog::warn << "No suitable files for input found! Random data will be used for input " << input_name
|
||||
<< slog::endl;
|
||||
files.second = {"random"};
|
||||
}
|
||||
|
||||
size_t filesToBeUsed = 0;
|
||||
size_t shapesToBeUsed = 0;
|
||||
if (files.second.size() > app_inputs_info.size()) {
|
||||
shapesToBeUsed = app_inputs_info.size();
|
||||
filesToBeUsed = files.second.size() - files.second.size() % app_inputs_info.size();
|
||||
if (filesToBeUsed != files.second.size()) {
|
||||
slog::warn << "Number of files must be a multiple of the number of shapes for certain input. Only " +
|
||||
std::to_string(filesToBeUsed) + " files will be added."
|
||||
<< slog::endl;
|
||||
}
|
||||
while (files.second.size() != filesToBeUsed) {
|
||||
files.second.pop_back();
|
||||
}
|
||||
} else {
|
||||
shapesToBeUsed = app_inputs_info.size() - app_inputs_info.size() % files.second.size();
|
||||
filesToBeUsed = files.second.size();
|
||||
if (shapesToBeUsed != app_inputs_info.size()) {
|
||||
slog::warn << "Number of data shapes must be a multiple of the number of files. For input "
|
||||
<< files.first << " only " + std::to_string(shapesToBeUsed) + " files will be added."
|
||||
<< slog::endl;
|
||||
}
|
||||
while (app_inputs_info.size() != shapesToBeUsed) {
|
||||
app_inputs_info.pop_back();
|
||||
net_input_im_sizes.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::map<std::string, std::string>> logOutput;
|
||||
// All inputs should process equal number of files, so for the case of N, 1, N number of files,
|
||||
// second input also should have N blobs cloned from 1 file
|
||||
size_t filesNum = 0;
|
||||
if (!inputFiles.empty()) {
|
||||
filesNum = std::max_element(inputFiles.begin(),
|
||||
inputFiles.end(),
|
||||
[](const std::pair<std::string, std::vector<std::string>>& a,
|
||||
const std::pair<std::string, std::vector<std::string>>& b) {
|
||||
return a.second.size() < b.second.size();
|
||||
})
|
||||
->second.size();
|
||||
} else {
|
||||
std::vector<std::pair<size_t, size_t>> net_input_im_sizes;
|
||||
for (auto& input_info : app_inputs_info[0]) {
|
||||
inputFiles[input_info.first] = {"random"};
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& files : inputFiles) {
|
||||
std::string input_name = files.first.empty() ? app_inputs_info[0].begin()->first : files.first;
|
||||
size_t n_shape = 0, m_file = 0;
|
||||
while (n_shape < app_inputs_info.size() || m_file < filesNum) {
|
||||
size_t batchSize = getBatchSize(app_inputs_info[n_shape % app_inputs_info.size()]);
|
||||
size_t inputId = m_file % files.second.size();
|
||||
auto input_info = app_inputs_info[n_shape % app_inputs_info.size()].at(input_name);
|
||||
|
||||
std::string blob_src_info;
|
||||
if (files.second[0] == "random") {
|
||||
// Fill random
|
||||
blob_src_info =
|
||||
"random (" + std::string((input_info.isImage() ? "image" : "binary data")) + " is expected)";
|
||||
blobs[input_name].push_back(getRandomBlob({input_name, input_info}));
|
||||
} else if (files.second[0] == "image_info") {
|
||||
// Most likely it is image info: fill with image information
|
||||
auto image_size = net_input_im_sizes.at(n_shape % app_inputs_info.size());
|
||||
blob_src_info =
|
||||
"Image size blob " + std::to_string(image_size.first) + " x " + std::to_string(image_size.second);
|
||||
blobs[input_name].push_back(getImInfoBlob(image_size, batchSize, {input_name, input_info}));
|
||||
} else if (input_info.isImage()) {
|
||||
// Fill with Images
|
||||
blobs[input_name].push_back(
|
||||
getImageBlob(files.second, inputId, batchSize, {input_name, input_info}, &blob_src_info));
|
||||
} else {
|
||||
// Fill with binary files
|
||||
blobs[input_name].push_back(
|
||||
getBinaryBlob(files.second, inputId, batchSize, {input_name, input_info}, &blob_src_info));
|
||||
}
|
||||
|
||||
// Preparing info
|
||||
std::string strOut = getTestInfoStreamHeader(input_info) + blob_src_info;
|
||||
if (n_shape >= logOutput.size()) {
|
||||
logOutput.resize(n_shape + 1);
|
||||
}
|
||||
logOutput[n_shape][input_name] += strOut;
|
||||
|
||||
++n_shape;
|
||||
m_file += batchSize;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < logOutput.size(); i++) {
|
||||
slog::info << "Test Config " << i << slog::endl;
|
||||
auto maxNameWidth = std::max_element(logOutput[i].begin(),
|
||||
logOutput[i].end(),
|
||||
[](const std::pair<std::string, std::string>& a,
|
||||
const std::pair<std::string, std::string>& b) {
|
||||
return a.first.size() < b.first.size();
|
||||
})
|
||||
->first.size();
|
||||
for (auto inputLog : logOutput[i]) {
|
||||
slog::info << std::left << std::setw(maxNameWidth + 2) << inputLog.first << inputLog.second << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return blobs;
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobsStaticCase(
|
||||
const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
size_t requestsNum) {
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> blobs;
|
||||
|
||||
std::vector<std::pair<size_t, size_t>> net_input_im_sizes;
|
||||
for (auto& item : app_inputs_info) {
|
||||
if (item.second.isImage()) {
|
||||
net_input_im_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
|
||||
}
|
||||
}
|
||||
|
||||
size_t imageInputsNum = net_input_im_sizes.size();
|
||||
size_t binaryInputsNum = app_inputs_info.size() - imageInputsNum;
|
||||
|
||||
std::vector<std::string> binaryFiles;
|
||||
std::vector<std::string> imageFiles;
|
||||
@ -255,7 +517,7 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
binaryFiles = filterFilesByExtensions(inputFiles, supported_binary_extensions);
|
||||
std::sort(std::begin(binaryFiles), std::end(binaryFiles));
|
||||
|
||||
auto binaryToBeUsed = binaryInputCount * batchSize * requests.size();
|
||||
auto binaryToBeUsed = binaryInputsNum * batchSize * requestsNum;
|
||||
if (binaryToBeUsed > 0 && binaryFiles.empty()) {
|
||||
std::stringstream ss;
|
||||
for (auto& ext : supported_binary_extensions) {
|
||||
@ -278,7 +540,7 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions);
|
||||
std::sort(std::begin(imageFiles), std::end(imageFiles));
|
||||
|
||||
auto imagesToBeUsed = imageInputCount * batchSize * requests.size();
|
||||
auto imagesToBeUsed = imageInputsNum * batchSize * requestsNum;
|
||||
if (imagesToBeUsed > 0 && imageFiles.empty()) {
|
||||
std::stringstream ss;
|
||||
for (auto& ext : supported_image_extensions) {
|
||||
@ -299,156 +561,129 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t requestId = 0; requestId < requests.size(); requestId++) {
|
||||
slog::info << "Infer Request " << requestId << " filling" << slog::endl;
|
||||
std::map<std::string, std::vector<std::string>> mappedFiles;
|
||||
size_t imageInputsCount = 0;
|
||||
size_t binaryInputsCount = 0;
|
||||
for (auto& input : app_inputs_info) {
|
||||
if (input.second.isImage()) {
|
||||
mappedFiles[input.first] = {};
|
||||
for (size_t i = 0; i < imageFiles.size(); i += imageInputsNum) {
|
||||
mappedFiles[input.first].push_back(
|
||||
imageFiles[(imageInputsCount + i) * imageInputsNum % imageFiles.size()]);
|
||||
}
|
||||
++imageInputsCount;
|
||||
} else {
|
||||
mappedFiles[input.first] = {};
|
||||
if (!binaryFiles.empty()) {
|
||||
for (size_t i = 0; i < binaryFiles.size(); i += binaryInputsNum) {
|
||||
mappedFiles[input.first].push_back(binaryFiles[(binaryInputsCount + i) % binaryFiles.size()]);
|
||||
}
|
||||
}
|
||||
++binaryInputsCount;
|
||||
}
|
||||
}
|
||||
|
||||
size_t filesNum = 0;
|
||||
if (!inputFiles.empty()) {
|
||||
filesNum = std::max_element(mappedFiles.begin(),
|
||||
mappedFiles.end(),
|
||||
[](const std::pair<std::string, std::vector<std::string>>& a,
|
||||
const std::pair<std::string, std::vector<std::string>>& b) {
|
||||
return a.second.size() < b.second.size();
|
||||
})
|
||||
->second.size();
|
||||
}
|
||||
size_t test_configs_num = filesNum / batchSize == 0 ? 1 : filesNum / batchSize;
|
||||
std::vector<std::map<std::string, std::string>> logOutput(test_configs_num);
|
||||
for (const auto& files : mappedFiles) {
|
||||
size_t imageInputId = 0;
|
||||
size_t binaryInputId = 0;
|
||||
for (auto& item : app_inputs_info) {
|
||||
Blob::Ptr inputBlob = requests.at(requestId)->getBlob(item.first);
|
||||
auto app_info = app_inputs_info.at(item.first);
|
||||
auto precision = app_info.precision;
|
||||
if (app_info.isImage()) {
|
||||
auto input_name = files.first;
|
||||
auto input_info = app_inputs_info.at(files.first);
|
||||
|
||||
for (size_t i = 0; i < test_configs_num; ++i) {
|
||||
std::string blob_src_info;
|
||||
if (input_info.isImage()) {
|
||||
if (!imageFiles.empty()) {
|
||||
// Fill with Images
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobImage<float>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBlobImage<short>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBlobImage<int32_t>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBlobImage<int64_t>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::U8) {
|
||||
fillBlobImage<uint8_t>(inputBlob,
|
||||
imageFiles,
|
||||
batchSize,
|
||||
app_info,
|
||||
requestId,
|
||||
imageInputId++,
|
||||
imageInputCount);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << item.first;
|
||||
}
|
||||
blobs[input_name].push_back(
|
||||
getImageBlob(files.second, imageInputId, batchSize, {input_name, input_info}, &blob_src_info));
|
||||
imageInputId = (imageInputId + batchSize) % files.second.size();
|
||||
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (!binaryFiles.empty()) {
|
||||
// Fill with binary files
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobBinary<float>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBlobBinary<short>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBlobBinary<int32_t>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBlobBinary<int64_t>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else if ((precision == InferenceEngine::Precision::U8) ||
|
||||
(precision == InferenceEngine::Precision::BOOL)) {
|
||||
fillBlobBinary<uint8_t>(inputBlob,
|
||||
binaryFiles,
|
||||
batchSize,
|
||||
requestId,
|
||||
binaryInputId++,
|
||||
binaryInputCount);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << item.first;
|
||||
}
|
||||
blobs[input_name].push_back(getBinaryBlob(files.second,
|
||||
binaryInputId,
|
||||
batchSize,
|
||||
{input_name, input_info},
|
||||
&blob_src_info));
|
||||
binaryInputId = (binaryInputId + batchSize) % files.second.size();
|
||||
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) {
|
||||
if (input_info.isImageInfo() && (net_input_im_sizes.size() == 1)) {
|
||||
// Most likely it is image info: fill with image information
|
||||
auto image_size = input_image_sizes.at(0);
|
||||
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x"
|
||||
<< image_size.second << slog::endl;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobImInfo<float>(inputBlob, batchSize, image_size);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBlobImInfo<short>(inputBlob, batchSize, image_size);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBlobImInfo<int32_t>(inputBlob, batchSize, image_size);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBlobImInfo<int64_t>(inputBlob, batchSize, image_size);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for image info!";
|
||||
}
|
||||
auto image_size = net_input_im_sizes.at(0);
|
||||
blob_src_info = "Image size blob " + std::to_string(image_size.first) + " x " +
|
||||
std::to_string(image_size.second);
|
||||
blobs[input_name].push_back(getImInfoBlob(image_size, batchSize, {input_name, input_info}));
|
||||
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Fill random
|
||||
slog::info << "Fill input '" << item.first << "' with random values ("
|
||||
<< std::string((app_info.isImage() ? "image" : "some binary data")) << " is expected)"
|
||||
<< slog::endl;
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBlobRandom<float, float>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBlobRandom<short, short>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBlobRandom<int32_t, int32_t>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBlobRandom<int64_t, int64_t>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::U8) {
|
||||
// uniform_int_distribution<uint8_t> is not allowed in the C++17
|
||||
// standard and vs2017/19
|
||||
fillBlobRandom<uint8_t, uint32_t>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::I8) {
|
||||
// uniform_int_distribution<int8_t> is not allowed in the C++17 standard
|
||||
// and vs2017/19
|
||||
fillBlobRandom<int8_t, int32_t>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::U16) {
|
||||
fillBlobRandom<uint16_t, uint16_t>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::I16) {
|
||||
fillBlobRandom<int16_t, int16_t>(inputBlob);
|
||||
} else if (precision == InferenceEngine::Precision::BOOL) {
|
||||
fillBlobRandom<uint8_t, uint32_t>(inputBlob, 0, 1);
|
||||
} else {
|
||||
IE_THROW() << "Input precision is not supported for " << item.first;
|
||||
}
|
||||
blob_src_info =
|
||||
"random (" + std::string((input_info.isImage() ? "image" : "binary data")) + " is expected)";
|
||||
blobs[input_name].push_back(getRandomBlob({input_name, input_info}));
|
||||
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < logOutput.size(); i++) {
|
||||
slog::info << "Test Config " << i << slog::endl;
|
||||
auto maxNameWidth = std::max_element(logOutput[i].begin(),
|
||||
logOutput[i].end(),
|
||||
[](const std::pair<std::string, std::string>& a,
|
||||
const std::pair<std::string, std::string>& b) {
|
||||
return a.first.size() < b.first.size();
|
||||
})
|
||||
->first.size();
|
||||
for (auto inputLog : logOutput[i]) {
|
||||
slog::info << std::left << std::setw(maxNameWidth + 2) << inputLog.first << inputLog.second << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return blobs;
|
||||
}
|
||||
|
||||
void copyBlobData(InferenceEngine::Blob::Ptr& dst, const InferenceEngine::Blob::Ptr& src) {
|
||||
if (src->getTensorDesc() != dst->getTensorDesc()) {
|
||||
throw std::runtime_error(
|
||||
"Source and destination blobs tensor descriptions are expected to be equal for data copying.");
|
||||
}
|
||||
|
||||
InferenceEngine::MemoryBlob::Ptr srcMinput = as<InferenceEngine::MemoryBlob>(src);
|
||||
if (!srcMinput) {
|
||||
IE_THROW() << "We expect source blob to be inherited from MemoryBlob in "
|
||||
"fillBlobImage, "
|
||||
<< "but by fact we were not able to cast source blob to MemoryBlob";
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto srcMinputHolder = srcMinput->wmap();
|
||||
auto srcBlobData = srcMinputHolder.as<void*>();
|
||||
|
||||
InferenceEngine::MemoryBlob::Ptr dstMinput = as<InferenceEngine::MemoryBlob>(dst);
|
||||
if (!dstMinput) {
|
||||
IE_THROW() << "We expect destination blob to be inherited from MemoryBlob in "
|
||||
"fillBlobImage, "
|
||||
<< "but by fact we were not able to cast destination blob to MemoryBlob";
|
||||
}
|
||||
auto dstMinputHolder = dstMinput->wmap();
|
||||
auto dstBlobData = dstMinputHolder.as<void*>();
|
||||
|
||||
std::memcpy(dstBlobData, srcBlobData, src->byteSize());
|
||||
}
|
||||
|
@ -14,7 +14,14 @@
|
||||
#include "utils.hpp"
|
||||
// clang-format on
|
||||
|
||||
void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests);
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobs(
|
||||
std::map<std::string, std::vector<std::string>>& inputFiles,
|
||||
std::vector<benchmark_app::InputsInfo>& app_inputs_info);
|
||||
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobsStaticCase(
|
||||
const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
size_t requestsNum);
|
||||
|
||||
void copyBlobData(InferenceEngine::Blob::Ptr& dst, const InferenceEngine::Blob::Ptr& src);
|
||||
|
@ -34,14 +34,6 @@ using namespace InferenceEngine;
|
||||
|
||||
static const size_t progressBarDefaultTotalCount = 1000;
|
||||
|
||||
uint64_t getDurationInMilliseconds(uint32_t duration) {
|
||||
return duration * 1000LL;
|
||||
}
|
||||
|
||||
uint64_t getDurationInNanoseconds(uint32_t duration) {
|
||||
return duration * 1000000000LL;
|
||||
}
|
||||
|
||||
bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
// ---------------------------Parsing and validating input
|
||||
// arguments--------------------------------------
|
||||
@ -104,7 +96,7 @@ static void next_step(const std::string additional_info = "") {
|
||||
{6, "Configuring input of the model"},
|
||||
{7, "Loading the model to the device"},
|
||||
{8, "Setting optimal runtime parameters"},
|
||||
{9, "Creating infer requests and filling input blobs with images"},
|
||||
{9, "Creating infer requests and preparing input blobs with data"},
|
||||
{10, "Measuring performance"},
|
||||
{11, "Dumping statistics report"}};
|
||||
|
||||
@ -116,13 +108,6 @@ static void next_step(const std::string additional_info = "") {
|
||||
<< (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T getMedianValue(const std::vector<T>& vec, std::size_t percentile) {
|
||||
std::vector<T> sortedVec(vec);
|
||||
std::sort(sortedVec.begin(), sortedVec.end());
|
||||
return sortedVec[(sortedVec.size() / 100) * percentile];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The entry point of the benchmark application
|
||||
*/
|
||||
@ -180,15 +165,15 @@ int main(int argc, char* argv[]) {
|
||||
load_config(FLAGS_load_config, config);
|
||||
}
|
||||
#endif
|
||||
/** This vector stores paths to the processed images **/
|
||||
std::vector<std::string> inputFiles;
|
||||
parseInputFilesArguments(inputFiles);
|
||||
/** This vector stores paths to the processed images with input names**/
|
||||
auto inputFiles = parseInputArguments(gflags::GetArgvs());
|
||||
|
||||
// ----------------- 2. Loading the Inference Engine
|
||||
// -----------------------------------------------------------
|
||||
next_step();
|
||||
|
||||
Core ie;
|
||||
|
||||
if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) {
|
||||
// CPU (MKLDNN) extensions is loaded as a shared library and passed as a
|
||||
// pointer to base extension
|
||||
@ -378,19 +363,10 @@ int main(int argc, char* argv[]) {
|
||||
ie.SetConfig(item.second, item.first);
|
||||
}
|
||||
|
||||
auto double_to_string = [](const double number) {
|
||||
std::stringstream ss;
|
||||
ss << std::fixed << std::setprecision(2) << number;
|
||||
return ss.str();
|
||||
};
|
||||
auto get_total_ms_time = [](Time::time_point& startTime) {
|
||||
return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
|
||||
};
|
||||
|
||||
size_t batchSize = FLAGS_b;
|
||||
Precision precision = Precision::UNSPECIFIED;
|
||||
std::string topology_name = "";
|
||||
benchmark_app::InputsInfo app_inputs_info;
|
||||
std::vector<benchmark_app::InputsInfo> app_inputs_info;
|
||||
std::string output_name;
|
||||
|
||||
// Takes priority over config from file
|
||||
@ -398,6 +374,7 @@ int main(int argc, char* argv[]) {
|
||||
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), FLAGS_cache_dir}});
|
||||
}
|
||||
|
||||
bool isDynamicNetwork = false;
|
||||
if (FLAGS_load_from_file && !isNetworkCompiled) {
|
||||
next_step();
|
||||
slog::info << "Skipping the step for loading network from file" << slog::endl;
|
||||
@ -407,14 +384,15 @@ int main(int argc, char* argv[]) {
|
||||
slog::info << "Skipping the step for loading network from file" << slog::endl;
|
||||
auto startTime = Time::now();
|
||||
exeNetwork = ie.LoadNetwork(FLAGS_m, device_name);
|
||||
auto duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
auto duration_ms = double_to_string(get_duration_ms_till_now(startTime));
|
||||
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"load network time (ms)", duration_ms}});
|
||||
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
|
||||
FLAGS_layout,
|
||||
FLAGS_b,
|
||||
batchSize,
|
||||
FLAGS_data_shape,
|
||||
FLAGS_iscale,
|
||||
FLAGS_imean,
|
||||
exeNetwork.GetInputsInfo());
|
||||
@ -430,7 +408,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
auto startTime = Time::now();
|
||||
CNNNetwork cnnNetwork = ie.ReadNetwork(FLAGS_m);
|
||||
auto duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
auto duration_ms = double_to_string(get_duration_ms_till_now(startTime));
|
||||
slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
@ -444,34 +422,45 @@ int main(int argc, char* argv[]) {
|
||||
// ----------------- 5. Resizing network to match image sizes and given
|
||||
// batch ----------------------------------
|
||||
next_step();
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
// Parse input shapes if specified
|
||||
bool reshape = false;
|
||||
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape,
|
||||
FLAGS_layout,
|
||||
FLAGS_b,
|
||||
FLAGS_data_shape,
|
||||
FLAGS_iscale,
|
||||
FLAGS_imean,
|
||||
inputInfo,
|
||||
reshape);
|
||||
if (reshape) {
|
||||
InferenceEngine::ICNNNetwork::InputShapes shapes = {};
|
||||
for (auto& item : app_inputs_info)
|
||||
shapes[item.first] = item.second.shape;
|
||||
benchmark_app::PartialShapes shapes = {};
|
||||
for (auto& item : app_inputs_info[0])
|
||||
shapes[item.first] = item.second.partialShape;
|
||||
slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl;
|
||||
startTime = Time::now();
|
||||
cnnNetwork.reshape(shapes);
|
||||
duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
duration_ms = double_to_string(get_duration_ms_till_now(startTime));
|
||||
slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"reshape network time (ms)", duration_ms}});
|
||||
}
|
||||
// use batch size according to provided layout and shapes
|
||||
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize();
|
||||
|
||||
topology_name = cnnNetwork.getName();
|
||||
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
|
||||
|
||||
// Check if network has dynamic shapes
|
||||
auto input_info = app_inputs_info[0];
|
||||
isDynamicNetwork = std::any_of(input_info.begin(),
|
||||
input_info.end(),
|
||||
[](const std::pair<std::string, benchmark_app::InputInfo>& i) {
|
||||
return i.second.partialShape.is_dynamic();
|
||||
});
|
||||
|
||||
// use batch size according to provided layout and shapes (static case)
|
||||
if (batchSize == 0 || !isDynamicNetwork) {
|
||||
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info[0]) : cnnNetwork.getBatchSize();
|
||||
}
|
||||
|
||||
slog::info << (batchSize != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
|
||||
<< slog::endl;
|
||||
|
||||
// ----------------- 6. Configuring inputs and outputs
|
||||
@ -482,11 +471,12 @@ int main(int argc, char* argv[]) {
|
||||
for (auto& item : cnnNetwork.getInputsInfo()) {
|
||||
// if precision for input set by user, then set it to app_inputs
|
||||
// if it an image, set U8
|
||||
if (!FLAGS_ip.empty() || FLAGS_iop.find(item.first) != std::string::npos) {
|
||||
app_inputs_info.at(item.first).precision = item.second->getPrecision();
|
||||
} else if (app_inputs_info.at(item.first).isImage()) {
|
||||
app_inputs_info.at(item.first).precision = Precision::U8;
|
||||
item.second->setPrecision(app_inputs_info.at(item.first).precision);
|
||||
if (!FLAGS_ip.empty() || FLAGS_iop.find(item.first) != std::string::npos ||
|
||||
item.second->getPartialShape().is_dynamic()) {
|
||||
app_inputs_info[0].at(item.first).precision = item.second->getPrecision();
|
||||
} else if (app_inputs_info[0].at(item.first).isImage()) {
|
||||
app_inputs_info[0].at(item.first).precision = Precision::U8;
|
||||
item.second->setPrecision(app_inputs_info[0].at(item.first).precision);
|
||||
}
|
||||
}
|
||||
|
||||
@ -496,7 +486,7 @@ int main(int argc, char* argv[]) {
|
||||
next_step();
|
||||
startTime = Time::now();
|
||||
exeNetwork = ie.LoadNetwork(cnnNetwork, device_name);
|
||||
duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
duration_ms = double_to_string(get_duration_ms_till_now(startTime));
|
||||
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
@ -513,7 +503,7 @@ int main(int argc, char* argv[]) {
|
||||
next_step();
|
||||
auto startTime = Time::now();
|
||||
exeNetwork = ie.ImportNetwork(FLAGS_m, device_name, {});
|
||||
auto duration_ms = double_to_string(get_total_ms_time(startTime));
|
||||
auto duration_ms = double_to_string(get_duration_ms_till_now(startTime));
|
||||
slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
@ -521,6 +511,7 @@ int main(int argc, char* argv[]) {
|
||||
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
|
||||
FLAGS_layout,
|
||||
FLAGS_b,
|
||||
FLAGS_data_shape,
|
||||
FLAGS_iscale,
|
||||
FLAGS_imean,
|
||||
exeNetwork.GetInputsInfo());
|
||||
@ -528,6 +519,23 @@ int main(int argc, char* argv[]) {
|
||||
batchSize = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDynamicNetwork && FLAGS_api == "sync") {
|
||||
throw std::logic_error("Benchmarking of the model with dynamic shapes is available for async API only."
|
||||
"Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior");
|
||||
}
|
||||
|
||||
// Defining of benchmark mode
|
||||
// for static models inference only mode is used as default one
|
||||
bool inferenceOnly = FLAGS_inference_only;
|
||||
if (isDynamicNetwork) {
|
||||
if (isFlagSetInCommandLine("inference_only") && inferenceOnly && app_inputs_info.size() != 1) {
|
||||
throw std::logic_error(
|
||||
"Dynamic models with different input data shapes must be benchmarked only in full mode.");
|
||||
}
|
||||
inferenceOnly = isFlagSetInCommandLine("inference_only") && inferenceOnly && app_inputs_info.size() == 1;
|
||||
}
|
||||
|
||||
// ----------------- 8. Querying optimal runtime parameters
|
||||
// -----------------------------------------------------
|
||||
next_step();
|
||||
@ -573,11 +581,21 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
// Iteration limit
|
||||
uint32_t niter = FLAGS_niter;
|
||||
size_t shape_groups_num = app_inputs_info.size();
|
||||
if ((niter > 0) && (FLAGS_api == "async")) {
|
||||
niter = ((niter + nireq - 1) / nireq) * nireq;
|
||||
if (FLAGS_niter != niter) {
|
||||
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
|
||||
<< niter << " using number of requests " << nireq << slog::endl;
|
||||
if (shape_groups_num > nireq) {
|
||||
niter = ((niter + shape_groups_num - 1) / shape_groups_num) * shape_groups_num;
|
||||
if (FLAGS_niter != niter) {
|
||||
slog::warn << "Number of iterations was aligned by data shape groups number from " << FLAGS_niter
|
||||
<< " to " << niter << " using number of possible input shapes " << shape_groups_num
|
||||
<< slog::endl;
|
||||
}
|
||||
} else {
|
||||
niter = ((niter + nireq - 1) / nireq) * nireq;
|
||||
if (FLAGS_niter != niter) {
|
||||
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
|
||||
<< niter << " using number of requests " << nireq << slog::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -596,6 +614,7 @@ int main(int argc, char* argv[]) {
|
||||
statistics->addParameters(
|
||||
StatisticsReport::Category::RUNTIME_CONFIG,
|
||||
{
|
||||
{"benchmark mode", inferenceOnly ? "inference only" : "full"},
|
||||
{"topology", topology_name},
|
||||
{"target device", device_name},
|
||||
{"API", FLAGS_api},
|
||||
@ -619,18 +638,46 @@ int main(int argc, char* argv[]) {
|
||||
// ----------------------------------------
|
||||
next_step();
|
||||
|
||||
InferRequestsQueue inferRequestsQueue(exeNetwork, nireq);
|
||||
if (isFlagSetInCommandLine("use_device_mem")) {
|
||||
if (device_name.find("GPU") == 0)
|
||||
::gpu::fillRemoteBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests, exeNetwork);
|
||||
else if (device_name.find("CPU") == 0)
|
||||
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
|
||||
else
|
||||
IE_THROW() << "Requested device doesn't support `use_device_mem` option.";
|
||||
} else {
|
||||
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
|
||||
}
|
||||
InferRequestsQueue inferRequestsQueue(exeNetwork, nireq, app_inputs_info.size(), FLAGS_pcseq);
|
||||
|
||||
bool inputHasName = false;
|
||||
if (inputFiles.size() > 0) {
|
||||
inputHasName = inputFiles.begin()->first != "";
|
||||
}
|
||||
bool newInputType = isDynamicNetwork || inputHasName;
|
||||
// create vector to store remote input blobs buffer
|
||||
std::vector<::gpu::BufferType> clInputsBuffer;
|
||||
bool useGpuMem = false;
|
||||
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> inputsData;
|
||||
if (isFlagSetInCommandLine("use_device_mem")) {
|
||||
if (device_name.find("GPU") == 0) {
|
||||
inputsData = ::gpu::getRemoteInputBlobs(inputFiles, app_inputs_info, exeNetwork, clInputsBuffer);
|
||||
useGpuMem = true;
|
||||
} else if (device_name.find("CPU") == 0) {
|
||||
if (newInputType) {
|
||||
inputsData = getBlobs(inputFiles, app_inputs_info);
|
||||
} else {
|
||||
inputsData =
|
||||
getBlobsStaticCase(inputFiles.empty() ? std::vector<std::string>{} : inputFiles.begin()->second,
|
||||
batchSize,
|
||||
app_inputs_info[0],
|
||||
nireq);
|
||||
}
|
||||
} else {
|
||||
IE_THROW() << "Requested device doesn't support `use_device_mem` option.";
|
||||
}
|
||||
} else {
|
||||
if (newInputType) {
|
||||
inputsData = getBlobs(inputFiles, app_inputs_info);
|
||||
} else {
|
||||
inputsData =
|
||||
getBlobsStaticCase(inputFiles.empty() ? std::vector<std::string>{} : inputFiles.begin()->second,
|
||||
batchSize,
|
||||
app_inputs_info[0],
|
||||
nireq);
|
||||
}
|
||||
}
|
||||
// ----------------- 10. Measuring performance
|
||||
// ------------------------------------------------------------------
|
||||
size_t progressCnt = 0;
|
||||
@ -668,26 +715,91 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
ss << niter << " iterations";
|
||||
}
|
||||
|
||||
next_step(ss.str());
|
||||
|
||||
if (inferenceOnly) {
|
||||
slog::info << "BENCHMARK IS IN INFERENCE ONLY MODE." << slog::endl;
|
||||
slog::info << "Input blobs will be filled once before performance measurements." << slog::endl;
|
||||
} else {
|
||||
slog::info << "BENCHMARK IS IN FULL MODE." << slog::endl;
|
||||
slog::info << "Inputs setup stage will be included in performance measurements." << slog::endl;
|
||||
}
|
||||
|
||||
// copy prepared data straight into inferRequest->getBlob()
|
||||
// for inference only mode
|
||||
if (inferenceOnly) {
|
||||
if (nireq < inputsData.begin()->second.size())
|
||||
slog::warn << "Only " << nireq << " test configs will be used." << slog::endl;
|
||||
size_t i = 0;
|
||||
for (auto& inferRequest : inferRequestsQueue.requests) {
|
||||
auto inputs = app_inputs_info[i % app_inputs_info.size()];
|
||||
for (auto& item : inputs) {
|
||||
auto inputName = item.first;
|
||||
const auto& inputBlob = inputsData.at(inputName)[i % inputsData.at(inputName).size()];
|
||||
// for remote blobs setBlob is used, they are already allocated on the device
|
||||
if (useGpuMem) {
|
||||
inferRequest->setBlob(inputName, inputBlob);
|
||||
} else {
|
||||
InferenceEngine::Blob::Ptr requestBlob = inferRequest->getBlob(inputName);
|
||||
if (isDynamicNetwork) {
|
||||
requestBlob->setShape(inputBlob->getTensorDesc().getDims());
|
||||
}
|
||||
copyBlobData(requestBlob, inputBlob);
|
||||
}
|
||||
}
|
||||
|
||||
if (useGpuMem) {
|
||||
auto outputBlobs = ::gpu::getRemoteOutputBlobs(exeNetwork, inferRequest->getOutputClBuffer());
|
||||
for (auto& output : exeNetwork.GetOutputsInfo()) {
|
||||
inferRequest->setBlob(output.first, outputBlobs[output.first]);
|
||||
}
|
||||
}
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
// warming up - out of scope
|
||||
auto inferRequest = inferRequestsQueue.getIdleRequest();
|
||||
if (!inferRequest) {
|
||||
IE_THROW() << "No idle Infer Requests!";
|
||||
}
|
||||
|
||||
if (!inferenceOnly) {
|
||||
auto inputs = app_inputs_info[0];
|
||||
|
||||
for (auto& item : inputs) {
|
||||
auto inputName = item.first;
|
||||
const auto& data = inputsData.at(inputName)[0];
|
||||
inferRequest->setBlob(inputName, data);
|
||||
}
|
||||
|
||||
if (useGpuMem) {
|
||||
auto outputBlobs = ::gpu::getRemoteOutputBlobs(exeNetwork, inferRequest->getOutputClBuffer());
|
||||
for (auto& output : exeNetwork.GetOutputsInfo()) {
|
||||
inferRequest->setBlob(output.first, outputBlobs[output.first]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAGS_api == "sync") {
|
||||
inferRequest->infer();
|
||||
} else {
|
||||
inferRequest->startAsync();
|
||||
}
|
||||
|
||||
inferRequestsQueue.waitAll();
|
||||
|
||||
auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]);
|
||||
slog::info << "First inference took " << duration_ms << " ms" << slog::endl;
|
||||
if (statistics)
|
||||
|
||||
if (statistics) {
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"first inference time (ms)", duration_ms}});
|
||||
}
|
||||
inferRequestsQueue.resetTimes();
|
||||
|
||||
size_t processedFramesN = 0;
|
||||
auto startTime = Time::now();
|
||||
auto execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
|
||||
|
||||
@ -695,7 +807,6 @@ int main(int argc, char* argv[]) {
|
||||
/** to align number if iterations to guarantee that last infer requests are
|
||||
* executed in the same conditions **/
|
||||
ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress);
|
||||
|
||||
while ((niter != 0LL && iteration < niter) ||
|
||||
(duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
|
||||
(FLAGS_api == "async" && iteration % nireq != 0)) {
|
||||
@ -704,6 +815,31 @@ int main(int argc, char* argv[]) {
|
||||
IE_THROW() << "No idle Infer Requests!";
|
||||
}
|
||||
|
||||
if (!inferenceOnly) {
|
||||
auto inputs = app_inputs_info[iteration % app_inputs_info.size()];
|
||||
|
||||
if (FLAGS_pcseq) {
|
||||
inferRequest->setLatencyGroupId(iteration % app_inputs_info.size());
|
||||
}
|
||||
|
||||
if (isDynamicNetwork) {
|
||||
batchSize = getBatchSize(inputs);
|
||||
}
|
||||
|
||||
for (auto& item : inputs) {
|
||||
auto inputName = item.first;
|
||||
const auto& data = inputsData.at(inputName)[iteration % inputsData.at(inputName).size()];
|
||||
inferRequest->setBlob(inputName, data);
|
||||
}
|
||||
|
||||
if (useGpuMem) {
|
||||
auto outputBlobs = ::gpu::getRemoteOutputBlobs(exeNetwork, inferRequest->getOutputClBuffer());
|
||||
for (auto& output : exeNetwork.GetOutputsInfo()) {
|
||||
inferRequest->setBlob(output.first, outputBlobs[output.first]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAGS_api == "sync") {
|
||||
inferRequest->infer();
|
||||
} else {
|
||||
@ -716,9 +852,10 @@ int main(int argc, char* argv[]) {
|
||||
inferRequest->wait();
|
||||
inferRequest->startAsync();
|
||||
}
|
||||
iteration++;
|
||||
++iteration;
|
||||
|
||||
execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
|
||||
processedFramesN += batchSize;
|
||||
|
||||
if (niter > 0) {
|
||||
progressBar.addProgress(1);
|
||||
@ -737,10 +874,17 @@ int main(int argc, char* argv[]) {
|
||||
// wait the latest inference executions
|
||||
inferRequestsQueue.waitAll();
|
||||
|
||||
double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
|
||||
LatencyMetrics generalLatency(inferRequestsQueue.getLatencies());
|
||||
std::vector<LatencyMetrics> groupLatencies = {};
|
||||
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
|
||||
for (auto lats : inferRequestsQueue.getLatencyGroups()) {
|
||||
groupLatencies.push_back(LatencyMetrics(lats));
|
||||
}
|
||||
}
|
||||
|
||||
double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
|
||||
double fps =
|
||||
(FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
|
||||
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / generalLatency.percentile(FLAGS_latency_percentile)
|
||||
: 1000.0 * processedFramesN / totalDuration;
|
||||
|
||||
if (statistics) {
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
@ -751,19 +895,67 @@ int main(int argc, char* argv[]) {
|
||||
if (device_name.find("MULTI") == std::string::npos) {
|
||||
std::string latency_label;
|
||||
if (FLAGS_latency_percentile == 50) {
|
||||
latency_label = "latency (ms)";
|
||||
latency_label = "Median latency (ms)";
|
||||
} else {
|
||||
latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
|
||||
}
|
||||
statistics->addParameters(
|
||||
StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{latency_label, double_to_string(generalLatency.percentile(FLAGS_latency_percentile))},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{latency_label, double_to_string(latency)},
|
||||
{"Average latency (ms)", double_to_string(generalLatency.average())},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"Min latency (ms)", double_to_string(generalLatency.min())},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"Max latency (ms)", double_to_string(generalLatency.max())},
|
||||
});
|
||||
|
||||
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"Latency for each data shape group:", ""},
|
||||
});
|
||||
for (size_t i = 0; i < app_inputs_info.size(); ++i) {
|
||||
std::string data_shapes_string = "";
|
||||
data_shapes_string += std::to_string(i + 1) + ". ";
|
||||
for (auto& item : app_inputs_info[i]) {
|
||||
data_shapes_string += item.first + " : " + getShapeString(item.second.dataShape) + " ";
|
||||
}
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{data_shapes_string, ""},
|
||||
});
|
||||
statistics->addParameters(
|
||||
StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{latency_label,
|
||||
double_to_string(groupLatencies[i].percentile(FLAGS_latency_percentile))},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"Average (ms)", double_to_string(groupLatencies[i].average())},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"Min (ms)", double_to_string(groupLatencies[i].min())},
|
||||
});
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{
|
||||
{"Max (ms)", double_to_string(groupLatencies[i].max())},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
|
||||
{{"throughput", double_to_string(fps)}});
|
||||
}
|
||||
|
||||
progressBar.finish();
|
||||
|
||||
// ----------------- 11. Dumping statistics report
|
||||
@ -805,18 +997,32 @@ int main(int argc, char* argv[]) {
|
||||
if (statistics)
|
||||
statistics->dump();
|
||||
|
||||
std::cout << "Count: " << iteration << " iterations" << std::endl;
|
||||
std::cout << "Duration: " << double_to_string(totalDuration) << " ms" << std::endl;
|
||||
// Performance metrics report
|
||||
slog::info << "Count: " << iteration << " iterations" << slog::endl;
|
||||
slog::info << "Duration: " << double_to_string(totalDuration) << " ms" << slog::endl;
|
||||
if (device_name.find("MULTI") == std::string::npos) {
|
||||
std::cout << "Latency";
|
||||
if (FLAGS_latency_percentile == 50) {
|
||||
std::cout << ": ";
|
||||
} else {
|
||||
std::cout << " (" << FLAGS_latency_percentile << " percentile): ";
|
||||
slog::info << "Latency: " << slog::endl;
|
||||
generalLatency.logTotal(FLAGS_latency_percentile);
|
||||
|
||||
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
|
||||
slog::info << "Latency for each data shape group:" << slog::endl;
|
||||
for (size_t i = 0; i < app_inputs_info.size(); ++i) {
|
||||
slog::info << (i + 1) << ".";
|
||||
for (auto& item : app_inputs_info[i]) {
|
||||
std::stringstream input_shape;
|
||||
auto shape = item.second.dataShape;
|
||||
std::copy(shape.begin(), shape.end() - 1, std::ostream_iterator<int>(input_shape, ","));
|
||||
input_shape << shape.back();
|
||||
slog::info << " " << item.first << " : " << getShapeString(item.second.dataShape);
|
||||
}
|
||||
slog::info << slog::endl;
|
||||
|
||||
groupLatencies[i].logTotal(FLAGS_latency_percentile);
|
||||
}
|
||||
}
|
||||
std::cout << double_to_string(latency) << " ms" << std::endl;
|
||||
}
|
||||
std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl;
|
||||
slog::info << "Throughput: " << double_to_string(fps) << " FPS" << slog::endl;
|
||||
|
||||
} catch (const std::exception& ex) {
|
||||
slog::err << ex.what() << slog::endl;
|
||||
|
||||
|
@ -2,12 +2,15 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
// clang-format off
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// clang-format off
|
||||
#include <samples/slog.hpp>
|
||||
|
||||
#include "remote_blobs_filling.hpp"
|
||||
// clang-format on
|
||||
|
||||
@ -85,58 +88,98 @@ size_t getBytesPerElement(InferenceEngine::Precision precision) {
|
||||
}
|
||||
}
|
||||
|
||||
void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests,
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork) {
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getRemoteInputBlobs(
|
||||
const std::map<std::string, std::vector<std::string>>& inputFiles,
|
||||
const std::vector<benchmark_app::InputsInfo>& app_inputs_info,
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork,
|
||||
std::vector<BufferType>& clBuffer) {
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
slog::info << "Device memory will be used for input and output blobs" << slog::endl;
|
||||
if (inputFiles.size()) {
|
||||
slog::warn << "Device memory supports only random data at this moment, input images will be ignored"
|
||||
<< slog::endl;
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> remoteBlobs;
|
||||
auto context = exeNetwork.GetContext();
|
||||
auto oclContext = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context)->get();
|
||||
auto oclInstance = std::make_shared<OpenCL>(oclContext);
|
||||
|
||||
auto setShared = [&](size_t requestId,
|
||||
const std::string name,
|
||||
const InferenceEngine::TensorDesc& desc,
|
||||
bool fillRandom = false) {
|
||||
auto setShared = [&](const std::string name, const InferenceEngine::TensorDesc& desc, bool fillRandom = false) {
|
||||
cl_int err;
|
||||
auto inputDims = desc.getDims();
|
||||
auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies<size_t>());
|
||||
auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision());
|
||||
|
||||
cl::Buffer sharedBuffer =
|
||||
cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
|
||||
clBuffer.push_back(cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err));
|
||||
|
||||
if (fillRandom) {
|
||||
void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(sharedBuffer,
|
||||
void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(clBuffer.back(),
|
||||
CL_TRUE,
|
||||
CL_MEM_READ_WRITE,
|
||||
0,
|
||||
(cl::size_type)inputSize);
|
||||
fillBuffer(mappedPtr, elementsNum, desc.getPrecision());
|
||||
oclInstance->_queue.enqueueUnmapMemObject(sharedBuffer, mappedPtr);
|
||||
oclInstance->_queue.enqueueUnmapMemObject(clBuffer.back(), mappedPtr);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr sharedBlob = InferenceEngine::gpu::make_shared_blob(desc, context, sharedBuffer);
|
||||
|
||||
requests.at(requestId)->setBlob(name, sharedBlob);
|
||||
auto blob = InferenceEngine::gpu::make_shared_blob(desc, context, clBuffer.back());
|
||||
remoteBlobs[name].push_back(blob);
|
||||
};
|
||||
|
||||
for (size_t requestId = 0; requestId < requests.size(); requestId++) {
|
||||
for (auto& item : exeNetwork.GetInputsInfo())
|
||||
setShared(requestId, item.first, item.second->getTensorDesc(), true);
|
||||
|
||||
for (auto& item : exeNetwork.GetOutputsInfo())
|
||||
setShared(requestId, item.first, item.second->getTensorDesc());
|
||||
for (auto& inputs_info : app_inputs_info) {
|
||||
for (auto& input : inputs_info) {
|
||||
// Fill random
|
||||
slog::info << "Prepare remote blob for input '" << input.first << "' with random values ("
|
||||
<< std::string((input.second.isImage() ? "image" : "some binary data")) << " is expected)"
|
||||
<< slog::endl;
|
||||
setShared(input.first,
|
||||
InferenceEngine::TensorDesc(input.second.precision,
|
||||
input.second.dataShape,
|
||||
getLayoutFromString(input.second.layout)),
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
return remoteBlobs;
|
||||
#else
|
||||
IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked";
|
||||
#endif
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::Blob::Ptr> getRemoteOutputBlobs(
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork,
|
||||
std::map<std::string, ::gpu::BufferType>& clBuffer) {
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
std::map<std::string, InferenceEngine::Blob::Ptr> outputBlobs;
|
||||
for (auto& output : exeNetwork.GetOutputsInfo()) {
|
||||
cl_int err;
|
||||
auto context = exeNetwork.GetContext();
|
||||
auto oclContext = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context)->get();
|
||||
auto oclInstance = std::make_shared<OpenCL>(oclContext);
|
||||
|
||||
auto desc = output.second->getTensorDesc();
|
||||
auto inputDims = desc.getDims();
|
||||
auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies<size_t>());
|
||||
auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision());
|
||||
|
||||
cl::size_type bufferSize = 0;
|
||||
if (clBuffer.find(output.first) == clBuffer.end()) {
|
||||
clBuffer[output.first] =
|
||||
cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
|
||||
} else {
|
||||
auto& buff = clBuffer[output.first];
|
||||
buff.getInfo(CL_MEM_SIZE, &bufferSize);
|
||||
if (inputSize != bufferSize) {
|
||||
buff = cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
|
||||
}
|
||||
}
|
||||
outputBlobs[output.first] = InferenceEngine::gpu::make_shared_blob(desc, context, clBuffer[output.first]);
|
||||
}
|
||||
|
||||
return outputBlobs;
|
||||
#else
|
||||
IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked";
|
||||
#endif
|
||||
}
|
||||
} // namespace gpu
|
||||
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT)
|
||||
# define HAVE_DEVICE_MEM_SUPPORT
|
||||
# include "gpu/gpu_context_api_ocl.hpp"
|
||||
@ -10,13 +12,14 @@
|
||||
// clang-format off
|
||||
#include "inference_engine.hpp"
|
||||
|
||||
#include "infer_request_wrap.hpp"
|
||||
#include "utils.hpp"
|
||||
// clang-format on
|
||||
|
||||
namespace gpu {
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
using BufferType = cl::Buffer;
|
||||
|
||||
struct OpenCL {
|
||||
cl::Context _context;
|
||||
cl::Device _device;
|
||||
@ -55,12 +58,18 @@ struct OpenCL {
|
||||
_queue = cl::CommandQueue(_context, _device, props);
|
||||
}
|
||||
};
|
||||
#else
|
||||
using BufferType = void*;
|
||||
#endif
|
||||
|
||||
void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests,
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork);
|
||||
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getRemoteInputBlobs(
|
||||
const std::map<std::string, std::vector<std::string>>& inputFiles,
|
||||
const std::vector<benchmark_app::InputsInfo>& app_inputs_info,
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork,
|
||||
std::vector<BufferType>& clBuffer);
|
||||
|
||||
std::map<std::string, InferenceEngine::Blob::Ptr> getRemoteOutputBlobs(
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork,
|
||||
std::map<std::string, ::gpu::BufferType>& clBuffer);
|
||||
|
||||
} // namespace gpu
|
||||
|
43
samples/cpp/benchmark_app/shared_blob_allocator.hpp
Normal file
43
samples/cpp/benchmark_app/shared_blob_allocator.hpp
Normal file
@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ie_allocator.hpp"
|
||||
|
||||
template <class T>
|
||||
class SharedBlobAllocator : public InferenceEngine::IAllocator {
|
||||
public:
|
||||
SharedBlobAllocator(const T* data, size_t size) : data(data), size(size){};
|
||||
|
||||
~SharedBlobAllocator() {
|
||||
free((void*)data);
|
||||
};
|
||||
|
||||
void* lock(void* handle, InferenceEngine::LockOp op = InferenceEngine::LOCK_FOR_WRITE) noexcept override {
|
||||
if (handle == data) {
|
||||
return (void*)data;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void unlock(void* handle) noexcept override{};
|
||||
|
||||
void* alloc(size_t size) noexcept override {
|
||||
return size <= this->size ? (void*)data : nullptr;
|
||||
};
|
||||
|
||||
bool free(void* handle) noexcept override {
|
||||
if (handle == data) {
|
||||
delete[] data;
|
||||
data = nullptr;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
private:
|
||||
const T* data;
|
||||
size_t size;
|
||||
};
|
@ -11,9 +11,12 @@
|
||||
|
||||
// clang-format off
|
||||
#include "inference_engine.hpp"
|
||||
|
||||
#include "samples/common.hpp"
|
||||
#include "samples/csv_dumper.hpp"
|
||||
#include "samples/slog.hpp"
|
||||
|
||||
#include "utils.hpp"
|
||||
// clang-format on
|
||||
|
||||
// @brief statistics reports types
|
||||
@ -21,6 +24,53 @@ static constexpr char noCntReport[] = "no_counters";
|
||||
static constexpr char averageCntReport[] = "average_counters";
|
||||
static constexpr char detailedCntReport[] = "detailed_counters";
|
||||
|
||||
/// @brief Responsible for calculating different latency metrics
|
||||
class LatencyMetrics {
|
||||
public:
|
||||
LatencyMetrics() = delete;
|
||||
|
||||
LatencyMetrics(const std::vector<double>& latencies) : latencies(latencies) {
|
||||
if (latencies.empty()) {
|
||||
throw std::logic_error("Latency metrics class expects non-empty vector of latencies at consturction.");
|
||||
}
|
||||
std::sort(this->latencies.begin(), this->latencies.end());
|
||||
}
|
||||
|
||||
LatencyMetrics(std::vector<double>&& latencies) : latencies(latencies) {
|
||||
if (latencies.empty()) {
|
||||
throw std::logic_error("Latency metrics class expects non-empty vector of latencies at consturction.");
|
||||
}
|
||||
std::sort(this->latencies.begin(), this->latencies.end());
|
||||
}
|
||||
|
||||
double min() {
|
||||
return latencies[0];
|
||||
}
|
||||
|
||||
double average() {
|
||||
return std::accumulate(latencies.begin(), latencies.end(), 0.0) / latencies.size();
|
||||
}
|
||||
|
||||
double percentile(std::size_t p) {
|
||||
return latencies[size_t(latencies.size() / 100.0 * p)];
|
||||
}
|
||||
|
||||
double max() {
|
||||
return latencies.back();
|
||||
}
|
||||
|
||||
void logTotal(size_t p) {
|
||||
std::string percentileStr = (p == 50) ? "\tMedian: " : "\t" + std::to_string(p) + " percentile: ";
|
||||
slog::info << percentileStr << double_to_string(percentile(p)) << " ms" << slog::endl;
|
||||
slog::info << "\tAvg: " << double_to_string(average()) << " ms" << slog::endl;
|
||||
slog::info << "\tMin: " << double_to_string(min()) << " ms" << slog::endl;
|
||||
slog::info << "\tMax: " << double_to_string(max()) << " ms" << slog::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<double> latencies;
|
||||
};
|
||||
|
||||
/// @brief Responsible for collecting of statistics and dumping to .csv file
|
||||
class StatisticsReport {
|
||||
public:
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
// clang-format off
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
@ -10,8 +9,10 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "samples/common.hpp"
|
||||
#include "samples/slog.hpp"
|
||||
// clang-format off
|
||||
#include <samples/args_helper.hpp>
|
||||
#include <samples/common.hpp>
|
||||
#include <samples/slog.hpp>
|
||||
|
||||
#include "utils.hpp"
|
||||
// clang-format on
|
||||
@ -35,7 +36,7 @@ size_t InputInfo::getDimentionByLayout(char character) const {
|
||||
size_t pos = layout.find(character);
|
||||
if (pos == std::string::npos)
|
||||
throw std::runtime_error("Error: Can't get " + std::string(character, 1) + " from layout " + layout);
|
||||
return shape.at(pos);
|
||||
return dataShape.at(pos);
|
||||
}
|
||||
size_t InputInfo::width() const {
|
||||
return getDimentionByLayout('W');
|
||||
@ -152,8 +153,8 @@ size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info) {
|
||||
std::size_t batch_index = info.second.layout.find("N");
|
||||
if (batch_index != std::string::npos) {
|
||||
if (batch_size == 0)
|
||||
batch_size = info.second.shape[batch_index];
|
||||
else if (batch_size != info.second.shape[batch_index])
|
||||
batch_size = info.second.dataShape[batch_index];
|
||||
else if (batch_size != info.second.dataShape[batch_index])
|
||||
throw std::logic_error("Can't deterimine batch size: batch is "
|
||||
"different for different inputs!");
|
||||
}
|
||||
@ -163,6 +164,47 @@ size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info) {
|
||||
return batch_size;
|
||||
}
|
||||
|
||||
InferenceEngine::Layout getLayoutFromString(const std::string& string_layout) {
|
||||
static const std::unordered_map<std::string, InferenceEngine::Layout> layouts = {
|
||||
{"NCHW", InferenceEngine::Layout::NCHW},
|
||||
{"NHWC", InferenceEngine::Layout::NHWC},
|
||||
{"NCDHW", InferenceEngine::Layout::NCDHW},
|
||||
{"NDHWC", InferenceEngine::Layout::NDHWC},
|
||||
{"C", InferenceEngine::Layout::C},
|
||||
{"CHW", InferenceEngine::Layout::CHW},
|
||||
{"HWC", InferenceEngine::Layout::HWC},
|
||||
{"HW", InferenceEngine::Layout::HW},
|
||||
{"NC", InferenceEngine::Layout::NC},
|
||||
{"CN", InferenceEngine::Layout::CN}};
|
||||
auto it = layouts.find(string_layout);
|
||||
if (it != layouts.end()) {
|
||||
return it->second;
|
||||
}
|
||||
IE_THROW() << "Unknown layout with name '" << string_layout << "'.";
|
||||
}
|
||||
|
||||
std::string getShapeString(const InferenceEngine::SizeVector& shape) {
|
||||
std::stringstream ss;
|
||||
ss << "[";
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (i > 0)
|
||||
ss << ", ";
|
||||
ss << shape.at(i);
|
||||
}
|
||||
ss << "]";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string getShapesString(const benchmark_app::PartialShapes& shapes) {
|
||||
std::stringstream ss;
|
||||
for (auto& shape : shapes) {
|
||||
if (!ss.str().empty())
|
||||
ss << ", ";
|
||||
ss << "\'" << shape.first << "': " << shape.second;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes) {
|
||||
std::stringstream ss;
|
||||
for (auto& shape : shapes) {
|
||||
@ -218,6 +260,120 @@ std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& sc
|
||||
return return_value;
|
||||
}
|
||||
|
||||
std::vector<ngraph::Dimension> parsePartialShape(const std::string& partial_shape) {
|
||||
std::vector<ngraph::Dimension> shape;
|
||||
for (auto& dim : split(partial_shape, ',')) {
|
||||
if (dim == "?" || dim == "-1") {
|
||||
shape.push_back(ngraph::Dimension::dynamic());
|
||||
} else {
|
||||
const std::string range_divider = "..";
|
||||
size_t range_index = dim.find(range_divider);
|
||||
if (range_index != std::string::npos) {
|
||||
std::string min = dim.substr(0, range_index);
|
||||
std::string max = dim.substr(range_index + range_divider.length());
|
||||
shape.push_back(ngraph::Dimension(min.empty() ? 0 : std::stoi(min),
|
||||
max.empty() ? ngraph::Interval::s_max : std::stoi(max)));
|
||||
} else {
|
||||
shape.push_back(std::stoi(dim));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return shape;
|
||||
}
|
||||
|
||||
InferenceEngine::SizeVector parseTensorShape(const std::string& dataShape) {
|
||||
std::vector<size_t> shape;
|
||||
for (auto& dim : split(dataShape, ',')) {
|
||||
shape.push_back(std::stoi(dim));
|
||||
}
|
||||
return shape;
|
||||
}
|
||||
|
||||
std::pair<std::string, std::vector<std::string>> parseInputFiles(const std::string& file_paths_string) {
|
||||
auto search_string = file_paths_string;
|
||||
std::string input_name = "";
|
||||
std::vector<std::string> file_paths;
|
||||
|
||||
// parse strings like <input1>:file1,file2,file3 and get name from them
|
||||
size_t semicolon_pos = search_string.find_first_of(":");
|
||||
size_t quote_pos = search_string.find_first_of("\"");
|
||||
if (semicolon_pos != std::string::npos && quote_pos != std::string::npos && semicolon_pos > quote_pos) {
|
||||
// if : is found after opening " symbol - this means that " belongs to pathname
|
||||
semicolon_pos = std::string::npos;
|
||||
}
|
||||
if (search_string.length() > 2 && semicolon_pos == 1 && search_string[2] == '\\') {
|
||||
// Special case like C:\ denotes drive name, not an input name
|
||||
semicolon_pos = std::string::npos;
|
||||
}
|
||||
|
||||
if (semicolon_pos != std::string::npos) {
|
||||
input_name = search_string.substr(0, semicolon_pos);
|
||||
search_string = search_string.substr(semicolon_pos + 1);
|
||||
}
|
||||
|
||||
// parse file1,file2,file3 and get vector of paths
|
||||
size_t coma_pos = 0;
|
||||
do {
|
||||
coma_pos = search_string.find_first_of(',');
|
||||
file_paths.push_back(search_string.substr(0, coma_pos));
|
||||
if (coma_pos == std::string::npos) {
|
||||
search_string = "";
|
||||
break;
|
||||
}
|
||||
search_string = search_string.substr(coma_pos + 1);
|
||||
} while (coma_pos != std::string::npos);
|
||||
|
||||
if (!search_string.empty())
|
||||
throw std::logic_error("Can't parse file paths for input " + input_name +
|
||||
" in input parameter string: " + file_paths_string);
|
||||
|
||||
return {input_name, file_paths};
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<std::string>> parseInputArguments(const std::vector<std::string>& args) {
|
||||
std::map<std::string, std::vector<std::string>> mapped_files = {};
|
||||
auto args_it = begin(args);
|
||||
const auto is_image_arg = [](const std::string& s) {
|
||||
return s == "-i";
|
||||
};
|
||||
const auto is_arg = [](const std::string& s) {
|
||||
return s.front() == '-';
|
||||
};
|
||||
while (args_it != args.end()) {
|
||||
const auto files_start = std::find_if(args_it, end(args), is_image_arg);
|
||||
if (files_start == end(args)) {
|
||||
break;
|
||||
}
|
||||
const auto files_begin = std::next(files_start);
|
||||
const auto files_end = std::find_if(files_begin, end(args), is_arg);
|
||||
for (auto f = files_begin; f != files_end; ++f) {
|
||||
auto files = parseInputFiles(*f);
|
||||
if (mapped_files.find(files.first) == mapped_files.end()) {
|
||||
mapped_files[files.first] = {};
|
||||
}
|
||||
|
||||
for (auto& file : files.second) {
|
||||
readInputFilesArguments(mapped_files[files.first], file);
|
||||
}
|
||||
}
|
||||
args_it = files_end;
|
||||
}
|
||||
size_t max_files = 20;
|
||||
for (auto& files : mapped_files) {
|
||||
if (files.second.size() <= max_files) {
|
||||
slog::info << "For input " << files.first << " " << files.second.size() << " files were added. "
|
||||
<< slog::endl;
|
||||
} else {
|
||||
slog::info << "For input " << files.first << " " << files.second.size() << " files were added. "
|
||||
<< " The number of files will be limited to " << max_files << "." << slog::endl;
|
||||
files.second.resize(20);
|
||||
}
|
||||
}
|
||||
|
||||
return mapped_files;
|
||||
}
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
void dump_config(const std::string& filename, const std::map<std::string, std::map<std::string, std::string>>& config) {
|
||||
auto plugin_to_opencv_format = [](const std::string& str) -> std::string {
|
||||
@ -270,4 +426,4 @@ void load_config(const std::string& filename, std::map<std::string, std::map<std
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -4,15 +4,43 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <map>
|
||||
#include <samples/slog.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/partial_shape.hpp"
|
||||
|
||||
typedef std::chrono::high_resolution_clock Time;
|
||||
typedef std::chrono::nanoseconds ns;
|
||||
|
||||
inline uint64_t getDurationInMilliseconds(uint32_t duration) {
|
||||
return duration * 1000LL;
|
||||
}
|
||||
|
||||
inline uint64_t getDurationInNanoseconds(uint32_t duration) {
|
||||
return duration * 1000000000LL;
|
||||
}
|
||||
|
||||
inline double get_duration_ms_till_now(Time::time_point& startTime) {
|
||||
return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
|
||||
};
|
||||
|
||||
inline std::string double_to_string(const double number) {
|
||||
std::stringstream ss;
|
||||
ss << std::fixed << std::setprecision(2) << number;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
namespace benchmark_app {
|
||||
struct InputInfo {
|
||||
InferenceEngine::Precision precision;
|
||||
InferenceEngine::SizeVector shape;
|
||||
ngraph::PartialShape partialShape;
|
||||
InferenceEngine::SizeVector dataShape;
|
||||
std::string layout;
|
||||
InferenceEngine::Layout originalLayout;
|
||||
std::vector<float> scale;
|
||||
std::vector<float> mean;
|
||||
bool isImage() const;
|
||||
@ -25,43 +53,56 @@ struct InputInfo {
|
||||
size_t depth() const;
|
||||
};
|
||||
using InputsInfo = std::map<std::string, InputInfo>;
|
||||
using PartialShapes = std::map<std::string, ngraph::PartialShape>;
|
||||
} // namespace benchmark_app
|
||||
|
||||
std::vector<std::string> parseDevices(const std::string& device_string);
|
||||
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
|
||||
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
|
||||
const std::string& values_string);
|
||||
|
||||
InferenceEngine::Layout getLayoutFromString(const std::string& string_layout);
|
||||
std::string getShapeString(const InferenceEngine::SizeVector& shape);
|
||||
std::string getShapesString(const benchmark_app::PartialShapes& shapes);
|
||||
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
|
||||
size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
|
||||
std::vector<std::string> split(const std::string& s, char delim);
|
||||
|
||||
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
|
||||
const benchmark_app::InputsInfo& inputs_info);
|
||||
std::vector<ngraph::Dimension> parsePartialShape(const std::string& partial_shape);
|
||||
InferenceEngine::SizeVector parseTensorShape(const std::string& data_shape);
|
||||
std::pair<std::string, std::vector<std::string>> parseInputFiles(const std::string& file_paths_string);
|
||||
std::map<std::string, std::vector<std::string>> parseInputArguments(const std::vector<std::string>& args);
|
||||
|
||||
template <typename T>
|
||||
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string,
|
||||
const std::map<std::string, T>& input_info) {
|
||||
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
|
||||
// inputs)
|
||||
std::map<std::string, std::string> return_value;
|
||||
std::map<std::string, std::vector<std::string>> parseInputParameters(const std::string parameter_string,
|
||||
const std::map<std::string, T>& input_info) {
|
||||
// Parse parameter string like "[value0]", "[value0][value1]" or "input0[value0][value1],input1[value2][value3]"
|
||||
// (applied to all inputs)
|
||||
std::map<std::string, std::vector<std::string>> return_value;
|
||||
std::string search_string = parameter_string;
|
||||
auto start_pos = search_string.find_first_of('[');
|
||||
auto input_name = search_string.substr(0, start_pos);
|
||||
while (start_pos != std::string::npos) {
|
||||
auto end_pos = search_string.find_first_of(']');
|
||||
if (end_pos == std::string::npos)
|
||||
break;
|
||||
auto input_name = search_string.substr(0, start_pos);
|
||||
if (start_pos)
|
||||
input_name = search_string.substr(0, start_pos);
|
||||
auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
|
||||
if (!input_name.empty()) {
|
||||
return_value[input_name] = input_value;
|
||||
return_value[input_name].push_back(input_value);
|
||||
} else {
|
||||
for (auto& item : input_info) {
|
||||
return_value[item.first] = input_value;
|
||||
return_value[item.first].push_back(input_value);
|
||||
}
|
||||
}
|
||||
search_string = search_string.substr(end_pos + 1);
|
||||
if (search_string.empty() || search_string.front() != ',')
|
||||
if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
|
||||
break;
|
||||
search_string = search_string.substr(1);
|
||||
if (search_string.front() == ',')
|
||||
search_string = search_string.substr(1);
|
||||
start_pos = search_string.find_first_of('[');
|
||||
}
|
||||
if (!search_string.empty())
|
||||
@ -70,87 +111,156 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
|
||||
const std::string& layout_string,
|
||||
const size_t batch_size,
|
||||
const std::string& scale_string,
|
||||
const std::string& mean_string,
|
||||
const std::map<std::string, T>& input_info,
|
||||
bool& reshape_required) {
|
||||
std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
|
||||
std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
|
||||
std::vector<benchmark_app::InputsInfo> getInputsInfo(const std::string& shape_string,
|
||||
const std::string& layout_string,
|
||||
const size_t batch_size,
|
||||
const std::string& data_shapes_string,
|
||||
const std::string& scale_string,
|
||||
const std::string& mean_string,
|
||||
const std::map<std::string, T>& input_info,
|
||||
bool& reshape_required) {
|
||||
std::map<std::string, std::vector<std::string>> shape_map = parseInputParameters(shape_string, input_info);
|
||||
std::map<std::string, std::vector<std::string>> data_shapes_map =
|
||||
parseInputParameters(data_shapes_string, input_info);
|
||||
std::map<std::string, std::vector<std::string>> layout_map = parseInputParameters(layout_string, input_info);
|
||||
|
||||
size_t min_size = 1, max_size = 1;
|
||||
if (!data_shapes_map.empty()) {
|
||||
min_size = std::min_element(data_shapes_map.begin(),
|
||||
data_shapes_map.end(),
|
||||
[](std::pair<std::string, std::vector<std::string>> a,
|
||||
std::pair<std::string, std::vector<std::string>> b) {
|
||||
return a.second.size() < b.second.size() && a.second.size() != 1;
|
||||
})
|
||||
->second.size();
|
||||
|
||||
max_size = std::max_element(data_shapes_map.begin(),
|
||||
data_shapes_map.end(),
|
||||
[](std::pair<std::string, std::vector<std::string>> a,
|
||||
std::pair<std::string, std::vector<std::string>> b) {
|
||||
return a.second.size() < b.second.size();
|
||||
})
|
||||
->second.size();
|
||||
if (min_size != max_size) {
|
||||
throw std::logic_error(
|
||||
"Shapes number for every input should be either 1 or should be equal to shapes number of other inputs");
|
||||
}
|
||||
}
|
||||
|
||||
reshape_required = false;
|
||||
benchmark_app::InputsInfo info_map;
|
||||
for (auto& item : input_info) {
|
||||
benchmark_app::InputInfo info;
|
||||
auto name = item.first;
|
||||
auto descriptor = item.second->getTensorDesc();
|
||||
// Precision
|
||||
info.precision = descriptor.getPrecision();
|
||||
// Shape
|
||||
if (shape_map.count(name)) {
|
||||
std::vector<size_t> parsed_shape;
|
||||
for (auto& dim : split(shape_map.at(name), ',')) {
|
||||
parsed_shape.push_back(std::stoi(dim));
|
||||
}
|
||||
info.shape = parsed_shape;
|
||||
reshape_required = true;
|
||||
} else {
|
||||
info.shape = descriptor.getDims();
|
||||
}
|
||||
// Layout
|
||||
if (layout_map.count(name)) {
|
||||
info.layout = layout_map.at(name);
|
||||
std::transform(info.layout.begin(), info.layout.end(), info.layout.begin(), ::toupper);
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << descriptor.getLayout();
|
||||
info.layout = ss.str();
|
||||
}
|
||||
// Update shape with batch if needed
|
||||
if (batch_size != 0) {
|
||||
std::size_t batch_index = info.layout.find("N");
|
||||
if ((batch_index != std::string::npos) && (info.shape.at(batch_index) != batch_size)) {
|
||||
info.shape[batch_index] = batch_size;
|
||||
|
||||
std::vector<benchmark_app::InputsInfo> info_maps;
|
||||
|
||||
for (size_t i = 0; i < min_size; ++i) {
|
||||
benchmark_app::InputsInfo info_map;
|
||||
for (auto& item : input_info) {
|
||||
benchmark_app::InputInfo info;
|
||||
auto name = item.first;
|
||||
auto descriptor = item.second->getTensorDesc();
|
||||
// Precision
|
||||
info.precision = descriptor.getPrecision();
|
||||
// Partial Shape
|
||||
if (shape_map.count(name)) {
|
||||
std::vector<ngraph::Dimension> parsed_shape;
|
||||
if (shape_map.at(name).size() > 1) {
|
||||
throw std::logic_error(
|
||||
"shape command line parameter doesn't support multiple shapes for one input.");
|
||||
}
|
||||
info.partialShape = parsePartialShape(shape_map.at(name)[0]);
|
||||
reshape_required = true;
|
||||
} else {
|
||||
info.partialShape = item.second->getPartialShape();
|
||||
}
|
||||
|
||||
if (info.partialShape.is_dynamic() && info.isImage()) {
|
||||
throw std::logic_error(
|
||||
"benchmark_app supports only binary and random data as input for dynamic models at this moment.");
|
||||
}
|
||||
|
||||
// Tensor Shape
|
||||
if (info.partialShape.is_dynamic() && data_shapes_map.count(name)) {
|
||||
info.dataShape = parseTensorShape(data_shapes_map.at(name)[i % data_shapes_map.at(name).size()]);
|
||||
} else if (info.partialShape.is_static()) {
|
||||
info.dataShape = info.partialShape.get_shape();
|
||||
if (data_shapes_map.find(name) != data_shapes_map.end()) {
|
||||
throw std::logic_error(
|
||||
"Network's input \"" + name +
|
||||
"\" is static. Use -shape argument for static inputs instead of -data_shape.");
|
||||
}
|
||||
} else if (!data_shapes_map.empty()) {
|
||||
throw std::logic_error("Can't find network input name \"" + name + "\" in \"-data_shape " +
|
||||
data_shapes_string + "\" command line parameter");
|
||||
} else {
|
||||
throw std::logic_error(
|
||||
"data_shape command line parameter should be set in case of network with dynamic shapes.");
|
||||
}
|
||||
|
||||
// Layout
|
||||
info.originalLayout = descriptor.getLayout();
|
||||
if (layout_map.count(name)) {
|
||||
if (layout_map.at(name).size() > 1) {
|
||||
throw std::logic_error(
|
||||
"layout command line parameter doesn't support multiple layouts for one input.");
|
||||
}
|
||||
info.layout = layout_map.at(name)[0];
|
||||
std::transform(info.layout.begin(), info.layout.end(), info.layout.begin(), ::toupper);
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << descriptor.getLayout();
|
||||
info.layout = ss.str();
|
||||
}
|
||||
// Update shape with batch if needed (only in static shape case)
|
||||
// Update blob shape only not affecting network shape to trigger dynamic batch size case
|
||||
if (batch_size != 0) {
|
||||
std::size_t batch_index = info.layout.find("N");
|
||||
if ((batch_index != std::string::npos) && (info.dataShape.at(batch_index) != batch_size)) {
|
||||
if (info.partialShape.is_static()) {
|
||||
info.partialShape[batch_index] = batch_size;
|
||||
}
|
||||
info.dataShape[batch_index] = batch_size;
|
||||
reshape_required = true;
|
||||
}
|
||||
}
|
||||
info_map[name] = info;
|
||||
}
|
||||
|
||||
// Update scale and mean
|
||||
std::map<std::string, std::vector<float>> scale_map = parseScaleOrMean(scale_string, info_map);
|
||||
std::map<std::string, std::vector<float>> mean_map = parseScaleOrMean(mean_string, info_map);
|
||||
|
||||
for (auto& item : info_map) {
|
||||
if (item.second.isImage()) {
|
||||
item.second.scale.assign({1, 1, 1});
|
||||
item.second.mean.assign({0, 0, 0});
|
||||
|
||||
if (scale_map.count(item.first)) {
|
||||
item.second.scale = scale_map.at(item.first);
|
||||
}
|
||||
if (mean_map.count(item.first)) {
|
||||
item.second.mean = mean_map.at(item.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
info_map[name] = info;
|
||||
|
||||
info_maps.push_back(info_map);
|
||||
}
|
||||
|
||||
// Update scale and mean
|
||||
std::map<std::string, std::vector<float>> scale_map = parseScaleOrMean(scale_string, info_map);
|
||||
std::map<std::string, std::vector<float>> mean_map = parseScaleOrMean(mean_string, info_map);
|
||||
|
||||
for (auto& item : info_map) {
|
||||
if (item.second.isImage()) {
|
||||
item.second.scale.assign({1, 1, 1});
|
||||
item.second.mean.assign({0, 0, 0});
|
||||
|
||||
if (scale_map.count(item.first)) {
|
||||
item.second.scale = scale_map.at(item.first);
|
||||
}
|
||||
if (mean_map.count(item.first)) {
|
||||
item.second.mean = mean_map.at(item.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return info_map;
|
||||
return info_maps;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
|
||||
const std::string& layout_string,
|
||||
const size_t batch_size,
|
||||
const std::string& scale_string,
|
||||
const std::string& mean_string,
|
||||
const std::map<std::string, T>& input_info) {
|
||||
std::vector<benchmark_app::InputsInfo> getInputsInfo(const std::string& shape_string,
|
||||
const std::string& layout_string,
|
||||
const size_t batch_size,
|
||||
const std::string& data_shapes_string,
|
||||
const std::string& scale_string,
|
||||
const std::string& mean_string,
|
||||
const std::map<std::string, T>& input_info) {
|
||||
bool reshape_required = false;
|
||||
return getInputsInfo<T>(shape_string,
|
||||
layout_string,
|
||||
batch_size,
|
||||
data_shapes_string,
|
||||
scale_string,
|
||||
mean_string,
|
||||
input_info,
|
||||
@ -160,4 +270,4 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
|
||||
#ifdef USE_OPENCV
|
||||
void dump_config(const std::string& filename, const std::map<std::string, std::map<std::string, std::string>>& config);
|
||||
void load_config(const std::string& filename, std::map<std::string, std::map<std::string, std::string>>& config);
|
||||
#endif
|
||||
#endif
|
||||
|
@ -679,6 +679,15 @@ inline std::string getFullDeviceName(InferenceEngine::Core& ie, std::string devi
|
||||
}
|
||||
}
|
||||
|
||||
inline std::string getFullDeviceName(ov::runtime::Core& ie, std::string device) {
|
||||
InferenceEngine::Parameter p;
|
||||
try {
|
||||
p = ie.get_metric(device, METRIC_KEY(FULL_DEVICE_NAME));
|
||||
return p.as<std::string>();
|
||||
} catch (InferenceEngine::Exception&) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief This class represents an object that is found by an object detection net
|
||||
*/
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,3 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
@ -228,3 +224,95 @@ static void showUsage() {
|
||||
std::cout << " -iname \"<string>\" " << input_layer_names_message << std::endl;
|
||||
std::cout << " -pwl_me \"<double>\" " << pwl_max_error_percent_message << std::endl;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks input arguments
|
||||
* @param argc number of args
|
||||
* @param argv list of input arguments
|
||||
* @return bool status true(Success) or false(Fail)
|
||||
*/
|
||||
bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
slog::info << "Parsing input parameters" << slog::endl;
|
||||
|
||||
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
|
||||
if (FLAGS_h) {
|
||||
showUsage();
|
||||
showAvailableDevices();
|
||||
return false;
|
||||
}
|
||||
bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty();
|
||||
|
||||
// input not required only in dump mode and if external scale factor provided
|
||||
if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) {
|
||||
showUsage();
|
||||
if (isDumpMode) {
|
||||
throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale"
|
||||
" factor need to be provided. See -q user option");
|
||||
}
|
||||
throw std::logic_error("Input file not set. Please use -i.");
|
||||
}
|
||||
|
||||
if (FLAGS_m.empty() && FLAGS_rg.empty()) {
|
||||
showUsage();
|
||||
throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set.");
|
||||
}
|
||||
|
||||
if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) {
|
||||
throw std::logic_error("Only one of -m and -rg is allowed.");
|
||||
}
|
||||
|
||||
std::vector<std::string> supportedDevices = {"CPU",
|
||||
"GPU",
|
||||
"GNA_AUTO",
|
||||
"GNA_HW",
|
||||
"GNA_HW_WITH_SW_FBACK",
|
||||
"GNA_SW_EXACT",
|
||||
"GNA_SW",
|
||||
"GNA_SW_FP32",
|
||||
"HETERO:GNA,CPU",
|
||||
"HETERO:GNA_HW,CPU",
|
||||
"HETERO:GNA_SW_EXACT,CPU",
|
||||
"HETERO:GNA_SW,CPU",
|
||||
"HETERO:GNA_SW_FP32,CPU",
|
||||
"MYRIAD"};
|
||||
|
||||
if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) {
|
||||
throw std::logic_error("Specified device is not supported.");
|
||||
}
|
||||
|
||||
uint32_t batchSize = (uint32_t)FLAGS_bs;
|
||||
if ((batchSize < 1) || (batchSize > 8)) {
|
||||
throw std::logic_error("Batch size out of range (1..8).");
|
||||
}
|
||||
|
||||
/** default is a static quantization **/
|
||||
if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("dynamic") != 0) && (FLAGS_q.compare("user") != 0)) {
|
||||
throw std::logic_error("Quantization mode not supported (static, dynamic, user).");
|
||||
}
|
||||
|
||||
if (FLAGS_q.compare("dynamic") == 0) {
|
||||
throw std::logic_error("Dynamic quantization not yet supported.");
|
||||
}
|
||||
|
||||
if (FLAGS_qb != 16 && FLAGS_qb != 8) {
|
||||
throw std::logic_error("Only 8 or 16 bits supported.");
|
||||
}
|
||||
|
||||
if (FLAGS_nthreads <= 0) {
|
||||
throw std::logic_error("Invalid value for 'nthreads' argument. It must be greater that or equal to 0");
|
||||
}
|
||||
|
||||
if (FLAGS_cw_r < 0) {
|
||||
throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0");
|
||||
}
|
||||
|
||||
if (FLAGS_cw_l < 0) {
|
||||
throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0");
|
||||
}
|
||||
|
||||
if (FLAGS_pwl_me < 0.0 || FLAGS_pwl_me > 100.0) {
|
||||
throw std::logic_error("Invalid value for 'pwl_me' argument. It must be greater than 0.0 and less than 100.0");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
406
samples/cpp/speech_sample/utils.hpp
Normal file
406
samples/cpp/speech_sample/utils.hpp
Normal file
@ -0,0 +1,406 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <cnpy.h>
|
||||
|
||||
#include <samples/common.hpp>
|
||||
|
||||
#define MAX_SCORE_DIFFERENCE 0.0001f // max score difference for frame error threshold
|
||||
#define MAX_VAL_2B_FEAT 16384 // max to find scale factor
|
||||
|
||||
typedef std::chrono::high_resolution_clock Time;
|
||||
typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
|
||||
typedef std::chrono::duration<float> fsec;
|
||||
|
||||
/**
|
||||
* @brief struct to store score error
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t numScores;
|
||||
uint32_t numErrors;
|
||||
float threshold;
|
||||
float maxError;
|
||||
float rmsError;
|
||||
float sumError;
|
||||
float sumRmsError;
|
||||
float sumSquaredError;
|
||||
float maxRelError;
|
||||
float sumRelError;
|
||||
float sumSquaredRelError;
|
||||
} score_error_t;
|
||||
|
||||
/**
|
||||
* @brief struct to store infer request data per frame
|
||||
*/
|
||||
struct InferRequestStruct {
|
||||
ov::runtime::InferRequest inferRequest;
|
||||
int frameIndex;
|
||||
uint32_t numFramesThisBatch;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Check number of input files and model network inputs
|
||||
* @param numInputs number model inputs
|
||||
* @param numInputFiles number of input files
|
||||
* @return none.
|
||||
*/
|
||||
void CheckNumberOfInputs(size_t numInputs, size_t numInputFiles) {
|
||||
if (numInputs != numInputFiles) {
|
||||
throw std::logic_error("Number of network inputs (" + std::to_string(numInputs) +
|
||||
")"
|
||||
" is not equal to number of input files (" +
|
||||
std::to_string(numInputFiles) + ")");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get scale factor for quantization
|
||||
* @param ptrFloatMemory pointer to float memory with speech feature vector
|
||||
* @param targetMax max scale factor
|
||||
* @param numElements number of elements in speech feature vector
|
||||
* @return scale factor
|
||||
*/
|
||||
float ScaleFactorForQuantization(void* ptrFloatMemory, float targetMax, uint32_t numElements) {
|
||||
float* ptrFloatFeat = reinterpret_cast<float*>(ptrFloatMemory);
|
||||
float max = 0.0;
|
||||
float scaleFactor;
|
||||
|
||||
for (uint32_t i = 0; i < numElements; i++) {
|
||||
if (fabs(ptrFloatFeat[i]) > max) {
|
||||
max = fabs(ptrFloatFeat[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (max == 0) {
|
||||
scaleFactor = 1.0;
|
||||
} else {
|
||||
scaleFactor = targetMax / max;
|
||||
}
|
||||
|
||||
return (scaleFactor);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clean score error
|
||||
* @param error pointer to score error struct
|
||||
* @return none.
|
||||
*/
|
||||
void ClearScoreError(score_error_t* error) {
|
||||
error->numScores = 0;
|
||||
error->numErrors = 0;
|
||||
error->maxError = 0.0;
|
||||
error->rmsError = 0.0;
|
||||
error->sumError = 0.0;
|
||||
error->sumRmsError = 0.0;
|
||||
error->sumSquaredError = 0.0;
|
||||
error->maxRelError = 0.0;
|
||||
error->sumRelError = 0.0;
|
||||
error->sumSquaredRelError = 0.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Update total score error
|
||||
* @param error pointer to score error struct
|
||||
* @param totalError pointer to total score error struct
|
||||
* @return none.
|
||||
*/
|
||||
void UpdateScoreError(score_error_t* error, score_error_t* totalError) {
|
||||
totalError->numErrors += error->numErrors;
|
||||
totalError->numScores += error->numScores;
|
||||
totalError->sumRmsError += error->rmsError;
|
||||
totalError->sumError += error->sumError;
|
||||
totalError->sumSquaredError += error->sumSquaredError;
|
||||
if (error->maxError > totalError->maxError) {
|
||||
totalError->maxError = error->maxError;
|
||||
}
|
||||
totalError->sumRelError += error->sumRelError;
|
||||
totalError->sumSquaredRelError += error->sumSquaredRelError;
|
||||
if (error->maxRelError > totalError->maxRelError) {
|
||||
totalError->maxRelError = error->maxRelError;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Compare score errors, array should be the same length
|
||||
* @param ptrScoreArray - pointer to score error struct array
|
||||
* @param ptrRefScoreArray - pointer to score error struct array to compare
|
||||
* @param scoreError - pointer to score error struct to save a new error
|
||||
* @param numRows - number rows in score error arrays
|
||||
* @param numColumns - number columns in score error arrays
|
||||
* @return none.
|
||||
*/
|
||||
void CompareScores(float* ptrScoreArray,
|
||||
void* ptrRefScoreArray,
|
||||
score_error_t* scoreError,
|
||||
uint32_t numRows,
|
||||
uint32_t numColumns) {
|
||||
uint32_t numErrors = 0;
|
||||
|
||||
ClearScoreError(scoreError);
|
||||
|
||||
float* A = ptrScoreArray;
|
||||
float* B = reinterpret_cast<float*>(ptrRefScoreArray);
|
||||
for (uint32_t i = 0; i < numRows; i++) {
|
||||
for (uint32_t j = 0; j < numColumns; j++) {
|
||||
float score = A[i * numColumns + j];
|
||||
// std::cout << "score" << score << std::endl;
|
||||
float refscore = B[i * numColumns + j];
|
||||
float error = fabs(refscore - score);
|
||||
float rel_error = error / (static_cast<float>(fabs(refscore)) + 1e-20f);
|
||||
float squared_error = error * error;
|
||||
float squared_rel_error = rel_error * rel_error;
|
||||
scoreError->numScores++;
|
||||
scoreError->sumError += error;
|
||||
scoreError->sumSquaredError += squared_error;
|
||||
if (error > scoreError->maxError) {
|
||||
scoreError->maxError = error;
|
||||
}
|
||||
scoreError->sumRelError += rel_error;
|
||||
scoreError->sumSquaredRelError += squared_rel_error;
|
||||
if (rel_error > scoreError->maxRelError) {
|
||||
scoreError->maxRelError = rel_error;
|
||||
}
|
||||
if (error > scoreError->threshold) {
|
||||
numErrors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns));
|
||||
scoreError->sumRmsError += scoreError->rmsError;
|
||||
scoreError->numErrors = numErrors;
|
||||
// std::cout << "rmsError=" << scoreError->rmsError << "sumRmsError="<<scoreError->sumRmsError;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get total stdev error
|
||||
* @param error pointer to score error struct
|
||||
* @return error
|
||||
*/
|
||||
float StdDevError(score_error_t error) {
|
||||
return (sqrt(error.sumSquaredError / error.numScores -
|
||||
(error.sumError / error.numScores) * (error.sumError / error.numScores)));
|
||||
}
|
||||
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
# ifdef _WIN32
|
||||
# include <intrin.h>
|
||||
# include <windows.h>
|
||||
# else
|
||||
|
||||
# include <cpuid.h>
|
||||
|
||||
# endif
|
||||
|
||||
inline void native_cpuid(unsigned int* eax, unsigned int* ebx, unsigned int* ecx, unsigned int* edx) {
|
||||
size_t level = *eax;
|
||||
# ifdef _WIN32
|
||||
int regs[4] = {static_cast<int>(*eax), static_cast<int>(*ebx), static_cast<int>(*ecx), static_cast<int>(*edx)};
|
||||
__cpuid(regs, level);
|
||||
*eax = static_cast<uint32_t>(regs[0]);
|
||||
*ebx = static_cast<uint32_t>(regs[1]);
|
||||
*ecx = static_cast<uint32_t>(regs[2]);
|
||||
*edx = static_cast<uint32_t>(regs[3]);
|
||||
# else
|
||||
__get_cpuid(level, eax, ebx, ecx, edx);
|
||||
# endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get GNA module frequency
|
||||
* @return GNA module frequency in MHz
|
||||
*/
|
||||
float getGnaFrequencyMHz() {
|
||||
uint32_t eax = 1;
|
||||
uint32_t ebx = 0;
|
||||
uint32_t ecx = 0;
|
||||
uint32_t edx = 0;
|
||||
uint32_t family = 0;
|
||||
uint32_t model = 0;
|
||||
const uint8_t sixth_family = 6;
|
||||
const uint8_t cannon_lake_model = 102;
|
||||
const uint8_t gemini_lake_model = 122;
|
||||
const uint8_t ice_lake_model = 126;
|
||||
const uint8_t tgl_model = 140;
|
||||
const uint8_t next_model = 151;
|
||||
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
family = (eax >> 8) & 0xF;
|
||||
|
||||
// model is the concatenation of two fields
|
||||
// | extended model | model |
|
||||
// copy extended model data
|
||||
model = (eax >> 16) & 0xF;
|
||||
// shift
|
||||
model <<= 4;
|
||||
// copy model data
|
||||
model += (eax >> 4) & 0xF;
|
||||
|
||||
if (family == sixth_family) {
|
||||
switch (model) {
|
||||
case cannon_lake_model:
|
||||
case ice_lake_model:
|
||||
case tgl_model:
|
||||
case next_model:
|
||||
return 400;
|
||||
case gemini_lake_model:
|
||||
return 200;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
// counters not supported and we returns just default value
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // if not ARM
|
||||
|
||||
/**
|
||||
* @brief Print a report on the statistical score error
|
||||
* @param totalError reference to a total score error struct
|
||||
* @param framesNum number of frames in utterance
|
||||
* @param stream output stream
|
||||
* @return none.
|
||||
*/
|
||||
void printReferenceCompareResults(score_error_t const& totalError, size_t framesNum, std::ostream& stream) {
|
||||
stream << " max error: " << totalError.maxError << std::endl;
|
||||
stream << " avg error: " << totalError.sumError / totalError.numScores << std::endl;
|
||||
stream << " avg rms error: " << totalError.sumRmsError / framesNum << std::endl;
|
||||
stream << " stdev error: " << StdDevError(totalError) << std::endl << std::endl;
|
||||
stream << std::endl;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Print a report on the performance counts
|
||||
* @param utterancePerfMap reference to a map to store performance counters
|
||||
* @param numberOfFrames number of frames
|
||||
* @param stream output stream
|
||||
* @param fullDeviceName full device name string
|
||||
* @param numberOfFramesOnHw number of frames delivered to GNA HW
|
||||
* @param FLAGS_d flag of device
|
||||
* @return none.
|
||||
*/
|
||||
void printPerformanceCounters(std::map<std::string, ov::runtime::ProfilingInfo> const& utterancePerfMap,
|
||||
size_t numberOfFrames,
|
||||
std::ostream& stream,
|
||||
std::string fullDeviceName,
|
||||
const uint64_t numberOfFramesOnHw,
|
||||
std::string FLAGS_d) {
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
stream << std::endl << "Performance counts:" << std::endl;
|
||||
stream << std::setw(10) << std::right << ""
|
||||
<< "Counter descriptions";
|
||||
stream << std::setw(22) << "Utt scoring time";
|
||||
stream << std::setw(18) << "Avg infer time";
|
||||
stream << std::endl;
|
||||
|
||||
stream << std::setw(46) << "(ms)";
|
||||
stream << std::setw(24) << "(us per call)";
|
||||
stream << std::endl;
|
||||
// if GNA HW counters
|
||||
// get frequency of GNA module
|
||||
float freq = getGnaFrequencyMHz();
|
||||
for (const auto& it : utterancePerfMap) {
|
||||
std::string const& counter_name = it.first;
|
||||
float current_units_us = static_cast<float>(it.second.real_time.count()) / freq;
|
||||
float call_units_us = current_units_us / numberOfFrames;
|
||||
if (FLAGS_d.find("GNA") != std::string::npos) {
|
||||
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
|
||||
} else {
|
||||
stream << std::setw(30) << std::left << counter_name;
|
||||
}
|
||||
stream << std::setw(16) << std::right << current_units_us / 1000;
|
||||
stream << std::setw(21) << std::right << call_units_us;
|
||||
stream << std::endl;
|
||||
}
|
||||
stream << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Full device name: " << fullDeviceName << std::endl;
|
||||
std::cout << std::endl;
|
||||
stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
|
||||
stream << "/" << numberOfFrames;
|
||||
stream << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get performance counts
|
||||
* @param request reference to infer request
|
||||
* @param perfCounters reference to a map to save performance counters
|
||||
* @return none.
|
||||
*/
|
||||
void getPerformanceCounters(ov::runtime::InferRequest& request,
|
||||
std::map<std::string, ov::runtime::ProfilingInfo>& perfCounters) {
|
||||
auto retPerfCounters = request.get_profiling_info();
|
||||
|
||||
for (const auto& element : retPerfCounters) {
|
||||
perfCounters[element.node_name] = element;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Summarize performance counts and total number of frames executed on the GNA HW device
|
||||
* @param perfCounters reference to a map to get performance counters
|
||||
* @param totalPerfCounters reference to a map to save total performance counters
|
||||
* @param totalRunsOnHw reference to a total number of frames computed on GNA HW
|
||||
* @return none.
|
||||
*/
|
||||
void sumPerformanceCounters(std::map<std::string, ov::runtime::ProfilingInfo> const& perfCounters,
|
||||
std::map<std::string, ov::runtime::ProfilingInfo>& totalPerfCounters,
|
||||
uint64_t& totalRunsOnHw) {
|
||||
auto runOnHw = false;
|
||||
for (const auto& pair : perfCounters) {
|
||||
totalPerfCounters[pair.first].real_time += pair.second.real_time;
|
||||
runOnHw |= pair.second.real_time > std::chrono::microseconds(0); // if realTime is above zero, that means that
|
||||
// a primitive was executed on the device
|
||||
}
|
||||
totalRunsOnHw += runOnHw;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse scale factors
|
||||
* @param str reference to user-specified input scale factor for quantization, can be separated by comma
|
||||
* @return vector scale factors
|
||||
*/
|
||||
std::vector<std::string> ParseScaleFactors(const std::string& str) {
|
||||
std::vector<std::string> scaleFactorInput;
|
||||
|
||||
if (!str.empty()) {
|
||||
std::string outStr;
|
||||
std::istringstream stream(str);
|
||||
int i = 0;
|
||||
while (getline(stream, outStr, ',')) {
|
||||
auto floatScaleFactor = std::stof(outStr);
|
||||
if (floatScaleFactor <= 0.0f) {
|
||||
throw std::logic_error("Scale factor for input #" + std::to_string(i) +
|
||||
" (counting from zero) is out of range (must be positive).");
|
||||
}
|
||||
scaleFactorInput.push_back(outStr);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error("Scale factor need to be specified via -sf option if you are using -q user");
|
||||
}
|
||||
return scaleFactorInput;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse string of file names separated by comma to save it to vector of file names
|
||||
* @param str file names separated by comma
|
||||
* @return vector of file names
|
||||
*/
|
||||
std::vector<std::string> ConvertStrToVector(std::string str) {
|
||||
std::vector<std::string> blobName;
|
||||
if (!str.empty()) {
|
||||
size_t pos_last = 0;
|
||||
size_t pos_next = 0;
|
||||
while ((pos_next = str.find(",", pos_last)) != std::string::npos) {
|
||||
blobName.push_back(str.substr(pos_last, pos_next - pos_last));
|
||||
pos_last = pos_next + 1;
|
||||
}
|
||||
blobName.push_back(str.substr(pos_last));
|
||||
}
|
||||
return blobName;
|
||||
}
|
@ -31,19 +31,19 @@ endif()
|
||||
|
||||
if(ENABLE_OV_IR_FRONTEND)
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_dependencies(ov_runtime_libraries ir_ov_frontend)
|
||||
add_dependencies(ov_runtime_libraries ov_ir_frontend)
|
||||
endif()
|
||||
# use this one once CVS-69781 is fixed
|
||||
# add_dependencies(inference_engine ir_ov_frontend)
|
||||
# add_dependencies(inference_engine ov_ir_frontend)
|
||||
endif()
|
||||
if(ENABLE_OV_ONNX_FRONTEND)
|
||||
add_dependencies(inference_engine onnx_ov_frontend)
|
||||
add_dependencies(inference_engine ov_onnx_frontend)
|
||||
endif()
|
||||
|
||||
if(ENABLE_OV_PDPD_FRONTEND)
|
||||
add_dependencies(inference_engine paddlepaddle_ov_frontend)
|
||||
add_dependencies(inference_engine ov_paddlepaddle_frontend)
|
||||
endif()
|
||||
|
||||
if(ENABLE_OV_TF_FRONTEND)
|
||||
add_dependencies(inference_engine tensorflow_ov_frontend)
|
||||
add_dependencies(inference_engine ov_tensorflow_frontend)
|
||||
endif()
|
||||
|
@ -17,9 +17,9 @@ set(LIBRARY_OUTPUT_DIRECTORY_BIN ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
|
||||
add_subdirectory(src)
|
||||
|
||||
if(ENABLE_OV_CORE_UNIT_TESTS)
|
||||
add_subdirectory(tests/mock/mock_py_ov_frontend)
|
||||
add_dependencies(pyopenvino mock_py_ov_frontend)
|
||||
set_target_properties(mock_py_ov_frontend PROPERTIES
|
||||
add_subdirectory(tests/mock/ov_mock_py_frontend)
|
||||
add_dependencies(pyopenvino ov_mock_py_frontend)
|
||||
set_target_properties(ov_mock_py_frontend PROPERTIES
|
||||
LIBRARY_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY_BIN}
|
||||
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY_BIN}
|
||||
COMPILE_PDB_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY_BIN}
|
||||
|
@ -41,6 +41,8 @@ from openvino.pyopenvino import FrontEnd
|
||||
from openvino.pyopenvino import InputModel
|
||||
from openvino.pyopenvino import Place
|
||||
from openvino.pyopenvino import TelemetryExtension
|
||||
from openvino.pyopenvino import DecoderTransformationExtension
|
||||
from openvino.pyopenvino import JsonConfigExtension
|
||||
|
||||
# exceptions
|
||||
from openvino.pyopenvino import NotImplementedFailure
|
||||
|
@ -321,68 +321,64 @@ py::dict outputs_to_dict(const std::vector<ov::Output<const ov::Node>>& outputs,
|
||||
ov::runtime::Tensor t{request.get_tensor(out)};
|
||||
switch (t.get_element_type()) {
|
||||
case ov::element::Type_t::i8: {
|
||||
py::array arr(t.get_shape(), t.data<int8_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<int8_t>(t.get_shape(), t.data<int8_t>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::i16: {
|
||||
py::array arr(t.get_shape(), t.data<int16_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<int16_t>(t.get_shape(), t.data<int16_t>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::i32: {
|
||||
py::array arr(t.get_shape(), t.data<int32_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<int32_t>(t.get_shape(), t.data<int32_t>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::i64: {
|
||||
py::array arr(t.get_shape(), t.data<int64_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<int64_t>(t.get_shape(), t.data<int64_t>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u8: {
|
||||
py::array arr(t.get_shape(), t.data<uint8_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<uint8_t>(t.get_shape(), t.data<uint8_t>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u16: {
|
||||
py::array arr(t.get_shape(), t.data<uint16_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<uint16_t>(t.get_shape(), t.data<uint16_t>());
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u32: {
|
||||
py::array arr(t.get_shape(), t.data<uint32_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<uint32_t>(t.get_shape(), t.data<uint32_t>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::u64: {
|
||||
py::array arr(t.get_shape(), t.data<uint64_t>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<uint64_t>(t.get_shape(), t.data<uint64_t>());
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::bf16: {
|
||||
py::array arr(t.get_shape(), t.data<ov::bfloat16>());
|
||||
res[py::cast(out)] = arr.view("int16");
|
||||
res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data<ov::bfloat16>());
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::f16: {
|
||||
py::array arr(t.get_shape(), t.data<ov::float16>());
|
||||
res[py::cast(out)] = arr.view("int16");
|
||||
res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data<ov::float16>());
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::f32: {
|
||||
py::array arr(t.get_shape(), t.data<float>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<float>(t.get_shape(), t.data<float>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::f64: {
|
||||
py::array arr(t.get_shape(), t.data<double>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<double>(t.get_shape(), t.data<double>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
case ov::element::Type_t::boolean: {
|
||||
py::array arr(t.get_shape(), t.data<bool*>());
|
||||
res[py::cast(out)] = arr;
|
||||
res[py::cast(out)] = py::array_t<bool>(t.get_shape(), t.data<bool>());
|
||||
;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
16
src/bindings/python/src/pyopenvino/core/extension.cpp
Normal file
16
src/bindings/python/src/pyopenvino/core/extension.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <pybind11/functional.h>
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include <pybind11/stl_bind.h>
|
||||
|
||||
#include "openvino/frontend/manager.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
void regclass_Extension(py::module m) {
|
||||
py::class_<ov::Extension, std::shared_ptr<ov::Extension>> ext(m, "Extension", py::dynamic_attr());
|
||||
}
|
11
src/bindings/python/src/pyopenvino/core/extension.hpp
Normal file
11
src/bindings/python/src/pyopenvino/core/extension.hpp
Normal file
@ -0,0 +1,11 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
void regclass_Extension(py::module m);
|
@ -65,7 +65,6 @@ void regclass_InferRequest(py::module m) {
|
||||
self._start_time = Time::now();
|
||||
self._request.infer();
|
||||
self._end_time = Time::now();
|
||||
|
||||
return Common::outputs_to_dict(self._outputs, self._request);
|
||||
},
|
||||
py::arg("inputs"));
|
||||
|
56
src/bindings/python/src/pyopenvino/frontend/extensions.cpp
Normal file
56
src/bindings/python/src/pyopenvino/frontend/extensions.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <pybind11/functional.h>
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include <pybind11/stl_bind.h>
|
||||
|
||||
#include "extension/json_config.hpp"
|
||||
#include "manager.hpp"
|
||||
#include "openvino/frontend/exception.hpp"
|
||||
#include "openvino/frontend/extension/decoder_transformation.hpp"
|
||||
#include "openvino/frontend/extension/telemetry.hpp"
|
||||
#include "pyopenvino/graph/function.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
using namespace ov::frontend;
|
||||
|
||||
void regclass_frontend_TelemetryExtension(py::module m) {
|
||||
py::class_<TelemetryExtension, std::shared_ptr<TelemetryExtension>, ov::Extension> ext(m,
|
||||
"TelemetryExtension",
|
||||
py::dynamic_attr());
|
||||
|
||||
ext.def(py::init([](const std::string& event_category,
|
||||
const TelemetryExtension::event_callback& send_event,
|
||||
const TelemetryExtension::error_callback& send_error,
|
||||
const TelemetryExtension::error_callback& send_stack_trace) {
|
||||
return std::make_shared<TelemetryExtension>(event_category, send_event, send_error, send_stack_trace);
|
||||
}));
|
||||
|
||||
ext.def("send_event", &TelemetryExtension::send_event);
|
||||
ext.def("send_error", &TelemetryExtension::send_error);
|
||||
ext.def("send_stack_trace", &TelemetryExtension::send_stack_trace);
|
||||
}
|
||||
|
||||
void regclass_frontend_DecoderTransformationExtension(py::module m) {
|
||||
py::class_<ov::frontend::DecoderTransformationExtension,
|
||||
std::shared_ptr<ov::frontend::DecoderTransformationExtension>,
|
||||
ov::Extension>
|
||||
ext(m, "DecoderTransformationExtension", py::dynamic_attr());
|
||||
}
|
||||
|
||||
void regclass_frontend_JsonConfigExtension(py::module m) {
|
||||
py::class_<ov::frontend::JsonConfigExtension,
|
||||
std::shared_ptr<ov::frontend::JsonConfigExtension>,
|
||||
ov::frontend::DecoderTransformationExtension>
|
||||
ext(m, "JsonConfigExtension", py::dynamic_attr());
|
||||
|
||||
ext.doc() = "Extension class to load and process ModelOptimizer JSON config file";
|
||||
|
||||
ext.def(py::init([](const std::string& path) {
|
||||
return std::make_shared<ov::frontend::JsonConfigExtension>(path);
|
||||
}));
|
||||
}
|
13
src/bindings/python/src/pyopenvino/frontend/extensions.hpp
Normal file
13
src/bindings/python/src/pyopenvino/frontend/extensions.hpp
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
void regclass_frontend_TelemetryExtension(py::module m);
|
||||
void regclass_frontend_DecoderTransformationExtension(py::module m);
|
||||
void regclass_frontend_JsonConfigExtension(py::module m);
|
@ -8,8 +8,8 @@
|
||||
#include <pybind11/stl_bind.h>
|
||||
|
||||
#include "openvino/frontend/exception.hpp"
|
||||
#include "openvino/frontend/extension/telemetry.hpp"
|
||||
#include "openvino/frontend/manager.hpp"
|
||||
#include "openvino/frontend/telemetry_extension.hpp"
|
||||
#include "pyopenvino/graph/function.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
@ -41,7 +41,7 @@ void regclass_frontend_FrontEnd(py::module m) {
|
||||
)");
|
||||
|
||||
fem.def("convert",
|
||||
static_cast<std::shared_ptr<ov::Model> (FrontEnd::*)(InputModel::Ptr) const>(&FrontEnd::convert),
|
||||
static_cast<std::shared_ptr<ov::Model> (FrontEnd::*)(const InputModel::Ptr&) const>(&FrontEnd::convert),
|
||||
py::arg("model"),
|
||||
R"(
|
||||
Completely convert and normalize entire function, throws if it is not possible.
|
||||
@ -58,7 +58,7 @@ void regclass_frontend_FrontEnd(py::module m) {
|
||||
)");
|
||||
|
||||
fem.def("convert",
|
||||
static_cast<void (FrontEnd::*)(std::shared_ptr<ov::Model>) const>(&FrontEnd::convert),
|
||||
static_cast<void (FrontEnd::*)(const std::shared_ptr<ov::Model>&) const>(&FrontEnd::convert),
|
||||
py::arg("function"),
|
||||
R"(
|
||||
Completely convert the remaining, not converted part of a function.
|
||||
@ -143,26 +143,3 @@ void regclass_frontend_FrontEnd(py::module m) {
|
||||
return "<FrontEnd '" + self.get_name() + "'>";
|
||||
});
|
||||
}
|
||||
|
||||
void regclass_frontend_Extension(py::module m) {
|
||||
py::class_<ov::Extension, std::shared_ptr<ov::Extension>> ext(m, "Extension", py::dynamic_attr());
|
||||
}
|
||||
|
||||
void regclass_frontend_TelemetryExtension(py::module m) {
|
||||
{
|
||||
py::class_<TelemetryExtension, std::shared_ptr<TelemetryExtension>, ov::Extension> ext(m,
|
||||
"TelemetryExtension",
|
||||
py::dynamic_attr());
|
||||
|
||||
ext.def(py::init([](const std::string& event_category,
|
||||
const TelemetryExtension::event_callback& send_event,
|
||||
const TelemetryExtension::error_callback& send_error,
|
||||
const TelemetryExtension::error_callback& send_stack_trace) {
|
||||
return std::make_shared<TelemetryExtension>(event_category, send_event, send_error, send_stack_trace);
|
||||
}));
|
||||
|
||||
ext.def("send_event", &TelemetryExtension::send_event);
|
||||
ext.def("send_error", &TelemetryExtension::send_error);
|
||||
ext.def("send_stack_trace", &TelemetryExtension::send_stack_trace);
|
||||
}
|
||||
}
|
||||
|
@ -9,5 +9,3 @@
|
||||
namespace py = pybind11;
|
||||
|
||||
void regclass_frontend_FrontEnd(py::module m);
|
||||
void regclass_frontend_Extension(py::module m);
|
||||
void regclass_frontend_TelemetryExtension(py::module m);
|
||||
|
@ -14,4 +14,3 @@ void regclass_frontend_InitializationFailureFrontEnd(py::module m);
|
||||
void regclass_frontend_OpConversionFailureFrontEnd(py::module m);
|
||||
void regclass_frontend_OpValidationFailureFrontEnd(py::module m);
|
||||
void regclass_frontend_GeneralFailureFrontEnd(py::module m);
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "pyopenvino/core/compiled_model.hpp"
|
||||
#include "pyopenvino/core/containers.hpp"
|
||||
#include "pyopenvino/core/core.hpp"
|
||||
#include "pyopenvino/core/extension.hpp"
|
||||
#include "pyopenvino/core/ie_parameter.hpp"
|
||||
#include "pyopenvino/core/infer_request.hpp"
|
||||
#include "pyopenvino/core/offline_transformations.hpp"
|
||||
@ -31,6 +32,7 @@
|
||||
#include "pyopenvino/core/tensor.hpp"
|
||||
#include "pyopenvino/core/variable_state.hpp"
|
||||
#include "pyopenvino/core/version.hpp"
|
||||
#include "pyopenvino/frontend/extensions.hpp"
|
||||
#include "pyopenvino/frontend/frontend.hpp"
|
||||
#include "pyopenvino/frontend/inputmodel.hpp"
|
||||
#include "pyopenvino/frontend/manager.hpp"
|
||||
@ -124,6 +126,7 @@ PYBIND11_MODULE(pyopenvino, m) {
|
||||
regclass_Parameter(m);
|
||||
regclass_AsyncInferQueue(m);
|
||||
regclass_ProfilingInfo(m);
|
||||
regclass_Extension(m);
|
||||
|
||||
regclass_frontend_Place(m);
|
||||
regclass_frontend_InitializationFailureFrontEnd(m);
|
||||
@ -131,11 +134,12 @@ PYBIND11_MODULE(pyopenvino, m) {
|
||||
regclass_frontend_OpConversionFailureFrontEnd(m);
|
||||
regclass_frontend_OpValidationFailureFrontEnd(m);
|
||||
regclass_frontend_NotImplementedFailureFrontEnd(m);
|
||||
regclass_frontend_Extension(m);
|
||||
regclass_frontend_FrontEndManager(m);
|
||||
regclass_frontend_FrontEnd(m);
|
||||
regclass_frontend_InputModel(m);
|
||||
regclass_frontend_TelemetryExtension(m);
|
||||
regclass_frontend_DecoderTransformationExtension(m);
|
||||
regclass_frontend_JsonConfigExtension(m);
|
||||
|
||||
regmodule_offline_transformations(m);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set(TARGET_FE_NAME "mock_py_ov_frontend")
|
||||
set(TARGET_FE_NAME "ov_mock_py_frontend")
|
||||
|
||||
file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
|
||||
file(GLOB_RECURSE LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
|
@ -9,11 +9,11 @@
|
||||
#include "openvino/frontend/visibility.hpp"
|
||||
|
||||
// Defined if we are building the plugin DLL (instead of using it)
|
||||
#ifdef mock_py_ov_frontend_EXPORTS
|
||||
#ifdef ov_mock_py_frontend_EXPORTS
|
||||
# define MOCK_API OPENVINO_CORE_EXPORTS
|
||||
#else
|
||||
# define MOCK_API OPENVINO_CORE_IMPORTS
|
||||
#endif // mock_py_ov_frontend_EXPORTS
|
||||
#endif // ov_mock_py_frontend_EXPORTS
|
||||
|
||||
// OK to have 'using' in mock header
|
||||
|
||||
@ -257,13 +257,13 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_equal(Ptr another) const override {
|
||||
bool is_equal(const Ptr& another) const override {
|
||||
m_stat.m_is_equal++;
|
||||
m_stat.m_lastArgPlace = another;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_equal_data(Ptr another) const override {
|
||||
bool is_equal_data(const Ptr& another) const override {
|
||||
m_stat.m_is_equal_data++;
|
||||
m_stat.m_lastArgPlace = another;
|
||||
return false;
|
||||
@ -471,19 +471,19 @@ public:
|
||||
return std::make_shared<PlaceMockPy>();
|
||||
}
|
||||
|
||||
void set_name_for_tensor(Place::Ptr tensor, const std::string& newName) override {
|
||||
void set_name_for_tensor(const Place::Ptr& tensor, const std::string& newName) override {
|
||||
m_stat.m_set_name_for_tensor++;
|
||||
m_stat.m_lastArgPlace = tensor;
|
||||
m_stat.m_lastArgString = newName;
|
||||
}
|
||||
|
||||
void add_name_for_tensor(Place::Ptr tensor, const std::string& newName) override {
|
||||
void add_name_for_tensor(const Place::Ptr& tensor, const std::string& newName) override {
|
||||
m_stat.m_add_name_for_tensor++;
|
||||
m_stat.m_lastArgPlace = tensor;
|
||||
m_stat.m_lastArgString = newName;
|
||||
}
|
||||
|
||||
void set_name_for_operation(Place::Ptr operation, const std::string& newName) override {
|
||||
void set_name_for_operation(const Place::Ptr& operation, const std::string& newName) override {
|
||||
m_stat.m_set_name_for_operation++;
|
||||
m_stat.m_lastArgPlace = operation;
|
||||
m_stat.m_lastArgString = newName;
|
||||
@ -499,32 +499,32 @@ public:
|
||||
m_stat.m_lastArgString = name;
|
||||
}
|
||||
|
||||
void set_name_for_dimension(Place::Ptr place, size_t shapeDimIndex, const std::string& dimName) override {
|
||||
void set_name_for_dimension(const Place::Ptr& place, size_t shapeDimIndex, const std::string& dimName) override {
|
||||
m_stat.m_set_name_for_dimension++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
m_stat.m_lastArgInt = static_cast<int>(shapeDimIndex);
|
||||
m_stat.m_lastArgString = dimName;
|
||||
}
|
||||
|
||||
void cut_and_add_new_input(Place::Ptr place, const std::string& newNameOptional) override {
|
||||
void cut_and_add_new_input(const Place::Ptr& place, const std::string& newNameOptional) override {
|
||||
m_stat.m_cut_and_add_new_input++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
m_stat.m_lastArgString = newNameOptional;
|
||||
}
|
||||
|
||||
void cut_and_add_new_output(Place::Ptr place, const std::string& newNameOptional) override {
|
||||
void cut_and_add_new_output(const Place::Ptr& place, const std::string& newNameOptional) override {
|
||||
m_stat.m_cut_and_add_new_output++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
m_stat.m_lastArgString = newNameOptional;
|
||||
}
|
||||
|
||||
Place::Ptr add_output(Place::Ptr place) override {
|
||||
Place::Ptr add_output(const Place::Ptr& place) override {
|
||||
m_stat.m_add_output++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
return std::make_shared<PlaceMockPy>();
|
||||
}
|
||||
|
||||
void remove_output(Place::Ptr place) override {
|
||||
void remove_output(const Place::Ptr& place) override {
|
||||
m_stat.m_remove_output++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
}
|
||||
@ -546,19 +546,19 @@ public:
|
||||
}
|
||||
|
||||
// Setting tensor properties
|
||||
void set_partial_shape(Place::Ptr place, const ngraph::PartialShape& shape) override {
|
||||
void set_partial_shape(const Place::Ptr& place, const ngraph::PartialShape& shape) override {
|
||||
m_stat.m_set_partial_shape++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
m_stat.m_lastArgPartialShape = shape;
|
||||
}
|
||||
|
||||
ngraph::PartialShape get_partial_shape(Place::Ptr place) const override {
|
||||
ngraph::PartialShape get_partial_shape(const Place::Ptr& place) const override {
|
||||
m_stat.m_get_partial_shape++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
return {};
|
||||
}
|
||||
|
||||
void set_element_type(Place::Ptr place, const ngraph::element::Type& type) override {
|
||||
void set_element_type(const Place::Ptr& place, const ngraph::element::Type& type) override {
|
||||
m_stat.m_set_element_type++;
|
||||
m_stat.m_lastArgPlace = place;
|
||||
m_stat.m_lastArgElementType = type;
|
||||
@ -631,26 +631,26 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> convert(InputModel::Ptr model) const override {
|
||||
std::shared_ptr<ov::Model> convert(const InputModel::Ptr& model) const override {
|
||||
m_stat.m_convert_model++;
|
||||
return std::make_shared<ov::Model>(ov::NodeVector{}, ov::ParameterVector{});
|
||||
}
|
||||
|
||||
void convert(std::shared_ptr<ov::Model> func) const override {
|
||||
void convert(const std::shared_ptr<ov::Model>& func) const override {
|
||||
m_stat.m_convert++;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> convert_partially(InputModel::Ptr model) const override {
|
||||
std::shared_ptr<ov::Model> convert_partially(const InputModel::Ptr& model) const override {
|
||||
m_stat.m_convert_partially++;
|
||||
return std::make_shared<ov::Model>(ov::NodeVector{}, ov::ParameterVector{});
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> decode(InputModel::Ptr model) const override {
|
||||
std::shared_ptr<ov::Model> decode(const InputModel::Ptr& model) const override {
|
||||
m_stat.m_decode++;
|
||||
return std::make_shared<ov::Model>(ov::NodeVector{}, ov::ParameterVector{});
|
||||
}
|
||||
|
||||
void normalize(std::shared_ptr<ov::Model> function) const override {
|
||||
void normalize(const std::shared_ptr<ov::Model>& function) const override {
|
||||
m_stat.m_normalize++;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set(TARGET_FE_NAME "mock_py_ov_frontend")
|
||||
set(TARGET_FE_NAME "ov_mock_py_frontend")
|
||||
set(PYBIND_FE_NAME "pybind_mock_frontend")
|
||||
|
||||
set(PYBIND_FE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/pyngraph_mock_frontend_api.cpp)
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
|
||||
#include "../mock_py_ov_frontend/mock_py_frontend.hpp"
|
||||
#include "../ov_mock_py_frontend/mock_py_frontend.hpp"
|
||||
|
||||
namespace py = pybind11;
|
||||
using namespace ngraph;
|
||||
|
@ -51,7 +51,7 @@ from openvino.frontend import FrontEndManager
|
||||
def create_test_onnx_models():
|
||||
models = {}
|
||||
# Input model 1
|
||||
add = onnx.helper.make_node("Add", inputs=["in1", "in2"], outputs=["add_out"])
|
||||
add = onnx.helper.make_node("Add", inputs=["in1", "in2"], outputs=["add_out"], name="onnx_add_op")
|
||||
split = onnx.helper.make_node("Split", inputs=["add_out"],
|
||||
outputs=["out1", "out2"], name="split1", axis=0)
|
||||
relu = onnx.helper.make_node("Relu", inputs=["in3"], outputs=["out3"])
|
||||
@ -1205,3 +1205,48 @@ def test_set_name_for_dimension():
|
||||
with pytest.raises(Exception) as e:
|
||||
model.set_name_for_dimension(one_const, 0, dim_name)
|
||||
assert "ONNX initializer shape dimension cannot be dynamic." in str(e)
|
||||
|
||||
|
||||
def test_set_input_partial_shape_using_input_edge():
|
||||
skip_if_onnx_frontend_is_disabled()
|
||||
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
|
||||
model = fe.load("input_model.onnx")
|
||||
|
||||
add_operator = model.get_place_by_operation_name("onnx_add_op")
|
||||
add_input_edge = add_operator.get_input_port(inputPortIndex=0)
|
||||
model.set_partial_shape(add_input_edge, PartialShape([10, 10]))
|
||||
add_input_edge = add_operator.get_input_port(inputPortIndex=1)
|
||||
model.set_partial_shape(add_input_edge, PartialShape([1]))
|
||||
|
||||
ov_model = fe.convert(model)
|
||||
assert ov_model.input("in1").get_partial_shape() == PartialShape([10, 10])
|
||||
assert ov_model.input("in2").get_partial_shape() == PartialShape([1])
|
||||
|
||||
assert ov_model.output("out4").get_partial_shape() == PartialShape([10, 10])
|
||||
|
||||
|
||||
def test_get_partial_shape_using_input_edge():
|
||||
skip_if_onnx_frontend_is_disabled()
|
||||
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
|
||||
model = fe.load("input_model.onnx")
|
||||
|
||||
add_operator = model.get_place_by_operation_name("onnx_add_op")
|
||||
add_input_edge = add_operator.get_input_port(inputPortIndex=0)
|
||||
|
||||
pshape = model.get_partial_shape(add_input_edge)
|
||||
assert pshape == PartialShape([2, 2])
|
||||
|
||||
|
||||
def test_get_partial_shape_using_output_edge():
|
||||
skip_if_onnx_frontend_is_disabled()
|
||||
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
|
||||
model = fe.load("input_model.onnx")
|
||||
|
||||
add_operator = model.get_place_by_operation_name("onnx_add_op")
|
||||
add_output_edge = add_operator.get_output_port(outputPortIndex=0)
|
||||
|
||||
assert model.get_partial_shape(add_output_edge) == PartialShape([2, 2])
|
||||
|
||||
split_operator = model.get_place_by_tensor_name("out1").get_producing_operation()
|
||||
out2_edge = split_operator.get_output_port(outputPortIndex=1)
|
||||
assert model.get_partial_shape(out2_edge) == PartialShape([1, 2])
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user