Merge remote-tracking branch 'upstream/master' into add_mxnet_operations

This commit is contained in:
yekruglov 2021-12-20 12:11:14 +03:00
commit fe4e714c76
545 changed files with 6722 additions and 2553 deletions

View File

@ -241,7 +241,7 @@ jobs:
. $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/tests/mo/unit_tests --junitxml=TEST-ModelOptimizer.xml
displayName: 'Model Optimizer UT'
continueOnError: false
enabled: false
enabled: true
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
workingDirectory: $(INSTALL_TEST_DIR)
@ -334,7 +334,7 @@ jobs:
displayName: 'Samples Smoke Tests'
continueOnError: false
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
enabled: false
enabled: true
- script: |
export DATA_PATH=$(MODELS_PATH)
@ -353,7 +353,7 @@ jobs:
workingDirectory: $(LAYER_TESTS_DIR)
displayName: 'Layer Tests'
continueOnError: false
enabled: false
enabled: true
- task: PublishTestResults@2
condition: always()

View File

@ -35,7 +35,7 @@ jobs:
- checkout: none
- script: git -C ~/work/openvino checkout -m --recurse-submodules $(Build.SourceVersion)
- script: git -C ~/work/openvino checkout -m $(Build.SourceVersion) && git -C ~/work/openvino submodule update --init --recursive
displayName: checkout
# Should be after 'Install dependencies' because Git lfs is not installed
@ -71,7 +71,7 @@ jobs:
./buildreleasenolto.sh
libinference_engine_preproc.so
MKLDNNPlugin
clDNNPlugin
ov_intel_gpu_plugin
clDNN_unit_tests64
gpuFuncTests
displayName: Build Lin

2
.gitmodules vendored
View File

@ -57,7 +57,7 @@
path = thirdparty/onednn_gpu
url = https://github.com/oneapi-src/oneDNN.git
[submodule "tools/pot/thirdparty/open_model_zoo"]
path = tools/pot/thirdparty/open_model_zoo
path = thirdparty/open_model_zoo
url = https://github.com/openvinotoolkit/open_model_zoo.git
[submodule "thirdparty/json/nlohmann_json"]
path = thirdparty/json/nlohmann_json

View File

@ -66,7 +66,7 @@ Jenkinsfile @openvinotoolkit/openvino-admins
/src/inference/include/ie/gna/ @openvinotoolkit/openvino-ie-gna-maintainers
# IE MULTI:
/inference-engine/src/multi_device/ @openvinotoolkit/openvino-ie-multi-maintainers
/src/plugins/auto/ @openvinotoolkit/openvino-ie-multi-maintainers
/src/inference/include/ie/multi-device/ @openvinotoolkit/openvino-ie-multi-maintainers
# IE Tests:

View File

@ -79,8 +79,20 @@ function(_ie_add_api_validator_post_build_step)
_ie_add_api_validator_post_build_step_recursive(TARGET ${API_VALIDATOR_TARGET})
# remove targets which were tested before
foreach(item IN LISTS VALIDATED_LIBRARIES)
foreach(target IN LISTS API_VALIDATOR_TARGETS)
list(FIND VALIDATED_LIBRARIES ${target} index)
if (NOT index EQUAL -1)
list(APPEND VALIDATED_TARGETS ${target})
endif()
if(TARGET "${target}")
get_target_property(orig_target ${target} ALIASED_TARGET)
list(FIND VALIDATED_LIBRARIES ${orig_target} index)
if (NOT index EQUAL -1)
list(APPEND VALIDATED_TARGETS ${target})
endif()
endif()
endforeach()
foreach(item IN LISTS VALIDATED_TARGETS)
list(REMOVE_ITEM API_VALIDATOR_TARGETS ${item})
endforeach()

View File

@ -3,7 +3,8 @@
#
set(FRONTEND_INSTALL_INCLUDE "runtime/include/")
set(FRONTEND_NAME_SUFFIX "_ov_frontend")
set(FRONTEND_NAME_PREFIX "ov_")
set(FRONTEND_NAME_SUFFIX "_frontend")
set(FRONTEND_NAMES "" CACHE INTERNAL "")
@ -20,7 +21,7 @@ function(ov_target_link_frontends TARGET_NAME)
endif()
foreach(name IN LISTS FRONTEND_NAMES)
set(frontend_target_name "${name}${FRONTEND_NAME_SUFFIX}")
set(frontend_target_name "${FRONTEND_NAME_PREFIX}${name}${FRONTEND_NAME_SUFFIX}")
target_link_libraries(${TARGET_NAME} PRIVATE ${frontend_target_name})
endforeach()
endfunction()
@ -99,7 +100,7 @@ macro(ov_add_frontend)
endif()
endforeach()
set(TARGET_NAME "${OV_FRONTEND_NAME}${FRONTEND_NAME_SUFFIX}")
set(TARGET_NAME "${FRONTEND_NAME_PREFIX}${OV_FRONTEND_NAME}${FRONTEND_NAME_SUFFIX}")
list(APPEND FRONTEND_NAMES ${OV_FRONTEND_NAME})
set(FRONTEND_NAMES "${FRONTEND_NAMES}" CACHE INTERNAL "" FORCE)

View File

@ -117,20 +117,20 @@ function(ie_add_plugin)
# fake dependencies to build in the following order:
# IE -> IE readers -> IE inference plugins -> IE-based apps
if(BUILD_SHARED_LIBS)
if(TARGET ir_ov_frontend)
add_dependencies(${IE_PLUGIN_NAME} ir_ov_frontend)
if(TARGET ov_ir_frontend)
add_dependencies(${IE_PLUGIN_NAME} ov_ir_frontend)
endif()
if(TARGET inference_engine_ir_v7_reader)
add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_v7_reader)
endif()
if(TARGET onnx_ov_frontend)
add_dependencies(${IE_PLUGIN_NAME} onnx_ov_frontend)
if(TARGET ov_onnx_frontend)
add_dependencies(${IE_PLUGIN_NAME} ov_onnx_frontend)
endif()
if(TARGET paddlepaddle_ov_frontend)
add_dependencies(${IE_PLUGIN_NAME} paddlepaddle_ov_frontend)
if(TARGET ov_paddlepaddle_frontend)
add_dependencies(${IE_PLUGIN_NAME} ov_paddlepaddle_frontend)
endif()
if(TARGET tensorflow_ov_frontend)
add_dependencies(${IE_PLUGIN_NAME} tensorflow_ov_frontend)
if(TARGET ov_tensorflow_frontend)
add_dependencies(${IE_PLUGIN_NAME} ov_tensorflow_frontend)
endif()
endif()

View File

@ -28,11 +28,11 @@
#
# ngraph::common - nGraph frontend common
#
# ngraph_onnx_ov_frontend_FOUND - True if the system has onnx_ov_frontend library
# ngraph::onnx_ov_frontend - ONNX FrontEnd target (optional)
# ngraph_ov_onnx_frontend_FOUND - True if the system has ov_onnx_frontend library
# ngraph::ov_onnx_frontend - ONNX FrontEnd target (optional)
#
# ngraph_paddlepaddle_frontend_FOUND - True if the system has PDPD frontend
# ngraph::paddlepaddle_ov_frontend - nGraph PDPD frontend (optional)
# ngraph::ov_paddlepaddle_frontend - nGraph PDPD frontend (optional)
#
@PACKAGE_INIT@
@ -58,38 +58,38 @@ if(TARGET openvino::frontend::common AND NOT TARGET ngraph::frontend_common)
INTERFACE_LINK_LIBRARIES openvino::frontend::common)
endif()
if(TARGET openvino::frontend::onnx AND NOT TARGET ngraph::onnx_ov_frontend)
add_library(ngraph::onnx_ov_frontend INTERFACE IMPORTED)
set_target_properties(ngraph::onnx_ov_frontend PROPERTIES
if(TARGET openvino::frontend::onnx AND NOT TARGET ngraph::ov_onnx_frontend)
add_library(ngraph::ov_onnx_frontend INTERFACE IMPORTED)
set_target_properties(ngraph::ov_onnx_frontend PROPERTIES
INTERFACE_LINK_LIBRARIES openvino::frontend::onnx)
endif()
if(TARGET openvino::frontend::paddlepaddle AND NOT TARGET ngraph::paddlepaddle_ov_frontend)
add_library(ngraph::paddlepaddle_ov_frontend INTERFACE IMPORTED)
set_target_properties(ngraph::paddlepaddle_ov_frontend PROPERTIES
if(TARGET openvino::frontend::paddlepaddle AND NOT TARGET ngraph::ov_paddlepaddle_frontend)
add_library(ngraph::ov_paddlepaddle_frontend INTERFACE IMPORTED)
set_target_properties(ngraph::ov_paddlepaddle_frontend PROPERTIES
INTERFACE_LINK_LIBRARIES openvino::frontend::paddlepaddle)
endif()
if(TARGET openvino::frontend::tensorflow AND NOT TARGET ngraph::tensorflow_ov_frontend)
add_library(ngraph::tensorflow_ov_frontend INTERFACE IMPORTED)
set_target_properties(ngraph::tensorflow_ov_frontend PROPERTIES
if(TARGET openvino::frontend::tensorflow AND NOT TARGET ngraph::ov_tensorflow_frontend)
add_library(ngraph::ov_tensorflow_frontend INTERFACE IMPORTED)
set_target_properties(ngraph::ov_tensorflow_frontend PROPERTIES
INTERFACE_LINK_LIBRARIES openvino::frontend::tensorflow)
endif()
set(ngraph_ngraph_FOUND ON)
set(NGRAPH_LIBRARIES ngraph::ngraph)
set(ngraph_onnx_ov_frontend_FOUND ${OpenVINO_Frontend_ONNX_FOUND})
set(ngraph_ov_onnx_frontend_FOUND ${OpenVINO_Frontend_ONNX_FOUND})
set(ngraph_onnx_importer_FOUND ${OpenVINO_Frontend_ONNX_FOUND})
if(ngraph_onnx_importer_FOUND)
set(ONNX_IMPORTER_LIBRARIES ngraph::onnx_ov_frontend)
set(ONNX_IMPORTER_LIBRARIES ngraph::ov_onnx_frontend)
# ngraph::onnx_importer target and variables are deprecated
# but need to create a dummy target for BW compatibility
if(NOT TARGET ngraph::onnx_importer)
add_library(ngraph::onnx_importer INTERFACE IMPORTED)
set_target_properties(ngraph::onnx_importer PROPERTIES
INTERFACE_LINK_LIBRARIES ngraph::onnx_ov_frontend)
INTERFACE_LINK_LIBRARIES ngraph::ov_onnx_frontend)
endif()
endif()

View File

@ -2,12 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
#! [complex:transformation]
import logging as log
import numpy as np
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.graph.graph import Graph
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
from openvino.tools.mo.graph.graph import Graph
class Complex(FrontReplacementSubgraph):
@ -41,4 +38,3 @@ class Complex(FrontReplacementSubgraph):
# change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
complex_node.out_port(0).get_connection().set_source(input_node_output_port)
#! [complex:transformation]

View File

@ -4,11 +4,11 @@
#! [complex_abs:transformation]
import numpy as np
from extensions.ops.elementwise import Pow
from extensions.ops.ReduceOps import ReduceSum
from mo.front.common.replacement import FrontReplacementOp
from mo.graph.graph import Graph, Node
from mo.ops.const import Const
from openvino.tools.mo.ops.elementwise import Pow
from openvino.tools.mo.ops.ReduceOps import ReduceSum
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
from openvino.tools.mo.graph.graph import Graph, Node
from openvino.tools.mo.ops.const import Const
class ComplexAbs(FrontReplacementOp):

View File

@ -3,8 +3,7 @@
# ! [fft_ext:extractor]
from ...ops.FFT import FFT
from mo.front.extractor import FrontExtractorOp
from mo.utils.error import Error
from openvino.tools.mo.front.extractor import FrontExtractorOp
class FFT2DFrontExtractor(FrontExtractorOp):

View File

@ -2,9 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
#! [fft:operation]
from mo.front.common.partial_infer.elemental import copy_shape_infer
from mo.graph.graph import Node, Graph
from mo.ops.op import Op
from openvino.tools.mo.front.common.partial_infer.elemental import copy_shape_infer
from openvino.tools.mo.graph.graph import Graph
from openvino.tools.mo.ops.op import Op
class FFT(Op):

View File

@ -40,13 +40,13 @@ This library contains the classes to:
Starting from 2022.1 release, OpenVINO Runtime introduced a concept of frontend plugins. Such plugins can be automatically dynamically loaded by OpenVINO Runtime dynamically depending on file format:
* Linux* OS:
- `libir_ov_frontend.so` to read a network from IR
- `libpaddlepaddle_ov_frontend.so` to read a network from PaddlePaddle model format
- `libonnx_ov_frontend.so` to read a network from ONNX model format
- `libov_ir_frontend.so` to read a network from IR
- `libov_paddlepaddle_frontend.so` to read a network from PaddlePaddle model format
- `libov_onnx_frontend.so` to read a network from ONNX model format
* Windows* OS:
- `ir_ov_frontend.dll` to read a network from IR
- `paddlepaddle_ov_frontend.dll` to read a network from PaddlePaddle model format
- `onnx_ov_frontend.dll` to read a network from ONNX model format
- `ov_ir_frontend.dll` to read a network from IR
- `ov_paddlepaddle_frontend.dll` to read a network from PaddlePaddle model format
- `ov_onnx_frontend.dll` to read a network from ONNX model format
### Device-Specific Plugin Libraries

View File

@ -62,7 +62,7 @@ The example below demonstrates how to unregister an operator from the destructor
## Requirements for Building with CMake
A program that uses the `register_operator` functionality requires `openvino::core` and `openvino::frontend::onnx` libraries in addition to the OpenVINO Inference Runtime.
The `onnx_ov_frontend` is a component of the `OpenVINO` package , so `find_package(OpenVINO REQUIRED COMPONENTS ONNX)` can find both.
The `ov_onnx_frontend` is a component of the `OpenVINO` package , so `find_package(OpenVINO REQUIRED COMPONENTS ONNX)` can find both.
Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
See CMakeLists.txt below for reference:

View File

@ -45,13 +45,13 @@ This library contains the classes to:
Starting from 2022.1 release, OpenVINO Runtime introduced a concept of frontend plugins. Such plugins can be automatically dynamically loaded by OpenVINO Runtime dynamically depending on file format:
* Unix* OS:
- `libir_ov_frontend.so` to read a network from IR
- `libpaddlepaddle_ov_frontend.so` to read a network from PaddlePaddle model format
- `libonnx_ov_frontend.so` to read a network from ONNX model format
- `libov_ir_frontend.so` to read a network from IR
- `libov_paddlepaddle_frontend.so` to read a network from PaddlePaddle model format
- `libov_onnx_frontend.so` to read a network from ONNX model format
* Windows* OS:
- `ir_ov_frontend.dll` to read a network from IR
- `paddlepaddle_ov_frontend.dll` to read a network from PaddlePaddle model format
- `onnx_ov_frontend.dll` to read a network from ONNX model format
- `ov_ir_frontend.dll` to read a network from IR
- `ov_paddlepaddle_frontend.dll` to read a network from PaddlePaddle model format
- `ov_onnx_frontend.dll` to read a network from ONNX model format
### Device-specific Plugin Libraries ###

View File

@ -639,9 +639,9 @@ graph. Consider the extractor for the TensorFlow\* operation `Const` (refer to t
`extensions/front/tf/const_ext.py`):
```py
from mo.front.extractor import FrontExtractorOp
from mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
from mo.ops.const import Const
from openvino.tools.mo.front.extractor import FrontExtractorOp
from openvino.tools.mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
from openvino.tools.mo.ops.const import Const
class ConstExtractor(FrontExtractorOp):
@ -679,9 +679,9 @@ Consider another example with an extractor of ONNX\* operation `Constant` (refer
from onnx import numpy_helper
from onnx.numpy_helper import to_array
from mo.front.extractor import FrontExtractorOp
from mo.front.onnx.extractors.utils import onnx_attr
from mo.ops.const import Const
from openvino.tools.mo.front.extractor import FrontExtractorOp
from openvino.tools.mo.front.onnx.extractors.utils import onnx_attr
from openvino.tools.mo.ops.const import Const
class ConstantExtractor(FrontExtractorOp):
@ -814,11 +814,11 @@ fusing of the sub-graph defining the [Mish](../../../ops/activation/Mish_4.md) a
operation:
```py
from extensions.front.Softplus_fusion import SoftplusFusion
from extensions.ops.activation_ops import Mish
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.subgraph_matcher import SubgraphMatch
from mo.graph.graph import Graph, rename_nodes
from openvino.tools.mo.front.Softplus_fusion import SoftplusFusion
from openvino.tools.mo.ops.activation_ops import Mish
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
from openvino.tools.mo.front.subgraph_matcher import SubgraphMatch
from openvino.tools.mo.graph.graph import Graph, rename_nodes
class MishFusion(FrontReplacementSubgraph):
@ -886,12 +886,12 @@ transformation.
Consider an example transformation from the file is `extensions/front/Pack.py` which replaces operation `Pack` from
the TensorFlow\*:
```py
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementOp
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Node, Graph, rename_nodes
from mo.ops.concat import Concat
from mo.ops.unsqueeze import Unsqueeze
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
from openvino.tools.mo.front.tf.graph_utils import create_op_with_const_inputs
from openvino.tools.mo.graph.graph import Node, Graph, rename_nodes
from openvino.tools.mo.ops.concat import Concat
from openvino.tools.mo.ops.unsqueeze import Unsqueeze
class Pack(FrontReplacementOp):
@ -932,11 +932,11 @@ specification.
```py
import logging as log
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementPattern
from mo.graph.graph import Graph
from mo.ops.const import Const
from mo.utils.error import Error
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
from openvino.tools.mo.front.common.replacement import FrontReplacementPattern
from openvino.tools.mo.graph.graph import Graph
from openvino.tools.mo.ops.const import Const
from openvino.tools.mo.utils.error import Error
class SqueezeNormalize(FrontReplacementPattern):
@ -1200,13 +1200,13 @@ The example of the configuration file for this type of transformation is `extens
and the corresponding transformation file is `./extensions/front/YOLO.py`:
```py
from extensions.front.no_op_eraser import NoOpEraser
from extensions.front.standalone_const_eraser import StandaloneConstEraser
from extensions.ops.regionyolo import RegionYoloOp
from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
from mo.graph.graph import Node, Graph
from mo.ops.result import Result
from mo.utils.error import Error
from openvino.tools.mo.front.no_op_eraser import NoOpEraser
from openvino.tools.mo.front.standalone_const_eraser import StandaloneConstEraser
from openvino.tools.mo.ops.regionyolo import RegionYoloOp
from openvino.tools.mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
from openvino.tools.mo.graph.graph import Node, Graph
from openvino.tools.mo.ops.result import Result
from openvino.tools.mo.utils.error import Error
class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):

View File

@ -20,9 +20,9 @@ assume that we have already created the `CustomOp` class (inherited from `Op` cl
for this MXNet custom operation as described in the [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
```py
from extension.ops.custom_op import CustomOp # implementation of the MO operation class
from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
from mo.front.extractor import MXNetCustomFrontExtractorOp
from openvino.tools.mo.ops.custom_op import CustomOp # implementation of the MO operation class
from openvino.tools.mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
from openvino.tools.mo.front.extractor import MXNetCustomFrontExtractorOp
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp): # inherit from specific base class
op = 'MyCustomOp' # the value corresponding to the `op_type` value of the MXNet operation

View File

@ -40,8 +40,8 @@ operation `ProposalOp` which corresponds to `Proposal` operation described in th
document. Refer to the source code below for a detailed explanation of the extractor.
```py
from extensions.ops.proposal import ProposalOp
from mo.front.extractor import CaffePythonFrontExtractorOp
from openvino.tools.mo.ops.proposal import ProposalOp
from openvino.tools.mo.front.extractor import CaffePythonFrontExtractorOp
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):

View File

@ -46,7 +46,7 @@ if(OpenCV_FOUND)
endif()
if(ENABLE_OV_ONNX_FRONTEND)
target_link_libraries(${TARGET_NAME} PRIVATE onnx_ov_frontend)
target_link_libraries(${TARGET_NAME} PRIVATE ov_onnx_frontend)
endif()
if(NOT MSVC)

View File

@ -1134,8 +1134,38 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
if (!isSuitableParent1 && !isSuitableParent2)
continue;
auto mergedConv = isSuitableParent1 ? parent1 : parent2;
auto peerNode = isSuitableParent1 ? parent2 : parent1;
std::shared_ptr<MKLDNNNode> mergedConv;
std::shared_ptr<MKLDNNNode> peerNode;
if (isSuitableParent1 && isSuitableParent2) {
// not merged operation (peerNode) has to be in low precision
const auto isBranchQuantized = [](const MKLDNNNodePtr& branchParent) {
const auto& fused = branchParent->getFusedWith();
const auto branchPrecision = fused.empty() ?
branchParent->getOriginalOutputPrecisionAtPort(0) :
fused[fused.size() - 1]->getOriginalOutputPrecisionAtPort(0);
return (branchPrecision == Precision::I8) || (branchPrecision == Precision::U8);
};
const auto isBranch1Quantized = isBranchQuantized(graphNode->getParentEdgesAtPort(0)[0]->getParent());
const auto isBranch2Quantized = isBranchQuantized(graphNode->getParentEdgesAtPort(1)[0]->getParent());
if (isBranch1Quantized || isBranch2Quantized) {
// INT8
const auto parent1CanBeMerged = parent1->getChildEdges().size() == 1ul;
// if both branches are quantized, then parent1 is selected (result is not changed)
mergedConv = isBranch2Quantized && parent1CanBeMerged ? parent1 : parent2;
peerNode = isBranch2Quantized && parent1CanBeMerged ? parent2 : parent1;
} else {
// original FP32
mergedConv = isSuitableParent1 ? parent1 : parent2;
peerNode = isSuitableParent1 ? parent2 : parent1;
}
} else {
mergedConv = isSuitableParent1 ? parent1 : parent2;
peerNode = isSuitableParent1 ? parent2 : parent1;
}
if (isSuitableParent1 && isSuitableParent2) {
if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
mergedConv->getChildEdges().size() != 1) {

View File

@ -1102,7 +1102,7 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
}
}
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) {
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
}

View File

@ -602,7 +602,7 @@ protected:
* Seed node should call this routine and pass its post operations list as parameter.
* @param ops List of fused post operations
*/
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1);
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims);
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }

View File

@ -1132,8 +1132,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
ops.append_sum(1.0);
} else {
// TODO [DS]: change to shape from memory
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
}
continue;
}

View File

@ -352,8 +352,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
} else {
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, dims, align);
eltwiseNode->appendPostOps(ops, dims);
} else {
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
}

View File

@ -365,9 +365,8 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vec
for (auto &node : fusedWith) {
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
// TODO [DS]: change to shape from memory
constexpr int align = 16;
// use legacy depthwise since backprop convolution does not support binary post ops
eltwiseNode->appendPostOps(ops, dims, align);
eltwiseNode->appendPostOps(ops, dims);
continue;
}
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {

View File

@ -1744,7 +1744,7 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
MKLDNNNode::fuseInto(parentNode);
}
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) {
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
@ -1775,11 +1775,11 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &p
}
} else {
const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0;
constexpr int align = 16; // always align for legacy scale/shift post ops
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
if (getAlgorithm() != EltwisePrelu) {
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
}
/* @todo legacy depthwise post ops are kept for now
* for performance reasons
*/

View File

@ -75,7 +75,7 @@ public:
bool created() const override;
bool canBeInPlace() const override;
bool canFuse(const MKLDNNNodePtr& node) const override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) override;
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
void fuseInto(MKLDNNNodePtr& parentNode) override;
InferenceEngine::Precision getRuntimePrecision() const override;

View File

@ -1706,8 +1706,13 @@ void MKLDNNFakeQuantizeNode::initializePostOpData(const VectorDims &dims, const
isPostOpDataInitialized = true;
}
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
initializePostOpData(postOpDims, align);
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims) {
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
const size_t bufferAlignment = 16;
initializePostOpData(postOpDims, bufferAlignment);
if (getAlgorithm() == FQBinarization) {
ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);

View File

@ -120,10 +120,7 @@ public:
InferenceEngine::Precision getInputPrecision() const { return inputPrecision; }
InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; }
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = 16) override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}) override;
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

View File

@ -198,9 +198,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
// TODO [DS]: change to shape from memory
constexpr int align = -1;
if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
} else {
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
}

View File

@ -109,7 +109,7 @@ protected:
uni_vmovdqu(b, a); // b = a
uni_vmovdqu(c, a); // c = a
uni_vpcmpeqd(b, b, zero); // if (a == 0) b = 1 else b = 0
uni_vpand(c, mask); // c = a & 01111111100000000000000000000000
uni_vpand(c, c, mask); // c = a & 01111111100000000000000000000000
uni_vpcmpeqd(c, c, zero); // if (c == 0) c = 1 else c = 0
uni_vtestps(b, c); // if ((!b & c) == 0) CF = 1 else CF = 0
}

View File

@ -2102,8 +2102,7 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, dims, align);
eltwiseNode->appendPostOps(ops, dims);
continue;
}

View File

@ -891,8 +891,7 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, postOpDims, align);
eltwiseNode->appendPostOps(ops, postOpDims);
continue;
}
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";

View File

@ -813,8 +813,7 @@ void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr& kernel_attrs, con
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, dims, align);
eltwiseNode->appendPostOps(ops, dims);
continue;
}

View File

@ -2779,8 +2779,7 @@ void MKLDNNReduceNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, postOpDims, align);
eltwiseNode->appendPostOps(ops, postOpDims);
continue;
}
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";

View File

@ -54,6 +54,11 @@ private:
const mv_blob_header& blobHdr,
std::vector<char>& blob);
void serializeParamsAndResults(
const Model& model,
const mv_blob_header& blobHdr,
std::vector<char>& blob);
ElfN_Ehdr createElfHeader();
void getMetaData(

View File

@ -62,4 +62,16 @@ VPU_PACKED(mv_stage_header {
uint32_t numShaves;
};)
VPU_PACKED(network_info_header {
uint32_t parameters_size;
uint32_t results_size;
};)
VPU_PACKED(network_params_header {
uint32_t name_lenght;
uint32_t shape_size;
uint32_t element_type_bytesize;
uint32_t output_tensor_names_size;
};)
} // namespace vpu

View File

@ -26,6 +26,9 @@ public:
const ie::InputsDataMap& getNetworkInputs() const { return _networkInputs; }
const ie::OutputsDataMap& getNetworkOutputs() const { return _networkOutputs; }
const std::vector<std::shared_ptr<const ov::Node>>& getNetworkParemeters() const { return _parameters; }
const std::vector<std::shared_ptr<const ov::Node>>& getNetworkResults() const { return _results; }
uint32_t getStageCount() const { return _blobHeader.stages_count; }
uint32_t getMagicNumber() const { return _blobHeader.magic_number; }
@ -36,6 +39,8 @@ public:
uint32_t getNumberOfShaves() const { return _blobHeader.number_of_shaves; }
uint32_t getNumberOfSlices() const { return _blobHeader.number_of_cmx_slices; }
uint32_t getFileSize() const { return _blobHeader.file_size; }
const DataInfo& getInputInfo() const { return _inputInfo; }
const DataInfo& getOutputInfo() const { return _outputInfo; }
@ -49,6 +54,9 @@ private:
ie::InputsDataMap _networkInputs;
ie::OutputsDataMap _networkOutputs;
std::vector<std::shared_ptr<const ov::Node>> _parameters = {};
std::vector<std::shared_ptr<const ov::Node>> _results = {};
DataInfo _inputInfo;
DataInfo _outputInfo;
};

View File

@ -89,6 +89,7 @@ std::set<std::string> getSupportedLayers(const ie::CNNNetwork& network, const Pl
const uint32_t BLOB_MAGIC_NUMBER = 9709;
const uint32_t BLOB_VERSION_MAJOR = 6;
// Must be changed when possible
const uint32_t BLOB_VERSION_MINOR = 0;
} // namespace vpu

View File

@ -15,6 +15,9 @@
#include <description_buffer.hpp>
#include <xml_parse_utils.h>
#include <ngraph/ops.hpp>
#include <transformations/utils/utils.hpp>
#include <climits>
#include <cstring>
#include <string>
@ -161,6 +164,118 @@ void BackEnd::serializeConstShapes(const Model& model, const mv_blob_header& blo
}
}
void BackEnd::serializeParamsAndResults(const Model& model, const mv_blob_header& blobHdr,
std::vector<char>& blob) {
const auto networkParams = model->attrs().getOrDefault<ov::ParameterVector>("networkParameters");
const auto networkResults = model->attrs().getOrDefault<ov::ResultVector>("networkResults");
auto getNetworkParameterHeader = [](const std::shared_ptr<ov::Node>& node) {
network_params_header nph;
nph.element_type_bytesize = sizeof(node->get_element_type().operator ov::element::Type_t());
nph.name_lenght = node->get_friendly_name().size();
nph.shape_size = node->get_shape().size();
nph.output_tensor_names_size = node->get_output_tensor(0).get_names().size();
return nph;
};
uint32_t networkInfoOffset = blob.size();
auto serializeParameters = [&blob, &networkInfoOffset,
&getNetworkParameterHeader](
const std::shared_ptr<ov::Node>& node) {
BlobSerializer headerSerializer;
BlobSerializer shapeSerializer;
BlobSerializer elementTypeSerializer;
BlobSerializer tensorNamesSerializer;
BlobSerializer inputNameForResultSerializer;
const auto nph = getNetworkParameterHeader(node);
const bool isResult = ov::is_type<ov::op::v0::Result>(node);
int totalNetworkInfoOffset =
networkInfoOffset + sizeof(nph) + nph.name_lenght +
nph.element_type_bytesize +
sizeof(size_t) * (nph.output_tensor_names_size + nph.shape_size);
for (const auto& name : node->get_output_tensor(0).get_names()) {
totalNetworkInfoOffset += sizeof(size_t) + name.size();
}
if (isResult) {
totalNetworkInfoOffset +=
sizeof(size_t) +
ngraph::op::util::create_ie_output_name(node->input_value(0)).size();
}
blob.resize(totalNetworkInfoOffset);
headerSerializer.append(nph);
std::copy_n(headerSerializer.data(), sizeof(nph),
blob.data() + networkInfoOffset);
networkInfoOffset += sizeof(nph);
const auto nodeName = node->get_friendly_name();
VPU_THROW_UNLESS(
node->get_output_partial_shape(0).rank().is_static(),
"Serialization of shapes with dynamic rank is not supported");
const auto nodeShape = node->get_output_partial_shape(0).get_shape();
const auto nodeElType =
node->get_element_type().operator ov::element::Type_t();
std::copy_n(nodeName.data(), nodeName.size(),
blob.data() + networkInfoOffset);
networkInfoOffset += nph.name_lenght;
for (const auto shapeIdx : nodeShape) {
shapeSerializer.append(shapeIdx);
}
std::copy_n(shapeSerializer.data(),
shapeSerializer.size(), blob.data() + networkInfoOffset);
networkInfoOffset += shapeSerializer.size();
elementTypeSerializer.append(nodeElType);
std::copy_n(elementTypeSerializer.data(), nph.element_type_bytesize,
blob.data() + networkInfoOffset);
networkInfoOffset += nph.element_type_bytesize;
for (const auto& name : node->get_output_tensor(0).get_names()) {
tensorNamesSerializer.append(name.size());
for (const auto ch : name) {
tensorNamesSerializer.append(ch);
}
}
std::copy_n(tensorNamesSerializer.data(), tensorNamesSerializer.size(),
blob.data() + networkInfoOffset);
networkInfoOffset += tensorNamesSerializer.size();
if (isResult) {
const auto inputNameForResult =
ngraph::op::util::create_ie_output_name(node->input_value(0));
inputNameForResultSerializer.append(inputNameForResult.size());
for (const auto ch : inputNameForResult) {
inputNameForResultSerializer.append(ch);
}
std::copy_n(inputNameForResultSerializer.data(),
inputNameForResultSerializer.size(),
blob.data() + networkInfoOffset);
networkInfoOffset += inputNameForResultSerializer.size();
}
};
BlobSerializer networkInfoSerializer;
network_info_header nih;
nih.parameters_size = networkParams.size();
nih.results_size = networkResults.size();
blob.resize(networkInfoOffset + sizeof(nih));
networkInfoSerializer.append(nih);
std::copy_n(networkInfoSerializer.data(), sizeof(nih), blob.data() + networkInfoOffset);
networkInfoOffset += sizeof(nih);
for (const auto& param : networkParams) {
serializeParameters(param);
}
for (const auto& result : networkResults) {
serializeParameters(result);
}
}
void BackEnd::serialize(
const Model& model,
std::vector<char>& blob,
@ -271,6 +386,12 @@ void BackEnd::serialize(
serializeConstData(model, blobHdr, blob);
serializeConstShapes(model, blobHdr, blob);
const auto networkParams = model->attrs().getOrDefault<ov::ParameterVector>("networkParameters");
const auto networkResults = model->attrs().getOrDefault<ov::ResultVector>("networkResults");
// To avoid constant network case
if (!networkParams.empty() && !networkResults.empty()) {
serializeParamsAndResults(model, blobHdr, blob);
}
blobHeader.first = blob.data();
blobHeader.second = sizeof(ElfN_Ehdr) + sizeof(mv_blob_header);

View File

@ -10,7 +10,7 @@
#include <string>
#include <ie_input_info.hpp>
#include <ie_ngraph_utils.hpp>
#include <vpu/graph_transformer.hpp>
#include <vpu/backend/blob_format.hpp>
#include <vpu/model/data.hpp>
@ -116,6 +116,64 @@ void BlobReader::parse(const std::vector<char>& blob) {
_networkOutputs[processedOutput.getName()] = std::make_shared<ie::Data>(processedOutput);
}
}
if (blob.size() != _blobHeader.file_size) {
auto networkInfoOffset = _blobHeader.file_size;
const auto nih = readFromBlob<network_info_header>(blob, networkInfoOffset);
auto extractParameter = [&blob, &networkInfoOffset](bool isResult) {
const auto nph = readFromBlob<network_params_header>(blob, networkInfoOffset);
std::string parameterFriendlyName(nph.name_lenght, '0');
for (auto idx = 0; idx < nph.name_lenght; ++idx) {
parameterFriendlyName[idx] = readFromBlob<char>(blob, networkInfoOffset);
}
ov::Shape parameterShape(nph.shape_size);
for (auto idx = 0; idx < nph.shape_size; ++idx) {
parameterShape[idx] = readFromBlob<size_t>(blob, networkInfoOffset);
}
ov::element::Type_t parameterType = readFromBlob<ov::element::Type_t>(blob, networkInfoOffset);
std::shared_ptr<ov::Node> parameter =
std::make_shared<ov::op::v0::Parameter>(parameterType,
parameterShape);
std::unordered_set<std::string> tensorNames;
for (auto idx = 0; idx < nph.output_tensor_names_size; ++idx) {
const auto nameLenght = readFromBlob<size_t>(blob, networkInfoOffset);
std::string tensorName;
for (auto nameSymbolIdx = 0; nameSymbolIdx < nameLenght; ++nameSymbolIdx) {
tensorName += readFromBlob<char>(blob, networkInfoOffset);
}
tensorNames.insert(tensorName);
}
if (isResult) {
auto fakeParameter = parameter;
parameter = std::make_shared<ov::op::v0::Result>(parameter);
const auto inputNameLenght = readFromBlob<size_t>(blob, networkInfoOffset);
std::string inputName;
for (auto nameSymbolIdx = 0; nameSymbolIdx < inputNameLenght; ++nameSymbolIdx) {
inputName += readFromBlob<char>(blob, networkInfoOffset);
}
fakeParameter->set_friendly_name(inputName);
parameter = parameter->copy_with_new_inputs({fakeParameter});
}
parameter->set_friendly_name(parameterFriendlyName);
parameter->output(0).get_tensor().set_names(tensorNames);
return parameter;
};
for (auto paramIdx = 0; paramIdx < nih.parameters_size; ++paramIdx) {
_parameters.emplace_back(extractParameter(false));
}
for (auto paramIdx = 0; paramIdx < nih.results_size; ++paramIdx) {
_results.emplace_back(extractParameter(true));
}
}
}
} // namespace vpu

View File

@ -492,7 +492,14 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
model->attrs().set<int>("index", g_counter.fetch_add(1));
model->attrs().set<Resources>("resources", env.resources);
// Transmitting Information about the parameters/results of the network for
// the possibility of importing it
if (network.getFunction() != nullptr) {
model->attrs().set<ov::ParameterVector>(
"networkParameters", network.getFunction()->get_parameters());
model->attrs().set<ov::ResultVector>(
"networkResults", network.getFunction()->get_results());
}
//
// Update IE Network
//

View File

@ -164,11 +164,20 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
this->_networkInputs = blobReader.getNetworkInputs();
this->_networkOutputs = blobReader.getNetworkOutputs();
std::size_t numStages = blobReader.getStageCount();
auto blobHeader = blobReader.getHeader();
if (blobSize == blobReader.getFileSize()) {
_log->warning(
"Older version of blob. Unable to get information about network "
"parameters/results. Please recompile blob");
}
this->setInputs(blobReader.getNetworkParemeters());
this->setOutputs(blobReader.getNetworkResults());
_inputInfo = blobReader.getInputInfo();
_outputInfo = blobReader.getOutputInfo();
std::size_t numStages = blobReader.getStageCount();
auto blobHeader = blobReader.getHeader();
openDevice(devicePool);
_executor->allocateGraph(_device, _graphDesc, _graphBlob, blobHeader, numStages, networkName, _actualNumExecutors);
_graphMetaData.stagesMeta.resize(numStages);

View File

@ -0,0 +1,37 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "primitive.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief
/// @details
struct slice : public primitive_base<slice> {
CLDNN_DECLARE_PRIMITIVE(slice)
/// @brief Constructs slice primitive.
/// @param id This primitive id.
/// @param inputs List of primitive ids.
slice(const primitive_id& id,
const std::vector<primitive_id>& inputs,
const tensor output_shape,
const primitive_id& ext_prim_id = "",
const padding& output_padding = padding())
: primitive_base{id, inputs, ext_prim_id, output_padding},
output_shape {output_shape}
{}
tensor output_shape;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@ -24,6 +24,7 @@ private:
debug_configuration();
public:
static const char *prefix;
int help; // Print help messages
int verbose; // Verbose execution
int print_multi_kernel_perf; // Print execution time of each kernel in multi-kernel primitimive
int disable_usm; // Disable usm usage
@ -34,6 +35,7 @@ public:
std::string dump_layers; // Dump intermediate buffers of specified layers only, separated by space
std::string dry_run_path; // Dry run and serialize execution graph into the specified path
int dump_layers_dst_only; // Dump only output of layers
int dump_layers_limit_batch; // Limit the size of batch to dump
int base_batch_for_memory_estimation; // Base batch size to be used in memory estimation
static const debug_configuration *get_instance();
};

View File

@ -55,6 +55,7 @@ enum class KernelType {
DEPTH_TO_SPACE,
BATCH_TO_SPACE,
SHUFFLE_CHANNELS,
SLICE,
STRIDED_SLICE,
REVERSE_SEQUENCE,
BINARY_CONVOLUTION,

View File

@ -0,0 +1,111 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include"slice_kernel_ref.h"
#include <kernel_selector_utils.h>
#include <vector>
namespace {
void addJitConstantsForAttribute(kernel_selector::JitConstants &jit,
const std::string &name, const std::vector<std::int32_t> &attribute) {
using namespace kernel_selector;
jit.AddConstant(MakeJitConstant(name + "_BATCH", attribute[0]));
jit.AddConstant(MakeJitConstant(name + "_FEATURE", attribute[1]));
if (attribute.size() == 5) { // BFZYX
jit.AddConstant(MakeJitConstant(name + "_Z", attribute[2]));
jit.AddConstant(MakeJitConstant(name + "_Y", attribute[3]));
jit.AddConstant(MakeJitConstant(name + "_X", attribute[4]));
} else { // BFYX
jit.AddConstant(MakeJitConstant(name + "_Y", attribute[2]));
jit.AddConstant(MakeJitConstant(name + "_X", attribute[3]));
}
}
} // anonymous namespace
namespace kernel_selector {
KernelsData SliceKernelRef::GetKernelsData(const Params &params,
const optional_params &options) const {
if (!Validate(params, options)) {
return {};
}
KernelData kernel_data = KernelData::Default<slice_params>(params);
slice_params &new_params =
dynamic_cast<slice_params&>(*kernel_data.params.get());
auto dispatch_data = SetDefault(new_params, options);
auto entry_point = GetEntryPoint(kernelName, new_params.layerID, params, options);
auto slice_specific_jit = GetJitConstants(new_params);
auto jit = CreateJit(kernelName, slice_specific_jit, entry_point);
FillCLKernelData(kernel_data.kernels[0], dispatch_data, params.engineInfo,
kernelName, jit, entry_point);
return {kernel_data};
}
KernelsPriority SliceKernelRef::GetKernelsPriority(const Params&/*params*/,
const optional_params&/*options*/) const {
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
}
ParamsKey SliceKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT64);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
return k;
}
bool SliceKernelRef::Validate(const Params &p, const optional_params &o) const {
if (p.GetType() != KernelType::SLICE || o.GetType() != KernelType::SLICE) {
return false;
}
const slice_params &params = dynamic_cast<const slice_params&>(p);
if (params.inputs.empty())
return false;
if (params.output.Dimentions() > 5 || params.inputs[0].Dimentions() > 5)
return false;
return true;
}
JitConstants SliceKernelRef::GetJitConstants(const slice_params &params) const {
JitConstants jit = MakeBaseParamsJitConstants(params);
addJitConstantsForAttribute(jit, "SLICE_BEGIN", params.start);
addJitConstantsForAttribute(jit, "SLICE_END", params.end);
addJitConstantsForAttribute(jit, "SLICE_STEP", params.step);
return jit;
}
CommonDispatchData SliceKernelRef::SetDefault(const slice_params &params,
const optional_params&) const {
CommonDispatchData dispatchData;
dispatchData.gws = { params.output.Batch().v, params.output.Feature().v,
params.output.Z().v * params.output.Y().v * params.output.X().v };
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws,
params.engineInfo);
return dispatchData;
}
} // namespace kernel_selector

View File

@ -0,0 +1,42 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "kernel_base_opencl.h"
#include <vector>
namespace kernel_selector {
struct slice_params: public base_params {
slice_params() : base_params(KernelType::SLICE) {}
std::vector<std::int32_t> start;
std::vector<std::int32_t> end;
std::vector<std::int32_t> step;
};
struct slice_optional_params : optional_params {
slice_optional_params() : optional_params(KernelType::SLICE) {}
};
class SliceKernelRef: public KernelBaseOpenCL {
public:
SliceKernelRef() :
KernelBaseOpenCL { "slice_ref" } {
}
KernelsData GetKernelsData(const Params &params,
const optional_params &options) const override;
KernelsPriority GetKernelsPriority(const Params &params,
const optional_params &options) const override;
ParamsKey GetSupportedKey() const override;
bool Validate(const Params &p, const optional_params &o) const override;
private:
JitConstants GetJitConstants(const slice_params &params) const;
CommonDispatchData SetDefault(const slice_params &params,
const optional_params&) const;
};
} // namespace kernel_selector

View File

@ -0,0 +1,18 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "slice_kernel_selector.h"
#include "slice_kernel_ref.h"
namespace kernel_selector {
slice_kernel_selector::slice_kernel_selector() {
Attach<SliceKernelRef>();
}
KernelsData slice_kernel_selector::GetBestKernels(const Params &params,
const optional_params &options) const {
return GetNaiveBestKernel(params, options, KernelType::SLICE);
}
} // namespace kernel_selector

View File

@ -0,0 +1,23 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <kernel_selector.h>
namespace kernel_selector {
class slice_kernel_selector : public kernel_selector_base {
public:
static slice_kernel_selector& Instance() {
static slice_kernel_selector instance_;
return instance_;
}
slice_kernel_selector();
KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
};
} // namespace kernel_selector

View File

@ -0,0 +1,36 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "include/batch_headers/fetch_data.cl"
KERNEL(slice_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint feature = get_global_id(1);
#if INPUT0_DIMS <= 4
const uint xy = get_global_id(2);
const uint y = xy / OUTPUT_SIZE_X;
const uint x = xy % OUTPUT_SIZE_X;
const uint output_index = OUTPUT_GET_INDEX(batch, feature, y, x);
const uint input_index = INPUT0_GET_INDEX(
SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
SLICE_BEGIN_Y + y * SLICE_STEP_Y,
SLICE_BEGIN_X + x * SLICE_STEP_X);
#elif INPUT0_DIMS == 5
const uint xyz = get_global_id(2);
const uint yx = xyz % (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
const uint z = xyz / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
const uint y = yx / OUTPUT_SIZE_X;
const uint x = yx % OUTPUT_SIZE_X;
const uint output_index = OUTPUT_GET_INDEX(batch, feature, z, y, x);
const uint input_index = INPUT0_GET_INDEX(
SLICE_BEGIN_BATCH + batch * SLICE_STEP_BATCH,
SLICE_BEGIN_FEATURE + feature * SLICE_STEP_FEATURE,
SLICE_BEGIN_Z + z * SLICE_STEP_Z,
SLICE_BEGIN_Y + y * SLICE_STEP_Y,
SLICE_BEGIN_X + x * SLICE_STEP_X);
#endif
output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
}

View File

@ -3,7 +3,9 @@
//
#include "intel_gpu/runtime/debug_configuration.hpp"
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <memory>
#include <vector>
#include <sstream>
@ -97,10 +99,39 @@ void get_common_debug_env_var(const std::string &var, T &val) {
return get_debug_env_var(var, val, allowed_option_prefixes);
}
static void print_help_messages() {
std::vector<std::pair<std::string, std::string>> message_list;
message_list.emplace_back("OV_GPU_Help", "Print help messages");
message_list.emplace_back("OV_GPU_Verbose", "Verbose execution");
message_list.emplace_back("OV_GPU_PrintMultiKernelPerf", "Print execution time of each kernel in multi-kernel primitimive");
message_list.emplace_back("OV_GPU_DisableUsm", "Disable usm usage");
message_list.emplace_back("OV_GPU_DisableOnednn", "Disable onednn for discrete GPU (no effect for integrated GPU)");
message_list.emplace_back("OV_GPU_DumpGraphs", "Dump optimized graph");
message_list.emplace_back("OV_GPU_DumpSources", "Dump opencl sources");
message_list.emplace_back("OV_GPU_DumpLayersPath", "Enable dumping intermediate buffers and set the dest path");
message_list.emplace_back("OV_GPU_DumpLayers", "Dump intermediate buffers of specified layers only, separated by space");
message_list.emplace_back("OV_GPU_DumpLayersDstOnly", "Dump only output of layers");
message_list.emplace_back("OV_GPU_DumpLayersLimitBatch", "Limit the size of batch to dump");
message_list.emplace_back("OV_GPU_DryRunPath", "Dry run and serialize execution graph into the specified path");
message_list.emplace_back("OV_GPU_BaseBatchForMemEstimation", "Base batch size to be used in memory estimation");
auto max_name_length_item = std::max_element(message_list.begin(), message_list.end(),
[](std::pair<std::string, std::string>& a, std::pair<std::string, std::string>& b){
return a.first.size() < b.first.size();
});
int name_width = static_cast<int>(max_name_length_item->first.size()) + 2;
GPU_DEBUG_COUT << "Supported environment variables for debugging" << std::endl;
for (auto& p : message_list) {
GPU_DEBUG_COUT << " - " << std::left << std::setw(name_width) << p.first + ": " << p.second << std::endl;
}
}
#endif
debug_configuration::debug_configuration()
: verbose(0)
: help(0)
, verbose(0)
, print_multi_kernel_perf(0)
, disable_usm(0)
, dump_graphs(std::string())
@ -110,8 +141,10 @@ debug_configuration::debug_configuration()
, dump_layers_dst_only(0)
, dry_run_path(std::string())
, disable_onednn(0)
, dump_layers_limit_batch(std::numeric_limits<int>::max())
, base_batch_for_memory_estimation(-1) {
#ifdef GPU_DEBUG_CONFIG
get_gpu_debug_env_var("Help", help);
get_common_debug_env_var("Verbose", verbose);
get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf);
get_gpu_debug_env_var("DisableUsm", disable_usm);
@ -120,10 +153,16 @@ debug_configuration::debug_configuration()
get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
get_gpu_debug_env_var("DumpLayers", dump_layers);
get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
get_gpu_debug_env_var("DumpLayersLimitBatch", dump_layers_limit_batch);
get_gpu_debug_env_var("DisableOnednn", disable_onednn);
get_gpu_debug_env_var("DryRunPath", dry_run_path);
get_gpu_debug_env_var("BaseBatchForMemEstimation", base_batch_for_memory_estimation);
if (help > 0) {
print_help_messages();
exit(0);
}
if (dump_layers.length() > 0)
dump_layers = " " + dump_layers + " "; // Insert delimiter for easier parsing when used
#endif

View File

@ -7,12 +7,34 @@
#include "pass_manager.h"
#include "program_node.h"
#ifdef ENABLE_ONEDNN_FOR_GPU
#include "fully_connected_inst.h"
#include <impls/onednn/utils.hpp>
#endif
using namespace cldnn;
void add_onednn_optimization_attributes::run(program& p) {
#ifdef ENABLE_ONEDNN_FOR_GPU
for (auto& node : p.get_processing_order()) {
if (node->get_preferred_impl_type() == impl_types::onednn) {
if (node->is_type<fully_connected>()) {
auto fc_prim = node->as<fully_connected>().get_primitive();
// Reshape fused ops tensors for OneDNN FC if needed
if (fc_prim->input_size == 3) {
for (auto& fused_prim : node->get_fused_primitives()) {
auto fused_node = fused_prim.node;
if (fused_node->is_type<eltwise>()) {
auto& dependency = node->get_dependency(fused_prim.dep_start_idx);
auto original_layout = dependency.get_output_layout();
onednn::combine_bf_with_first_spatial_dim(original_layout);
dependency.set_output_layout(original_layout, false);
}
}
}
}
node->init_onednn_primitive_attributes();
}
}

View File

@ -436,10 +436,5 @@ void graph_initializations::run(program& p) {
}
set_outputs(p);
p.get_processing_order().calc_processing_order(p);
for (auto& node : p.get_processing_order()) {
if (!node->is_type<data>())
node->get_output_layout();
}
}
} // namespace cldnn

View File

@ -65,6 +65,7 @@ void register_implementations() {
REGISTER_OCL(softmax);
REGISTER_OCL(space_to_batch);
REGISTER_OCL(space_to_depth);
REGISTER_OCL(slice);
REGISTER_OCL(strided_slice);
REGISTER_OCL(tile);
REGISTER_OCL(lstm_dynamic_input);

View File

@ -53,6 +53,7 @@
#include "intel_gpu/primitives/scatter_nd_update.hpp"
#include "intel_gpu/primitives/select.hpp"
#include "intel_gpu/primitives/shuffle_channels.hpp"
#include "intel_gpu/primitives/slice.hpp"
#include "intel_gpu/primitives/softmax.hpp"
#include "intel_gpu/primitives/space_to_batch.hpp"
#include "intel_gpu/primitives/strided_slice.hpp"
@ -73,7 +74,7 @@ void register_implementations();
namespace detail {
#define REGISTER_OCL(prim) \
#define REGISTER_OCL(prim) \
struct attach_##prim##_impl { \
attach_##prim##_impl(); \
}
@ -130,6 +131,7 @@ REGISTER_OCL(scatter_elements_update);
REGISTER_OCL(scatter_nd_update);
REGISTER_OCL(select);
REGISTER_OCL(shuffle_channels);
REGISTER_OCL(slice);
REGISTER_OCL(softmax);
REGISTER_OCL(space_to_batch);
REGISTER_OCL(space_to_depth);

View File

@ -0,0 +1,138 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <slice_inst.h>
#include <slice/slice_kernel_ref.h>
#include <data_inst.h>
#include <intel_gpu/runtime/error_handler.hpp>
#include <impls/implementation_map.hpp>
#include <slice/slice_kernel_selector.h>
#include "primitive_base.hpp"
#include <vector>
#include <algorithm>
#include <cstddef>
namespace cldnn {
namespace ocl {
namespace {
template<typename T, class = typename std::enable_if<std::is_integral<T>::value>::type>
std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
mem_lock<T> lock{node.get_attached_memory_ptr(), stream};
T* data = lock.data();
std::vector<std::int32_t> integer_data;
integer_data.reserve(node.get_output_layout().count());
std::copy(data, data + node.get_output_layout().count(), std::back_inserter(integer_data));
return integer_data;
}
std::vector<std::int32_t> extractIntegerData(const data_node& node, const stream& stream) {
switch (node.get_output_layout().data_type) {
case data_types::u8:
return extractIntegerData<std::uint8_t>(node, stream);
case data_types::i8:
return extractIntegerData<std::int8_t>(node, stream);
case data_types::i32:
return extractIntegerData<std::int32_t>(node, stream);
case data_types::i64:
return extractIntegerData<std::int64_t>(node, stream);
default:
CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), "Slice parameter",
node.get_output_layout().data_type, "Any integral type",
data_types::i32, "Slice parameters should be of integral type.");
}
return {};
}
std::vector<std::int32_t> extractShape(kernel_selector::Tensor::DataTensor& tensor) {
auto logical_dims = tensor.LogicalDims();
// LogicalDims method returns dims in reversed order
return {logical_dims.rbegin(), logical_dims.rend()};
}
} // namespace
struct slice_impl : typed_primitive_impl_ocl<slice> {
using parent = typed_primitive_impl_ocl<slice>;
using parent::parent;
enum InputIndices {
kData,
kStart,
kEnd,
kStep,
kAxes,
kInputsNum
};
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<slice_impl>(*this);
}
static primitive_impl* create(const slice_node& arg) {
auto params = get_default_params<kernel_selector::slice_params>(
arg);
auto op_params = get_default_optional_params<
kernel_selector::slice_optional_params>(
arg.get_program());
const auto& inputs = arg.get_dependencies();
const stream& stream = arg.get_program().get_stream();
auto start_elts = extractIntegerData(inputs[InputIndices::kStart]->as<data>(), stream);
auto end_elts = extractIntegerData(inputs[InputIndices::kEnd]->as<data>(), stream);
auto step_elts = extractIntegerData(inputs[InputIndices::kStep]->as<data>(), stream);
auto data_shape = extractShape(params.inputs[0]);
std::vector<std::int32_t> axes(data_shape.size());
if (inputs.size() == InputIndices::kInputsNum)
axes = std::move(extractIntegerData(inputs[InputIndices::kAxes]->as<data>(), stream));
else
std::iota(axes.begin(), axes.end(), 0);
std::vector<std::int32_t> selected_start(data_shape.size(), 0);
std::vector<std::int32_t> selected_step(data_shape.size(), 1);
std::vector<std::int32_t> selected_end(data_shape);
for (int axe = 0; axe < axes.size(); axe++) {
auto transformed_axe = axes[axe] < 0 ? data_shape.size() + axes[axe] : axes[axe];
auto start = start_elts[axe];
auto end = end_elts[axe];
auto dim_size = data_shape[transformed_axe];
selected_start[transformed_axe] = std::max(std::min(start < 0 ? dim_size + start : start, dim_size - 1), 0);
selected_end[transformed_axe] = std::max(std::min(end < 0 ? dim_size + end : end, dim_size - 1), 0);
selected_step[transformed_axe] = step_elts[axe];
}
params.start = std::move(selected_start);
params.end = std::move(selected_end);
params.step = std::move(selected_step);
auto &kernel_selector =
kernel_selector::slice_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(params, op_params);
CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(),
"Cannot find a proper kernel with this arguments");
return new slice_impl(arg, best_kernels[0]);
}
};
namespace detail {
attach_slice_impl::attach_slice_impl() {
implementation_map<slice>::add(impl_types::ocl, slice_impl::create, {
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
std::make_tuple(data_types::i64, format::bfyx),
std::make_tuple(data_types::f16, format::bfzyx),
std::make_tuple(data_types::f32, format::bfzyx),
std::make_tuple(data_types::u8, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx),
std::make_tuple(data_types::i32, format::bfzyx),
std::make_tuple(data_types::i64, format::bfzyx),
});
}
} // namespace detail
} // namespace ocl
} // namespace cldnn

View File

@ -128,20 +128,6 @@ public:
static primitive_impl* create(const fully_connected_node& arg) {
auto& engine = arg.get_program().get_engine();
auto desc = get_fully_connected_descriptor(arg);
auto prim = arg.get_primitive();
if (prim->input_size == 3) {
for (auto& fused_node : arg.get_fused_primitives()) {
auto node = fused_node.node;
if (node->is_type<eltwise>()) {
auto& dependency = arg.get_dependency(fused_node.dep_start_idx);
auto original_layout = dependency.get_output_layout();
onednn::combine_bf_with_first_spatial_dim(original_layout);
dependency.set_output_layout(original_layout, false);
}
}
}
auto attr = arg.get_onednn_primitive_attributes();
dnnl::primitive_desc prim_desc{&desc->data, attr.get(), engine.get_onednn_engine(), nullptr};

View File

@ -0,0 +1,38 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <intel_gpu/primitives/slice.hpp>
#include "primitive_inst.h"
#include <intel_gpu/runtime/error_handler.hpp>
namespace cldnn {
template <>
struct typed_program_node<slice> : public typed_program_node_base<slice> {
using parent = typed_program_node_base<slice>;
public:
using parent::parent;
program_node& input(std::size_t index = 0) const { return get_dependency(index); }
};
using slice_node = typed_program_node<slice>;
template <>
class typed_primitive_inst<slice> : public typed_primitive_inst_base<slice> {
using parent = typed_primitive_inst_base<slice>;
public:
static layout calc_output_layout(slice_node const& node);
static std::string to_string(slice_node const& node);
public:
typed_primitive_inst(network& network, slice_node const& desc);
};
using slice_inst = typed_primitive_inst<slice>;
} // namespace cldnn

View File

@ -110,8 +110,18 @@ template <class T>
static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
auto&& size = mem->get_layout().size;
file_stream << "shape: " << size.to_string() << " ";
file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" << std::endl;
GPU_DEBUG_GET_INSTANCE(debug_config);
auto batch_size = std::max(std::min(debug_config->dump_layers_limit_batch, size.batch[0]), 1);
tensor tmp_size(size);
tmp_size.batch[0] = batch_size;
if (tmp_size == size) {
file_stream << "shape: " << size.to_string() << " ";
file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" << std::endl;
} else {
file_stream << "shape: " << tmp_size.to_string() << " ";
file_stream << "(count: " << tmp_size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format)
<< ", original shape: " << size.to_string() << ")" << std::endl;
}
mem_lock<T, mem_lock_type::read> lock(mem, stream);
auto mem_ptr = lock.data();
@ -119,7 +129,7 @@ static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) {
std::stringstream buffer;
for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) {
for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) {
for (cldnn::tensor::value_type b = 0; b < batch_size; ++b) {
for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) {
for (cldnn::tensor::value_type w = 0; w < size.spatial[3]; ++w) {
for (cldnn::tensor::value_type z = 0; z < size.spatial[2]; ++z) {

View File

@ -1225,8 +1225,13 @@ program::primitives_info program::get_current_stage_info() const {
void program::save_pass_info(std::string pass_name) {
// TODO: Directory path here can be probably changed to some bool flag
if (!options.get<build_option_type::graph_dumps_dir>()->directory_path.empty())
if (!options.get<build_option_type::graph_dumps_dir>()->directory_path.empty()) {
for (auto& node : this->get_processing_order()) {
if (!node->is_type<data>())
node->get_output_layout();
}
optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
}
}
void program::add_optimized_primitive_info(primitive_id optimized_primitive_id,

View File

@ -0,0 +1,40 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <slice_inst.h>
#include "primitive_type_base.h"
#include <sstream>
#include <json_object.h>
namespace cldnn {
primitive_type_id slice::type_id() {
static primitive_type_base<slice> instance;
return &instance;
}
slice_inst::typed_primitive_inst(network& network, slice_node const& node)
: parent(network, node) {}
layout slice_inst::calc_output_layout(slice_node const& node) {
auto primitive = node.get_primitive();
auto input_layout = node.input(0).get_output_layout();
return {input_layout.data_type, input_layout.format, primitive->output_shape};
}
std::string slice_inst::to_string(slice_node const& node) {
auto node_info = node.desc_to_json();
json_composite slice_info;
slice_info.add("input id", node.input().id());
slice_info.add("begin_param id", node.get_dependency(1).id());
slice_info.add("end_param id", node.get_dependency(2).id());
slice_info.add("step_param id", node.get_dependency(3).id());
slice_info.add("axis_param id", node.get_dependency(4).id());
node_info->add("slice info", slice_info);
std::stringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}
} // namespace cldnn

View File

@ -608,6 +608,7 @@ public:
#define CASE_FC_U8S8_3D_1 {2, 32, 1, 3}, {2, 32, 1, 16}, {16, 3, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_2 {1, 1, 1, 3}, {1, 1, 1, 32}, {32, 3, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_3 {2, 3, 1, 1}, {2, 3, 1, 15}, {15, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_4 {1, 512, 1, 1024}, {1, 384, 1, 1024}, {1024, 1024, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_NORMALIZE_I8_1 {1, 2, 3, 3}, data_types::u8, format::bfyx, data_types::f32, format::bfyx
@ -9258,7 +9259,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_elements_activation_scale_eltwise,
}));
#ifdef ENABLE_ONEDNN_FOR_GPU
class ConvFusingTestOneDNN : public WeightsPrimitiveFusingTest<bc_test_params> {
class WeightsPrimitiveFusingTestOneDNN : public WeightsPrimitiveFusingTest<bc_test_params> {
public:
void execute(bc_test_params& p) {
// Onednn post operation has issue in a machine that does not support imad.
@ -9299,7 +9300,7 @@ public:
}
};
class conv_int8_eltwise_onednn : public ConvFusingTestOneDNN {};
class conv_int8_eltwise_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_int8_eltwise_onednn, u8_eltwise_sum_out) {
auto p = GetParam();
@ -9364,7 +9365,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_eltwise_onednn,
bc_test_params{CASE_CONV3D_S8S8_5, 3, 4},
}));
class conv_fp32_activation_abs_onednn : public ConvFusingTestOneDNN {};
class conv_fp32_activation_abs_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_fp32_activation_abs_onednn, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9387,7 +9388,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_abs_onednn,
bc_test_params{CASE_CONV_FP16_4, 2, 3},
}));
class conv_fp32_activation_mish_onednn : public ConvFusingTestOneDNN {};
class conv_fp32_activation_mish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_fp32_activation_mish_onednn, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9410,7 +9411,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_mish_onednn,
bc_test_params{CASE_CONV_FP16_4, 2, 3},
}));
class conv_fp32_activation_swish_onednn : public ConvFusingTestOneDNN {};
class conv_fp32_activation_swish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_fp32_activation_swish_onednn, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9433,7 +9434,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_swish_onednn,
bc_test_params{CASE_CONV_FP16_4, 2, 3},
}));
class conv_fp32_activation_hswish_onednn : public ConvFusingTestOneDNN {};
class conv_fp32_activation_hswish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_fp32_activation_hswish_onednn, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9456,7 +9457,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_hswish_onednn,
bc_test_params{CASE_CONV_FP16_4, 2, 3},
}));
class conv_fp32_activation_exp_onednn : public ConvFusingTestOneDNN {};
class conv_fp32_activation_exp_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_fp32_activation_exp_onednn, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9479,7 +9480,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_activation_exp_onednn,
bc_test_params{CASE_CONV_FP16_4, 2, 3},
}));
class conv_int8_quantize_u8_onednn : public ConvFusingTestOneDNN {};
class conv_int8_quantize_u8_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_int8_quantize_u8_onednn, per_channel) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9526,7 +9527,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_quantize_u8_onednn,
bc_test_params{CASE_CONV_S8S8_3, 2, 3},
}));
class conv_int8_activation_eltwise_quantize_onednn : public ConvFusingTestOneDNN {};
class conv_int8_activation_eltwise_quantize_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_int8_activation_eltwise_quantize_onednn, bsv32_fsv32) {
auto p = GetParam();
layout eltwise_layout = get_output_layout(p);
@ -9578,7 +9579,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_activation_eltwise_quantize_oned
bc_test_params{CASE_CONV_S8S8_15, 2, 5},
}));
class conv_int8_scale_shift_swish_onednn : public ConvFusingTestOneDNN {};
class conv_int8_scale_shift_swish_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_int8_scale_shift_swish_onednn, bsv32_fsv32) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9617,7 +9618,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_scale_shift_swish_onednn,
bc_test_params{CASE_CONV_S8S8_15, 2, 7},
}));
class conv_int8_eltwise_scale_onednn : public ConvFusingTestOneDNN {};
class conv_int8_eltwise_scale_onednn : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(conv_int8_eltwise_scale_onednn, u8_eltwise_prod_out_reuse) {
auto p = GetParam();
@ -9667,7 +9668,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_int8_eltwise_scale_onednn,
// Limitations: no
// DNNL_VERBOSE log without optimization: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_linear:1:-128
// DNNL_VERBOSE log with optimization: attr-post-ops:eltwise_linear:12.75:-0.5
class post_ops_optimizations_onednn_eltw_linear_eltw_linear : public ConvFusingTestOneDNN {};
class post_ops_optimizations_onednn_eltw_linear_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_onednn_eltw_linear_eltw_linear, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9718,7 +9719,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_eltw_linear_
// Limitations: beta = 0 in eltw_linear
// DNNL_VERBOSE log without optimization: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_round+eltwise_linear:2.00784+eltwise_clip:0:512
// DNNL_VERBOSE log with optimization: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_round:0:0:2.00784+eltwise_clip:0:512
class post_ops_optimizations_onednn_eltw_non_linear_eltw_linear : public ConvFusingTestOneDNN {};
class post_ops_optimizations_onednn_eltw_non_linear_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_onednn_eltw_non_linear_eltw_linear, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9769,7 +9770,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_eltw_non_lin
// Limitations: alpha = 1 and scale = 1 in eltw_linear; binary_add is a constant compile-time buffer
// DNNL_VERBOSE log without optimization: attr-oscale:2 attr-post-ops:binary_add:f32:2+eltwise_linear:1:-127+eltwise_clip:-127:127
// DNNL_VERBOSE log with optimization: attr-oscale:2 attr-post-ops:binary_add:f32:2+eltwise_clip:-127:127
class post_ops_optimizations_onednn_binary_add_eltw_linear : public ConvFusingTestOneDNN {};
class post_ops_optimizations_onednn_binary_add_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_onednn_binary_add_eltw_linear, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9820,7 +9821,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_binary_add_e
// Limitations: beta = 0 in eltw_linear; binary_mul is a constant compile-time buffer
// DNNL_VERBOSE log without optimization: attr-oscale:2 attr-post-ops:binary_mul:f32:2+eltwise_linear:2.01575+eltwise_clip:0:512
// DNNL_VERBOSE log with optimization: attr-oscale:2 attr-post-ops:binary_mul:f32:2+eltwise_clip:0:512
class post_ops_optimizations_onednn_binary_mul_eltw_linear : public ConvFusingTestOneDNN {};
class post_ops_optimizations_onednn_binary_mul_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_onednn_binary_mul_eltw_linear, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9871,7 +9872,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_binary_mul_e
// Limitations: beta = 0 in eltw_linear
// DNNL_VERBOSE log without optimization: attr-oscale:2 attr-post-ops:eltwise_linear:2.01575+eltwise_clip:0:512
// DNNL_VERBOSE log with optimization: attr-oscale:2 attr-post-ops:eltwise_clip:0:512
class post_ops_optimizations_onednn_oscale_eltw_linear : public ConvFusingTestOneDNN {};
class post_ops_optimizations_onednn_oscale_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_onednn_oscale_eltw_linear, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9920,7 +9921,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_oscale_eltw_
// Limitations: beta = 0 in eltw_linear
// DNNL_VERBOSE log without optimization: attr-post-ops:eltwise_relu+sum:1:0:u8+eltwise_linear:12.7+eltwise_clip:0:127
// DNNL_VERBOSE log with optimization: attr-post-ops:eltwise_relu:0:0:12.7+sum:12.7:0:u8+eltwise_clip:0:127
class post_ops_optimizations_onednn_eltw_any_sum_eltw_linear : public ConvFusingTestOneDNN {};
class post_ops_optimizations_onednn_eltw_any_sum_eltw_linear : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_onednn_eltw_any_sum_eltw_linear, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -9970,7 +9971,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_onednn_eltw_any_sum
// Input range uses in 2 cases: not per-tensor output range or out_lo > out_hi
// Here's out_lo > out_hi and no optimizations
// DNNL_VERBOSE log: attr-post-ops:eltwise_linear:12.75:127.5+eltwise_round+eltwise_linear:-1:127
class post_ops_optimizations_input_range : public ConvFusingTestOneDNN {};
class post_ops_optimizations_input_range : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(post_ops_optimizations_input_range, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
@ -10015,6 +10016,33 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, post_ops_optimizations_input_range,
bc_test_params{CASE_CONV_S8S8_14, 2, 3},
bc_test_params{CASE_CONV_S8S8_15, 2, 3},
}));
class fc_int8_inputs_fused_fp32_sum : public WeightsPrimitiveFusingTestOneDNN {};
TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
auto p = GetParam();
auto shift_layout = layout{ p.default_type, p.default_format, tensor{1, 1, 1, p.kernel.batch[0]} };
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_fc_weights_layout(p))),
data("bias", get_mem(get_fc_bias_layout(p))),
data("shift_data", get_mem(shift_layout, 1)),
fully_connected("fc_prim", "input", "weights", "bias", cldnn::data_types::f32, "", padding(), get_fc_output_dim_size(p)),
eltwise("shift", {"fc_prim", "shift_data"}, eltwise_mode::sum, cldnn::data_types::f32),
crop("crop", "shift", get_output_layout(p).size, {0, 0, 0, 0}),
reorder("reorder_bfyx", "crop", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_inputs_fused_fp32_sum, ::testing::ValuesIn(std::vector<bc_test_params>{
// OneDNN has issue with small shapes - ticket 7064
// bc_test_params{ CASE_FC_U8S8_3D_1, 2, 4 },
// bc_test_params{ CASE_FC_U8S8_3D_2, 2, 4 },
bc_test_params{ CASE_FC_U8S8_3D_4, 2, 4 },
}));
#endif

View File

@ -0,0 +1,144 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/slice.hpp>
#include <intel_gpu/primitives/data.hpp>
#include <random>
#include <algorithm>
#include <vector>
using namespace cldnn;
using namespace ::tests;
namespace {
template<typename T>
class SliceTest : public ::testing::Test {
public:
static std::vector<T> GenInput(int size) {
std::vector<T> result;
for (int i = 0; i < size; i++)
result.push_back(i);
return result;
}
void TearDown() override {
assert(input_shape_.size() == 4 || input_shape_.size() == 5);
format input_format = input_shape_.size() == 4 ? format::bfyx : format::bfzyx;
layout data_layout ( input_type_, input_format, tensor{input_shape_} );
std::vector<T> input_vals = GenInput(data_layout.get_linear_size());
memory::ptr input = engine_.allocate_memory(data_layout);
set_values(input, input_vals);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("start", start_));
topology.add(data("stop", stop_));
topology.add(data("step", step_));
std::vector<primitive_id> inputs {"input", "start", "stop", "step"};
if (axes_) {
topology.add(data("axes", axes_));
inputs.push_back("axes");
}
topology.add(slice("slice", inputs, tensor{output_shape_}));
network network(engine_, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "slice");
auto output = outputs.at("slice").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
ASSERT_EQ(output_ptr.size(), expected_output_.size());
for (size_t i = 0; i < output_ptr.size(); ++i)
EXPECT_TRUE(are_equal(expected_output_[i], output_ptr[i], 2e-3));
}
data_types DataType() const;
protected:
engine& engine_ = get_test_engine();
std::vector<std::int32_t> input_shape_;
data_types input_type_ {DataType()};
memory::ptr start_;
memory::ptr stop_;
memory::ptr step_;
memory::ptr axes_;
std::vector<std::int32_t> output_shape_;
std::vector<T> expected_output_;
};
template<>
data_types SliceTest<float>::DataType() const {return data_types::f32;}
template<>
data_types SliceTest<int>::DataType() const { return data_types::i32; }
template<>
data_types SliceTest<long long>::DataType() const { return data_types::i64; }
using testing::Types;
typedef Types<float, int, long long> DataTypes;
TYPED_TEST_SUITE(SliceTest, DataTypes);
TYPED_TEST(SliceTest, bfyx_positive_step) {
this->input_shape_ = { 1, 2, 100, 12 };
this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
set_values<int64_t>(this->start_, {0, 1, 0, 1});
this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
set_values<int64_t>(this->stop_, { 1, 2, 5, 100 });
this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
set_values<int64_t>(this->step_, { 1, 1, 1, 10 });
this->output_shape_ = { 1, 1, 5, 10 };
this->expected_output_ = {
1201, 1211, 1221, 1231, 1241, 1301, 1311, 1321, 1331, 1341,
1401, 1411, 1421, 1431, 1441, 1501, 1511, 1521, 1531, 1541,
1601, 1611, 1621, 1631, 1641, 1701, 1711, 1721, 1731, 1741,
1801, 1811, 1821, 1831, 1841, 1901, 1911, 1921, 1931, 1941,
2001, 2011, 2021, 2031, 2041, 2101, 2111, 2121, 2131, 2141
};
}
TYPED_TEST(SliceTest, bfyx_negative_step) {
this->input_shape_ = { 1, 2, 100, 12 };
this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
set_values<int64_t>(this->start_, { 1, 2, 5, 100 });
this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
set_values<int64_t>(this->stop_, {0, 1, 0, 1});
this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } });
set_values<int64_t>(this->step_, { -1, -1, -1, -10 });
this->output_shape_ = { 1, 1, 5, 10 };
this->expected_output_ = {
1799, 1789, 1779, 1769, 1759, 1699, 1689, 1679, 1669, 1659,
1599, 1589, 1579, 1569, 1559, 1499, 1489, 1479, 1469, 1459,
1399, 1389, 1379, 1369, 1359, 1299, 1289, 1279, 1269, 1259,
1199, 1189, 1179, 1169, 1159, 1099, 1089, 1079, 1069, 1059,
999, 989, 979, 969, 959, 899, 889, 879, 869, 859
};
}
TYPED_TEST(SliceTest, bfzyx) {
this->input_shape_ = { 2, 3, 10, 12, 5 };
this->start_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
set_values<int64_t>(this->start_, { 0, 0, 0, 0, 0 });
this->stop_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
set_values<int64_t>(this->stop_, {1, 2, 2, 2, 2});
this->step_ = this->engine_.allocate_memory({ data_types::i64, format::bfzyx, { 5, 1, 1, 1 } });
set_values<int64_t>(this->step_, { 1, 1, 1, 1, 1 });
this->output_shape_ = { 1, 2, 2, 2, 2 };
this->expected_output_ = {
0, 1, 10, 11, 120, 121, 130, 131,
600, 601, 610, 611, 720, 721, 730, 731
};
}
} // anonymous namespace

@ -1 +1 @@
Subproject commit 5adbcb757c77f1bf0cd11ad58dd92e93ea2e3561
Subproject commit acee807d84944008df6741677ab52e01d790d58a

View File

@ -79,11 +79,14 @@ Options:
-h, --help Print a usage message
-m "<path>" Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with a trained compiled model.
-i "<path>" Optional. Path to a folder with images and/or binaries or to specific image or binary file.
In case of dynamic shapes networks with several inputs provide the same number of files for each input (except cases with single file for any input):
"input1:1.jpg input2:1.bin", "input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin ".
Also you can pass specific keys for inputs: "random" - for fillling input with random data, "image_info" - for filling input with image size.
-d "<device>" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU.
Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin.
Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin.
Use "-d GPU.X" format to specify device id for GPU devices.
The application looks for a suitable plugin for the specified device.
The application looks for a suitable plugin for the specified device.
-l "<absolute_path>" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
Or
-c "<absolute_path>" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
@ -99,11 +102,23 @@ Options:
-stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
-t Optional. Time, in seconds, to execute topology.
-progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
-shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
-shape Optional. Set shape for network input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
This parameter affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?].
For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].
-data_shape Required for networks with dynamic shapes. Set shape for input blobs.
In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]".
In case of several input sizes provide the same number for
each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]",
"input1[1,1,128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]".
If network shapes are all static specifying the option will cause an exception.
-layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
-cache_dir "<path>" Optional. Enables caching of loaded models to specified directory.
-load_from_file Optional. Loads model from file directly without ReadNetwork.
-latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
-inference_only Optional. Measure only inference stage. Default option for static models.
Dynamic models are measured in full mode which includes inputs setup stage,
inference only mode available for them with single input data shape only.
To enable full mode for static models pass \"false\" value to this argument: ex. -inference_only=false".
CPU-specific performance options:
-nstreams "<integer>" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
@ -117,16 +132,19 @@ Options:
-enforcebf16="<true/false>" Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform.
-pin "YES"/"HYBRID_AWARE"/"NUMA"/"NO"
Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):
enabling threads->cores pinning ("YES", which is already default for a conventional CPU),
letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs)
threads->(NUMA)nodes ("NUMA") or
completely disable ("NO") CPU inference threads pinning.
enabling threads->cores pinning ("YES", which is already default for a conventional CPU),
letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs)
threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU inference threads pinning.
-ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
-op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required.
Overwrites precision from ip and op options for specified layers.
Statistics dumping options:
-report_type "<type>" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
-report_type "<type>" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency.
"average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network.
"detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters
and latency for each executed infer request.
-report_folder Optional. Path to a folder where statistics report is stored.
-exec_graph_path Optional. Path to a file where to store executable graph information serialized.
-pc Optional. Report performance counters.
@ -181,33 +199,55 @@ This section provides step-by-step instructions on how to run the Benchmark Tool
The application outputs the number of executed iterations, total duration of execution, latency, and throughput.
Additionally, if you set the `-report_type` parameter, the application outputs statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds.
Below are fragments of sample output for CPU and GPU devices:
Below are fragments of sample output static and dynamic networks:
* For CPU:
* For static network:
```
[Step 8/9] Measuring performance (Start inference asynchronously, 60000 ms duration, 4 inference requests in parallel using 4 streams)
Progress: [....................] 100.00% done
[Step 9/9] Dumping statistics report
[ INFO ] Statistics collecting was not requested. No reports are dumped.
Progress: [....................] 100.00% done
Count: 4612 iterations
Duration: 60110.04 ms
Latency: 50.99 ms
Throughput: 76.73 FPS
```
* For GPU:
```
[Step 10/11] Measuring performance (Start inference asynchronously, 5 inference requests using 4 streams for CPU, limits: 120000 ms duration)
Progress: [....................] 100% done
[Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, limits: 60000 ms duration)
[ INFO ] BENCHMARK IS IN INFERENCE ONLY MODE.
[ INFO ] Input blobs will be filled once before performance measurements.
[ INFO ] First inference took 26.26 ms
Progress: [................... ] 99% done
[Step 11/11] Dumping statistics report
Count: 102515 iterations
Duration: 120007.38 ms
Latency: 5.84 ms
Throughput: 854.24 FP
[ INFO ] Count: 6640 iterations
[ INFO ] Duration: 60039.70 ms
[ INFO ] Latency:
[ INFO ] Median: 35.36 ms
[ INFO ] Avg: 36.12 ms
[ INFO ] Min: 18.55 ms
[ INFO ] Max: 88.96 ms
[ INFO ] Throughput: 110.59 FPS
```
* For dynamic network:
```
[Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, limits: 60000 ms duration)
[ INFO ] BENCHMARK IS IN FULL MODE.
[ INFO ] Inputs setup stage will be included in performance measurements.
[ INFO ] First inference took 26.80 ms
Progress: [................... ] 99% done
[Step 11/11] Dumping statistics report
[ INFO ] Count: 5199 iterations
[ INFO ] Duration: 60043.34 ms
[ INFO ] Latency:
[ INFO ] Median: 41.58 ms
[ INFO ] Avg: 46.07 ms
[ INFO ] Min: 8.44 ms
[ INFO ] Max: 115.65 ms
[ INFO ] Latency for each data shape group:
[ INFO ] 1. data : [1, 3, 224, 224]
[ INFO ] Median: 38.37 ms
[ INFO ] Avg: 30.29 ms
[ INFO ] Min: 8.44 ms
[ INFO ] Max: 61.30 ms
[ INFO ] 2. data : [1, 3, 448, 448]
[ INFO ] Median: 68.21 ms
[ INFO ] Avg: 61.85 ms
[ INFO ] Min: 29.58 ms
[ INFO ] Max: 115.65 ms
[ INFO ] Throughput: 86.59 FPS
```
## See Also

View File

@ -19,7 +19,12 @@ static const char help_message[] = "Print a usage message";
/// @brief message for images argument
static const char input_message[] =
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.";
"Optional. Path to a folder with images and/or binaries or to specific image or binary file.\n"
" In case of dynamic shapes networks with several inputs provide the same number"
" of files for each input (except cases with single file for any input):"
"\"input1:1.jpg input2:1.bin\", \"input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin \"."
" Also you can pass specific keys for inputs: \"random\" - for fillling input with random data,"
" \"image_info\" - for filling input with image size.";
/// @brief message for model argument
static const char model_message[] =
@ -136,6 +141,9 @@ static const char progress_message[] =
// @brief message for performance counters option
static const char pc_message[] = "Optional. Report performance counters.";
// @brief message for performance counters for sequence option
static const char pcseq_message[] = "Optional. Report latencies for each shape in -data_shape sequence.";
#ifdef HAVE_DEVICE_MEM_SUPPORT
// @brief message for switching memory allocation type option
static const char use_device_mem_message[] =
@ -155,9 +163,19 @@ static const char dump_config_message[] =
#endif
static const char shape_message[] =
"Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or "
"\"[1,3,224,224]\""
" in case of one input size.";
"Optional. Set shape for network input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
" in case of one input size. This parameter affect model input shape and can be dynamic."
" For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]."
" For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].";
static const char data_shape_message[] =
" Required for networks with dynamic shapes. Set shape for input blobs."
" In case of one input size: \"[1,3,224,224]\" or \"input1[1,3,224,224],input2[1,4]\"."
" In case of several input sizes provide the same number for each input (except cases with single shape for any "
"input):"
" \"[1,3,128,128][3,3,128,128][1,3,320,320]\", \"input1[1,1,128,128][1,1,256,256],input2[80,1]\""
" or \"input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]\"."
" If network shapes are all static specifying the option will cause an exception.";
static const char layout_message[] =
"Optional. Prompts how network layouts should be treated by application. "
@ -196,6 +214,13 @@ static constexpr char input_image_mean_message[] =
"Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
"Example: -imean data[255,255,255],info[255,255,255]\n";
static constexpr char inference_only_message[] =
"Optional. Measure only inference stage. Default option for static models. Dynamic models"
" are measured in full mode which includes inputs setup stage,"
" inference only mode available for them with single input data shape only."
" To enable full mode for static models pass \"false\" value to this argument:"
" ex. \"-inference_only=false\".\n";
/// @brief Define flag for showing help message <br>
DEFINE_bool(h, false, help_message);
@ -276,6 +301,9 @@ DEFINE_bool(progress, false, progress_message);
/// @brief Define flag for showing performance counters <br>
DEFINE_bool(pc, false, pc_message);
/// @brief Define flag for showing performance sequence counters <br>
DEFINE_bool(pcseq, false, pcseq_message);
#ifdef HAVE_DEVICE_MEM_SUPPORT
/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers
DEFINE_bool(use_device_mem, false, use_device_mem_message);
@ -292,6 +320,9 @@ DEFINE_string(dump_config, "", dump_config_message);
/// @brief Define flag for input shape <br>
DEFINE_string(shape, "", shape_message);
/// @brief Define flag for input blob shape <br>
DEFINE_string(data_shape, "", data_shape_message);
/// @brief Define flag for layout shape <br>
DEFINE_string(layout, "", layout_message);
@ -322,6 +353,9 @@ DEFINE_string(iscale, "", input_image_scale_message);
/// @brief Define flag for using input image mean <br>
DEFINE_string(imean, "", input_image_mean_message);
/// @brief Define flag for inference only mode <br>
DEFINE_bool(inference_only, true, inference_only_message);
/**
* @brief This function show a help message
*/
@ -346,8 +380,9 @@ static void showUsage() {
std::cout << " -t " << execution_time_message << std::endl;
std::cout << " -progress " << progress_message << std::endl;
std::cout << " -shape " << shape_message << std::endl;
std::cout << " -data_shape " << data_shape_message << std::endl;
std::cout << " -layout " << layout_message << std::endl;
std::cout << " -cache_dir \"<path>\" " << cache_dir_message << std::endl;
std::cout << " -cache_dir \"<path>\" " << cache_dir_message << std::endl;
std::cout << " -load_from_file " << load_from_file_message << std::endl;
std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl;
std::cout << std::endl << " device-specific performance options:" << std::endl;
@ -363,6 +398,7 @@ static void showUsage() {
std::cout << " -report_folder " << report_folder_message << std::endl;
std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl;
std::cout << " -pc " << pc_message << std::endl;
std::cout << " -pcseq " << pcseq_message << std::endl;
#ifdef USE_OPENCV
std::cout << " -dump_config " << dump_config_message << std::endl;
std::cout << " -load_config " << load_config_message << std::endl;
@ -373,4 +409,5 @@ static void showUsage() {
std::cout << " -iop \"<value>\" " << iop_message << std::endl;
std::cout << " -iscale " << input_image_scale_message << std::endl;
std::cout << " -imean " << input_image_mean_message << std::endl;
std::cout << " -inference_only " << inference_only_message << std::endl;
}

View File

@ -18,13 +18,12 @@
// clang-format off
#include "inference_engine.hpp"
#include "remote_blobs_filling.hpp"
#include "statistics_report.hpp"
#include "utils.hpp"
// clang-format on
typedef std::chrono::high_resolution_clock Time;
typedef std::chrono::nanoseconds ns;
typedef std::function<void(size_t id, const double latency)> QueueCallbackFunction;
typedef std::function<void(size_t id, size_t group_id, const double latency)> QueueCallbackFunction;
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
/// time.
@ -37,10 +36,12 @@ public:
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
: _request(net.CreateInferRequest()),
_id(id),
_callbackQueue(callbackQueue) {
_lat_group_id(0),
_callbackQueue(callbackQueue),
outputClBuffer() {
_request.SetCompletionCallback([&]() {
_endTime = Time::now();
_callbackQueue(_id, getExecutionTimeInMilliseconds());
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
});
}
@ -57,7 +58,7 @@ public:
_startTime = Time::now();
_request.Infer();
_endTime = Time::now();
_callbackQueue(_id, getExecutionTimeInMilliseconds());
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> getPerformanceCounts() {
@ -77,26 +78,48 @@ public:
return static_cast<double>(execTime.count()) * 0.000001;
}
void setLatencyGroupId(size_t id) {
_lat_group_id = id;
}
// in case of using GPU memory we need to allocate CL buffer for
// output blobs. By encapsulating cl buffer inside InferReqWrap
// we will control the number of output buffers and access to it.
std::map<std::string, ::gpu::BufferType>& getOutputClBuffer() {
return outputClBuffer;
}
private:
InferenceEngine::InferRequest _request;
Time::time_point _startTime;
Time::time_point _endTime;
size_t _id;
size_t _lat_group_id;
QueueCallbackFunction _callbackQueue;
std::map<std::string, ::gpu::BufferType> outputClBuffer;
};
class InferRequestsQueue final {
public:
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) {
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net,
size_t nireq,
size_t lat_group_n,
bool enable_lat_groups)
: enable_lat_groups(enable_lat_groups) {
for (size_t id = 0; id < nireq; id++) {
requests.push_back(std::make_shared<InferReqWrap>(
net,
id,
std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2)));
requests.push_back(std::make_shared<InferReqWrap>(net,
id,
std::bind(&InferRequestsQueue::putIdleRequest,
this,
std::placeholders::_1,
std::placeholders::_2,
std::placeholders::_3)));
_idleIds.push(id);
}
_latency_groups.resize(lat_group_n);
resetTimes();
}
~InferRequestsQueue() {
// Inference Request guarantee that it will wait for all asynchronous internal tasks in destructor
// So it should be released before any context that the request can use inside internal asynchronous tasks
@ -111,15 +134,21 @@ public:
_startTime = Time::time_point::max();
_endTime = Time::time_point::min();
_latencies.clear();
for (auto& group : _latency_groups) {
group.clear();
}
}
double getDurationInMilliseconds() {
return std::chrono::duration_cast<ns>(_endTime - _startTime).count() * 0.000001;
}
void putIdleRequest(size_t id, const double latency) {
void putIdleRequest(size_t id, size_t lat_group_id, const double latency) {
std::unique_lock<std::mutex> lock(_mutex);
_latencies.push_back(latency);
if (enable_lat_groups) {
_latency_groups[lat_group_id].push_back(latency);
}
_idleIds.push(id);
_endTime = std::max(Time::now(), _endTime);
_cv.notify_one();
@ -147,6 +176,10 @@ public:
return _latencies;
}
std::vector<std::vector<double>> getLatencyGroups() {
return _latency_groups;
}
std::vector<InferReqWrap::Ptr> requests;
private:
@ -156,4 +189,6 @@ private:
Time::time_point _startTime;
Time::time_point _endTime;
std::vector<double> _latencies;
std::vector<std::vector<double>> _latency_groups;
bool enable_lat_groups;
};

View File

@ -3,7 +3,10 @@
//
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
@ -13,6 +16,8 @@
#include "format_reader_ptr.h"
#include "inputs_filling.hpp"
#include "shared_blob_allocator.hpp"
#include "utils.hpp"
// clang-format on
using namespace InferenceEngine;
@ -42,207 +47,464 @@ std::vector<std::string> filterFilesByExtensions(const std::vector<std::string>&
return filtered;
}
template <typename T>
void fillBlobImage(Blob::Ptr& inputBlob,
const std::vector<std::string>& filePaths,
const size_t& batchSize,
const benchmark_app::InputInfo& app_info,
const size_t& requestId,
const size_t& inputId,
const size_t& inputSize) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
"fillBlobImage, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer
// happens
auto minputHolder = minput->wmap();
auto inputBlobData = minputHolder.as<T*>();
/** Collect images data ptrs **/
std::vector<std::shared_ptr<uint8_t>> vreader;
vreader.reserve(batchSize);
for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize;
i++, inputIndex += inputSize) {
inputIndex %= filePaths.size();
slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl;
FormatReader::ReaderPtr reader(filePaths[inputIndex].c_str());
if (reader.get() == nullptr) {
slog::warn << "Image " << filePaths[inputIndex] << " cannot be read!" << slog::endl << slog::endl;
continue;
}
/** Getting image data **/
std::shared_ptr<uint8_t> imageData(reader->getData(app_info.width(), app_info.height()));
if (imageData) {
vreader.push_back(imageData);
}
}
/** Fill input tensor with images. First b channel, then g and r channels **/
const size_t numChannels = app_info.channels();
const size_t width = app_info.width();
const size_t height = app_info.height();
/** Iterate over all input images **/
for (size_t imageId = 0; imageId < vreader.size(); ++imageId) {
/** Iterate over all width **/
for (size_t w = 0; w < app_info.width(); ++w) {
/** Iterate over all height **/
for (size_t h = 0; h < app_info.height(); ++h) {
/** Iterate over all channels **/
for (size_t ch = 0; ch < numChannels; ++ch) {
/** [images stride + channels stride + pixel id ] all in
* bytes **/
size_t offset = imageId * numChannels * width * height +
(((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
? (ch * width * height + h * width + w)
: (h * width * numChannels + w * numChannels + ch));
inputBlobData[offset] =
(static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) -
static_cast<T>(app_info.mean[ch])) /
static_cast<T>(app_info.scale[ch]);
}
}
}
}
}
template <typename T>
void fillBlobBinary(Blob::Ptr& inputBlob,
const std::vector<std::string>& filePaths,
const size_t& batchSize,
const size_t& requestId,
const size_t& inputId,
const size_t& inputSize) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
auto adjBatchSize = batchSize;
// Check layout
std::stringstream ss;
auto tensorDesc = inputBlob->getTensorDesc();
ss << tensorDesc.getLayout();
auto layout = ss.str();
std::size_t batchIndex = layout.find("N");
if (batchIndex == std::string::npos) {
adjBatchSize = 1;
} else if (tensorDesc.getDims().at(batchIndex) != batchSize) {
adjBatchSize = tensorDesc.getDims().at(batchIndex);
}
if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
"fillBlobBinary, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer
// happens
auto minputHolder = minput->wmap();
auto inputBlobData = minputHolder.as<char*>();
for (size_t i = 0ULL, inputIndex = requestId * adjBatchSize * inputSize + inputId; i < adjBatchSize;
i++, inputIndex += inputSize) {
inputIndex %= filePaths.size();
slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl;
std::ifstream binaryFile(filePaths[inputIndex], std::ios_base::binary | std::ios_base::ate);
if (!binaryFile) {
IE_THROW() << "Cannot open " << filePaths[inputIndex];
}
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
binaryFile.seekg(0, std::ios_base::beg);
if (!binaryFile.good()) {
IE_THROW() << "Can not read " << filePaths[inputIndex];
}
auto inputSize = inputBlob->size() * sizeof(T) / adjBatchSize;
if (fileSize != inputSize) {
IE_THROW() << "File " << filePaths[inputIndex] << " contains " << std::to_string(fileSize)
<< " bytes "
"but the network expects "
<< std::to_string(inputSize);
}
binaryFile.read(&inputBlobData[i * inputSize], inputSize);
}
}
template <typename T>
using uniformDistribution = typename std::conditional<
std::is_floating_point<T>::value,
std::uniform_real_distribution<T>,
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
template <typename T, typename T2>
void fillBlobRandom(Blob::Ptr& inputBlob,
T rand_min = std::numeric_limits<uint8_t>::min(),
T rand_max = std::numeric_limits<uint8_t>::max()) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
"fillBlobRandom, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer
// happens
auto minputHolder = minput->wmap();
template <typename T>
InferenceEngine::Blob::Ptr createBlobFromImage(const std::vector<std::string>& files,
size_t inputId,
size_t batchSize,
const benchmark_app::InputInfo& inputInfo,
std::string* filenames_used = nullptr) {
size_t blob_size =
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
T* data = new T[blob_size];
auto inputBlobData = minputHolder.as<T*>();
std::mt19937 gen(0);
uniformDistribution<T2> distribution(rand_min, rand_max);
for (size_t i = 0; i < inputBlob->size(); i++) {
inputBlobData[i] = static_cast<T>(distribution(gen));
/** Collect images data ptrs **/
std::vector<std::shared_ptr<uint8_t>> vreader;
vreader.reserve(batchSize);
for (size_t b = 0; b < batchSize; ++b) {
auto inputIndex = (inputId + b) % files.size();
if (filenames_used) {
*filenames_used += (filenames_used->empty() ? "" : ", ") + files[inputIndex];
}
FormatReader::ReaderPtr reader(files[inputIndex].c_str());
if (reader.get() == nullptr) {
slog::warn << "Image " << files[inputIndex] << " cannot be read!" << slog::endl << slog::endl;
continue;
}
/** Getting image data **/
std::shared_ptr<uint8_t> imageData(reader->getData(inputInfo.width(), inputInfo.height()));
if (imageData) {
vreader.push_back(imageData);
}
}
/** Fill input tensor with image. First b channel, then g and r channels **/
const size_t numChannels = inputInfo.channels();
const size_t width = inputInfo.width();
const size_t height = inputInfo.height();
/** Iterate over all input images **/
for (size_t b = 0; b < batchSize; ++b) {
/** Iterate over all width **/
for (size_t w = 0; w < width; ++w) {
/** Iterate over all height **/
for (size_t h = 0; h < height; ++h) {
/** Iterate over all channels **/
for (size_t ch = 0; ch < numChannels; ++ch) {
/** [images stride + channels stride + pixel id ] all in
* bytes **/
size_t offset = b * numChannels * width * height +
(((inputInfo.layout == "NCHW") || (inputInfo.layout == "CHW"))
? (ch * width * height + h * width + w)
: (h * width * numChannels + w * numChannels + ch));
data[offset] =
(static_cast<T>(vreader.at(b).get()[h * width * numChannels + w * numChannels + ch]) -
static_cast<T>(inputInfo.mean[ch])) /
static_cast<T>(inputInfo.scale[ch]);
}
}
}
}
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
auto blob =
InferenceEngine::make_shared_blob<T>(tDesc,
std::make_shared<SharedBlobAllocator<T>>(data, blob_size * sizeof(T)));
blob->allocate();
return blob;
}
template <typename T>
void fillBlobImInfo(Blob::Ptr& inputBlob, const size_t& batchSize, std::pair<size_t, size_t> image_size) {
MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
if (!minput) {
IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in "
"fillBlobImInfo, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer
// happens
auto minputHolder = minput->wmap();
InferenceEngine::Blob::Ptr createBlobImInfo(const std::pair<size_t, size_t>& image_size,
size_t batchSize,
const benchmark_app::InputInfo& inputInfo) {
size_t blob_size =
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
T* data = new T[blob_size];
auto inputBlobData = minputHolder.as<T*>();
for (size_t b = 0; b < batchSize; b++) {
size_t iminfoSize = inputBlob->size() / batchSize;
size_t iminfoSize = blob_size / batchSize;
for (size_t i = 0; i < iminfoSize; i++) {
size_t index = b * iminfoSize + i;
if (0 == i)
inputBlobData[index] = static_cast<T>(image_size.first);
data[index] = static_cast<T>(image_size.first);
else if (1 == i)
inputBlobData[index] = static_cast<T>(image_size.second);
data[index] = static_cast<T>(image_size.second);
else
inputBlobData[index] = 1;
data[index] = 1;
}
}
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
InferenceEngine::Blob::Ptr blob =
InferenceEngine::make_shared_blob<T>(tDesc,
std::make_shared<SharedBlobAllocator<T>>(data, blob_size * sizeof(T)));
blob->allocate();
return blob;
}
template <typename T>
InferenceEngine::Blob::Ptr createBlobFromBinary(const std::vector<std::string>& files,
size_t inputId,
size_t batchSize,
const benchmark_app::InputInfo& inputInfo,
std::string* filenames_used = nullptr) {
size_t blob_size =
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
char* data = new char[blob_size * sizeof(T)];
// adjust batch size
std::stringstream ss;
ss << inputInfo.originalLayout;
std::string layout = ss.str();
if (layout.find("N") == std::string::npos) {
batchSize = 1;
} else if (inputInfo.batch() != batchSize) {
batchSize = inputInfo.batch();
}
for (size_t b = 0; b < batchSize; ++b) {
size_t inputIndex = (inputId + b) % files.size();
std::ifstream binaryFile(files[inputIndex], std::ios_base::binary | std::ios_base::ate);
if (!binaryFile) {
IE_THROW() << "Cannot open " << files[inputIndex];
}
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
binaryFile.seekg(0, std::ios_base::beg);
if (!binaryFile.good()) {
IE_THROW() << "Can not read " << files[inputIndex];
}
auto inputSize = blob_size * sizeof(T) / batchSize;
if (fileSize != inputSize) {
IE_THROW() << "File " << files[inputIndex] << " contains " << std::to_string(fileSize)
<< " bytes "
"but the network expects "
<< std::to_string(inputSize);
}
if (inputInfo.layout != "CN") {
binaryFile.read(&data[b * inputSize], inputSize);
} else {
for (int i = 0; i < inputInfo.channels(); i++) {
binaryFile.read(&data[(i * batchSize + b) * sizeof(T)], sizeof(T));
}
}
if (filenames_used) {
*filenames_used += (filenames_used->empty() ? "" : ", ") + files[inputIndex];
}
}
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
InferenceEngine::Blob::Ptr blob =
InferenceEngine::make_shared_blob<T>(tDesc,
std::make_shared<SharedBlobAllocator<T>>((T*)data, blob_size * sizeof(T)));
blob->allocate();
return blob;
}
template <typename T, typename T2>
InferenceEngine::Blob::Ptr createBlobRandom(const benchmark_app::InputInfo& inputInfo,
T rand_min = std::numeric_limits<uint8_t>::min(),
T rand_max = std::numeric_limits<uint8_t>::max()) {
size_t blob_size =
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<int>());
T* data = new T[blob_size];
std::mt19937 gen(0);
uniformDistribution<T2> distribution(rand_min, rand_max);
for (size_t i = 0; i < blob_size; i++) {
data[i] = static_cast<T>(distribution(gen));
}
InferenceEngine::TensorDesc tDesc(inputInfo.precision, inputInfo.dataShape, inputInfo.originalLayout);
InferenceEngine::Blob::Ptr blob =
InferenceEngine::make_shared_blob<T>(tDesc,
std::make_shared<SharedBlobAllocator<T>>(data, blob_size * sizeof(T)));
blob->allocate();
return blob;
}
InferenceEngine::Blob::Ptr getImageBlob(const std::vector<std::string>& files,
size_t inputId,
size_t batchSize,
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
std::string* filenames_used = nullptr) {
auto precision = inputInfo.second.precision;
if (precision == InferenceEngine::Precision::FP32) {
return createBlobFromImage<float>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::FP16) {
return createBlobFromImage<short>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::I32) {
return createBlobFromImage<int32_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::I64) {
return createBlobFromImage<int64_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::U8) {
return createBlobFromImage<uint8_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else {
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
}
}
void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests) {
std::vector<std::pair<size_t, size_t>> input_image_sizes;
for (auto& item : app_inputs_info) {
if (item.second.isImage()) {
input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
}
slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions ("
<< item.second.layout << "): ";
for (const auto& i : item.second.shape) {
slog::info << i << " ";
}
slog::info << slog::endl;
InferenceEngine::Blob::Ptr getImInfoBlob(const std::pair<size_t, size_t>& image_size,
size_t batchSize,
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo) {
auto precision = inputInfo.second.precision;
if (precision == InferenceEngine::Precision::FP32) {
return createBlobImInfo<float>(image_size, batchSize, inputInfo.second);
} else if (precision == InferenceEngine::Precision::FP16) {
return createBlobImInfo<short>(image_size, batchSize, inputInfo.second);
} else if (precision == InferenceEngine::Precision::I32) {
return createBlobImInfo<int32_t>(image_size, batchSize, inputInfo.second);
} else if (precision == InferenceEngine::Precision::I64) {
return createBlobImInfo<int64_t>(image_size, batchSize, inputInfo.second);
} else {
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
}
}
InferenceEngine::Blob::Ptr getBinaryBlob(const std::vector<std::string>& files,
size_t inputId,
size_t batchSize,
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
std::string* filenames_used = nullptr) {
auto precision = inputInfo.second.precision;
if (precision == InferenceEngine::Precision::FP32) {
return createBlobFromBinary<float>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::FP16) {
return createBlobFromBinary<short>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::I32) {
return createBlobFromBinary<int32_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if (precision == InferenceEngine::Precision::I64) {
return createBlobFromBinary<int64_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else if ((precision == InferenceEngine::Precision::U8) || (precision == InferenceEngine::Precision::BOOL)) {
return createBlobFromBinary<uint8_t>(files, inputId, batchSize, inputInfo.second, filenames_used);
} else {
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
}
}
InferenceEngine::Blob::Ptr getRandomBlob(const std::pair<std::string, benchmark_app::InputInfo>& inputInfo) {
auto precision = inputInfo.second.precision;
if (precision == InferenceEngine::Precision::FP32) {
return createBlobRandom<float, float>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::FP16) {
return createBlobRandom<short, short>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::I32) {
return createBlobRandom<int32_t, int32_t>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::I64) {
return createBlobRandom<int64_t, int64_t>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::U8) {
// uniform_int_distribution<uint8_t> is not allowed in the C++17
// standard and vs2017/19
return createBlobRandom<uint8_t, uint32_t>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::I8) {
// uniform_int_distribution<int8_t> is not allowed in the C++17 standard
// and vs2017/19
return createBlobRandom<int8_t, int32_t>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::U16) {
return createBlobRandom<uint16_t, uint16_t>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::I16) {
return createBlobRandom<int16_t, int16_t>(inputInfo.second);
} else if (precision == InferenceEngine::Precision::BOOL) {
return createBlobRandom<uint8_t, uint32_t>(inputInfo.second, 0, 1);
} else {
IE_THROW() << "Input precision is not supported for " << inputInfo.first;
}
}
std::string getTestInfoStreamHeader(benchmark_app::InputInfo& inputInfo) {
std::stringstream strOut;
strOut << "(" << inputInfo.layout << ", " << inputInfo.precision << ", " << getShapeString(inputInfo.dataShape)
<< ", ";
if (inputInfo.partialShape.is_dynamic()) {
strOut << std::string("dyn:") << inputInfo.partialShape << "):\t";
} else {
strOut << "static):\t";
}
return strOut.str();
}
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobs(
std::map<std::string, std::vector<std::string>>& inputFiles,
std::vector<benchmark_app::InputsInfo>& app_inputs_info) {
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> blobs;
if (app_inputs_info.empty()) {
throw std::logic_error("Inputs Info for network is empty!");
}
size_t imageInputCount = input_image_sizes.size();
size_t binaryInputCount = app_inputs_info.size() - imageInputCount;
if (!inputFiles.empty() && inputFiles.size() != app_inputs_info[0].size()) {
throw std::logic_error("Number of inputs specified in -i must be equal number of network inputs!");
}
// count image type inputs of network
std::vector<std::pair<size_t, size_t>> net_input_im_sizes;
for (auto& inputs_info : app_inputs_info) {
for (auto& input : inputs_info) {
if (input.second.isImage()) {
net_input_im_sizes.push_back(std::make_pair(input.second.width(), input.second.height()));
}
}
}
for (auto& files : inputFiles) {
if (!files.first.empty() && app_inputs_info[0].find(files.first) == app_inputs_info[0].end()) {
throw std::logic_error("Input name \"" + files.first +
"\" used in -i parameter doesn't match any network's input");
}
std::string input_name = files.first.empty() ? app_inputs_info[0].begin()->first : files.first;
auto input = app_inputs_info[0].at(input_name);
if (!files.second.empty() && files.second[0] != "random" && files.second[0] != "image_info") {
if (input.isImage()) {
files.second = filterFilesByExtensions(files.second, supported_image_extensions);
} else if (input.isImageInfo() && net_input_im_sizes.size() == app_inputs_info.size()) {
slog::info << "Input '" << input_name
<< "' probably is image info. All files for this input will"
" be ignored."
<< slog::endl;
files.second = {"image_info"};
continue;
} else {
files.second = filterFilesByExtensions(files.second, supported_binary_extensions);
}
}
if (files.second.empty()) {
slog::warn << "No suitable files for input found! Random data will be used for input " << input_name
<< slog::endl;
files.second = {"random"};
}
size_t filesToBeUsed = 0;
size_t shapesToBeUsed = 0;
if (files.second.size() > app_inputs_info.size()) {
shapesToBeUsed = app_inputs_info.size();
filesToBeUsed = files.second.size() - files.second.size() % app_inputs_info.size();
if (filesToBeUsed != files.second.size()) {
slog::warn << "Number of files must be a multiple of the number of shapes for certain input. Only " +
std::to_string(filesToBeUsed) + " files will be added."
<< slog::endl;
}
while (files.second.size() != filesToBeUsed) {
files.second.pop_back();
}
} else {
shapesToBeUsed = app_inputs_info.size() - app_inputs_info.size() % files.second.size();
filesToBeUsed = files.second.size();
if (shapesToBeUsed != app_inputs_info.size()) {
slog::warn << "Number of data shapes must be a multiple of the number of files. For input "
<< files.first << " only " + std::to_string(shapesToBeUsed) + " files will be added."
<< slog::endl;
}
while (app_inputs_info.size() != shapesToBeUsed) {
app_inputs_info.pop_back();
net_input_im_sizes.pop_back();
}
}
}
std::vector<std::map<std::string, std::string>> logOutput;
// All inputs should process equal number of files, so for the case of N, 1, N number of files,
// second input also should have N blobs cloned from 1 file
size_t filesNum = 0;
if (!inputFiles.empty()) {
filesNum = std::max_element(inputFiles.begin(),
inputFiles.end(),
[](const std::pair<std::string, std::vector<std::string>>& a,
const std::pair<std::string, std::vector<std::string>>& b) {
return a.second.size() < b.second.size();
})
->second.size();
} else {
std::vector<std::pair<size_t, size_t>> net_input_im_sizes;
for (auto& input_info : app_inputs_info[0]) {
inputFiles[input_info.first] = {"random"};
}
}
for (const auto& files : inputFiles) {
std::string input_name = files.first.empty() ? app_inputs_info[0].begin()->first : files.first;
size_t n_shape = 0, m_file = 0;
while (n_shape < app_inputs_info.size() || m_file < filesNum) {
size_t batchSize = getBatchSize(app_inputs_info[n_shape % app_inputs_info.size()]);
size_t inputId = m_file % files.second.size();
auto input_info = app_inputs_info[n_shape % app_inputs_info.size()].at(input_name);
std::string blob_src_info;
if (files.second[0] == "random") {
// Fill random
blob_src_info =
"random (" + std::string((input_info.isImage() ? "image" : "binary data")) + " is expected)";
blobs[input_name].push_back(getRandomBlob({input_name, input_info}));
} else if (files.second[0] == "image_info") {
// Most likely it is image info: fill with image information
auto image_size = net_input_im_sizes.at(n_shape % app_inputs_info.size());
blob_src_info =
"Image size blob " + std::to_string(image_size.first) + " x " + std::to_string(image_size.second);
blobs[input_name].push_back(getImInfoBlob(image_size, batchSize, {input_name, input_info}));
} else if (input_info.isImage()) {
// Fill with Images
blobs[input_name].push_back(
getImageBlob(files.second, inputId, batchSize, {input_name, input_info}, &blob_src_info));
} else {
// Fill with binary files
blobs[input_name].push_back(
getBinaryBlob(files.second, inputId, batchSize, {input_name, input_info}, &blob_src_info));
}
// Preparing info
std::string strOut = getTestInfoStreamHeader(input_info) + blob_src_info;
if (n_shape >= logOutput.size()) {
logOutput.resize(n_shape + 1);
}
logOutput[n_shape][input_name] += strOut;
++n_shape;
m_file += batchSize;
}
}
for (int i = 0; i < logOutput.size(); i++) {
slog::info << "Test Config " << i << slog::endl;
auto maxNameWidth = std::max_element(logOutput[i].begin(),
logOutput[i].end(),
[](const std::pair<std::string, std::string>& a,
const std::pair<std::string, std::string>& b) {
return a.first.size() < b.first.size();
})
->first.size();
for (auto inputLog : logOutput[i]) {
slog::info << std::left << std::setw(maxNameWidth + 2) << inputLog.first << inputLog.second << slog::endl;
}
}
return blobs;
}
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobsStaticCase(
const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
size_t requestsNum) {
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> blobs;
std::vector<std::pair<size_t, size_t>> net_input_im_sizes;
for (auto& item : app_inputs_info) {
if (item.second.isImage()) {
net_input_im_sizes.push_back(std::make_pair(item.second.width(), item.second.height()));
}
}
size_t imageInputsNum = net_input_im_sizes.size();
size_t binaryInputsNum = app_inputs_info.size() - imageInputsNum;
std::vector<std::string> binaryFiles;
std::vector<std::string> imageFiles;
@ -255,7 +517,7 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
binaryFiles = filterFilesByExtensions(inputFiles, supported_binary_extensions);
std::sort(std::begin(binaryFiles), std::end(binaryFiles));
auto binaryToBeUsed = binaryInputCount * batchSize * requests.size();
auto binaryToBeUsed = binaryInputsNum * batchSize * requestsNum;
if (binaryToBeUsed > 0 && binaryFiles.empty()) {
std::stringstream ss;
for (auto& ext : supported_binary_extensions) {
@ -278,7 +540,7 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions);
std::sort(std::begin(imageFiles), std::end(imageFiles));
auto imagesToBeUsed = imageInputCount * batchSize * requests.size();
auto imagesToBeUsed = imageInputsNum * batchSize * requestsNum;
if (imagesToBeUsed > 0 && imageFiles.empty()) {
std::stringstream ss;
for (auto& ext : supported_image_extensions) {
@ -299,156 +561,129 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
}
}
for (size_t requestId = 0; requestId < requests.size(); requestId++) {
slog::info << "Infer Request " << requestId << " filling" << slog::endl;
std::map<std::string, std::vector<std::string>> mappedFiles;
size_t imageInputsCount = 0;
size_t binaryInputsCount = 0;
for (auto& input : app_inputs_info) {
if (input.second.isImage()) {
mappedFiles[input.first] = {};
for (size_t i = 0; i < imageFiles.size(); i += imageInputsNum) {
mappedFiles[input.first].push_back(
imageFiles[(imageInputsCount + i) * imageInputsNum % imageFiles.size()]);
}
++imageInputsCount;
} else {
mappedFiles[input.first] = {};
if (!binaryFiles.empty()) {
for (size_t i = 0; i < binaryFiles.size(); i += binaryInputsNum) {
mappedFiles[input.first].push_back(binaryFiles[(binaryInputsCount + i) % binaryFiles.size()]);
}
}
++binaryInputsCount;
}
}
size_t filesNum = 0;
if (!inputFiles.empty()) {
filesNum = std::max_element(mappedFiles.begin(),
mappedFiles.end(),
[](const std::pair<std::string, std::vector<std::string>>& a,
const std::pair<std::string, std::vector<std::string>>& b) {
return a.second.size() < b.second.size();
})
->second.size();
}
size_t test_configs_num = filesNum / batchSize == 0 ? 1 : filesNum / batchSize;
std::vector<std::map<std::string, std::string>> logOutput(test_configs_num);
for (const auto& files : mappedFiles) {
size_t imageInputId = 0;
size_t binaryInputId = 0;
for (auto& item : app_inputs_info) {
Blob::Ptr inputBlob = requests.at(requestId)->getBlob(item.first);
auto app_info = app_inputs_info.at(item.first);
auto precision = app_info.precision;
if (app_info.isImage()) {
auto input_name = files.first;
auto input_info = app_inputs_info.at(files.first);
for (size_t i = 0; i < test_configs_num; ++i) {
std::string blob_src_info;
if (input_info.isImage()) {
if (!imageFiles.empty()) {
// Fill with Images
if (precision == InferenceEngine::Precision::FP32) {
fillBlobImage<float>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::FP16) {
fillBlobImage<short>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::I32) {
fillBlobImage<int32_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::I64) {
fillBlobImage<int64_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else if (precision == InferenceEngine::Precision::U8) {
fillBlobImage<uint8_t>(inputBlob,
imageFiles,
batchSize,
app_info,
requestId,
imageInputId++,
imageInputCount);
} else {
IE_THROW() << "Input precision is not supported for " << item.first;
}
blobs[input_name].push_back(
getImageBlob(files.second, imageInputId, batchSize, {input_name, input_info}, &blob_src_info));
imageInputId = (imageInputId + batchSize) % files.second.size();
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
continue;
}
} else {
if (!binaryFiles.empty()) {
// Fill with binary files
if (precision == InferenceEngine::Precision::FP32) {
fillBlobBinary<float>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::FP16) {
fillBlobBinary<short>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::I32) {
fillBlobBinary<int32_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if (precision == InferenceEngine::Precision::I64) {
fillBlobBinary<int64_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else if ((precision == InferenceEngine::Precision::U8) ||
(precision == InferenceEngine::Precision::BOOL)) {
fillBlobBinary<uint8_t>(inputBlob,
binaryFiles,
batchSize,
requestId,
binaryInputId++,
binaryInputCount);
} else {
IE_THROW() << "Input precision is not supported for " << item.first;
}
blobs[input_name].push_back(getBinaryBlob(files.second,
binaryInputId,
batchSize,
{input_name, input_info},
&blob_src_info));
binaryInputId = (binaryInputId + batchSize) % files.second.size();
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
continue;
}
if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) {
if (input_info.isImageInfo() && (net_input_im_sizes.size() == 1)) {
// Most likely it is image info: fill with image information
auto image_size = input_image_sizes.at(0);
slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x"
<< image_size.second << slog::endl;
if (precision == InferenceEngine::Precision::FP32) {
fillBlobImInfo<float>(inputBlob, batchSize, image_size);
} else if (precision == InferenceEngine::Precision::FP16) {
fillBlobImInfo<short>(inputBlob, batchSize, image_size);
} else if (precision == InferenceEngine::Precision::I32) {
fillBlobImInfo<int32_t>(inputBlob, batchSize, image_size);
} else if (precision == InferenceEngine::Precision::I64) {
fillBlobImInfo<int64_t>(inputBlob, batchSize, image_size);
} else {
IE_THROW() << "Input precision is not supported for image info!";
}
auto image_size = net_input_im_sizes.at(0);
blob_src_info = "Image size blob " + std::to_string(image_size.first) + " x " +
std::to_string(image_size.second);
blobs[input_name].push_back(getImInfoBlob(image_size, batchSize, {input_name, input_info}));
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
continue;
}
}
// Fill random
slog::info << "Fill input '" << item.first << "' with random values ("
<< std::string((app_info.isImage() ? "image" : "some binary data")) << " is expected)"
<< slog::endl;
if (precision == InferenceEngine::Precision::FP32) {
fillBlobRandom<float, float>(inputBlob);
} else if (precision == InferenceEngine::Precision::FP16) {
fillBlobRandom<short, short>(inputBlob);
} else if (precision == InferenceEngine::Precision::I32) {
fillBlobRandom<int32_t, int32_t>(inputBlob);
} else if (precision == InferenceEngine::Precision::I64) {
fillBlobRandom<int64_t, int64_t>(inputBlob);
} else if (precision == InferenceEngine::Precision::U8) {
// uniform_int_distribution<uint8_t> is not allowed in the C++17
// standard and vs2017/19
fillBlobRandom<uint8_t, uint32_t>(inputBlob);
} else if (precision == InferenceEngine::Precision::I8) {
// uniform_int_distribution<int8_t> is not allowed in the C++17 standard
// and vs2017/19
fillBlobRandom<int8_t, int32_t>(inputBlob);
} else if (precision == InferenceEngine::Precision::U16) {
fillBlobRandom<uint16_t, uint16_t>(inputBlob);
} else if (precision == InferenceEngine::Precision::I16) {
fillBlobRandom<int16_t, int16_t>(inputBlob);
} else if (precision == InferenceEngine::Precision::BOOL) {
fillBlobRandom<uint8_t, uint32_t>(inputBlob, 0, 1);
} else {
IE_THROW() << "Input precision is not supported for " << item.first;
}
blob_src_info =
"random (" + std::string((input_info.isImage() ? "image" : "binary data")) + " is expected)";
blobs[input_name].push_back(getRandomBlob({input_name, input_info}));
logOutput[i][input_name] += getTestInfoStreamHeader(input_info) + blob_src_info;
}
}
for (int i = 0; i < logOutput.size(); i++) {
slog::info << "Test Config " << i << slog::endl;
auto maxNameWidth = std::max_element(logOutput[i].begin(),
logOutput[i].end(),
[](const std::pair<std::string, std::string>& a,
const std::pair<std::string, std::string>& b) {
return a.first.size() < b.first.size();
})
->first.size();
for (auto inputLog : logOutput[i]) {
slog::info << std::left << std::setw(maxNameWidth + 2) << inputLog.first << inputLog.second << slog::endl;
}
}
return blobs;
}
void copyBlobData(InferenceEngine::Blob::Ptr& dst, const InferenceEngine::Blob::Ptr& src) {
if (src->getTensorDesc() != dst->getTensorDesc()) {
throw std::runtime_error(
"Source and destination blobs tensor descriptions are expected to be equal for data copying.");
}
InferenceEngine::MemoryBlob::Ptr srcMinput = as<InferenceEngine::MemoryBlob>(src);
if (!srcMinput) {
IE_THROW() << "We expect source blob to be inherited from MemoryBlob in "
"fillBlobImage, "
<< "but by fact we were not able to cast source blob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer
// happens
auto srcMinputHolder = srcMinput->wmap();
auto srcBlobData = srcMinputHolder.as<void*>();
InferenceEngine::MemoryBlob::Ptr dstMinput = as<InferenceEngine::MemoryBlob>(dst);
if (!dstMinput) {
IE_THROW() << "We expect destination blob to be inherited from MemoryBlob in "
"fillBlobImage, "
<< "but by fact we were not able to cast destination blob to MemoryBlob";
}
auto dstMinputHolder = dstMinput->wmap();
auto dstBlobData = dstMinputHolder.as<void*>();
std::memcpy(dstBlobData, srcBlobData, src->byteSize());
}

View File

@ -14,7 +14,14 @@
#include "utils.hpp"
// clang-format on
void fillBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests);
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobs(
std::map<std::string, std::vector<std::string>>& inputFiles,
std::vector<benchmark_app::InputsInfo>& app_inputs_info);
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getBlobsStaticCase(
const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
size_t requestsNum);
void copyBlobData(InferenceEngine::Blob::Ptr& dst, const InferenceEngine::Blob::Ptr& src);

View File

@ -34,14 +34,6 @@ using namespace InferenceEngine;
static const size_t progressBarDefaultTotalCount = 1000;
uint64_t getDurationInMilliseconds(uint32_t duration) {
return duration * 1000LL;
}
uint64_t getDurationInNanoseconds(uint32_t duration) {
return duration * 1000000000LL;
}
bool ParseAndCheckCommandLine(int argc, char* argv[]) {
// ---------------------------Parsing and validating input
// arguments--------------------------------------
@ -104,7 +96,7 @@ static void next_step(const std::string additional_info = "") {
{6, "Configuring input of the model"},
{7, "Loading the model to the device"},
{8, "Setting optimal runtime parameters"},
{9, "Creating infer requests and filling input blobs with images"},
{9, "Creating infer requests and preparing input blobs with data"},
{10, "Measuring performance"},
{11, "Dumping statistics report"}};
@ -116,13 +108,6 @@ static void next_step(const std::string additional_info = "") {
<< (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl;
}
template <typename T>
T getMedianValue(const std::vector<T>& vec, std::size_t percentile) {
std::vector<T> sortedVec(vec);
std::sort(sortedVec.begin(), sortedVec.end());
return sortedVec[(sortedVec.size() / 100) * percentile];
}
/**
* @brief The entry point of the benchmark application
*/
@ -180,15 +165,15 @@ int main(int argc, char* argv[]) {
load_config(FLAGS_load_config, config);
}
#endif
/** This vector stores paths to the processed images **/
std::vector<std::string> inputFiles;
parseInputFilesArguments(inputFiles);
/** This vector stores paths to the processed images with input names**/
auto inputFiles = parseInputArguments(gflags::GetArgvs());
// ----------------- 2. Loading the Inference Engine
// -----------------------------------------------------------
next_step();
Core ie;
if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) {
// CPU (MKLDNN) extensions is loaded as a shared library and passed as a
// pointer to base extension
@ -378,19 +363,10 @@ int main(int argc, char* argv[]) {
ie.SetConfig(item.second, item.first);
}
auto double_to_string = [](const double number) {
std::stringstream ss;
ss << std::fixed << std::setprecision(2) << number;
return ss.str();
};
auto get_total_ms_time = [](Time::time_point& startTime) {
return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
};
size_t batchSize = FLAGS_b;
Precision precision = Precision::UNSPECIFIED;
std::string topology_name = "";
benchmark_app::InputsInfo app_inputs_info;
std::vector<benchmark_app::InputsInfo> app_inputs_info;
std::string output_name;
// Takes priority over config from file
@ -398,6 +374,7 @@ int main(int argc, char* argv[]) {
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), FLAGS_cache_dir}});
}
bool isDynamicNetwork = false;
if (FLAGS_load_from_file && !isNetworkCompiled) {
next_step();
slog::info << "Skipping the step for loading network from file" << slog::endl;
@ -407,14 +384,15 @@ int main(int argc, char* argv[]) {
slog::info << "Skipping the step for loading network from file" << slog::endl;
auto startTime = Time::now();
exeNetwork = ie.LoadNetwork(FLAGS_m, device_name);
auto duration_ms = double_to_string(get_total_ms_time(startTime));
auto duration_ms = double_to_string(get_duration_ms_till_now(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"load network time (ms)", duration_ms}});
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
batchSize,
FLAGS_data_shape,
FLAGS_iscale,
FLAGS_imean,
exeNetwork.GetInputsInfo());
@ -430,7 +408,7 @@ int main(int argc, char* argv[]) {
auto startTime = Time::now();
CNNNetwork cnnNetwork = ie.ReadNetwork(FLAGS_m);
auto duration_ms = double_to_string(get_total_ms_time(startTime));
auto duration_ms = double_to_string(get_duration_ms_till_now(startTime));
slog::info << "Read network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
@ -444,34 +422,45 @@ int main(int argc, char* argv[]) {
// ----------------- 5. Resizing network to match image sizes and given
// batch ----------------------------------
next_step();
batchSize = cnnNetwork.getBatchSize();
// Parse input shapes if specified
bool reshape = false;
app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
FLAGS_data_shape,
FLAGS_iscale,
FLAGS_imean,
inputInfo,
reshape);
if (reshape) {
InferenceEngine::ICNNNetwork::InputShapes shapes = {};
for (auto& item : app_inputs_info)
shapes[item.first] = item.second.shape;
benchmark_app::PartialShapes shapes = {};
for (auto& item : app_inputs_info[0])
shapes[item.first] = item.second.partialShape;
slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl;
startTime = Time::now();
cnnNetwork.reshape(shapes);
duration_ms = double_to_string(get_total_ms_time(startTime));
duration_ms = double_to_string(get_duration_ms_till_now(startTime));
slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"reshape network time (ms)", duration_ms}});
}
// use batch size according to provided layout and shapes
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize();
topology_name = cnnNetwork.getName();
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
// Check if network has dynamic shapes
auto input_info = app_inputs_info[0];
isDynamicNetwork = std::any_of(input_info.begin(),
input_info.end(),
[](const std::pair<std::string, benchmark_app::InputInfo>& i) {
return i.second.partialShape.is_dynamic();
});
// use batch size according to provided layout and shapes (static case)
if (batchSize == 0 || !isDynamicNetwork) {
batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info[0]) : cnnNetwork.getBatchSize();
}
slog::info << (batchSize != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize
<< slog::endl;
// ----------------- 6. Configuring inputs and outputs
@ -482,11 +471,12 @@ int main(int argc, char* argv[]) {
for (auto& item : cnnNetwork.getInputsInfo()) {
// if precision for input set by user, then set it to app_inputs
// if it an image, set U8
if (!FLAGS_ip.empty() || FLAGS_iop.find(item.first) != std::string::npos) {
app_inputs_info.at(item.first).precision = item.second->getPrecision();
} else if (app_inputs_info.at(item.first).isImage()) {
app_inputs_info.at(item.first).precision = Precision::U8;
item.second->setPrecision(app_inputs_info.at(item.first).precision);
if (!FLAGS_ip.empty() || FLAGS_iop.find(item.first) != std::string::npos ||
item.second->getPartialShape().is_dynamic()) {
app_inputs_info[0].at(item.first).precision = item.second->getPrecision();
} else if (app_inputs_info[0].at(item.first).isImage()) {
app_inputs_info[0].at(item.first).precision = Precision::U8;
item.second->setPrecision(app_inputs_info[0].at(item.first).precision);
}
}
@ -496,7 +486,7 @@ int main(int argc, char* argv[]) {
next_step();
startTime = Time::now();
exeNetwork = ie.LoadNetwork(cnnNetwork, device_name);
duration_ms = double_to_string(get_total_ms_time(startTime));
duration_ms = double_to_string(get_duration_ms_till_now(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
@ -513,7 +503,7 @@ int main(int argc, char* argv[]) {
next_step();
auto startTime = Time::now();
exeNetwork = ie.ImportNetwork(FLAGS_m, device_name, {});
auto duration_ms = double_to_string(get_total_ms_time(startTime));
auto duration_ms = double_to_string(get_duration_ms_till_now(startTime));
slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
@ -521,6 +511,7 @@ int main(int argc, char* argv[]) {
app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape,
FLAGS_layout,
FLAGS_b,
FLAGS_data_shape,
FLAGS_iscale,
FLAGS_imean,
exeNetwork.GetInputsInfo());
@ -528,6 +519,23 @@ int main(int argc, char* argv[]) {
batchSize = 1;
}
}
if (isDynamicNetwork && FLAGS_api == "sync") {
throw std::logic_error("Benchmarking of the model with dynamic shapes is available for async API only."
"Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior");
}
// Defining of benchmark mode
// for static models inference only mode is used as default one
bool inferenceOnly = FLAGS_inference_only;
if (isDynamicNetwork) {
if (isFlagSetInCommandLine("inference_only") && inferenceOnly && app_inputs_info.size() != 1) {
throw std::logic_error(
"Dynamic models with different input data shapes must be benchmarked only in full mode.");
}
inferenceOnly = isFlagSetInCommandLine("inference_only") && inferenceOnly && app_inputs_info.size() == 1;
}
// ----------------- 8. Querying optimal runtime parameters
// -----------------------------------------------------
next_step();
@ -573,11 +581,21 @@ int main(int argc, char* argv[]) {
// Iteration limit
uint32_t niter = FLAGS_niter;
size_t shape_groups_num = app_inputs_info.size();
if ((niter > 0) && (FLAGS_api == "async")) {
niter = ((niter + nireq - 1) / nireq) * nireq;
if (FLAGS_niter != niter) {
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
<< niter << " using number of requests " << nireq << slog::endl;
if (shape_groups_num > nireq) {
niter = ((niter + shape_groups_num - 1) / shape_groups_num) * shape_groups_num;
if (FLAGS_niter != niter) {
slog::warn << "Number of iterations was aligned by data shape groups number from " << FLAGS_niter
<< " to " << niter << " using number of possible input shapes " << shape_groups_num
<< slog::endl;
}
} else {
niter = ((niter + nireq - 1) / nireq) * nireq;
if (FLAGS_niter != niter) {
slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to "
<< niter << " using number of requests " << nireq << slog::endl;
}
}
}
@ -596,6 +614,7 @@ int main(int argc, char* argv[]) {
statistics->addParameters(
StatisticsReport::Category::RUNTIME_CONFIG,
{
{"benchmark mode", inferenceOnly ? "inference only" : "full"},
{"topology", topology_name},
{"target device", device_name},
{"API", FLAGS_api},
@ -619,18 +638,46 @@ int main(int argc, char* argv[]) {
// ----------------------------------------
next_step();
InferRequestsQueue inferRequestsQueue(exeNetwork, nireq);
if (isFlagSetInCommandLine("use_device_mem")) {
if (device_name.find("GPU") == 0)
::gpu::fillRemoteBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests, exeNetwork);
else if (device_name.find("CPU") == 0)
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
else
IE_THROW() << "Requested device doesn't support `use_device_mem` option.";
} else {
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
}
InferRequestsQueue inferRequestsQueue(exeNetwork, nireq, app_inputs_info.size(), FLAGS_pcseq);
bool inputHasName = false;
if (inputFiles.size() > 0) {
inputHasName = inputFiles.begin()->first != "";
}
bool newInputType = isDynamicNetwork || inputHasName;
// create vector to store remote input blobs buffer
std::vector<::gpu::BufferType> clInputsBuffer;
bool useGpuMem = false;
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> inputsData;
if (isFlagSetInCommandLine("use_device_mem")) {
if (device_name.find("GPU") == 0) {
inputsData = ::gpu::getRemoteInputBlobs(inputFiles, app_inputs_info, exeNetwork, clInputsBuffer);
useGpuMem = true;
} else if (device_name.find("CPU") == 0) {
if (newInputType) {
inputsData = getBlobs(inputFiles, app_inputs_info);
} else {
inputsData =
getBlobsStaticCase(inputFiles.empty() ? std::vector<std::string>{} : inputFiles.begin()->second,
batchSize,
app_inputs_info[0],
nireq);
}
} else {
IE_THROW() << "Requested device doesn't support `use_device_mem` option.";
}
} else {
if (newInputType) {
inputsData = getBlobs(inputFiles, app_inputs_info);
} else {
inputsData =
getBlobsStaticCase(inputFiles.empty() ? std::vector<std::string>{} : inputFiles.begin()->second,
batchSize,
app_inputs_info[0],
nireq);
}
}
// ----------------- 10. Measuring performance
// ------------------------------------------------------------------
size_t progressCnt = 0;
@ -668,26 +715,91 @@ int main(int argc, char* argv[]) {
}
ss << niter << " iterations";
}
next_step(ss.str());
if (inferenceOnly) {
slog::info << "BENCHMARK IS IN INFERENCE ONLY MODE." << slog::endl;
slog::info << "Input blobs will be filled once before performance measurements." << slog::endl;
} else {
slog::info << "BENCHMARK IS IN FULL MODE." << slog::endl;
slog::info << "Inputs setup stage will be included in performance measurements." << slog::endl;
}
// copy prepared data straight into inferRequest->getBlob()
// for inference only mode
if (inferenceOnly) {
if (nireq < inputsData.begin()->second.size())
slog::warn << "Only " << nireq << " test configs will be used." << slog::endl;
size_t i = 0;
for (auto& inferRequest : inferRequestsQueue.requests) {
auto inputs = app_inputs_info[i % app_inputs_info.size()];
for (auto& item : inputs) {
auto inputName = item.first;
const auto& inputBlob = inputsData.at(inputName)[i % inputsData.at(inputName).size()];
// for remote blobs setBlob is used, they are already allocated on the device
if (useGpuMem) {
inferRequest->setBlob(inputName, inputBlob);
} else {
InferenceEngine::Blob::Ptr requestBlob = inferRequest->getBlob(inputName);
if (isDynamicNetwork) {
requestBlob->setShape(inputBlob->getTensorDesc().getDims());
}
copyBlobData(requestBlob, inputBlob);
}
}
if (useGpuMem) {
auto outputBlobs = ::gpu::getRemoteOutputBlobs(exeNetwork, inferRequest->getOutputClBuffer());
for (auto& output : exeNetwork.GetOutputsInfo()) {
inferRequest->setBlob(output.first, outputBlobs[output.first]);
}
}
++i;
}
}
// warming up - out of scope
auto inferRequest = inferRequestsQueue.getIdleRequest();
if (!inferRequest) {
IE_THROW() << "No idle Infer Requests!";
}
if (!inferenceOnly) {
auto inputs = app_inputs_info[0];
for (auto& item : inputs) {
auto inputName = item.first;
const auto& data = inputsData.at(inputName)[0];
inferRequest->setBlob(inputName, data);
}
if (useGpuMem) {
auto outputBlobs = ::gpu::getRemoteOutputBlobs(exeNetwork, inferRequest->getOutputClBuffer());
for (auto& output : exeNetwork.GetOutputsInfo()) {
inferRequest->setBlob(output.first, outputBlobs[output.first]);
}
}
}
if (FLAGS_api == "sync") {
inferRequest->infer();
} else {
inferRequest->startAsync();
}
inferRequestsQueue.waitAll();
auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]);
slog::info << "First inference took " << duration_ms << " ms" << slog::endl;
if (statistics)
if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"first inference time (ms)", duration_ms}});
}
inferRequestsQueue.resetTimes();
size_t processedFramesN = 0;
auto startTime = Time::now();
auto execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
@ -695,7 +807,6 @@ int main(int argc, char* argv[]) {
/** to align number if iterations to guarantee that last infer requests are
* executed in the same conditions **/
ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress);
while ((niter != 0LL && iteration < niter) ||
(duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
(FLAGS_api == "async" && iteration % nireq != 0)) {
@ -704,6 +815,31 @@ int main(int argc, char* argv[]) {
IE_THROW() << "No idle Infer Requests!";
}
if (!inferenceOnly) {
auto inputs = app_inputs_info[iteration % app_inputs_info.size()];
if (FLAGS_pcseq) {
inferRequest->setLatencyGroupId(iteration % app_inputs_info.size());
}
if (isDynamicNetwork) {
batchSize = getBatchSize(inputs);
}
for (auto& item : inputs) {
auto inputName = item.first;
const auto& data = inputsData.at(inputName)[iteration % inputsData.at(inputName).size()];
inferRequest->setBlob(inputName, data);
}
if (useGpuMem) {
auto outputBlobs = ::gpu::getRemoteOutputBlobs(exeNetwork, inferRequest->getOutputClBuffer());
for (auto& output : exeNetwork.GetOutputsInfo()) {
inferRequest->setBlob(output.first, outputBlobs[output.first]);
}
}
}
if (FLAGS_api == "sync") {
inferRequest->infer();
} else {
@ -716,9 +852,10 @@ int main(int argc, char* argv[]) {
inferRequest->wait();
inferRequest->startAsync();
}
iteration++;
++iteration;
execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
processedFramesN += batchSize;
if (niter > 0) {
progressBar.addProgress(1);
@ -737,10 +874,17 @@ int main(int argc, char* argv[]) {
// wait the latest inference executions
inferRequestsQueue.waitAll();
double latency = getMedianValue<double>(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile);
LatencyMetrics generalLatency(inferRequestsQueue.getLatencies());
std::vector<LatencyMetrics> groupLatencies = {};
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
for (auto lats : inferRequestsQueue.getLatencyGroups()) {
groupLatencies.push_back(LatencyMetrics(lats));
}
}
double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
double fps =
(FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration;
double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / generalLatency.percentile(FLAGS_latency_percentile)
: 1000.0 * processedFramesN / totalDuration;
if (statistics) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
@ -751,19 +895,67 @@ int main(int argc, char* argv[]) {
if (device_name.find("MULTI") == std::string::npos) {
std::string latency_label;
if (FLAGS_latency_percentile == 50) {
latency_label = "latency (ms)";
latency_label = "Median latency (ms)";
} else {
latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)";
}
statistics->addParameters(
StatisticsReport::Category::EXECUTION_RESULTS,
{
{latency_label, double_to_string(generalLatency.percentile(FLAGS_latency_percentile))},
});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{latency_label, double_to_string(latency)},
{"Average latency (ms)", double_to_string(generalLatency.average())},
});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"Min latency (ms)", double_to_string(generalLatency.min())},
});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"Max latency (ms)", double_to_string(generalLatency.max())},
});
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"Latency for each data shape group:", ""},
});
for (size_t i = 0; i < app_inputs_info.size(); ++i) {
std::string data_shapes_string = "";
data_shapes_string += std::to_string(i + 1) + ". ";
for (auto& item : app_inputs_info[i]) {
data_shapes_string += item.first + " : " + getShapeString(item.second.dataShape) + " ";
}
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{data_shapes_string, ""},
});
statistics->addParameters(
StatisticsReport::Category::EXECUTION_RESULTS,
{
{latency_label,
double_to_string(groupLatencies[i].percentile(FLAGS_latency_percentile))},
});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"Average (ms)", double_to_string(groupLatencies[i].average())},
});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"Min (ms)", double_to_string(groupLatencies[i].min())},
});
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{
{"Max (ms)", double_to_string(groupLatencies[i].max())},
});
}
}
}
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
{{"throughput", double_to_string(fps)}});
}
progressBar.finish();
// ----------------- 11. Dumping statistics report
@ -805,18 +997,32 @@ int main(int argc, char* argv[]) {
if (statistics)
statistics->dump();
std::cout << "Count: " << iteration << " iterations" << std::endl;
std::cout << "Duration: " << double_to_string(totalDuration) << " ms" << std::endl;
// Performance metrics report
slog::info << "Count: " << iteration << " iterations" << slog::endl;
slog::info << "Duration: " << double_to_string(totalDuration) << " ms" << slog::endl;
if (device_name.find("MULTI") == std::string::npos) {
std::cout << "Latency";
if (FLAGS_latency_percentile == 50) {
std::cout << ": ";
} else {
std::cout << " (" << FLAGS_latency_percentile << " percentile): ";
slog::info << "Latency: " << slog::endl;
generalLatency.logTotal(FLAGS_latency_percentile);
if (FLAGS_pcseq && app_inputs_info.size() > 1) {
slog::info << "Latency for each data shape group:" << slog::endl;
for (size_t i = 0; i < app_inputs_info.size(); ++i) {
slog::info << (i + 1) << ".";
for (auto& item : app_inputs_info[i]) {
std::stringstream input_shape;
auto shape = item.second.dataShape;
std::copy(shape.begin(), shape.end() - 1, std::ostream_iterator<int>(input_shape, ","));
input_shape << shape.back();
slog::info << " " << item.first << " : " << getShapeString(item.second.dataShape);
}
slog::info << slog::endl;
groupLatencies[i].logTotal(FLAGS_latency_percentile);
}
}
std::cout << double_to_string(latency) << " ms" << std::endl;
}
std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl;
slog::info << "Throughput: " << double_to_string(fps) << " FPS" << slog::endl;
} catch (const std::exception& ex) {
slog::err << ex.what() << slog::endl;

View File

@ -2,12 +2,15 @@
// SPDX-License-Identifier: Apache-2.0
//
// clang-format off
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
// clang-format off
#include <samples/slog.hpp>
#include "remote_blobs_filling.hpp"
// clang-format on
@ -85,58 +88,98 @@ size_t getBytesPerElement(InferenceEngine::Precision precision) {
}
}
void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests,
const InferenceEngine::ExecutableNetwork& exeNetwork) {
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getRemoteInputBlobs(
const std::map<std::string, std::vector<std::string>>& inputFiles,
const std::vector<benchmark_app::InputsInfo>& app_inputs_info,
const InferenceEngine::ExecutableNetwork& exeNetwork,
std::vector<BufferType>& clBuffer) {
#ifdef HAVE_DEVICE_MEM_SUPPORT
slog::info << "Device memory will be used for input and output blobs" << slog::endl;
if (inputFiles.size()) {
slog::warn << "Device memory supports only random data at this moment, input images will be ignored"
<< slog::endl;
}
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> remoteBlobs;
auto context = exeNetwork.GetContext();
auto oclContext = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context)->get();
auto oclInstance = std::make_shared<OpenCL>(oclContext);
auto setShared = [&](size_t requestId,
const std::string name,
const InferenceEngine::TensorDesc& desc,
bool fillRandom = false) {
auto setShared = [&](const std::string name, const InferenceEngine::TensorDesc& desc, bool fillRandom = false) {
cl_int err;
auto inputDims = desc.getDims();
auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies<size_t>());
auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision());
cl::Buffer sharedBuffer =
cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
clBuffer.push_back(cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err));
if (fillRandom) {
void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(sharedBuffer,
void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(clBuffer.back(),
CL_TRUE,
CL_MEM_READ_WRITE,
0,
(cl::size_type)inputSize);
fillBuffer(mappedPtr, elementsNum, desc.getPrecision());
oclInstance->_queue.enqueueUnmapMemObject(sharedBuffer, mappedPtr);
oclInstance->_queue.enqueueUnmapMemObject(clBuffer.back(), mappedPtr);
}
InferenceEngine::Blob::Ptr sharedBlob = InferenceEngine::gpu::make_shared_blob(desc, context, sharedBuffer);
requests.at(requestId)->setBlob(name, sharedBlob);
auto blob = InferenceEngine::gpu::make_shared_blob(desc, context, clBuffer.back());
remoteBlobs[name].push_back(blob);
};
for (size_t requestId = 0; requestId < requests.size(); requestId++) {
for (auto& item : exeNetwork.GetInputsInfo())
setShared(requestId, item.first, item.second->getTensorDesc(), true);
for (auto& item : exeNetwork.GetOutputsInfo())
setShared(requestId, item.first, item.second->getTensorDesc());
for (auto& inputs_info : app_inputs_info) {
for (auto& input : inputs_info) {
// Fill random
slog::info << "Prepare remote blob for input '" << input.first << "' with random values ("
<< std::string((input.second.isImage() ? "image" : "some binary data")) << " is expected)"
<< slog::endl;
setShared(input.first,
InferenceEngine::TensorDesc(input.second.precision,
input.second.dataShape,
getLayoutFromString(input.second.layout)),
true);
}
}
return remoteBlobs;
#else
IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked";
#endif
}
std::map<std::string, InferenceEngine::Blob::Ptr> getRemoteOutputBlobs(
const InferenceEngine::ExecutableNetwork& exeNetwork,
std::map<std::string, ::gpu::BufferType>& clBuffer) {
#ifdef HAVE_DEVICE_MEM_SUPPORT
std::map<std::string, InferenceEngine::Blob::Ptr> outputBlobs;
for (auto& output : exeNetwork.GetOutputsInfo()) {
cl_int err;
auto context = exeNetwork.GetContext();
auto oclContext = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context)->get();
auto oclInstance = std::make_shared<OpenCL>(oclContext);
auto desc = output.second->getTensorDesc();
auto inputDims = desc.getDims();
auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies<size_t>());
auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision());
cl::size_type bufferSize = 0;
if (clBuffer.find(output.first) == clBuffer.end()) {
clBuffer[output.first] =
cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
} else {
auto& buff = clBuffer[output.first];
buff.getInfo(CL_MEM_SIZE, &bufferSize);
if (inputSize != bufferSize) {
buff = cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
}
}
outputBlobs[output.first] = InferenceEngine::gpu::make_shared_blob(desc, context, clBuffer[output.first]);
}
return outputBlobs;
#else
IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked";
#endif
}
} // namespace gpu

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT)
# define HAVE_DEVICE_MEM_SUPPORT
# include "gpu/gpu_context_api_ocl.hpp"
@ -10,13 +12,14 @@
// clang-format off
#include "inference_engine.hpp"
#include "infer_request_wrap.hpp"
#include "utils.hpp"
// clang-format on
namespace gpu {
#ifdef HAVE_DEVICE_MEM_SUPPORT
using BufferType = cl::Buffer;
struct OpenCL {
cl::Context _context;
cl::Device _device;
@ -55,12 +58,18 @@ struct OpenCL {
_queue = cl::CommandQueue(_context, _device, props);
}
};
#else
using BufferType = void*;
#endif
void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
const size_t& batchSize,
benchmark_app::InputsInfo& app_inputs_info,
std::vector<InferReqWrap::Ptr> requests,
const InferenceEngine::ExecutableNetwork& exeNetwork);
std::map<std::string, std::vector<InferenceEngine::Blob::Ptr>> getRemoteInputBlobs(
const std::map<std::string, std::vector<std::string>>& inputFiles,
const std::vector<benchmark_app::InputsInfo>& app_inputs_info,
const InferenceEngine::ExecutableNetwork& exeNetwork,
std::vector<BufferType>& clBuffer);
std::map<std::string, InferenceEngine::Blob::Ptr> getRemoteOutputBlobs(
const InferenceEngine::ExecutableNetwork& exeNetwork,
std::map<std::string, ::gpu::BufferType>& clBuffer);
} // namespace gpu

View File

@ -0,0 +1,43 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "ie_allocator.hpp"
template <class T>
class SharedBlobAllocator : public InferenceEngine::IAllocator {
public:
SharedBlobAllocator(const T* data, size_t size) : data(data), size(size){};
~SharedBlobAllocator() {
free((void*)data);
};
void* lock(void* handle, InferenceEngine::LockOp op = InferenceEngine::LOCK_FOR_WRITE) noexcept override {
if (handle == data) {
return (void*)data;
}
return nullptr;
}
void unlock(void* handle) noexcept override{};
void* alloc(size_t size) noexcept override {
return size <= this->size ? (void*)data : nullptr;
};
bool free(void* handle) noexcept override {
if (handle == data) {
delete[] data;
data = nullptr;
return true;
}
return false;
};
private:
const T* data;
size_t size;
};

View File

@ -11,9 +11,12 @@
// clang-format off
#include "inference_engine.hpp"
#include "samples/common.hpp"
#include "samples/csv_dumper.hpp"
#include "samples/slog.hpp"
#include "utils.hpp"
// clang-format on
// @brief statistics reports types
@ -21,6 +24,53 @@ static constexpr char noCntReport[] = "no_counters";
static constexpr char averageCntReport[] = "average_counters";
static constexpr char detailedCntReport[] = "detailed_counters";
/// @brief Responsible for calculating different latency metrics
class LatencyMetrics {
public:
LatencyMetrics() = delete;
LatencyMetrics(const std::vector<double>& latencies) : latencies(latencies) {
if (latencies.empty()) {
throw std::logic_error("Latency metrics class expects non-empty vector of latencies at consturction.");
}
std::sort(this->latencies.begin(), this->latencies.end());
}
LatencyMetrics(std::vector<double>&& latencies) : latencies(latencies) {
if (latencies.empty()) {
throw std::logic_error("Latency metrics class expects non-empty vector of latencies at consturction.");
}
std::sort(this->latencies.begin(), this->latencies.end());
}
double min() {
return latencies[0];
}
double average() {
return std::accumulate(latencies.begin(), latencies.end(), 0.0) / latencies.size();
}
double percentile(std::size_t p) {
return latencies[size_t(latencies.size() / 100.0 * p)];
}
double max() {
return latencies.back();
}
void logTotal(size_t p) {
std::string percentileStr = (p == 50) ? "\tMedian: " : "\t" + std::to_string(p) + " percentile: ";
slog::info << percentileStr << double_to_string(percentile(p)) << " ms" << slog::endl;
slog::info << "\tAvg: " << double_to_string(average()) << " ms" << slog::endl;
slog::info << "\tMin: " << double_to_string(min()) << " ms" << slog::endl;
slog::info << "\tMax: " << double_to_string(max()) << " ms" << slog::endl;
}
private:
std::vector<double> latencies;
};
/// @brief Responsible for collecting of statistics and dumping to .csv file
class StatisticsReport {
public:

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
// clang-format off
#include <algorithm>
#include <map>
#include <regex>
@ -10,8 +9,10 @@
#include <utility>
#include <vector>
#include "samples/common.hpp"
#include "samples/slog.hpp"
// clang-format off
#include <samples/args_helper.hpp>
#include <samples/common.hpp>
#include <samples/slog.hpp>
#include "utils.hpp"
// clang-format on
@ -35,7 +36,7 @@ size_t InputInfo::getDimentionByLayout(char character) const {
size_t pos = layout.find(character);
if (pos == std::string::npos)
throw std::runtime_error("Error: Can't get " + std::string(character, 1) + " from layout " + layout);
return shape.at(pos);
return dataShape.at(pos);
}
size_t InputInfo::width() const {
return getDimentionByLayout('W');
@ -152,8 +153,8 @@ size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info) {
std::size_t batch_index = info.second.layout.find("N");
if (batch_index != std::string::npos) {
if (batch_size == 0)
batch_size = info.second.shape[batch_index];
else if (batch_size != info.second.shape[batch_index])
batch_size = info.second.dataShape[batch_index];
else if (batch_size != info.second.dataShape[batch_index])
throw std::logic_error("Can't deterimine batch size: batch is "
"different for different inputs!");
}
@ -163,6 +164,47 @@ size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info) {
return batch_size;
}
InferenceEngine::Layout getLayoutFromString(const std::string& string_layout) {
static const std::unordered_map<std::string, InferenceEngine::Layout> layouts = {
{"NCHW", InferenceEngine::Layout::NCHW},
{"NHWC", InferenceEngine::Layout::NHWC},
{"NCDHW", InferenceEngine::Layout::NCDHW},
{"NDHWC", InferenceEngine::Layout::NDHWC},
{"C", InferenceEngine::Layout::C},
{"CHW", InferenceEngine::Layout::CHW},
{"HWC", InferenceEngine::Layout::HWC},
{"HW", InferenceEngine::Layout::HW},
{"NC", InferenceEngine::Layout::NC},
{"CN", InferenceEngine::Layout::CN}};
auto it = layouts.find(string_layout);
if (it != layouts.end()) {
return it->second;
}
IE_THROW() << "Unknown layout with name '" << string_layout << "'.";
}
std::string getShapeString(const InferenceEngine::SizeVector& shape) {
std::stringstream ss;
ss << "[";
for (size_t i = 0; i < shape.size(); ++i) {
if (i > 0)
ss << ", ";
ss << shape.at(i);
}
ss << "]";
return ss.str();
}
std::string getShapesString(const benchmark_app::PartialShapes& shapes) {
std::stringstream ss;
for (auto& shape : shapes) {
if (!ss.str().empty())
ss << ", ";
ss << "\'" << shape.first << "': " << shape.second;
}
return ss.str();
}
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes) {
std::stringstream ss;
for (auto& shape : shapes) {
@ -218,6 +260,120 @@ std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& sc
return return_value;
}
std::vector<ngraph::Dimension> parsePartialShape(const std::string& partial_shape) {
std::vector<ngraph::Dimension> shape;
for (auto& dim : split(partial_shape, ',')) {
if (dim == "?" || dim == "-1") {
shape.push_back(ngraph::Dimension::dynamic());
} else {
const std::string range_divider = "..";
size_t range_index = dim.find(range_divider);
if (range_index != std::string::npos) {
std::string min = dim.substr(0, range_index);
std::string max = dim.substr(range_index + range_divider.length());
shape.push_back(ngraph::Dimension(min.empty() ? 0 : std::stoi(min),
max.empty() ? ngraph::Interval::s_max : std::stoi(max)));
} else {
shape.push_back(std::stoi(dim));
}
}
}
return shape;
}
InferenceEngine::SizeVector parseTensorShape(const std::string& dataShape) {
std::vector<size_t> shape;
for (auto& dim : split(dataShape, ',')) {
shape.push_back(std::stoi(dim));
}
return shape;
}
std::pair<std::string, std::vector<std::string>> parseInputFiles(const std::string& file_paths_string) {
auto search_string = file_paths_string;
std::string input_name = "";
std::vector<std::string> file_paths;
// parse strings like <input1>:file1,file2,file3 and get name from them
size_t semicolon_pos = search_string.find_first_of(":");
size_t quote_pos = search_string.find_first_of("\"");
if (semicolon_pos != std::string::npos && quote_pos != std::string::npos && semicolon_pos > quote_pos) {
// if : is found after opening " symbol - this means that " belongs to pathname
semicolon_pos = std::string::npos;
}
if (search_string.length() > 2 && semicolon_pos == 1 && search_string[2] == '\\') {
// Special case like C:\ denotes drive name, not an input name
semicolon_pos = std::string::npos;
}
if (semicolon_pos != std::string::npos) {
input_name = search_string.substr(0, semicolon_pos);
search_string = search_string.substr(semicolon_pos + 1);
}
// parse file1,file2,file3 and get vector of paths
size_t coma_pos = 0;
do {
coma_pos = search_string.find_first_of(',');
file_paths.push_back(search_string.substr(0, coma_pos));
if (coma_pos == std::string::npos) {
search_string = "";
break;
}
search_string = search_string.substr(coma_pos + 1);
} while (coma_pos != std::string::npos);
if (!search_string.empty())
throw std::logic_error("Can't parse file paths for input " + input_name +
" in input parameter string: " + file_paths_string);
return {input_name, file_paths};
}
std::map<std::string, std::vector<std::string>> parseInputArguments(const std::vector<std::string>& args) {
std::map<std::string, std::vector<std::string>> mapped_files = {};
auto args_it = begin(args);
const auto is_image_arg = [](const std::string& s) {
return s == "-i";
};
const auto is_arg = [](const std::string& s) {
return s.front() == '-';
};
while (args_it != args.end()) {
const auto files_start = std::find_if(args_it, end(args), is_image_arg);
if (files_start == end(args)) {
break;
}
const auto files_begin = std::next(files_start);
const auto files_end = std::find_if(files_begin, end(args), is_arg);
for (auto f = files_begin; f != files_end; ++f) {
auto files = parseInputFiles(*f);
if (mapped_files.find(files.first) == mapped_files.end()) {
mapped_files[files.first] = {};
}
for (auto& file : files.second) {
readInputFilesArguments(mapped_files[files.first], file);
}
}
args_it = files_end;
}
size_t max_files = 20;
for (auto& files : mapped_files) {
if (files.second.size() <= max_files) {
slog::info << "For input " << files.first << " " << files.second.size() << " files were added. "
<< slog::endl;
} else {
slog::info << "For input " << files.first << " " << files.second.size() << " files were added. "
<< " The number of files will be limited to " << max_files << "." << slog::endl;
files.second.resize(20);
}
}
return mapped_files;
}
#ifdef USE_OPENCV
void dump_config(const std::string& filename, const std::map<std::string, std::map<std::string, std::string>>& config) {
auto plugin_to_opencv_format = [](const std::string& str) -> std::string {

View File

@ -4,15 +4,43 @@
#pragma once
#include <chrono>
#include <iomanip>
#include <map>
#include <samples/slog.hpp>
#include <string>
#include <vector>
#include "ngraph/partial_shape.hpp"
typedef std::chrono::high_resolution_clock Time;
typedef std::chrono::nanoseconds ns;
inline uint64_t getDurationInMilliseconds(uint32_t duration) {
return duration * 1000LL;
}
inline uint64_t getDurationInNanoseconds(uint32_t duration) {
return duration * 1000000000LL;
}
inline double get_duration_ms_till_now(Time::time_point& startTime) {
return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
};
inline std::string double_to_string(const double number) {
std::stringstream ss;
ss << std::fixed << std::setprecision(2) << number;
return ss.str();
};
namespace benchmark_app {
struct InputInfo {
InferenceEngine::Precision precision;
InferenceEngine::SizeVector shape;
ngraph::PartialShape partialShape;
InferenceEngine::SizeVector dataShape;
std::string layout;
InferenceEngine::Layout originalLayout;
std::vector<float> scale;
std::vector<float> mean;
bool isImage() const;
@ -25,43 +53,56 @@ struct InputInfo {
size_t depth() const;
};
using InputsInfo = std::map<std::string, InputInfo>;
using PartialShapes = std::map<std::string, ngraph::PartialShape>;
} // namespace benchmark_app
std::vector<std::string> parseDevices(const std::string& device_string);
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string);
InferenceEngine::Layout getLayoutFromString(const std::string& string_layout);
std::string getShapeString(const InferenceEngine::SizeVector& shape);
std::string getShapesString(const benchmark_app::PartialShapes& shapes);
std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
std::vector<std::string> split(const std::string& s, char delim);
std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean,
const benchmark_app::InputsInfo& inputs_info);
std::vector<ngraph::Dimension> parsePartialShape(const std::string& partial_shape);
InferenceEngine::SizeVector parseTensorShape(const std::string& data_shape);
std::pair<std::string, std::vector<std::string>> parseInputFiles(const std::string& file_paths_string);
std::map<std::string, std::vector<std::string>> parseInputArguments(const std::vector<std::string>& args);
template <typename T>
std::map<std::string, std::string> parseInputParameters(const std::string parameter_string,
const std::map<std::string, T>& input_info) {
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
// inputs)
std::map<std::string, std::string> return_value;
std::map<std::string, std::vector<std::string>> parseInputParameters(const std::string parameter_string,
const std::map<std::string, T>& input_info) {
// Parse parameter string like "[value0]", "[value0][value1]" or "input0[value0][value1],input1[value2][value3]"
// (applied to all inputs)
std::map<std::string, std::vector<std::string>> return_value;
std::string search_string = parameter_string;
auto start_pos = search_string.find_first_of('[');
auto input_name = search_string.substr(0, start_pos);
while (start_pos != std::string::npos) {
auto end_pos = search_string.find_first_of(']');
if (end_pos == std::string::npos)
break;
auto input_name = search_string.substr(0, start_pos);
if (start_pos)
input_name = search_string.substr(0, start_pos);
auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
if (!input_name.empty()) {
return_value[input_name] = input_value;
return_value[input_name].push_back(input_value);
} else {
for (auto& item : input_info) {
return_value[item.first] = input_value;
return_value[item.first].push_back(input_value);
}
}
search_string = search_string.substr(end_pos + 1);
if (search_string.empty() || search_string.front() != ',')
if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
break;
search_string = search_string.substr(1);
if (search_string.front() == ',')
search_string = search_string.substr(1);
start_pos = search_string.find_first_of('[');
}
if (!search_string.empty())
@ -70,87 +111,156 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame
}
template <typename T>
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
const std::string& layout_string,
const size_t batch_size,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info,
bool& reshape_required) {
std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
std::vector<benchmark_app::InputsInfo> getInputsInfo(const std::string& shape_string,
const std::string& layout_string,
const size_t batch_size,
const std::string& data_shapes_string,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info,
bool& reshape_required) {
std::map<std::string, std::vector<std::string>> shape_map = parseInputParameters(shape_string, input_info);
std::map<std::string, std::vector<std::string>> data_shapes_map =
parseInputParameters(data_shapes_string, input_info);
std::map<std::string, std::vector<std::string>> layout_map = parseInputParameters(layout_string, input_info);
size_t min_size = 1, max_size = 1;
if (!data_shapes_map.empty()) {
min_size = std::min_element(data_shapes_map.begin(),
data_shapes_map.end(),
[](std::pair<std::string, std::vector<std::string>> a,
std::pair<std::string, std::vector<std::string>> b) {
return a.second.size() < b.second.size() && a.second.size() != 1;
})
->second.size();
max_size = std::max_element(data_shapes_map.begin(),
data_shapes_map.end(),
[](std::pair<std::string, std::vector<std::string>> a,
std::pair<std::string, std::vector<std::string>> b) {
return a.second.size() < b.second.size();
})
->second.size();
if (min_size != max_size) {
throw std::logic_error(
"Shapes number for every input should be either 1 or should be equal to shapes number of other inputs");
}
}
reshape_required = false;
benchmark_app::InputsInfo info_map;
for (auto& item : input_info) {
benchmark_app::InputInfo info;
auto name = item.first;
auto descriptor = item.second->getTensorDesc();
// Precision
info.precision = descriptor.getPrecision();
// Shape
if (shape_map.count(name)) {
std::vector<size_t> parsed_shape;
for (auto& dim : split(shape_map.at(name), ',')) {
parsed_shape.push_back(std::stoi(dim));
}
info.shape = parsed_shape;
reshape_required = true;
} else {
info.shape = descriptor.getDims();
}
// Layout
if (layout_map.count(name)) {
info.layout = layout_map.at(name);
std::transform(info.layout.begin(), info.layout.end(), info.layout.begin(), ::toupper);
} else {
std::stringstream ss;
ss << descriptor.getLayout();
info.layout = ss.str();
}
// Update shape with batch if needed
if (batch_size != 0) {
std::size_t batch_index = info.layout.find("N");
if ((batch_index != std::string::npos) && (info.shape.at(batch_index) != batch_size)) {
info.shape[batch_index] = batch_size;
std::vector<benchmark_app::InputsInfo> info_maps;
for (size_t i = 0; i < min_size; ++i) {
benchmark_app::InputsInfo info_map;
for (auto& item : input_info) {
benchmark_app::InputInfo info;
auto name = item.first;
auto descriptor = item.second->getTensorDesc();
// Precision
info.precision = descriptor.getPrecision();
// Partial Shape
if (shape_map.count(name)) {
std::vector<ngraph::Dimension> parsed_shape;
if (shape_map.at(name).size() > 1) {
throw std::logic_error(
"shape command line parameter doesn't support multiple shapes for one input.");
}
info.partialShape = parsePartialShape(shape_map.at(name)[0]);
reshape_required = true;
} else {
info.partialShape = item.second->getPartialShape();
}
if (info.partialShape.is_dynamic() && info.isImage()) {
throw std::logic_error(
"benchmark_app supports only binary and random data as input for dynamic models at this moment.");
}
// Tensor Shape
if (info.partialShape.is_dynamic() && data_shapes_map.count(name)) {
info.dataShape = parseTensorShape(data_shapes_map.at(name)[i % data_shapes_map.at(name).size()]);
} else if (info.partialShape.is_static()) {
info.dataShape = info.partialShape.get_shape();
if (data_shapes_map.find(name) != data_shapes_map.end()) {
throw std::logic_error(
"Network's input \"" + name +
"\" is static. Use -shape argument for static inputs instead of -data_shape.");
}
} else if (!data_shapes_map.empty()) {
throw std::logic_error("Can't find network input name \"" + name + "\" in \"-data_shape " +
data_shapes_string + "\" command line parameter");
} else {
throw std::logic_error(
"data_shape command line parameter should be set in case of network with dynamic shapes.");
}
// Layout
info.originalLayout = descriptor.getLayout();
if (layout_map.count(name)) {
if (layout_map.at(name).size() > 1) {
throw std::logic_error(
"layout command line parameter doesn't support multiple layouts for one input.");
}
info.layout = layout_map.at(name)[0];
std::transform(info.layout.begin(), info.layout.end(), info.layout.begin(), ::toupper);
} else {
std::stringstream ss;
ss << descriptor.getLayout();
info.layout = ss.str();
}
// Update shape with batch if needed (only in static shape case)
// Update blob shape only not affecting network shape to trigger dynamic batch size case
if (batch_size != 0) {
std::size_t batch_index = info.layout.find("N");
if ((batch_index != std::string::npos) && (info.dataShape.at(batch_index) != batch_size)) {
if (info.partialShape.is_static()) {
info.partialShape[batch_index] = batch_size;
}
info.dataShape[batch_index] = batch_size;
reshape_required = true;
}
}
info_map[name] = info;
}
// Update scale and mean
std::map<std::string, std::vector<float>> scale_map = parseScaleOrMean(scale_string, info_map);
std::map<std::string, std::vector<float>> mean_map = parseScaleOrMean(mean_string, info_map);
for (auto& item : info_map) {
if (item.second.isImage()) {
item.second.scale.assign({1, 1, 1});
item.second.mean.assign({0, 0, 0});
if (scale_map.count(item.first)) {
item.second.scale = scale_map.at(item.first);
}
if (mean_map.count(item.first)) {
item.second.mean = mean_map.at(item.first);
}
}
}
info_map[name] = info;
info_maps.push_back(info_map);
}
// Update scale and mean
std::map<std::string, std::vector<float>> scale_map = parseScaleOrMean(scale_string, info_map);
std::map<std::string, std::vector<float>> mean_map = parseScaleOrMean(mean_string, info_map);
for (auto& item : info_map) {
if (item.second.isImage()) {
item.second.scale.assign({1, 1, 1});
item.second.mean.assign({0, 0, 0});
if (scale_map.count(item.first)) {
item.second.scale = scale_map.at(item.first);
}
if (mean_map.count(item.first)) {
item.second.mean = mean_map.at(item.first);
}
}
}
return info_map;
return info_maps;
}
template <typename T>
benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string,
const std::string& layout_string,
const size_t batch_size,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info) {
std::vector<benchmark_app::InputsInfo> getInputsInfo(const std::string& shape_string,
const std::string& layout_string,
const size_t batch_size,
const std::string& data_shapes_string,
const std::string& scale_string,
const std::string& mean_string,
const std::map<std::string, T>& input_info) {
bool reshape_required = false;
return getInputsInfo<T>(shape_string,
layout_string,
batch_size,
data_shapes_string,
scale_string,
mean_string,
input_info,

View File

@ -679,6 +679,15 @@ inline std::string getFullDeviceName(InferenceEngine::Core& ie, std::string devi
}
}
inline std::string getFullDeviceName(ov::runtime::Core& ie, std::string device) {
InferenceEngine::Parameter p;
try {
p = ie.get_metric(device, METRIC_KEY(FULL_DEVICE_NAME));
return p.as<std::string>();
} catch (InferenceEngine::Exception&) {
return "";
}
}
/**
* @brief This class represents an object that is found by an object detection net
*/

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,3 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <gflags/gflags.h>
@ -228,3 +224,95 @@ static void showUsage() {
std::cout << " -iname \"<string>\" " << input_layer_names_message << std::endl;
std::cout << " -pwl_me \"<double>\" " << pwl_max_error_percent_message << std::endl;
}
/**
* @brief Checks input arguments
* @param argc number of args
* @param argv list of input arguments
* @return bool status true(Success) or false(Fail)
*/
bool ParseAndCheckCommandLine(int argc, char* argv[]) {
slog::info << "Parsing input parameters" << slog::endl;
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
if (FLAGS_h) {
showUsage();
showAvailableDevices();
return false;
}
bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty();
// input not required only in dump mode and if external scale factor provided
if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) {
showUsage();
if (isDumpMode) {
throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale"
" factor need to be provided. See -q user option");
}
throw std::logic_error("Input file not set. Please use -i.");
}
if (FLAGS_m.empty() && FLAGS_rg.empty()) {
showUsage();
throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set.");
}
if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) {
throw std::logic_error("Only one of -m and -rg is allowed.");
}
std::vector<std::string> supportedDevices = {"CPU",
"GPU",
"GNA_AUTO",
"GNA_HW",
"GNA_HW_WITH_SW_FBACK",
"GNA_SW_EXACT",
"GNA_SW",
"GNA_SW_FP32",
"HETERO:GNA,CPU",
"HETERO:GNA_HW,CPU",
"HETERO:GNA_SW_EXACT,CPU",
"HETERO:GNA_SW,CPU",
"HETERO:GNA_SW_FP32,CPU",
"MYRIAD"};
if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) {
throw std::logic_error("Specified device is not supported.");
}
uint32_t batchSize = (uint32_t)FLAGS_bs;
if ((batchSize < 1) || (batchSize > 8)) {
throw std::logic_error("Batch size out of range (1..8).");
}
/** default is a static quantization **/
if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("dynamic") != 0) && (FLAGS_q.compare("user") != 0)) {
throw std::logic_error("Quantization mode not supported (static, dynamic, user).");
}
if (FLAGS_q.compare("dynamic") == 0) {
throw std::logic_error("Dynamic quantization not yet supported.");
}
if (FLAGS_qb != 16 && FLAGS_qb != 8) {
throw std::logic_error("Only 8 or 16 bits supported.");
}
if (FLAGS_nthreads <= 0) {
throw std::logic_error("Invalid value for 'nthreads' argument. It must be greater that or equal to 0");
}
if (FLAGS_cw_r < 0) {
throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0");
}
if (FLAGS_cw_l < 0) {
throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0");
}
if (FLAGS_pwl_me < 0.0 || FLAGS_pwl_me > 100.0) {
throw std::logic_error("Invalid value for 'pwl_me' argument. It must be greater than 0.0 and less than 100.0");
}
return true;
}

View File

@ -0,0 +1,406 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cnpy.h>
#include <samples/common.hpp>
#define MAX_SCORE_DIFFERENCE 0.0001f // max score difference for frame error threshold
#define MAX_VAL_2B_FEAT 16384 // max to find scale factor
typedef std::chrono::high_resolution_clock Time;
typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
typedef std::chrono::duration<float> fsec;
/**
* @brief struct to store score error
*/
typedef struct {
uint32_t numScores;
uint32_t numErrors;
float threshold;
float maxError;
float rmsError;
float sumError;
float sumRmsError;
float sumSquaredError;
float maxRelError;
float sumRelError;
float sumSquaredRelError;
} score_error_t;
/**
* @brief struct to store infer request data per frame
*/
struct InferRequestStruct {
ov::runtime::InferRequest inferRequest;
int frameIndex;
uint32_t numFramesThisBatch;
};
/**
* @brief Check number of input files and model network inputs
* @param numInputs number model inputs
* @param numInputFiles number of input files
* @return none.
*/
void CheckNumberOfInputs(size_t numInputs, size_t numInputFiles) {
if (numInputs != numInputFiles) {
throw std::logic_error("Number of network inputs (" + std::to_string(numInputs) +
")"
" is not equal to number of input files (" +
std::to_string(numInputFiles) + ")");
}
}
/**
* @brief Get scale factor for quantization
* @param ptrFloatMemory pointer to float memory with speech feature vector
* @param targetMax max scale factor
* @param numElements number of elements in speech feature vector
* @return scale factor
*/
float ScaleFactorForQuantization(void* ptrFloatMemory, float targetMax, uint32_t numElements) {
float* ptrFloatFeat = reinterpret_cast<float*>(ptrFloatMemory);
float max = 0.0;
float scaleFactor;
for (uint32_t i = 0; i < numElements; i++) {
if (fabs(ptrFloatFeat[i]) > max) {
max = fabs(ptrFloatFeat[i]);
}
}
if (max == 0) {
scaleFactor = 1.0;
} else {
scaleFactor = targetMax / max;
}
return (scaleFactor);
}
/**
* @brief Clean score error
* @param error pointer to score error struct
* @return none.
*/
void ClearScoreError(score_error_t* error) {
error->numScores = 0;
error->numErrors = 0;
error->maxError = 0.0;
error->rmsError = 0.0;
error->sumError = 0.0;
error->sumRmsError = 0.0;
error->sumSquaredError = 0.0;
error->maxRelError = 0.0;
error->sumRelError = 0.0;
error->sumSquaredRelError = 0.0;
}
/**
* @brief Update total score error
* @param error pointer to score error struct
* @param totalError pointer to total score error struct
* @return none.
*/
void UpdateScoreError(score_error_t* error, score_error_t* totalError) {
totalError->numErrors += error->numErrors;
totalError->numScores += error->numScores;
totalError->sumRmsError += error->rmsError;
totalError->sumError += error->sumError;
totalError->sumSquaredError += error->sumSquaredError;
if (error->maxError > totalError->maxError) {
totalError->maxError = error->maxError;
}
totalError->sumRelError += error->sumRelError;
totalError->sumSquaredRelError += error->sumSquaredRelError;
if (error->maxRelError > totalError->maxRelError) {
totalError->maxRelError = error->maxRelError;
}
}
/**
* @brief Compare score errors, array should be the same length
* @param ptrScoreArray - pointer to score error struct array
* @param ptrRefScoreArray - pointer to score error struct array to compare
* @param scoreError - pointer to score error struct to save a new error
* @param numRows - number rows in score error arrays
* @param numColumns - number columns in score error arrays
* @return none.
*/
void CompareScores(float* ptrScoreArray,
void* ptrRefScoreArray,
score_error_t* scoreError,
uint32_t numRows,
uint32_t numColumns) {
uint32_t numErrors = 0;
ClearScoreError(scoreError);
float* A = ptrScoreArray;
float* B = reinterpret_cast<float*>(ptrRefScoreArray);
for (uint32_t i = 0; i < numRows; i++) {
for (uint32_t j = 0; j < numColumns; j++) {
float score = A[i * numColumns + j];
// std::cout << "score" << score << std::endl;
float refscore = B[i * numColumns + j];
float error = fabs(refscore - score);
float rel_error = error / (static_cast<float>(fabs(refscore)) + 1e-20f);
float squared_error = error * error;
float squared_rel_error = rel_error * rel_error;
scoreError->numScores++;
scoreError->sumError += error;
scoreError->sumSquaredError += squared_error;
if (error > scoreError->maxError) {
scoreError->maxError = error;
}
scoreError->sumRelError += rel_error;
scoreError->sumSquaredRelError += squared_rel_error;
if (rel_error > scoreError->maxRelError) {
scoreError->maxRelError = rel_error;
}
if (error > scoreError->threshold) {
numErrors++;
}
}
}
scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns));
scoreError->sumRmsError += scoreError->rmsError;
scoreError->numErrors = numErrors;
// std::cout << "rmsError=" << scoreError->rmsError << "sumRmsError="<<scoreError->sumRmsError;
}
/**
* @brief Get total stdev error
* @param error pointer to score error struct
* @return error
*/
float StdDevError(score_error_t error) {
return (sqrt(error.sumSquaredError / error.numScores -
(error.sumError / error.numScores) * (error.sumError / error.numScores)));
}
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
# ifdef _WIN32
# include <intrin.h>
# include <windows.h>
# else
# include <cpuid.h>
# endif
inline void native_cpuid(unsigned int* eax, unsigned int* ebx, unsigned int* ecx, unsigned int* edx) {
size_t level = *eax;
# ifdef _WIN32
int regs[4] = {static_cast<int>(*eax), static_cast<int>(*ebx), static_cast<int>(*ecx), static_cast<int>(*edx)};
__cpuid(regs, level);
*eax = static_cast<uint32_t>(regs[0]);
*ebx = static_cast<uint32_t>(regs[1]);
*ecx = static_cast<uint32_t>(regs[2]);
*edx = static_cast<uint32_t>(regs[3]);
# else
__get_cpuid(level, eax, ebx, ecx, edx);
# endif
}
/**
* @brief Get GNA module frequency
* @return GNA module frequency in MHz
*/
float getGnaFrequencyMHz() {
uint32_t eax = 1;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t family = 0;
uint32_t model = 0;
const uint8_t sixth_family = 6;
const uint8_t cannon_lake_model = 102;
const uint8_t gemini_lake_model = 122;
const uint8_t ice_lake_model = 126;
const uint8_t tgl_model = 140;
const uint8_t next_model = 151;
native_cpuid(&eax, &ebx, &ecx, &edx);
family = (eax >> 8) & 0xF;
// model is the concatenation of two fields
// | extended model | model |
// copy extended model data
model = (eax >> 16) & 0xF;
// shift
model <<= 4;
// copy model data
model += (eax >> 4) & 0xF;
if (family == sixth_family) {
switch (model) {
case cannon_lake_model:
case ice_lake_model:
case tgl_model:
case next_model:
return 400;
case gemini_lake_model:
return 200;
default:
return 1;
}
} else {
// counters not supported and we returns just default value
return 1;
}
}
#endif // if not ARM
/**
* @brief Print a report on the statistical score error
* @param totalError reference to a total score error struct
* @param framesNum number of frames in utterance
* @param stream output stream
* @return none.
*/
void printReferenceCompareResults(score_error_t const& totalError, size_t framesNum, std::ostream& stream) {
stream << " max error: " << totalError.maxError << std::endl;
stream << " avg error: " << totalError.sumError / totalError.numScores << std::endl;
stream << " avg rms error: " << totalError.sumRmsError / framesNum << std::endl;
stream << " stdev error: " << StdDevError(totalError) << std::endl << std::endl;
stream << std::endl;
}
/**
* @brief Print a report on the performance counts
* @param utterancePerfMap reference to a map to store performance counters
* @param numberOfFrames number of frames
* @param stream output stream
* @param fullDeviceName full device name string
* @param numberOfFramesOnHw number of frames delivered to GNA HW
* @param FLAGS_d flag of device
* @return none.
*/
void printPerformanceCounters(std::map<std::string, ov::runtime::ProfilingInfo> const& utterancePerfMap,
size_t numberOfFrames,
std::ostream& stream,
std::string fullDeviceName,
const uint64_t numberOfFramesOnHw,
std::string FLAGS_d) {
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
stream << std::endl << "Performance counts:" << std::endl;
stream << std::setw(10) << std::right << ""
<< "Counter descriptions";
stream << std::setw(22) << "Utt scoring time";
stream << std::setw(18) << "Avg infer time";
stream << std::endl;
stream << std::setw(46) << "(ms)";
stream << std::setw(24) << "(us per call)";
stream << std::endl;
// if GNA HW counters
// get frequency of GNA module
float freq = getGnaFrequencyMHz();
for (const auto& it : utterancePerfMap) {
std::string const& counter_name = it.first;
float current_units_us = static_cast<float>(it.second.real_time.count()) / freq;
float call_units_us = current_units_us / numberOfFrames;
if (FLAGS_d.find("GNA") != std::string::npos) {
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
} else {
stream << std::setw(30) << std::left << counter_name;
}
stream << std::setw(16) << std::right << current_units_us / 1000;
stream << std::setw(21) << std::right << call_units_us;
stream << std::endl;
}
stream << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << fullDeviceName << std::endl;
std::cout << std::endl;
stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
stream << "/" << numberOfFrames;
stream << std::endl;
#endif
}
/**
* @brief Get performance counts
* @param request reference to infer request
* @param perfCounters reference to a map to save performance counters
* @return none.
*/
void getPerformanceCounters(ov::runtime::InferRequest& request,
std::map<std::string, ov::runtime::ProfilingInfo>& perfCounters) {
auto retPerfCounters = request.get_profiling_info();
for (const auto& element : retPerfCounters) {
perfCounters[element.node_name] = element;
}
}
/**
* @brief Summarize performance counts and total number of frames executed on the GNA HW device
* @param perfCounters reference to a map to get performance counters
* @param totalPerfCounters reference to a map to save total performance counters
* @param totalRunsOnHw reference to a total number of frames computed on GNA HW
* @return none.
*/
void sumPerformanceCounters(std::map<std::string, ov::runtime::ProfilingInfo> const& perfCounters,
std::map<std::string, ov::runtime::ProfilingInfo>& totalPerfCounters,
uint64_t& totalRunsOnHw) {
auto runOnHw = false;
for (const auto& pair : perfCounters) {
totalPerfCounters[pair.first].real_time += pair.second.real_time;
runOnHw |= pair.second.real_time > std::chrono::microseconds(0); // if realTime is above zero, that means that
// a primitive was executed on the device
}
totalRunsOnHw += runOnHw;
}
/**
* @brief Parse scale factors
* @param str reference to user-specified input scale factor for quantization, can be separated by comma
* @return vector scale factors
*/
std::vector<std::string> ParseScaleFactors(const std::string& str) {
std::vector<std::string> scaleFactorInput;
if (!str.empty()) {
std::string outStr;
std::istringstream stream(str);
int i = 0;
while (getline(stream, outStr, ',')) {
auto floatScaleFactor = std::stof(outStr);
if (floatScaleFactor <= 0.0f) {
throw std::logic_error("Scale factor for input #" + std::to_string(i) +
" (counting from zero) is out of range (must be positive).");
}
scaleFactorInput.push_back(outStr);
i++;
}
} else {
throw std::logic_error("Scale factor need to be specified via -sf option if you are using -q user");
}
return scaleFactorInput;
}
/**
* @brief Parse string of file names separated by comma to save it to vector of file names
* @param str file names separated by comma
* @return vector of file names
*/
std::vector<std::string> ConvertStrToVector(std::string str) {
std::vector<std::string> blobName;
if (!str.empty()) {
size_t pos_last = 0;
size_t pos_next = 0;
while ((pos_next = str.find(",", pos_last)) != std::string::npos) {
blobName.push_back(str.substr(pos_last, pos_next - pos_last));
pos_last = pos_next + 1;
}
blobName.push_back(str.substr(pos_last));
}
return blobName;
}

View File

@ -31,19 +31,19 @@ endif()
if(ENABLE_OV_IR_FRONTEND)
if(BUILD_SHARED_LIBS)
add_dependencies(ov_runtime_libraries ir_ov_frontend)
add_dependencies(ov_runtime_libraries ov_ir_frontend)
endif()
# use this one once CVS-69781 is fixed
# add_dependencies(inference_engine ir_ov_frontend)
# add_dependencies(inference_engine ov_ir_frontend)
endif()
if(ENABLE_OV_ONNX_FRONTEND)
add_dependencies(inference_engine onnx_ov_frontend)
add_dependencies(inference_engine ov_onnx_frontend)
endif()
if(ENABLE_OV_PDPD_FRONTEND)
add_dependencies(inference_engine paddlepaddle_ov_frontend)
add_dependencies(inference_engine ov_paddlepaddle_frontend)
endif()
if(ENABLE_OV_TF_FRONTEND)
add_dependencies(inference_engine tensorflow_ov_frontend)
add_dependencies(inference_engine ov_tensorflow_frontend)
endif()

View File

@ -17,9 +17,9 @@ set(LIBRARY_OUTPUT_DIRECTORY_BIN ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
add_subdirectory(src)
if(ENABLE_OV_CORE_UNIT_TESTS)
add_subdirectory(tests/mock/mock_py_ov_frontend)
add_dependencies(pyopenvino mock_py_ov_frontend)
set_target_properties(mock_py_ov_frontend PROPERTIES
add_subdirectory(tests/mock/ov_mock_py_frontend)
add_dependencies(pyopenvino ov_mock_py_frontend)
set_target_properties(ov_mock_py_frontend PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY_BIN}
ARCHIVE_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY_BIN}
COMPILE_PDB_OUTPUT_DIRECTORY ${LIBRARY_OUTPUT_DIRECTORY_BIN}

View File

@ -41,6 +41,8 @@ from openvino.pyopenvino import FrontEnd
from openvino.pyopenvino import InputModel
from openvino.pyopenvino import Place
from openvino.pyopenvino import TelemetryExtension
from openvino.pyopenvino import DecoderTransformationExtension
from openvino.pyopenvino import JsonConfigExtension
# exceptions
from openvino.pyopenvino import NotImplementedFailure

View File

@ -321,68 +321,64 @@ py::dict outputs_to_dict(const std::vector<ov::Output<const ov::Node>>& outputs,
ov::runtime::Tensor t{request.get_tensor(out)};
switch (t.get_element_type()) {
case ov::element::Type_t::i8: {
py::array arr(t.get_shape(), t.data<int8_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<int8_t>(t.get_shape(), t.data<int8_t>());
;
break;
}
case ov::element::Type_t::i16: {
py::array arr(t.get_shape(), t.data<int16_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<int16_t>(t.get_shape(), t.data<int16_t>());
;
break;
}
case ov::element::Type_t::i32: {
py::array arr(t.get_shape(), t.data<int32_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<int32_t>(t.get_shape(), t.data<int32_t>());
;
break;
}
case ov::element::Type_t::i64: {
py::array arr(t.get_shape(), t.data<int64_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<int64_t>(t.get_shape(), t.data<int64_t>());
;
break;
}
case ov::element::Type_t::u8: {
py::array arr(t.get_shape(), t.data<uint8_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<uint8_t>(t.get_shape(), t.data<uint8_t>());
;
break;
}
case ov::element::Type_t::u16: {
py::array arr(t.get_shape(), t.data<uint16_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<uint16_t>(t.get_shape(), t.data<uint16_t>());
break;
}
case ov::element::Type_t::u32: {
py::array arr(t.get_shape(), t.data<uint32_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<uint32_t>(t.get_shape(), t.data<uint32_t>());
;
break;
}
case ov::element::Type_t::u64: {
py::array arr(t.get_shape(), t.data<uint64_t>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<uint64_t>(t.get_shape(), t.data<uint64_t>());
break;
}
case ov::element::Type_t::bf16: {
py::array arr(t.get_shape(), t.data<ov::bfloat16>());
res[py::cast(out)] = arr.view("int16");
res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data<ov::bfloat16>());
break;
}
case ov::element::Type_t::f16: {
py::array arr(t.get_shape(), t.data<ov::float16>());
res[py::cast(out)] = arr.view("int16");
res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data<ov::float16>());
break;
}
case ov::element::Type_t::f32: {
py::array arr(t.get_shape(), t.data<float>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<float>(t.get_shape(), t.data<float>());
;
break;
}
case ov::element::Type_t::f64: {
py::array arr(t.get_shape(), t.data<double>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<double>(t.get_shape(), t.data<double>());
;
break;
}
case ov::element::Type_t::boolean: {
py::array arr(t.get_shape(), t.data<bool*>());
res[py::cast(out)] = arr;
res[py::cast(out)] = py::array_t<bool>(t.get_shape(), t.data<bool>());
;
break;
}
default: {

View File

@ -0,0 +1,16 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <pybind11/functional.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>
#include "openvino/frontend/manager.hpp"
namespace py = pybind11;
void regclass_Extension(py::module m) {
py::class_<ov::Extension, std::shared_ptr<ov::Extension>> ext(m, "Extension", py::dynamic_attr());
}

View File

@ -0,0 +1,11 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <pybind11/pybind11.h>
namespace py = pybind11;
void regclass_Extension(py::module m);

View File

@ -65,7 +65,6 @@ void regclass_InferRequest(py::module m) {
self._start_time = Time::now();
self._request.infer();
self._end_time = Time::now();
return Common::outputs_to_dict(self._outputs, self._request);
},
py::arg("inputs"));

View File

@ -0,0 +1,56 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <pybind11/functional.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>
#include "extension/json_config.hpp"
#include "manager.hpp"
#include "openvino/frontend/exception.hpp"
#include "openvino/frontend/extension/decoder_transformation.hpp"
#include "openvino/frontend/extension/telemetry.hpp"
#include "pyopenvino/graph/function.hpp"
namespace py = pybind11;
using namespace ov::frontend;
void regclass_frontend_TelemetryExtension(py::module m) {
py::class_<TelemetryExtension, std::shared_ptr<TelemetryExtension>, ov::Extension> ext(m,
"TelemetryExtension",
py::dynamic_attr());
ext.def(py::init([](const std::string& event_category,
const TelemetryExtension::event_callback& send_event,
const TelemetryExtension::error_callback& send_error,
const TelemetryExtension::error_callback& send_stack_trace) {
return std::make_shared<TelemetryExtension>(event_category, send_event, send_error, send_stack_trace);
}));
ext.def("send_event", &TelemetryExtension::send_event);
ext.def("send_error", &TelemetryExtension::send_error);
ext.def("send_stack_trace", &TelemetryExtension::send_stack_trace);
}
void regclass_frontend_DecoderTransformationExtension(py::module m) {
py::class_<ov::frontend::DecoderTransformationExtension,
std::shared_ptr<ov::frontend::DecoderTransformationExtension>,
ov::Extension>
ext(m, "DecoderTransformationExtension", py::dynamic_attr());
}
void regclass_frontend_JsonConfigExtension(py::module m) {
py::class_<ov::frontend::JsonConfigExtension,
std::shared_ptr<ov::frontend::JsonConfigExtension>,
ov::frontend::DecoderTransformationExtension>
ext(m, "JsonConfigExtension", py::dynamic_attr());
ext.doc() = "Extension class to load and process ModelOptimizer JSON config file";
ext.def(py::init([](const std::string& path) {
return std::make_shared<ov::frontend::JsonConfigExtension>(path);
}));
}

View File

@ -0,0 +1,13 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <pybind11/pybind11.h>
namespace py = pybind11;
void regclass_frontend_TelemetryExtension(py::module m);
void regclass_frontend_DecoderTransformationExtension(py::module m);
void regclass_frontend_JsonConfigExtension(py::module m);

View File

@ -8,8 +8,8 @@
#include <pybind11/stl_bind.h>
#include "openvino/frontend/exception.hpp"
#include "openvino/frontend/extension/telemetry.hpp"
#include "openvino/frontend/manager.hpp"
#include "openvino/frontend/telemetry_extension.hpp"
#include "pyopenvino/graph/function.hpp"
namespace py = pybind11;
@ -41,7 +41,7 @@ void regclass_frontend_FrontEnd(py::module m) {
)");
fem.def("convert",
static_cast<std::shared_ptr<ov::Model> (FrontEnd::*)(InputModel::Ptr) const>(&FrontEnd::convert),
static_cast<std::shared_ptr<ov::Model> (FrontEnd::*)(const InputModel::Ptr&) const>(&FrontEnd::convert),
py::arg("model"),
R"(
Completely convert and normalize entire function, throws if it is not possible.
@ -58,7 +58,7 @@ void regclass_frontend_FrontEnd(py::module m) {
)");
fem.def("convert",
static_cast<void (FrontEnd::*)(std::shared_ptr<ov::Model>) const>(&FrontEnd::convert),
static_cast<void (FrontEnd::*)(const std::shared_ptr<ov::Model>&) const>(&FrontEnd::convert),
py::arg("function"),
R"(
Completely convert the remaining, not converted part of a function.
@ -143,26 +143,3 @@ void regclass_frontend_FrontEnd(py::module m) {
return "<FrontEnd '" + self.get_name() + "'>";
});
}
void regclass_frontend_Extension(py::module m) {
py::class_<ov::Extension, std::shared_ptr<ov::Extension>> ext(m, "Extension", py::dynamic_attr());
}
void regclass_frontend_TelemetryExtension(py::module m) {
{
py::class_<TelemetryExtension, std::shared_ptr<TelemetryExtension>, ov::Extension> ext(m,
"TelemetryExtension",
py::dynamic_attr());
ext.def(py::init([](const std::string& event_category,
const TelemetryExtension::event_callback& send_event,
const TelemetryExtension::error_callback& send_error,
const TelemetryExtension::error_callback& send_stack_trace) {
return std::make_shared<TelemetryExtension>(event_category, send_event, send_error, send_stack_trace);
}));
ext.def("send_event", &TelemetryExtension::send_event);
ext.def("send_error", &TelemetryExtension::send_error);
ext.def("send_stack_trace", &TelemetryExtension::send_stack_trace);
}
}

View File

@ -9,5 +9,3 @@
namespace py = pybind11;
void regclass_frontend_FrontEnd(py::module m);
void regclass_frontend_Extension(py::module m);
void regclass_frontend_TelemetryExtension(py::module m);

View File

@ -14,4 +14,3 @@ void regclass_frontend_InitializationFailureFrontEnd(py::module m);
void regclass_frontend_OpConversionFailureFrontEnd(py::module m);
void regclass_frontend_OpValidationFailureFrontEnd(py::module m);
void regclass_frontend_GeneralFailureFrontEnd(py::module m);

View File

@ -24,6 +24,7 @@
#include "pyopenvino/core/compiled_model.hpp"
#include "pyopenvino/core/containers.hpp"
#include "pyopenvino/core/core.hpp"
#include "pyopenvino/core/extension.hpp"
#include "pyopenvino/core/ie_parameter.hpp"
#include "pyopenvino/core/infer_request.hpp"
#include "pyopenvino/core/offline_transformations.hpp"
@ -31,6 +32,7 @@
#include "pyopenvino/core/tensor.hpp"
#include "pyopenvino/core/variable_state.hpp"
#include "pyopenvino/core/version.hpp"
#include "pyopenvino/frontend/extensions.hpp"
#include "pyopenvino/frontend/frontend.hpp"
#include "pyopenvino/frontend/inputmodel.hpp"
#include "pyopenvino/frontend/manager.hpp"
@ -124,6 +126,7 @@ PYBIND11_MODULE(pyopenvino, m) {
regclass_Parameter(m);
regclass_AsyncInferQueue(m);
regclass_ProfilingInfo(m);
regclass_Extension(m);
regclass_frontend_Place(m);
regclass_frontend_InitializationFailureFrontEnd(m);
@ -131,11 +134,12 @@ PYBIND11_MODULE(pyopenvino, m) {
regclass_frontend_OpConversionFailureFrontEnd(m);
regclass_frontend_OpValidationFailureFrontEnd(m);
regclass_frontend_NotImplementedFailureFrontEnd(m);
regclass_frontend_Extension(m);
regclass_frontend_FrontEndManager(m);
regclass_frontend_FrontEnd(m);
regclass_frontend_InputModel(m);
regclass_frontend_TelemetryExtension(m);
regclass_frontend_DecoderTransformationExtension(m);
regclass_frontend_JsonConfigExtension(m);
regmodule_offline_transformations(m);
}

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
set(TARGET_FE_NAME "mock_py_ov_frontend")
set(TARGET_FE_NAME "ov_mock_py_frontend")
file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file(GLOB_RECURSE LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)

View File

@ -9,11 +9,11 @@
#include "openvino/frontend/visibility.hpp"
// Defined if we are building the plugin DLL (instead of using it)
#ifdef mock_py_ov_frontend_EXPORTS
#ifdef ov_mock_py_frontend_EXPORTS
# define MOCK_API OPENVINO_CORE_EXPORTS
#else
# define MOCK_API OPENVINO_CORE_IMPORTS
#endif // mock_py_ov_frontend_EXPORTS
#endif // ov_mock_py_frontend_EXPORTS
// OK to have 'using' in mock header
@ -257,13 +257,13 @@ public:
return false;
}
bool is_equal(Ptr another) const override {
bool is_equal(const Ptr& another) const override {
m_stat.m_is_equal++;
m_stat.m_lastArgPlace = another;
return false;
}
bool is_equal_data(Ptr another) const override {
bool is_equal_data(const Ptr& another) const override {
m_stat.m_is_equal_data++;
m_stat.m_lastArgPlace = another;
return false;
@ -471,19 +471,19 @@ public:
return std::make_shared<PlaceMockPy>();
}
void set_name_for_tensor(Place::Ptr tensor, const std::string& newName) override {
void set_name_for_tensor(const Place::Ptr& tensor, const std::string& newName) override {
m_stat.m_set_name_for_tensor++;
m_stat.m_lastArgPlace = tensor;
m_stat.m_lastArgString = newName;
}
void add_name_for_tensor(Place::Ptr tensor, const std::string& newName) override {
void add_name_for_tensor(const Place::Ptr& tensor, const std::string& newName) override {
m_stat.m_add_name_for_tensor++;
m_stat.m_lastArgPlace = tensor;
m_stat.m_lastArgString = newName;
}
void set_name_for_operation(Place::Ptr operation, const std::string& newName) override {
void set_name_for_operation(const Place::Ptr& operation, const std::string& newName) override {
m_stat.m_set_name_for_operation++;
m_stat.m_lastArgPlace = operation;
m_stat.m_lastArgString = newName;
@ -499,32 +499,32 @@ public:
m_stat.m_lastArgString = name;
}
void set_name_for_dimension(Place::Ptr place, size_t shapeDimIndex, const std::string& dimName) override {
void set_name_for_dimension(const Place::Ptr& place, size_t shapeDimIndex, const std::string& dimName) override {
m_stat.m_set_name_for_dimension++;
m_stat.m_lastArgPlace = place;
m_stat.m_lastArgInt = static_cast<int>(shapeDimIndex);
m_stat.m_lastArgString = dimName;
}
void cut_and_add_new_input(Place::Ptr place, const std::string& newNameOptional) override {
void cut_and_add_new_input(const Place::Ptr& place, const std::string& newNameOptional) override {
m_stat.m_cut_and_add_new_input++;
m_stat.m_lastArgPlace = place;
m_stat.m_lastArgString = newNameOptional;
}
void cut_and_add_new_output(Place::Ptr place, const std::string& newNameOptional) override {
void cut_and_add_new_output(const Place::Ptr& place, const std::string& newNameOptional) override {
m_stat.m_cut_and_add_new_output++;
m_stat.m_lastArgPlace = place;
m_stat.m_lastArgString = newNameOptional;
}
Place::Ptr add_output(Place::Ptr place) override {
Place::Ptr add_output(const Place::Ptr& place) override {
m_stat.m_add_output++;
m_stat.m_lastArgPlace = place;
return std::make_shared<PlaceMockPy>();
}
void remove_output(Place::Ptr place) override {
void remove_output(const Place::Ptr& place) override {
m_stat.m_remove_output++;
m_stat.m_lastArgPlace = place;
}
@ -546,19 +546,19 @@ public:
}
// Setting tensor properties
void set_partial_shape(Place::Ptr place, const ngraph::PartialShape& shape) override {
void set_partial_shape(const Place::Ptr& place, const ngraph::PartialShape& shape) override {
m_stat.m_set_partial_shape++;
m_stat.m_lastArgPlace = place;
m_stat.m_lastArgPartialShape = shape;
}
ngraph::PartialShape get_partial_shape(Place::Ptr place) const override {
ngraph::PartialShape get_partial_shape(const Place::Ptr& place) const override {
m_stat.m_get_partial_shape++;
m_stat.m_lastArgPlace = place;
return {};
}
void set_element_type(Place::Ptr place, const ngraph::element::Type& type) override {
void set_element_type(const Place::Ptr& place, const ngraph::element::Type& type) override {
m_stat.m_set_element_type++;
m_stat.m_lastArgPlace = place;
m_stat.m_lastArgElementType = type;
@ -631,26 +631,26 @@ public:
return false;
}
std::shared_ptr<ov::Model> convert(InputModel::Ptr model) const override {
std::shared_ptr<ov::Model> convert(const InputModel::Ptr& model) const override {
m_stat.m_convert_model++;
return std::make_shared<ov::Model>(ov::NodeVector{}, ov::ParameterVector{});
}
void convert(std::shared_ptr<ov::Model> func) const override {
void convert(const std::shared_ptr<ov::Model>& func) const override {
m_stat.m_convert++;
}
std::shared_ptr<ov::Model> convert_partially(InputModel::Ptr model) const override {
std::shared_ptr<ov::Model> convert_partially(const InputModel::Ptr& model) const override {
m_stat.m_convert_partially++;
return std::make_shared<ov::Model>(ov::NodeVector{}, ov::ParameterVector{});
}
std::shared_ptr<ov::Model> decode(InputModel::Ptr model) const override {
std::shared_ptr<ov::Model> decode(const InputModel::Ptr& model) const override {
m_stat.m_decode++;
return std::make_shared<ov::Model>(ov::NodeVector{}, ov::ParameterVector{});
}
void normalize(std::shared_ptr<ov::Model> function) const override {
void normalize(const std::shared_ptr<ov::Model>& function) const override {
m_stat.m_normalize++;
}

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
set(TARGET_FE_NAME "mock_py_ov_frontend")
set(TARGET_FE_NAME "ov_mock_py_frontend")
set(PYBIND_FE_NAME "pybind_mock_frontend")
set(PYBIND_FE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/pyngraph_mock_frontend_api.cpp)

View File

@ -5,7 +5,7 @@
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "../mock_py_ov_frontend/mock_py_frontend.hpp"
#include "../ov_mock_py_frontend/mock_py_frontend.hpp"
namespace py = pybind11;
using namespace ngraph;

View File

@ -51,7 +51,7 @@ from openvino.frontend import FrontEndManager
def create_test_onnx_models():
models = {}
# Input model 1
add = onnx.helper.make_node("Add", inputs=["in1", "in2"], outputs=["add_out"])
add = onnx.helper.make_node("Add", inputs=["in1", "in2"], outputs=["add_out"], name="onnx_add_op")
split = onnx.helper.make_node("Split", inputs=["add_out"],
outputs=["out1", "out2"], name="split1", axis=0)
relu = onnx.helper.make_node("Relu", inputs=["in3"], outputs=["out3"])
@ -1205,3 +1205,48 @@ def test_set_name_for_dimension():
with pytest.raises(Exception) as e:
model.set_name_for_dimension(one_const, 0, dim_name)
assert "ONNX initializer shape dimension cannot be dynamic." in str(e)
def test_set_input_partial_shape_using_input_edge():
skip_if_onnx_frontend_is_disabled()
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
model = fe.load("input_model.onnx")
add_operator = model.get_place_by_operation_name("onnx_add_op")
add_input_edge = add_operator.get_input_port(inputPortIndex=0)
model.set_partial_shape(add_input_edge, PartialShape([10, 10]))
add_input_edge = add_operator.get_input_port(inputPortIndex=1)
model.set_partial_shape(add_input_edge, PartialShape([1]))
ov_model = fe.convert(model)
assert ov_model.input("in1").get_partial_shape() == PartialShape([10, 10])
assert ov_model.input("in2").get_partial_shape() == PartialShape([1])
assert ov_model.output("out4").get_partial_shape() == PartialShape([10, 10])
def test_get_partial_shape_using_input_edge():
skip_if_onnx_frontend_is_disabled()
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
model = fe.load("input_model.onnx")
add_operator = model.get_place_by_operation_name("onnx_add_op")
add_input_edge = add_operator.get_input_port(inputPortIndex=0)
pshape = model.get_partial_shape(add_input_edge)
assert pshape == PartialShape([2, 2])
def test_get_partial_shape_using_output_edge():
skip_if_onnx_frontend_is_disabled()
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
model = fe.load("input_model.onnx")
add_operator = model.get_place_by_operation_name("onnx_add_op")
add_output_edge = add_operator.get_output_port(outputPortIndex=0)
assert model.get_partial_shape(add_output_edge) == PartialShape([2, 2])
split_operator = model.get_place_by_tensor_name("out1").get_producing_operation()
out2_edge = split_operator.get_output_port(outputPortIndex=1)
assert model.get_partial_shape(out2_edge) == PartialShape([1, 2])

Some files were not shown because too many files have changed in this diff Show More