Merge remote-tracking branch 'github/master' into auto-batch-master

# Conflicts:
#	samples/cpp/benchmark_app/inputs_filling.cpp
#	samples/cpp/benchmark_app/remote_blobs_filling.cpp
#	src/plugins/auto/executable_network.cpp
#	src/tests/unit/auto/exec_network_get_metrics.cpp
This commit is contained in:
myshevts 2021-12-17 16:39:50 +03:00
commit 9426db9b00
3635 changed files with 13486 additions and 6064 deletions

View File

@ -241,7 +241,7 @@ jobs:
. $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/tests/mo/unit_tests --junitxml=TEST-ModelOptimizer.xml
displayName: 'Model Optimizer UT'
continueOnError: false
enabled: false
enabled: true
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
workingDirectory: $(INSTALL_TEST_DIR)
@ -334,7 +334,7 @@ jobs:
displayName: 'Samples Smoke Tests'
continueOnError: false
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
enabled: false
enabled: true
- script: |
export DATA_PATH=$(MODELS_PATH)
@ -353,7 +353,7 @@ jobs:
workingDirectory: $(LAYER_TESTS_DIR)
displayName: 'Layer Tests'
continueOnError: false
enabled: false
enabled: true
- task: PublishTestResults@2
condition: always()

View File

@ -35,7 +35,7 @@ jobs:
- checkout: none
- script: git -C ~/work/openvino checkout -m --recurse-submodules $(Build.SourceVersion)
- script: git -C ~/work/openvino checkout -m $(Build.SourceVersion) && git -C ~/work/openvino submodule update --init --recursive
displayName: checkout
# Should be after 'Install dependencies' because Git lfs is not installed
@ -71,7 +71,7 @@ jobs:
./buildreleasenolto.sh
libinference_engine_preproc.so
MKLDNNPlugin
clDNNPlugin
ov_intel_gpu_plugin
clDNN_unit_tests64
gpuFuncTests
displayName: Build Lin

View File

@ -83,7 +83,7 @@ jobs:
displayName: 'Make dir'
- script: |
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
curl -O https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat
call install_ib_console.bat
workingDirectory: $(WORK_DIR)
displayName: 'Install IncrediBuild'
@ -117,9 +117,9 @@ jobs:
python -m pip install -r $(REPO_DIR)\tools\mo\requirements.txt
python -m pip install -r $(REPO_DIR)\tools\mo\requirements_dev.txt
rem Speed up build
certutil -urlcache -split -f https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip cmake-$(CMAKE_VERSION)-windows-x86_64.zip
powershell -command "Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip -OutFile cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
powershell -command "Expand-Archive -Force cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
powershell -command "Expand-Archive -Force ninja-win.zip"
git clone https://github.com/google/gtest-parallel.git
workingDirectory: $(WORK_DIR)

View File

@ -59,7 +59,7 @@ jobs:
- script: |
rem Speed up build
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
powershell -command "Expand-Archive -Force ninja-win.zip"
workingDirectory: $(WORK_DIR)
displayName: 'Install dependencies'

View File

@ -35,7 +35,7 @@ Jenkinsfile @openvinotoolkit/openvino-admins
/src/common/ @openvinotoolkit/openvino-ie-maintainers
/src/core/ @openvinotoolkit/openvino-ngraph-maintainers
/src/frontends/ @openvinotoolkit/openvino-ngraph-maintainers
/inference-engine/tests_deprecated/readers/ @openvinotoolkit/openvino-ngraph-maintainers
/src/tests_deprecated/readers/ @openvinotoolkit/openvino-ngraph-maintainers
# IE CPU:
/inference-engine/src/mkldnn_plugin/ @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
@ -53,12 +53,12 @@ Jenkinsfile @openvinotoolkit/openvino-admins
/inference-engine/src/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers
/src/inference/include/ie/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers
/inference-engine/thirdparty/movidius/ @openvinotoolkit/openvino-ie-vpu-maintainers
/inference-engine/tests_deprecated/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests_deprecated/functional/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests_deprecated/behavior/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests/functional/plugin/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests/unit/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests_deprecated/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests_deprecated/functional/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests_deprecated/behavior/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests/functional/plugin/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests/unit/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/scripts/run_tests_myriad_multistick.sh @openvinotoolkit/openvino-ie-vpu-maintainers
# IE GNA:
@ -70,10 +70,10 @@ Jenkinsfile @openvinotoolkit/openvino-admins
/src/inference/include/ie/multi-device/ @openvinotoolkit/openvino-ie-multi-maintainers
# IE Tests:
/inference-engine/tests/ @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests_deprecated/ @openvinotoolkit/openvino-ie-tests-maintainers
/inference-engine/tests/functional/inference_engine/ngraph_reader/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
/inference-engine/tests/functional/inference_engine/transformations/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
/src/tests/ @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests_deprecated/ @openvinotoolkit/openvino-ie-tests-maintainers
/src/tests/functional/inference_engine/ngraph_reader/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
/src/tests/functional/inference_engine/transformations/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
# Documentation:
/docs/ @openvinotoolkit/openvino-docs-maintainers

View File

@ -79,8 +79,20 @@ function(_ie_add_api_validator_post_build_step)
_ie_add_api_validator_post_build_step_recursive(TARGET ${API_VALIDATOR_TARGET})
# remove targets which were tested before
foreach(item IN LISTS VALIDATED_LIBRARIES)
foreach(target IN LISTS API_VALIDATOR_TARGETS)
list(FIND VALIDATED_LIBRARIES ${target} index)
if (NOT index EQUAL -1)
list(APPEND VALIDATED_TARGETS ${target})
endif()
if(TARGET "${target}")
get_target_property(orig_target ${target} ALIASED_TARGET)
list(FIND VALIDATED_LIBRARIES ${orig_target} index)
if (NOT index EQUAL -1)
list(APPEND VALIDATED_TARGETS ${target})
endif()
endif()
endforeach()
foreach(item IN LISTS VALIDATED_TARGETS)
list(REMOVE_ITEM API_VALIDATOR_TARGETS ${item})
endforeach()

View File

@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
set(FRONTEND_INSTALL_INCLUDE "runtime/include/ngraph/frontend")
set(FRONTEND_INSTALL_INCLUDE "runtime/include/")
set(FRONTEND_NAME_SUFFIX "_ov_frontend")
set(FRONTEND_NAMES "" CACHE INTERNAL "")
@ -225,7 +225,7 @@ macro(ov_add_frontend)
if(OV_FRONTEND_LINKABLE_FRONTEND)
# install -dev part
install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/${OV_FRONTEND_NAME}_frontend
install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/
DESTINATION ${FRONTEND_INSTALL_INCLUDE}
COMPONENT core_dev
FILES_MATCHING PATTERN "*.hpp")

View File

@ -4,7 +4,7 @@
#pragma once
#include "common/frontend.hpp"
#include "openvino/frontend/frontend.hpp"
@OV_FRONTEND_DECLARATIONS@

View File

@ -66,22 +66,22 @@ ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/src/core/tests"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/core"
onnx_out_files)
set(rel_path "inference-engine/tests/functional/plugin/shared/models")
set(rel_path "src/tests/functional/plugin/shared/models")
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/func_tests/models"
ft_out_files)
set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
set(rel_path "src/tests/functional/inference_engine/onnx_reader")
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
ie_onnx_out_files)
set(rel_path "inference-engine/tests/functional/inference_engine/ir_serialization")
set(rel_path "src/tests/functional/inference_engine/ir_serialization")
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ir_serialization"
ie_serialize_out_files)
set(rel_path "inference-engine/tests/unit/frontends/onnx_import/models")
set(rel_path "src/tests/unit/frontends/onnx_import/models")
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import"
ie_onnx_import_out_files)

View File

@ -2,12 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
#! [complex:transformation]
import logging as log
import numpy as np
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.graph.graph import Graph
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
from openvino.tools.mo.graph.graph import Graph
class Complex(FrontReplacementSubgraph):
@ -41,4 +38,3 @@ class Complex(FrontReplacementSubgraph):
# change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
complex_node.out_port(0).get_connection().set_source(input_node_output_port)
#! [complex:transformation]

View File

@ -4,11 +4,11 @@
#! [complex_abs:transformation]
import numpy as np
from extensions.ops.elementwise import Pow
from extensions.ops.ReduceOps import ReduceSum
from mo.front.common.replacement import FrontReplacementOp
from mo.graph.graph import Graph, Node
from mo.ops.const import Const
from openvino.tools.mo.ops.elementwise import Pow
from openvino.tools.mo.ops.ReduceOps import ReduceSum
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
from openvino.tools.mo.graph.graph import Graph, Node
from openvino.tools.mo.ops.const import Const
class ComplexAbs(FrontReplacementOp):

View File

@ -3,8 +3,7 @@
# ! [fft_ext:extractor]
from ...ops.FFT import FFT
from mo.front.extractor import FrontExtractorOp
from mo.utils.error import Error
from openvino.tools.mo.front.extractor import FrontExtractorOp
class FFT2DFrontExtractor(FrontExtractorOp):

View File

@ -2,9 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
#! [fft:operation]
from mo.front.common.partial_infer.elemental import copy_shape_infer
from mo.graph.graph import Node, Graph
from mo.ops.op import Op
from openvino.tools.mo.front.common.partial_infer.elemental import copy_shape_infer
from openvino.tools.mo.graph.graph import Graph
from openvino.tools.mo.ops.op import Op
class FFT(Op):

View File

@ -868,7 +868,7 @@ EXAMPLE_PATH = ../template_plugin/src \
../template_plugin/tests/functional/CMakeLists.txt \
../template_plugin/tests/functional/transformations \
../template_plugin/tests/functional/shared_tests_instances/ \
../../inference-engine/tests/functional/plugin/shared/include \
../../src/tests/functional/plugin/shared/include \
../snippets
# If the value of the EXAMPLE_PATH tag contains directories, you can use the

View File

@ -59,11 +59,14 @@ Framework-agnostic parameters:
--reverse_input_channels
Switch the input channels order from RGB to BGR (or
vice versa). Applied to original inputs of the model
if and only if a number of channels equals 3. Applied
after application of --mean_values and --scale_values
options, so numbers in --mean_values and
--scale_values go in the order of channels used in the
original model.
if and only if a number of channels equals 3.
When --mean_values/--scale_values are also specified,
reversing of channels will be applied to user's input
data first, so that numbers in --mean_values and
--scale_values go in the order of channels used in
the original model. In other words, if both options are
specified then the data flow in the model looks as following:
Parameter -> ReverseInputChannels -> Mean/Scale apply -> the original body of the model.
--log_level {CRITICAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}
Logger level
--input INPUT Quoted list of comma-separated input nodes names with

View File

@ -639,9 +639,9 @@ graph. Consider the extractor for the TensorFlow\* operation `Const` (refer to t
`extensions/front/tf/const_ext.py`):
```py
from mo.front.extractor import FrontExtractorOp
from mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
from mo.ops.const import Const
from openvino.tools.mo.front.extractor import FrontExtractorOp
from openvino.tools.mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
from openvino.tools.mo.ops.const import Const
class ConstExtractor(FrontExtractorOp):
@ -679,9 +679,9 @@ Consider another example with an extractor of ONNX\* operation `Constant` (refer
from onnx import numpy_helper
from onnx.numpy_helper import to_array
from mo.front.extractor import FrontExtractorOp
from mo.front.onnx.extractors.utils import onnx_attr
from mo.ops.const import Const
from openvino.tools.mo.front.extractor import FrontExtractorOp
from openvino.tools.mo.front.onnx.extractors.utils import onnx_attr
from openvino.tools.mo.ops.const import Const
class ConstantExtractor(FrontExtractorOp):
@ -814,11 +814,11 @@ fusing of the sub-graph defining the [Mish](../../../ops/activation/Mish_4.md) a
operation:
```py
from extensions.front.Softplus_fusion import SoftplusFusion
from extensions.ops.activation_ops import Mish
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.subgraph_matcher import SubgraphMatch
from mo.graph.graph import Graph, rename_nodes
from openvino.tools.mo.front.Softplus_fusion import SoftplusFusion
from openvino.tools.mo.ops.activation_ops import Mish
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
from openvino.tools.mo.front.subgraph_matcher import SubgraphMatch
from openvino.tools.mo.graph.graph import Graph, rename_nodes
class MishFusion(FrontReplacementSubgraph):
@ -886,12 +886,12 @@ transformation.
Consider an example transformation from the file is `extensions/front/Pack.py` which replaces operation `Pack` from
the TensorFlow\*:
```py
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementOp
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Node, Graph, rename_nodes
from mo.ops.concat import Concat
from mo.ops.unsqueeze import Unsqueeze
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
from openvino.tools.mo.front.tf.graph_utils import create_op_with_const_inputs
from openvino.tools.mo.graph.graph import Node, Graph, rename_nodes
from openvino.tools.mo.ops.concat import Concat
from openvino.tools.mo.ops.unsqueeze import Unsqueeze
class Pack(FrontReplacementOp):
@ -932,11 +932,11 @@ specification.
```py
import logging as log
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementPattern
from mo.graph.graph import Graph
from mo.ops.const import Const
from mo.utils.error import Error
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
from openvino.tools.mo.front.common.replacement import FrontReplacementPattern
from openvino.tools.mo.graph.graph import Graph
from openvino.tools.mo.ops.const import Const
from openvino.tools.mo.utils.error import Error
class SqueezeNormalize(FrontReplacementPattern):
@ -1200,13 +1200,13 @@ The example of the configuration file for this type of transformation is `extens
and the corresponding transformation file is `./extensions/front/YOLO.py`:
```py
from extensions.front.no_op_eraser import NoOpEraser
from extensions.front.standalone_const_eraser import StandaloneConstEraser
from extensions.ops.regionyolo import RegionYoloOp
from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
from mo.graph.graph import Node, Graph
from mo.ops.result import Result
from mo.utils.error import Error
from openvino.tools.mo.front.no_op_eraser import NoOpEraser
from openvino.tools.mo.front.standalone_const_eraser import StandaloneConstEraser
from openvino.tools.mo.ops.regionyolo import RegionYoloOp
from openvino.tools.mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
from openvino.tools.mo.graph.graph import Node, Graph
from openvino.tools.mo.ops.result import Result
from openvino.tools.mo.utils.error import Error
class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):

View File

@ -20,9 +20,9 @@ assume that we have already created the `CustomOp` class (inherited from `Op` cl
for this MXNet custom operation as described in the [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
```py
from extension.ops.custom_op import CustomOp # implementation of the MO operation class
from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
from mo.front.extractor import MXNetCustomFrontExtractorOp
from openvino.tools.mo.ops.custom_op import CustomOp # implementation of the MO operation class
from openvino.tools.mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
from openvino.tools.mo.front.extractor import MXNetCustomFrontExtractorOp
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp): # inherit from specific base class
op = 'MyCustomOp' # the value corresponding to the `op_type` value of the MXNet operation

View File

@ -40,8 +40,8 @@ operation `ProposalOp` which corresponds to `Proposal` operation described in th
document. Refer to the source code below for a detailed explanation of the extractor.
```py
from extensions.ops.proposal import ProposalOp
from mo.front.extractor import CaffePythonFrontExtractorOp
from openvino.tools.mo.ops.proposal import ProposalOp
from openvino.tools.mo.front.extractor import CaffePythonFrontExtractorOp
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):

View File

@ -430,7 +430,7 @@ PassConfig instance taken from pass::Manager is shared across all registered tra
## Transformations testing <a name="transformations_testing"></a>
If you are developing new transformation inside plugin, you need to add test into the `template_plugin/tests/functional/transformations` folder.
We have two types of tests: nGraph reader tests located in `inference-engine/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `inference-engine/tests/functional/inference_engine/transformations`
We have two types of tests: nGraph reader tests located in `src/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `src/tests/functional/inference_engine/transformations`
Reader tests are IR based and test end-to-end conversion from IR to CNNNetwork. Transformation tests test single ngraph transformations or low-level functions that are used inside transformations.
The basic transformation test looks like this:

View File

@ -38,17 +38,10 @@ target_include_directories(interpreter_backend PUBLIC $<BUILD_INTERFACE:${CMAKE_
file(GLOB_RECURSE all_backends_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
add_clang_format_target(interpreter_backend_clang FOR_SOURCES ${all_backends_src})
# developer package
openvino_developer_export_targets(COMPONENT core TARGETS interpreter_backend)
install(TARGETS interpreter_backend
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
ARCHIVE DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
if(NOT BUILD_SHARED_LIBS)
install(TARGETS interpreter_backend
RUNTIME DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
ARCHIVE DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
LIBRARY DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
endif()
# install
ov_install_static_lib(interpreter_backend template)

View File

@ -1707,7 +1707,24 @@ bool evaluate(const shared_ptr<op::v0::Log>& op, const HostTensorVector& outputs
}
namespace ctc_loss_v4 {
template <element::Type_t t1, element::Type_t t2>
template <element::Type_t t1,
element::Type_t t2,
typename std::enable_if<!std::is_floating_point<typename element_type_traits<t1>::value_type>::value &&
!std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value &&
!std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
bool>::type = true>
inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
const HostTensorVector& outputs,
const HostTensorVector& inputs) {
OPENVINO_ASSERT(false, "The data type for logits is expected to be a floating point type. Got:", element::Type(t1));
}
template <element::Type_t t1,
element::Type_t t2,
typename std::enable_if<std::is_floating_point<typename element_type_traits<t1>::value_type>::value ||
std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value ||
std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
bool>::type = true>
inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
const HostTensorVector& outputs,
const HostTensorVector& inputs) {
@ -1944,6 +1961,30 @@ bool evaluate(const shared_ptr<op::v0::RNNCell>& op, const HostTensorVector& out
return true;
}
template <element::Type_t ET>
bool evaluate(const shared_ptr<op::v0::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
using T = typename element_type_traits<ET>::value_type;
runtime::reference::lstm_cell<T>(inputs[0]->get_data_ptr<ET>(),
inputs[0]->get_shape(),
inputs[1]->get_data_ptr<ET>(),
inputs[1]->get_shape(),
inputs[2]->get_data_ptr<ET>(),
inputs[2]->get_shape(),
inputs[3]->get_data_ptr<ET>(),
inputs[3]->get_shape(),
inputs[4]->get_data_ptr<ET>(),
inputs[4]->get_shape(),
inputs[5]->get_data_ptr<ET>(),
inputs[5]->get_shape(),
outputs[0]->get_data_ptr<ET>(),
outputs[1]->get_data_ptr<ET>(),
op->get_activations()[0],
op->get_activations()[1],
op->get_activations()[2],
op->get_clip());
return true;
}
template <element::Type_t ET>
bool evaluate(const shared_ptr<op::v4::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
using T = typename element_type_traits<ET>::value_type;

View File

@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0)
NGRAPH_OP(GRN, op::v0)
NGRAPH_OP(HardSigmoid, op::v0)
NGRAPH_OP(LRN, ngraph::op::v0)
NGRAPH_OP(LSTMCell, op::v0)
NGRAPH_OP(MVN, ngraph::op::v0)
NGRAPH_OP(NormalizeL2, op::v0)
NGRAPH_OP(PriorBox, ngraph::op::v0)

View File

@ -37,4 +37,3 @@ set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_REL
# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
# POSSIBLE_PLUGINS ${TARGET_NAME})
# [cmake:plugin]
ov_install_static_lib(interpreter_backend tests)

View File

@ -0,0 +1,182 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "openvino/opsets/opset7.hpp"
#include "openvino/opsets/opset1.hpp"
#include "base_reference_test.hpp"
using namespace reference_tests;
using namespace ov;
namespace {
struct EinsumParams {
std::vector<Tensor> inputs;
std::string equation;
Tensor expectedResult;
std::string testcaseName;
};
struct Builder : ParamsBuilder<EinsumParams> {
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputs);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, equation);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
};
class ReferenceEinsumTest : public testing::TestWithParam<EinsumParams>, public CommonReferenceTest {
public:
void SetUp() override {
auto params = GetParam();
function = CreateModel(params);
for (const auto& input_tensor : params.inputs) {
inputData.push_back(input_tensor.data);
}
refOutData = {params.expectedResult.data};
}
static std::string getTestCaseName(const testing::TestParamInfo<EinsumParams>& obj) {
auto param = obj.param;
std::ostringstream result;
result << "iType=" << param.inputs[0].type;
result << "_iShape=" << param.inputs[0].shape;
result << "_equation=" << param.equation;
result << "_eType=" << param.expectedResult.type;
result << "_eShape=" << param.expectedResult.shape;
if (param.testcaseName != "") {
result << "_=" << param.testcaseName;
}
return result.str();
}
private:
static std::shared_ptr<Model> CreateModel(const EinsumParams& params) {
OutputVector output_vector;
ParameterVector param_vector;
for (const auto& input_tensor : params.inputs) {
auto param = std::make_shared<opset1::Parameter>(input_tensor.type, input_tensor.shape);
output_vector.push_back(param);
param_vector.push_back(param);
}
const auto einsum = std::make_shared<opset7::Einsum>(output_vector, params.equation);
const auto f = std::make_shared<Model>(OutputVector{einsum}, param_vector);
return f;
}
};
TEST_P(ReferenceEinsumTest, CompareWithRefs) {
Exec();
}
template <element::Type_t ET>
std::vector<EinsumParams> generateParams() {
using T = typename element_type_traits<ET>::value_type;
std::vector<EinsumParams> params {
Builder {}
.inputs({{ET, {1, 2}, std::vector<T>{1, 2}},
{ET, {3, 4}, std::vector<T>{3, 4, 5, 6,
7, 8, 9, 10,
11, 12, 13, 14}}})
.equation("ab,cd->abcd")
.expectedResult({ET, {1, 2, 3, 4}, std::vector<T>{3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 6, 8, 10, 12,
14, 16, 18, 20, 22, 24, 26, 28}})
.testcaseName("einsum_no_reduction"),
Builder {}
.inputs({{ET, {1, 2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
.equation("ijk->kij")
.expectedResult({ET, {3, 1, 2}, std::vector<T>{1, 4, 2, 5, 3, 6}})
.testcaseName("einsum_transpose"),
Builder {}
.inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
.equation("ab->a")
.expectedResult({ET, {2}, std::vector<T>{6, 15}})
.testcaseName("einsum_reduce"),
Builder {}
.inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}},
{ET, {3, 2}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
.equation("ab,bc->ac")
.expectedResult({ET, {2, 2}, std::vector<T>{22, 28, 49, 64}})
.testcaseName("einsum_matrix_multiplication"),
Builder {}
.inputs({{ET, {2, 4}, std::vector<T>{1, 3, 2, 7, 5, 6, 0, 1}},
{ET, {4, 3, 1}, std::vector<T>{1, 2, 3, 4, 5, 6, 5, 7, 3, 7, 9, 1}},
{ET, {4, 3}, std::vector<T>{4, 3, 1, 6, 4, 2, 2, 5, 3, 1, 9, 4}}})
.equation("ab,bcd,bc->ca")
.expectedResult({ET, {3, 2}, std::vector<T>{145, 171, 703, 231, 85, 91}})
.testcaseName("einsum_multiple_multiplication"),
Builder {}
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
.equation("a...->...")
.expectedResult({ET, {2, 3}, std::vector<T>{4, 8, 4, 8, 5, 13}})
.testcaseName("einsum_ellipsis_one_input_reduction"),
Builder {}
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
.equation("a...->...a")
.expectedResult({ET, {2, 3, 2}, std::vector<T>{1, 3, 3, 5, 2, 2, 7, 1, 5, 0, 6, 7}})
.testcaseName("einsum_ellipsis_one_input_transpose"),
Builder {}
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
{ET, {1}, std::vector<T>{2}}})
.equation("ab...,...->ab...")
.expectedResult({ET, {2, 2, 3}, std::vector<T>{2, 6, 4, 14, 10, 12, 6, 10, 4, 2, 0, 14}})
.testcaseName("einsum_ellipsis_mul_by_1dscalar"),
Builder {}
.inputs({{ET, {1, 1, 4, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
{ET, {3, 4, 2, 1}, std::vector<T>{3, 1, 6, 2, 3, 10, 9, 8, 2, 9, 3, 2,
4, 2, 3, 1, 9, 1, 11, 4, 7, 2, 3, 1}}})
.equation("a...j,j...->a...")
.expectedResult({ET, {1, 4, 2, 4}, std::vector<T>{27, 85, 37, 66, 30, 58, 50, 8,
37, 123, 55, 83, 16, 48, 24, 30,
29, 83, 43, 52, 20, 92, 44, 24,
24, 96, 48, 30, 13, 67, 31, 15}})
.testcaseName("einsum_ellipsis_complex_mul"),
Builder {}
.inputs({{ET, {1, 3, 3}, std::vector<T>{1, 2, 3, 4, 5, 6, 7, 8, 9}}})
.equation("kii->ki")
.expectedResult({ET, {1, 3}, std::vector<T>{1, 5, 9}})
.testcaseName("einsum_diagonal"),
Builder {}
.inputs({{ET, {2, 3, 3, 2, 4}, std::vector<T>{4, 2, 5, 4, 5, 5, 1, 1, 3, 3, 1, 1, 2, 2, 4, 1, 3, 4,
4, 5, 1, 3, 1, 3, 1, 4, 3, 5, 4, 4, 5, 4, 4, 5, 4, 2,
2, 2, 3, 3, 1, 1, 4, 3, 4, 2, 2, 1, 1, 2, 3, 1, 1, 4,
2, 3, 1, 3, 4, 2, 5, 5, 3, 4, 3, 4, 5, 4, 4, 5, 1, 3,
4, 4, 5, 3, 1, 3, 2, 5, 3, 2, 5, 4, 4, 2, 4, 4, 1, 4,
4, 5, 4, 4, 4, 2, 3, 3, 4, 2, 4, 2, 5, 1, 3, 2, 4, 3,
5, 1, 2, 3, 1, 1, 2, 5, 1, 1, 2, 1, 4, 5, 3, 4, 1, 3,
3, 1, 3, 2, 4, 5, 1, 1, 5, 4, 5, 2, 2, 3, 3, 1, 2, 4}},
{ET, {3, 2, 1}, std::vector<T>{1, 4, 4, 5, 3, 3}}})
.equation("abbac,bad->ad")
.expectedResult({ET, {2, 1}, std::vector<T>{123, 129}})
.testcaseName("einsum_diagonal_with_matmul"),
};
return params;
}
std::vector<EinsumParams> generateCombinedParams() {
const std::vector<std::vector<EinsumParams>> generatedParams {
generateParams<element::Type_t::i32>(),
generateParams<element::Type_t::f32>(),
};
std::vector<EinsumParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest,
testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,246 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "openvino/opsets/opset3.hpp"
#include "openvino/opsets/opset1.hpp"
#include "base_reference_test.hpp"
using namespace reference_tests;
using namespace ov;
namespace {
struct ExtractImagePatchesParams {
Tensor data;
Shape sizes;
Strides strides;
Shape rates;
op::PadType autoPad;
Tensor expectedResult;
std::string testcaseName;
};
struct Builder : ParamsBuilder<ExtractImagePatchesParams> {
REFERENCE_TESTS_ADD_SET_PARAM(Builder, data);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, sizes);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, strides);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, rates);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, autoPad);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
};
class ReferenceExtractImagePatchesTest : public testing::TestWithParam<ExtractImagePatchesParams>, public CommonReferenceTest {
public:
void SetUp() override {
auto params = GetParam();
function = CreateModel(params);
inputData = {params.data.data};
refOutData = {params.expectedResult.data};
}
static std::string getTestCaseName(const testing::TestParamInfo<ExtractImagePatchesParams>& obj) {
auto param = obj.param;
std::ostringstream result;
result << "dType=" << param.data.type;
result << "_dShape=" << param.data.shape;
result << "_sizes=" << param.sizes;
result << "_strides=" << param.strides;
result << "_rates=" << param.rates;
result << "_autoPad=" << param.autoPad;
result << "_eType=" << param.expectedResult.type;
result << "_eShape=" << param.expectedResult.shape;
if (param.testcaseName != "") {
result << "_=" << param.testcaseName;
}
return result.str();
}
private:
static std::shared_ptr<Model> CreateModel(const ExtractImagePatchesParams& params) {
const auto data = std::make_shared<opset1::Parameter>(params.data.type, params.data.shape);
const auto extrace_image_patches = std::make_shared<opset3::ExtractImagePatches>(data,
params.sizes,
params.strides,
params.rates,
params.autoPad);
const auto f = std::make_shared<Model>(extrace_image_patches, ParameterVector{data});
return f;
}
};
TEST_P(ReferenceExtractImagePatchesTest, CompareWithRefs) {
Exec();
}
template <element::Type_t ET>
std::vector<ExtractImagePatchesParams> generateParams() {
using T = typename element_type_traits<ET>::value_type;
std::vector<ExtractImagePatchesParams> params {
Builder {}
.data({ET, {1, 1, 10, 10}, std::vector<T>{
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
.sizes({3, 3})
.strides({5, 5})
.rates({1, 1})
.autoPad(op::PadType::VALID)
.expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
1, 6, 51, 56,
2, 7, 52, 57,
3, 8, 53, 58,
11, 16, 61, 66,
12, 17, 62, 67,
13, 18, 63, 68,
21, 26, 71, 76,
22, 27, 72, 77,
23, 28, 73, 78}}),
Builder {}
.data({ET, {1, 1, 10, 10}, std::vector<T>{
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
.sizes({4, 4})
.strides({8, 8})
.rates({1, 1})
.autoPad(op::PadType::VALID)
.expectedResult({ET, {1, 16, 1, 1}, std::vector<T>{
1, 2, 3, 4,
11, 12, 13, 14,
21, 22, 23, 24,
31, 32, 33, 34}}),
Builder {}
.data({ET, {1, 1, 10, 10}, std::vector<T>{
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
.sizes({4, 4})
.strides({9, 9})
.rates({1, 1})
.autoPad(op::PadType::SAME_UPPER)
.expectedResult({ET, {1, 16, 2, 2}, std::vector<T>{
0, 0, 0, 89,
0, 0, 81, 90,
0, 0, 82, 0,
0, 0, 83, 0,
0, 9, 0, 99,
1, 10, 91, 100,
2, 0, 92, 0,
3, 0, 93, 0,
0, 19, 0, 0,
11, 20, 0, 0,
12, 0, 0, 0,
13, 0, 0, 0,
0, 29, 0, 0,
21, 30, 0, 0,
22, 0, 0, 0,
23, 0, 0, 0}}),
Builder {}
.data({ET, {1, 1, 10, 10}, std::vector<T>{
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
.sizes({3, 3})
.strides({5, 5})
.rates({2, 2})
.autoPad(op::PadType::VALID)
.expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
1, 6, 51, 56,
3, 8, 53, 58,
5, 10, 55, 60,
21, 26, 71, 76,
23, 28, 73, 78,
25, 30, 75, 80,
41, 46, 91, 96,
43, 48, 93, 98,
45, 50, 95, 100}}),
Builder {}
.data({ET, {1, 2, 5, 5}, std::vector<T>{
1, 2, 3, 4, 5,
6, 7, 8, 9, 10,
11, 12, 13, 14, 15,
16, 17, 18, 19, 20,
21, 22, 23, 24, 25,
26, 27, 28, 29, 30,
31, 32, 33, 34, 35,
36, 37, 38, 39, 40,
41, 42, 43, 44, 45,
46, 47, 48, 49, 50}})
.sizes({2, 2})
.strides({3, 3})
.rates({1, 1})
.autoPad(op::PadType::VALID)
.expectedResult({ET, {1, 8, 2, 2}, std::vector<T>{
1, 4, 16, 19,
26, 29, 41, 44,
2, 5, 17, 20,
27, 30, 42, 45,
6, 9, 21, 24,
31, 34, 46, 49,
7, 10, 22, 25,
32, 35, 47, 50}}),
};
return params;
}
std::vector<ExtractImagePatchesParams> generateCombinedParams() {
const std::vector<std::vector<ExtractImagePatchesParams>> generatedParams {
generateParams<element::Type_t::i8>(),
generateParams<element::Type_t::i16>(),
generateParams<element::Type_t::i32>(),
generateParams<element::Type_t::i64>(),
generateParams<element::Type_t::u8>(),
generateParams<element::Type_t::u16>(),
generateParams<element::Type_t::u32>(),
generateParams<element::Type_t::u64>(),
generateParams<element::Type_t::bf16>(),
generateParams<element::Type_t::f16>(),
generateParams<element::Type_t::f32>(),
generateParams<element::Type_t::f64>(),
};
std::vector<ExtractImagePatchesParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest,
testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName);
} // namespace

View File

@ -4,7 +4,8 @@
#include <gtest/gtest.h>
#include "openvino/op/lstm_cell.hpp"
#include "openvino/opsets/opset4.hpp"
#include "openvino/opsets/opset1.hpp"
#include "base_reference_test.hpp"
using namespace reference_tests;
@ -12,13 +13,6 @@ using namespace ov;
namespace {
struct LSTMCellParams {
LSTMCellParams(
int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount,
const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B,
const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") :
batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount),
X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {}
int32_t batchSize;
int32_t inputSize;
int32_t hiddenSize;
@ -34,6 +28,22 @@ struct LSTMCellParams {
std::string testcaseName;
};
struct Builder : ParamsBuilder<LSTMCellParams> {
REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, X);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, W);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, R);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, B);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
};
class ReferenceLSTMCellTest : public testing::TestWithParam<LSTMCellParams>, public CommonReferenceTest {
public:
void SetUp() override {
@ -63,26 +73,24 @@ public:
result << "_hoType=" << param.Ho.type;
result << "_hoShape=" << param.Ho.shape;
result << "_coType=" << param.Co.type;
result << "_coShape=" << param.Co.shape;
if (param.testcaseName != "") {
result << "_coShape=" << param.Co.shape;
result << "_=" << param.testcaseName;
} else {
result << "_coShape=" << param.Co.shape;
}
return result.str();
}
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<op::v4::LSTMCell>(X,
std::make_shared<opset4::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@ -107,15 +115,15 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<op::v4::LSTMCell>(X,
std::make_shared<opset4::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@ -142,15 +150,15 @@ private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const float clip_threshold = 3.5f;
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<op::v4::LSTMCell>(X,
std::make_shared<opset4::LSTMCell>(X,
H_t,
C_t,
W,
@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) {
Exec();
}
class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest {
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<opset1::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
params.hiddenSize);
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
return function;
}
};
class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs {
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<opset1::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
params.hiddenSize);
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
return function;
}
};
class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip {
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const float clip_threshold = 3.5f;
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<opset1::LSTMCell>(X,
H_t,
C_t,
W,
R,
B,
params.hiddenSize,
op::LSTMWeightsFormat::IFCO,
std::vector<std::string>{"sigmoid", "tanh", "tanh"},
std::vector<float>{},
std::vector<float>{},
clip_threshold);
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
return function;
}
};
TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) {
Exec();
}
TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) {
Exec();
}
TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) {
Exec();
}
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParams() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
LSTMCellParams(
2, 3, 3, 4,
Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)),
Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}),
Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}),
"lstm_cell_zero_bias_default_attrs"),
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
.testcaseName("lstm_cell_zero_bias_default_attrs")
};
return params;
}
@ -232,53 +334,56 @@ template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
LSTMCellParams(
2, 3, 3, 4,
Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}),
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82509011030197144,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}),
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4578367471694946,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}),
"lstm_cell_bias_default_attrs"),
1.3659683465957642}))
.testcaseName("lstm_cell_bias_default_attrs"),
};
return params;
}
@ -302,53 +407,56 @@ template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasClip() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
LSTMCellParams(
2, 3, 3, 4,
Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}),
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82387429475784302,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}),
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4510968923568726,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}),
"lstm_cell_bias_clip"),
1.3659683465957642}))
.testcaseName("lstm_cell_bias_clip"),
};
return params;
}
@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip,
testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName);
} // namespace
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsV1() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
.testcaseName("lstm_cell_v1_zero_bias_default_attrs")
};
return params;
}
std::vector<LSTMCellParams> generateCombinedParamsV1() {
const std::vector<std::vector<LSTMCellParams>> generatedParams {
generateParamsV1<element::Type_t::bf16>(),
generateParamsV1<element::Type_t::f16>(),
generateParamsV1<element::Type_t::f32>(),
generateParamsV1<element::Type_t::f64>(),
};
std::vector<LSTMCellParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrsV1() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82509011030197144,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4578367471694946,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}))
.testcaseName("lstm_cell_v1_bias_default_attrs"),
};
return params;
}
std::vector<LSTMCellParams> generateCombinedParamsBiasDefaultAttrsV1() {
const std::vector<std::vector<LSTMCellParams>> generatedParams {
generateParamsBiasDefaultAttrsV1<element::Type_t::bf16>(),
generateParamsBiasDefaultAttrsV1<element::Type_t::f16>(),
generateParamsBiasDefaultAttrsV1<element::Type_t::f32>(),
generateParamsBiasDefaultAttrsV1<element::Type_t::f64>(),
};
std::vector<LSTMCellParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasClipV1() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82387429475784302,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4510968923568726,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}))
.testcaseName("lstm_cell_v1_bias_clip"),
};
return params;
}
std::vector<LSTMCellParams> generateCombinedParamsBiasClipV1() {
const std::vector<std::vector<LSTMCellParams>> generatedParams {
generateParamsBiasClipV1<element::Type_t::bf16>(),
generateParamsBiasClipV1<element::Type_t::f16>(),
generateParamsBiasClipV1<element::Type_t::f32>(),
generateParamsBiasClipV1<element::Type_t::f64>(),
};
std::vector<LSTMCellParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test,
testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs,
testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip,
testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName);
} // namespace

View File

@ -4,8 +4,8 @@
#include <gtest/gtest.h>
#include "openvino/op/topk.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/opsets/opset3.hpp"
#include "openvino/opsets/opset1.hpp"
#include "base_reference_test.hpp"
using namespace reference_tests;
@ -15,7 +15,7 @@ namespace {
struct TopKParams {
TopKParams(
const Tensor& A, const Tensor& k, const int64_t axis,
const op::v1::TopK::Mode mode, const op::v1::TopK::SortType sort,
const opset1::TopK::Mode mode, const opset1::TopK::SortType sort,
const Tensor& result0, const Tensor& result1, const size_t outIdx,
const std::string& testcaseName = "") :
A(A), k(k), axis(axis), mode(mode), sort(sort),
@ -25,8 +25,8 @@ struct TopKParams {
Tensor A;
Tensor k;
int64_t axis;
op::v1::TopK::Mode mode;
op::v1::TopK::SortType sort;
opset1::TopK::Mode mode;
opset1::TopK::SortType sort;
Tensor result0;
Tensor result1;
size_t outIdx;
@ -71,7 +71,6 @@ struct TopKParamsResnet50 {
std::string testcaseName;
};
class ReferenceTopKTestResnet50 : public testing::TestWithParam<TopKParamsResnet50>, public CommonReferenceTest {
public:
void SetUp() override {
@ -101,18 +100,18 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto B = std::make_shared<op::v1::TopK>(A,
op::v0::Constant::create(element::i64, {}, {5}),
const auto B = std::make_shared<opset1::TopK>(A,
opset1::Constant::create(element::i64, {}, {5}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES);
const auto C = std::make_shared<op::v1::TopK>(A,
op::v0::Constant::create(element::i64, {}, {1}),
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES);
const auto C = std::make_shared<opset1::TopK>(A,
opset1::Constant::create(element::i64, {}, {1}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES);
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES);
const auto out5_value = B->output(0);
const auto out5_index = B->output(1);
@ -220,12 +219,12 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = op::v0::Constant::create(params.k.type,
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
return f;
}
@ -253,8 +252,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
}({128, 1000})),
Tensor(ET2, {}, std::vector<T2>{5}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::NONE,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::NONE,
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
std::vector<T> expected_value;
for (size_t i = 0; i < rshape[0]; i++) {
@ -292,8 +291,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
}({128, 1000})),
Tensor(ET2, {}, std::vector<T2>{5}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::NONE,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::NONE,
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
std::vector<T> expected_value;
for (size_t i = 0; i < rshape[0]; i++) {
@ -331,8 +330,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
}({128, 1000})),
Tensor(ET2, {}, std::vector<T2>{5}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
std::vector<T> expected_value;
for (size_t i = 0; i < rshape[0]; i++) {
@ -366,8 +365,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
}({128, 1000})),
Tensor(ET2, {}, std::vector<T2>{5}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
std::vector<T> expected_value;
for (size_t i = 0; i < rshape[0]; i++) {
@ -401,8 +400,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
}({128, 1000})),
Tensor(ET2, {}, std::vector<T2>{5}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_INDICES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_INDICES,
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
std::vector<T> expected_value;
for (size_t i = 0; i < rshape[0]; i++) {
@ -440,8 +439,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
}({128, 1000})),
Tensor(ET2, {}, std::vector<T2>{5}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_INDICES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_INDICES,
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
std::vector<T> expected_value;
for (size_t i = 0; i < rshape[0]; i++) {
@ -467,8 +466,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
0,
@ -478,8 +477,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_INDICES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_INDICES,
Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
0,
@ -489,8 +488,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
0,
@ -500,8 +499,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_INDICES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_INDICES,
Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
0,
@ -536,7 +535,7 @@ std::vector<TopKParams> generateCombinedParamsMaxMinSort() {
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSort,
testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTest::getTestCaseName);
class ReferenceTopKTestV3 : public ReferenceTopKTest {
class ReferenceTopKTestBackend : public ReferenceTopKTest {
public:
void SetUp() override {
auto params = GetParam();
@ -547,18 +546,18 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = op::v0::Constant::create(params.k.type,
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<op::v3::TopK>(A, k, params.axis, params.mode, params.sort);
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTestV3, CompareWithRefs) {
TEST_P(ReferenceTopKTestBackend, CompareWithRefs) {
Exec();
}
@ -572,8 +571,8 @@ std::vector<TopKParams> generateParamsV3() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
0,
@ -583,8 +582,8 @@ std::vector<TopKParams> generateParamsV3() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_INDICES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_INDICES,
Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
0,
@ -594,8 +593,8 @@ std::vector<TopKParams> generateParamsV3() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
0,
@ -605,8 +604,8 @@ std::vector<TopKParams> generateParamsV3() {
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_INDICES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_INDICES,
Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
0,
@ -615,7 +614,7 @@ std::vector<TopKParams> generateParamsV3() {
return params;
}
std::vector<TopKParams> generateCombinedParamsV3() {
std::vector<TopKParams> generateCombinedParamsBackend() {
const std::vector<std::vector<TopKParams>> generatedParams {
generateParamsMaxMinSort<element::Type_t::i8, element::Type_t::i64, element::Type_t::i32>(),
generateParamsMaxMinSort<element::Type_t::i16, element::Type_t::i64, element::Type_t::i32>(),
@ -638,8 +637,8 @@ std::vector<TopKParams> generateCombinedParamsV3() {
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestV3,
testing::ValuesIn(generateCombinedParamsV3()), ReferenceTopKTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackend,
testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTest::getTestCaseName);
class ReferenceTopKTest1dMaxMin : public ReferenceTopKTest {
public:
@ -673,12 +672,12 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = op::v0::Constant::create(params.k.type,
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
return f;
}
@ -698,8 +697,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET2, {}, std::vector<T2>{6}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
0,
@ -709,8 +708,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET2, {}, std::vector<T2>{6}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
1,
@ -720,8 +719,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
0,
@ -731,8 +730,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
1,
@ -742,8 +741,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {1}, std::vector<T>{6}),
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
0,
@ -753,8 +752,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {1}, std::vector<T>{6}),
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
1,
@ -764,8 +763,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET2, {}, std::vector<T2>{6}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
0,
@ -775,8 +774,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET2, {}, std::vector<T2>{6}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
1,
@ -786,8 +785,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
0,
@ -797,8 +796,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET2, {}, std::vector<T2>{3}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
1,
@ -808,8 +807,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {1}, std::vector<T>{1}),
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
0,
@ -819,8 +818,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {1}, std::vector<T>{1}),
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
1,
@ -832,8 +831,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{3}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3, 2}, std::vector<T>{
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
}),
@ -849,8 +848,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{3}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3, 2}, std::vector<T>{
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
}),
@ -882,8 +881,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
@ -923,8 +922,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
@ -948,8 +947,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 2}, std::vector<T>{
10, 12, 9, 4, 11, 7, 6, 3
}),
@ -965,8 +964,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 2}, std::vector<T>{
10, 12, 9, 4, 11, 7, 6, 3
}),
@ -982,8 +981,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 1, 2}, std::vector<T>{
10, 12, 11, 7
}),
@ -999,8 +998,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 1, 2}, std::vector<T>{
10, 12, 11, 7
}),
@ -1016,8 +1015,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{3}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3, 2}, std::vector<T>{
8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
}),
@ -1033,8 +1032,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{3}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3, 2}, std::vector<T>{
8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
}),
@ -1050,8 +1049,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 2}, std::vector<T>{
8, 2, 10, 4, 5, 1, 6, 3
}),
@ -1067,8 +1066,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 2}, std::vector<T>{
8, 2, 10, 4, 5, 1, 6, 3
}),
@ -1084,8 +1083,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 1, 2}, std::vector<T>{
8, 2, 5, 1
}),
@ -1101,8 +1100,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 1, 2}, std::vector<T>{
8, 2, 5, 1
}),
@ -1118,8 +1117,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{4}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {4, 3}, std::vector<T>{
12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
}),
@ -1135,8 +1134,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{4}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {4, 3}, std::vector<T>{
12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
}),
@ -1152,8 +1151,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3}, std::vector<T>{
12, 11, 10, 9, 8, 7
}),
@ -1169,8 +1168,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3}, std::vector<T>{
12, 11, 10, 9, 8, 7
}),
@ -1186,8 +1185,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {1, 3}, std::vector<T>{
12, 11, 10
}),
@ -1203,8 +1202,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {1, 3}, std::vector<T>{
12, 11, 10
}),
@ -1220,8 +1219,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 1}, std::vector<T>{
4, 3
}),
@ -1237,8 +1236,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 1}, std::vector<T>{
4, 3
}),
@ -1254,8 +1253,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{4}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {4, 3}, std::vector<T>{
3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
}),
@ -1271,8 +1270,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{4}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {4, 3}, std::vector<T>{
3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
}),
@ -1288,8 +1287,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3}, std::vector<T>{
3, 1, 4, 6, 2, 5
}),
@ -1305,8 +1304,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{2}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3}, std::vector<T>{
3, 1, 4, 6, 2, 5
}),
@ -1322,8 +1321,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::NONE,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::NONE,
Tensor(ET, {1, 3}, std::vector<T>{
3, 1, 4
}),
@ -1339,8 +1338,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
}),
Tensor(ET2, {}, std::vector<T2>{1}),
0,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::NONE,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::NONE,
Tensor(ET, {1, 3}, std::vector<T>{
3, 1, 4
}),
@ -1380,12 +1379,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxM
class ReferenceTopKTestInt64 : public ReferenceTopKTest1dMaxMin {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = op::v0::Constant::create(params.k.type,
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<op::v1::TopK>(A,
const auto B = std::make_shared<opset1::TopK>(A,
k,
params.axis,
params.mode,
@ -1412,8 +1411,8 @@ std::vector<TopKParams> generateParamsInt64() {
}),
Tensor(ET2, {}, std::vector<T2>{3}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3, 2}, std::vector<T>{
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
}),
@ -1428,8 +1427,8 @@ std::vector<TopKParams> generateParamsInt64() {
}),
Tensor(ET2, {}, std::vector<T2>{3}),
1,
op::v1::TopK::Mode::MAX,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 3, 2}, std::vector<T>{
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
}),
@ -1468,12 +1467,12 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = op::v0::Constant::create(params.k.type,
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
return f;
}
@ -1493,8 +1492,8 @@ std::vector<TopKParams> generateParamsSingleOutput() {
Tensor(ET, {2, 3, 2}, std::vector<T>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7}),
Tensor(ET2, {}, std::vector<T2>{2}),
1,
op::v1::TopK::Mode::MIN,
op::v1::TopK::SortType::SORT_VALUES,
opset1::TopK::Mode::MIN,
opset1::TopK::SortType::SORT_VALUES,
Tensor(ET, {2, 2, 2}, std::vector<T>{}),
Tensor(ET_OUT, {2, 2, 2}, std::vector<T_OUT>{2, 0, 1, 2, 1, 0, 0, 1}),
0,
@ -1530,19 +1529,181 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingle
testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTest::getTestCaseName);
TEST(ReferenceTopKTestInvalid, topk_v1_invalid_strings) {
const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
const auto k = op::v0::Constant::create(element::i64, Shape{}, {1});
EXPECT_THROW(op::v1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
EXPECT_THROW(op::v1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
EXPECT_THROW(opset1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
EXPECT_THROW(opset1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
}
TEST(ReferenceTopKTestInvalid, topk_v1_invalid_k) {
const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
const auto k_non_scalar = op::v0::Constant::create(element::i64, Shape{2}, {1, 2});
EXPECT_THROW(op::v1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto k_float = op::v0::Constant::create(element::f32, Shape{}, {1.0f});
EXPECT_THROW(op::v1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto k_negative = op::v0::Constant::create(element::i8, Shape{}, {-1});
EXPECT_THROW(op::v1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
EXPECT_THROW(opset1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
EXPECT_THROW(opset1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
EXPECT_THROW(opset1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
}
class ReferenceTopKTestResnet50V3 : public ReferenceTopKTestResnet50 {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto B = std::make_shared<opset3::TopK>(A,
opset1::Constant::create(element::i64, {}, {5}),
1,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES);
const auto C = std::make_shared<opset3::TopK>(A,
opset1::Constant::create(element::i64, {}, {1}),
1,
opset1::TopK::Mode::MAX,
opset1::TopK::SortType::SORT_VALUES);
const auto out5_value = B->output(0);
const auto out5_index = B->output(1);
const auto out1_value = C->output(0);
const auto out1_index = C->output(1);
const auto f = std::make_shared<Model>(OutputVector{out5_value, out5_index, out1_value, out1_index}, ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTestResnet50V3, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestResnet50V3,
testing::ValuesIn(generateCombinedParamsResnet50()), ReferenceTopKTestResnet50V3::getTestCaseName);
class ReferenceTopKTestMaxMinSortV3 : public ReferenceTopKTestMaxMinSort {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTestMaxMinSortV3, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSortV3,
testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTestMaxMinSortV3::getTestCaseName);
class ReferenceTopKTestBackendV3 : public ReferenceTopKTestBackend {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTestBackendV3, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackendV3,
testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTestBackendV3::getTestCaseName);
class ReferenceTopKTest1dMaxMinV3 : public ReferenceTopKTest1dMaxMin {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTest1dMaxMinV3, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxMinV3,
testing::ValuesIn(generateCombinedParams1dMaxMin()), ReferenceTopKTest1dMaxMinV3::getTestCaseName);
class ReferenceTopKTestInt64V3 : public ReferenceTopKTestInt64 {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<opset3::TopK>(A,
k,
params.axis,
params.mode,
params.sort,
element::i64);
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTestInt64V3, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestInt64V3,
testing::ValuesIn(generateCombinedParamsInt64()), ReferenceTopKTestInt64V3::getTestCaseName);
class ReferenceTopKTestSingleOutputV3 : public ReferenceTopKTestSingleOutput {
private:
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
params.A.shape);
const auto k = opset1::Constant::create(params.k.type,
params.k.shape,
params.k.data.data());
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
return f;
}
};
TEST_P(ReferenceTopKTestSingleOutputV3, CompareWithRefs) {
Exec();
}
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingleOutputV3,
testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTestSingleOutputV3::getTestCaseName);
TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_strings) {
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
EXPECT_THROW(opset3::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
EXPECT_THROW(opset3::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
}
TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_k) {
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
EXPECT_THROW(opset3::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
EXPECT_THROW(opset3::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
EXPECT_THROW(opset3::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
}
} // namespace

View File

@ -7,8 +7,3 @@ project(InferenceEngine)
if(ENABLE_PYTHON)
add_subdirectory(ie_bridges/python)
endif()
if(ENABLE_TESTS)
add_subdirectory(tests_deprecated)
add_subdirectory(tests)
endif()

View File

@ -1,15 +0,0 @@
WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@
WHEEL_VERSION=@WHEEL_VERSION@
WHEEL_BUILD=@WHEEL_BUILD@
WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@
WHEEL_AUTHOR=@WHEEL_AUTHOR@
WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@
WHEEL_DESC=@WHEEL_DESC@
WHEEL_LICENSE=@WHEEL_LICENSE@
WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@
WHEEL_OVERVIEW=@WHEEL_OVERVIEW@
CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@
OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@
TBB_LIBS_DIR=@TBB_LIBS_DIR@
PY_PACKAGES_DIR=@PY_PACKAGES_DIR@

View File

@ -1,40 +1,14 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package")
set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package")
set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package authors name")
set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author")
set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package")
set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url")
set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url")
set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE)
set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE)
set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file")
set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file")
set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description")
set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env")
set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION})
set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib)
if(APPLE)
set(WHEEL_PLATFORM macosx_10_15_x86_64)
elseif(UNIX)
set(WHEEL_PLATFORM manylinux2014_x86_64)
elseif(WIN32)
set(WHEEL_PLATFORM win_amd64)
if(WIN32)
set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin)
else()
message(FATAL_ERROR "This platform is not supported")
endif()
configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY)
if(LINUX)
find_host_program(patchelf_program
NAMES patchelf
@ -55,21 +29,30 @@ endforeach()
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI)
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER)
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM)
string(STRIP ${PYTHON_ABI} PYTHON_ABI)
string(STRIP ${INTERPRETER} INTERPRETER)
string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM)
set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl")
set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels")
set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}")
add_custom_command(OUTPUT ${openvino_wheel_path}
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing"
COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
COMMAND ${CMAKE_COMMAND} -E env WHEEL_VERSION=${WHEEL_VERSION}
WHEEL_BUILD=${WHEEL_BUILD}
CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR}
OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH}
TBB_LIBS_DIR=${TBB_LIBS_DIR}
PY_PACKAGES_DIR=${PY_PACKAGES_DIR}
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel
--dist-dir ${openvino_wheels_output_dir}
--build=${WHEEL_BUILD}
--plat-name=${WHEEL_PLATFORM}
# COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT}
DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT}
DEPENDS ${openvino_wheel_deps}
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
COMMENT "Building Python wheel ${openvino_wheel_name}"
VERBATIM)

View File

@ -1,28 +0,0 @@
defusedxml>=0.7.1
scipy~=1.5.4
jstyleson~=0.0.2
numpy>=1.16.6,<1.20
addict>=2.4.0
pandas~=1.1.5
hyperopt~=0.1.2
networkx~=2.5
tqdm>=4.54.1
texttable~=1.6.3
py-cpuinfo>=7.0.0
PyYAML>=5.4.1
pillow>=8.1.2
scikit-image>=0.17.2
scikit-learn>=0.24.1
yamlloader>=0.5
shapely>=1.7.1
nibabel>=3.2.1
pydicom>=2.1.2
sentencepiece>=0.1.95
tokenizers>=0.10.1
editdistance>=0.5.3
parasail>=1.2.4
fast-ctc-decode>=0.2.5
rawpy>=0.16.0
nltk>=3.5
opencv-python==4.5.*
progress>=1.5

View File

@ -1,22 +0,0 @@
[options]
py_modules =
mo
mo_tf
mo_caffe
mo_mxnet
mo_onnx
mo_kaldi
[options.package_data]
* = *
[options.entry_points]
console_scripts =
[metadata]
license_files =
readme*
*LICENSE*
*license*
*third-party-programs*
*EULA*

View File

@ -1 +0,0 @@
numpy>=1.16.6,<1.20

View File

@ -1,32 +0,0 @@
## OpenVINO™ Toolkit
OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINO™ toolkit includes the Deep Learning Deployment Toolkit (DLDT).
OpenVINO™ toolkit:
- Enables CNN-based deep learning inference on the edge
- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCL™
Operating Systems:
- Ubuntu* 18.04 long-term support (LTS), 64-bit
- Windows* 10, 64-bit
- macOS* 10.15, 64-bit
## Install the Runtime Package Using the PyPI Repository
1. Set up and update pip to the highest version:
```sh
python3 -m pip install --upgrade pip
```
2. Install the Intel® distribution of OpenVINO™ toolkit:
```sh
pip install openvino
```
3. Verify that the package is installed:
```sh
python3 -c "from openvino.inference_engine import IECore"
```
Now you are ready to develop and run your application.

View File

@ -1,3 +1,3 @@
setuptools>=53.0.0
wheel>=0.36.2
python-decouple>=3.4

View File

@ -1,7 +1,11 @@
[metadata]
license_files =
readme*
*LICENSE*
*license*
*third-party-programs*
*EULA*
readme*
*LICENSE*
*license*
*third-party-programs*
../../../../licensing/runtime-third-party-programs.txt
../../../../licensing/tbb_third-party-programs.txt
../../../../licensing/onednn_third-party-programs.txt
../../../../LICENSE

View File

@ -21,7 +21,6 @@ from setuptools import setup, find_namespace_packages, Extension
from setuptools.command.build_ext import build_ext
from setuptools.command.build_clib import build_clib
from setuptools.command.install import install
from decouple import config
WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs')
WHEEL_LIBS_PACKAGE = 'openvino.libs'
@ -41,10 +40,11 @@ elif machine == 'aarch64':
ARCH = 'arm64'
# The following variables can be defined in environment or .env file
CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
SCRIPT_DIR = Path(__file__).resolve().parents[0]
CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.')
OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
LIB_INSTALL_CFG = {
@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms):
sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin')
# copy license file into the build directory
package_license = config('WHEEL_LICENSE', '')
package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE')
if os.path.exists(package_license):
copyfile(package_license, 'LICENSE')
packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
package_data: typing.Dict[str, list] = {}
pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino')
pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino')
ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else []
setup(
version=config('WHEEL_VERSION', '0.0.0'),
build=config('WHEEL_BUILD', '000'),
author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
version=os.getenv('WHEEL_VERSION', '0.0.0'),
build=os.getenv('WHEEL_BUILD', '000'),
author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
name=pkg_name,
license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
author=config('WHEEL_AUTHOR', 'Intel Corporation'),
description=config('WHEEL_DESC', 'Inference Engine Python* API'),
install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'),
description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'),
install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')),
long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')),
long_description_content_type='text/markdown',
download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
cmdclass={
'build': CustomBuild,
'install': CustomInstall,

View File

@ -25,6 +25,8 @@ public:
type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
initDims();
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
}
explicit Shape(const InferenceEngine::SizeVector& shape) {
@ -33,6 +35,8 @@ public:
type = ShapeType::Static;
initDims();
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
}
/**
@ -106,6 +110,10 @@ public:
return type == ShapeType::Dynamic;
}
bool hasZeroDims() const {
return hasZeroDimensions;
}
size_t getRank() const {
return minDims.size();
}
@ -169,6 +177,8 @@ private:
Dynamic
} type {ShapeType::Static};
bool hasZeroDimensions = false;
VectorDims minDims;
VectorDims maxDims;
VectorDims dims;

View File

@ -16,7 +16,8 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
offsetPadding = 0;
offsetPaddingToData.resize(dims.size(), 0);
strides.resize(order.size());
strides[strides.size() - 1] = 1;
// for empty tensor case we fill all strides with 0 values
strides[strides.size() - 1] = shape.hasZeroDims() ? 0 : 1;
for (size_t i = 2; i <= order.size(); i++) {
strides[strides.size() - i] = strides[strides.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)];
}
@ -33,6 +34,15 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
IE_THROW() << "CpuBlockedMemoryDesc doesn't support undefined blockedDims.";
}
if (shape.hasZeroDims()) {
const auto& dims = shape.getDims();
for (size_t i = 0; i < shape.getRank(); i++) {
if (dims[order[i]] == 0 && !dimsEqualWeak(blockedDims[i], 0)) {
IE_THROW() << "Can't create CpuBlockedMemoryDesc. Mistmatch zero dims in dims and blocked dims";
}
}
}
this->order = order;
this->blockedDims = blockedDims;
this->offsetPadding = offsetPadding;
@ -44,7 +54,9 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
}
if (strides.empty() && !order.empty()) {
if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
if (shape.hasZeroDims()) {
this->strides.resize(order.size(), 0);
} else if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
this->strides.resize(order.size(), Shape::UNDEFINED_DIM);
} else {
this->strides.resize(order.size());
@ -54,6 +66,9 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
}
}
} else {
if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
IE_THROW() << "Can't create CpuBlockedMemoryDesc with zero dim, but with non zero strides";
}
this->strides = strides;
}
@ -92,11 +107,16 @@ bool CpuBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc &rhs) const
return rhs.isCompatible(*this);
}
bool CpuBlockedMemoryDesc::canComputeMemSizeZeroDims() const {
return getShape().hasZeroDims() && getOffsetPadding() != Shape::UNDEFINED_DIM;
}
size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const {
int64_t e_size = getOffsetPadding() + 1; // size in bytes (from begin of data to last element)
for (int j = 0; j < getBlockDims().size(); j++)
e_size += (getBlockDims()[j] - 1) * getStrides()[j];
if (!getShape().hasZeroDims()) {
for (int j = 0; j < getBlockDims().size(); j++)
e_size += (getBlockDims()[j] - 1) * getStrides()[j];
}
e_size *= getPrecision() == InferenceEngine::Precision::BIN ? 1 : getPrecision().size();
@ -104,14 +124,14 @@ size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const {
}
size_t CpuBlockedMemoryDesc::getMaxMemSize() const {
if (shape.isStatic()) {
if (shape.isStatic() || shape.hasZeroDims()) {
return getCurrentMemSize();
}
auto& maxDims = shape.getMaxDims();
const auto& maxDims = shape.getMaxDims();
if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x ||
// WA: for some nodes ngraph compute upper bound depending on precision max value
std::numeric_limits<int32_t>::max() == x; })) {
x >= std::numeric_limits<int32_t>::max(); })) {
return UNDEFINED_SIZE;
}
@ -270,15 +290,23 @@ bool CpuBlockedMemoryDesc::blocksExtended() const {
}
size_t CpuBlockedMemoryDesc::getPaddedElementsCount() const {
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; }))
if (getShape().hasZeroDims()) {
return 0;
}
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
}
return std::accumulate(blockedDims.begin(), blockedDims.end(), size_t{1}, std::multiplies<size_t>());
}
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithUndefStridesAndOffset() const {
const auto orderSize = getOrder().size();
return std::make_shared<CpuBlockedMemoryDesc>(getPrecision(), getShape(), getBlockDims(), getOrder(), Shape::UNDEFINED_DIM,
VectorDims(orderSize, 0), VectorDims(orderSize, Shape::UNDEFINED_DIM));
CpuBlockedMemoryDescPtr newDesc = std::make_shared<CpuBlockedMemoryDesc>(*this);
newDesc->strides = VectorDims(orderSize, Shape::UNDEFINED_DIM);
newDesc->offsetPadding = Shape::UNDEFINED_DIM;
newDesc->offsetPaddingToData = VectorDims(orderSize, 0);
newDesc->status = descStatus::Undefined;
return newDesc;
}
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithDefaultStridesAndOffset() const {

View File

@ -84,6 +84,7 @@ public:
private:
size_t getElementOffset(size_t elemNumber) const override;
bool canComputeMemSizeZeroDims() const override;
size_t getCurrentMemSizeImp() const override;
size_t getOffset(const InferenceEngine::SizeVector& v) const;
bool isPlainFormat() const;

View File

@ -93,7 +93,7 @@ public:
*/
size_t getCurrentMemSize() const {
size_t retVal = UNDEFINED_SIZE;
if (isDefined()) {
if (canComputeMemSize()) {
retVal = getCurrentMemSizeImp();
}
return retVal;
@ -140,8 +140,13 @@ protected:
// Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc.
virtual size_t getElementOffset(size_t elemNumber) const = 0;
virtual bool canComputeMemSizeZeroDims() const = 0;
virtual bool isDefinedImp() const = 0;
bool canComputeMemSize() const {
return isDefined() || canComputeMemSizeZeroDims();
}
virtual MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const = 0;
MemoryDescType type;

View File

@ -48,17 +48,37 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Memo
CpuBlockedMemoryDesc MemoryDescUtils::convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
if (desc.getLayout() == InferenceEngine::Layout::ANY)
IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to CpuBlockedMemoryDesc";
const auto &blkDesc = desc.getBlockingDesc();
return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
blkDesc.getOffsetPaddingToData(), blkDesc.getStrides());
const auto& blkDesc = desc.getBlockingDesc();
const auto& dims = desc.getDims();
auto strides = blkDesc.getStrides();
// for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
// i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
std::fill(strides.begin(), strides.end(), 0);
}
return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(dims), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
blkDesc.getOffsetPaddingToData(), strides);
}
DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
const auto &blkDesc = desc.getBlockingDesc();
if (desc.getLayout() == InferenceEngine::Layout::ANY)
IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to DnnlBlockedMemoryDesc";
const auto& blkDesc = desc.getBlockingDesc();
const auto& dims = desc.getDims();
auto strides = blkDesc.getStrides();
// for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
// i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
std::fill(strides.begin(), strides.end(), 0);
}
return DnnlBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
blkDesc.getOffsetPaddingToData(), blkDesc.getStrides());
blkDesc.getOffsetPaddingToData(), strides);
}
BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) {
@ -80,9 +100,16 @@ InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &
InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) {
if (auto blockingDesc = dynamic_cast<const BlockedMemoryDesc*>(&desc)) {
return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(),
{blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(),
blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()});
InferenceEngine::BlockingDesc blkDesc = desc.getShape().hasZeroDims() ? InferenceEngine::BlockingDesc(blockingDesc->getBlockDims(),
blockingDesc->getOrder(),
blockingDesc->getOffsetPadding(),
blockingDesc->getOffsetPaddingToData()) :
InferenceEngine::BlockingDesc(blockingDesc->getBlockDims(),
blockingDesc->getOrder(),
blockingDesc->getOffsetPadding(),
blockingDesc->getOffsetPaddingToData(),
blockingDesc->getStrides());
return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), blkDesc);
} else {
IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc";
}

View File

@ -15,12 +15,17 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
const auto &dims = shape.getDims();
if (!strides.empty()) { // custom strides
if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
}
desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims),
MKLDNNExtensionUtils::IEPrecisionToDataType(prc),
MKLDNNExtensionUtils::convertToDnnlDims(strides)};
} else {
mkldnn::memory::dims plain_strides;
if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
if (shape.hasZeroDims()) {
plain_strides.resize(ndims, 0);
} else if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL);
} else {
plain_strides.resize(ndims, 1);
@ -58,8 +63,8 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
* Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N]
*/
DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims,
const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData,
const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) {
const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData,
const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) {
using namespace mkldnn;
// scalar case
if (shape.getRank() == 0) {
@ -90,8 +95,8 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined order.";
}
if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined blockedDims.";
if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM || val == 0; })) {
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined or zero blockedDims.";
}
auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims());
@ -106,7 +111,12 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
size_t inner_ndims = order.size() - dims.size();
const bool emptyDesc = shape.hasZeroDims();
if (!strides.empty()) {
if (emptyDesc && std::any_of(strides.begin(), strides.end(), [](size_t dim) { return dim != 0; } )) {
IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
}
bool is_descending_strides = true;
for (int i = 1; i < strides.size(); i++) {
is_descending_strides &= (strides[i - 1] >= strides[i]);
@ -118,7 +128,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides);
}
if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
if (!strides.empty() && !emptyDesc && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted
for (int i = outer_ndims; i < strides.size() - 1; i++) {
inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]);
@ -203,6 +213,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory:
order.swap(perm);
order.insert(order.end(), inner_idxs.begin(), inner_idxs.end());
if (shape.hasZeroDims()) {
auto& blk = desc.data.format_desc.blocking;
std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
}
initBlockedParams();
}
@ -296,6 +311,12 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc)
IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc";
order = extractOrder(desc);
if (getShape().hasZeroDims()) {
auto& blk = desc.data.format_desc.blocking;
std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
}
initBlockedParams();
}
@ -368,6 +389,7 @@ bool DnnlBlockedMemoryDesc::isTailCFormat() const {
static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& desc, const VectorDims& dims, const VectorDims& order) {
using namespace dnnl::impl::utils;
auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims);
const auto offsetPadding = desc.data.offset0;
mkldnn::memory::desc newMklDesc = desc;
array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size());
std::vector<int> perm(order.begin(), order.begin() + mklDims.size());
@ -379,6 +401,9 @@ static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& des
if (retCode != dnnl::impl::status::success) {
IE_THROW() << "Can not clone DnnlBlockedMemoryDesc with dims: " << MemoryDescUtils::dims2str(dims);
}
// dnnl::impl::fill_blocked always set offset0 to 0
// so we need to restore actual value
newMklDesc.data.offset0 = offsetPadding;
return newMklDesc;
}
@ -476,14 +501,14 @@ bool DnnlBlockedMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const {
}
size_t DnnlBlockedMemoryDesc::getMaxMemSize() const {
if (shape.isStatic()) {
if (shape.isStatic() || shape.hasZeroDims()) {
return getCurrentMemSize();
}
auto& maxDims = shape.getMaxDims();
const auto& maxDims = shape.getMaxDims();
if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x ||
// WA: for some nodes ngraph compute upper bound depending on precision max value
std::numeric_limits<int32_t>::max() == x; })) {
x >= std::numeric_limits<int32_t>::max(); })) {
return UNDEFINED_SIZE;
}
@ -492,6 +517,13 @@ size_t DnnlBlockedMemoryDesc::getMaxMemSize() const {
}
size_t DnnlBlockedMemoryDesc::getPaddedElementsCount() const {
if (getShape().hasZeroDims()) {
return 0;
}
if (std::any_of(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims,
[](dnnl_dim_t dim) { return dim == DNNL_RUNTIME_DIM_VAL; })) {
IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
}
return std::accumulate(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims, size_t{1},
std::multiplies<int64_t>());
}
@ -548,7 +580,7 @@ void DnnlBlockedMemoryDesc::initStrides() {
const size_t total_ndims = outer_ndims + inner_ndims;
// strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
VectorDims inner_strides(inner_ndims, 1);
VectorDims inner_strides(inner_ndims, getShape().hasZeroDims() ? 0 : 1);
for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i];
}
@ -600,7 +632,9 @@ void DnnlBlockedMemoryDesc::recomputeDefaultStrides() {
IE_THROW() << "Can't recompute stride: order size != blocked dims size";
auto &oneDnnStrides = desc.data.format_desc.blocking.strides;
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
if (getShape().hasZeroDims()) {
std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + getShape().getRank(), 0);
} else if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + rank, DNNL_RUNTIME_DIM_VAL);
initStrides();
} else {
@ -633,6 +667,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc,
desc = cloneDescWithNewDims(mdesc, shape.getDims(), order);
if (shape.hasZeroDims()) {
auto& blk = desc.data.format_desc.blocking;
std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
}
initBlockedParams();
}

View File

@ -71,7 +71,7 @@ private:
explicit DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc);
// Creates DnnlBlockedMemoryDesc using the shape parameter as a true shape but all other params (layout, blocks, etc.) are used from the mdesc, but
// the mdesc own shape is ignored. The main purpose of this constructor is making dynamic descriptor form some dummy mdesc, which stores info about
// the mdesc own shape is ignored. The main purpose of this constructor is making dynamic descriptor from some dummy mdesc, which stores info about
// layout, blocking, strides, etc., and the provided dynamic shape.
DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc, const Shape& shape);

View File

@ -15,6 +15,10 @@ DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) :
IE_THROW(Unexpected) << "Memory format any is prohibited!";
}
bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const {
return getShape().hasZeroDims() && desc.data.offset0 != DNNL_RUNTIME_DIM_VAL;
}
size_t DnnlMemoryDesc::getCurrentMemSizeImp() const {
return MKLDNNExtensionUtils::getMemSizeForDnnlDesc(desc);
}

View File

@ -63,6 +63,7 @@ private:
size_t getElementOffset(size_t elemNumber) const override;
bool canComputeMemSizeZeroDims() const override;
size_t getCurrentMemSizeImp() const override;
bool isDefinedImp() const override;
MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override;

View File

@ -319,7 +319,6 @@ void MKLDNNGraph::InitGraph() {
SortTopologically();
InitDescriptors();
RemoveDroppedEdges();
InitOptimalPrimitiveDescriptors();
@ -385,15 +384,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
void MKLDNNGraph::ExtractConstantAndExecutableNodes() {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantAndExecutableNodes");
for (const auto& graphNode : graphNodes) {
if (graphNode->isConstant())
if (graphNode->isConstant()) {
constantGraphNodes.emplace_back(graphNode);
else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable()))
} else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable())) {
/* @todo
* Revise implementation.
* With current way it is possible that with debug_caps enabled
* we execute a node, which is not ready to be executed
*/
executableGraphNodes.emplace_back(graphNode);
}
}
}
@ -793,7 +793,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
// check for empty output blob
if (std::any_of(outDims.begin(), outDims.end(), [](const Dim dim) {return dim == 0;})) {
return;
continue;
}
auto srcPrec = actualDesc.getPrecision();
@ -836,10 +836,11 @@ inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::st
DUMP(node, infer_count);
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute);
if (node->isDynamicNode())
if (node->isDynamicNode()) {
node->executeDynamic(stream);
else
} else {
node->execute(stream);
}
}
void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
@ -855,7 +856,6 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
if (request)
request->ThrowIfCanceled();
ExecuteNode(node, stream);
}
@ -994,22 +994,6 @@ Config MKLDNNGraph::getProperty() const {
return config;
}
Blob::Ptr MKLDNNGraph::getInputBlob(const std::string& name) {
auto itr = inputNodesMap.find(name);
if (itr != inputNodesMap.end()) {
return MemoryDescUtils::interpretAsBlob(itr->second->getChildEdgeAt(0)->getMemory());
}
return nullptr;
}
Blob::Ptr MKLDNNGraph::getOutputBlob(const std::string& name) {
auto itr = outputNodesMap.find(name);
if (itr != outputNodesMap.end()) {
return MemoryDescUtils::interpretAsBlob(itr->second->getParentEdgeAt(0)->getMemory());
}
return nullptr;
}
void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) {
if ((*it) == edge) {

View File

@ -44,9 +44,6 @@ public:
void setProperty(const std::map<std::string, std::string> &properties);
Config getProperty() const;
InferenceEngine::Blob::Ptr getInputBlob(const std::string& name);
InferenceEngine::Blob::Ptr getOutputBlob(const std::string& name);
template<typename NET>
void CreateGraph(NET &network,
const MKLDNNExtensionManager::Ptr& extMgr,

View File

@ -59,7 +59,7 @@ MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias");
FuseConvolutionAndBias(graph);
FuseConvolutionMatMulAndBias(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd");
@ -166,37 +166,38 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
graph.RemoveDroppedEdges();
}
void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == Convolution &&
auto isSuitableParentNode = [](const MKLDNNNodePtr& node) {
return (node->getType() == Convolution || node->getType() == MatMul) &&
node->getChildEdges().size() == 1 &&
node->getParentEdges().size() == 2 &&
node->getFusedWith().empty();
};
auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
return false;
auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
const auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1)
return false;
auto convOutDims = parentNode->getOutputShapeAtPort(0).getDims();
auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
convOutDims.size());
const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims();
const auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
parentOutDims.size());
// TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
// Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
if (convOutDims.size() != biasDims.size() || biasDims.size() < 2)
if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2)
return false;
if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1]))
const auto channelAxis = parentNode->getFusingAxis();
if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis]))
return false;
for (int i = 2; i < biasDims.size(); i++) {
if (biasDims[i] != 1)
for (int i = 0; i < biasDims.size(); i++) {
if (biasDims[i] != 1 && i != channelAxis)
return false;
}
@ -262,13 +263,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
graph.RemoveEdge(remEdge);
}
auto parentEltwise = parentNode;
const auto& parentEltwise = parentNode;
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
auto &graphEdges = graph.GetEdges();
auto& graphEdges = graph.GetEdges();
graphEdges.push_back(newEdge);
parent->addEdge(newEdge);
auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[1] };
auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[parentEltwise->getFusingAxis()] };
parent->outputShapes[inNum] = Shape(partialShape);
parentEltwise->inputShapes.push_back(parent->outputShapes[0]);
}
@ -627,7 +628,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
}
}
static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
/**
* @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
* for bf16 depthwise postops.
* This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
* multiple binary post ops.
* This check can already be removed for FC fusing, but should be kept for Convolution,
* which still uses legacy depthwise postops for performance reasons.
*/
static bool BF16QuantizeNodeFusing(const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
return childNode->getType() == FakeQuantize &&
one_of(Precision::BF16,
parentNode->getOriginalOutputPrecisionAtPort(0),
@ -638,7 +647,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
auto& graphNodes = graph.GetNodes();
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getInputShapeAtPort(0).getRank() != 3;
return node->getType() == FullyConnected && node->getChildEdges().size() == 1;
};
auto parent = graphNodes.begin();

View File

@ -19,7 +19,7 @@ public:
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
private:
void FuseConvolutionAndBias(MKLDNNGraph &graph);
void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph);
void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
void FuseMultiplyAndAdd(MKLDNNGraph &graph);
void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);

View File

@ -190,8 +190,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::redefineMemoryForInputNodes() {
const auto inputNode = cpuInputNodes.find(blob.first);
if (inputNode == cpuInputNodes.end())
IE_THROW() << "CPU execution graph doesn't contain input node with name: " << blob.first;
if (inputNode->second->isDynamicNode())
if (inputNode->second->isDynamicNode()) {
inputNode->second->redefineOutputMemory({blob.second->getTensorDesc().getDims()});
}
}
}

View File

@ -4,6 +4,7 @@
#include "mkldnn_node.h"
#include "dnnl_debug.h"
#include "mkldnn_edge.h"
#include "mkldnn_extension_mngr.h"
#include "mkldnn_itt.h"
@ -83,7 +84,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
for (size_t i = 0; i < op->get_input_size(); i++) {
const auto &shape = op->get_input_partial_shape(i);
if (shape.rank().is_dynamic()) {
IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank";
IE_THROW(Unexpected) << "CPU plug-in doesn't support " << getTypeStr() << " operation with dynamic rank. Operation name: " << getName();
}
bool isScalar = shape.rank().get_length() == 0;
@ -98,7 +99,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
for (size_t i = 0; i < op->get_output_size(); i++) {
const auto &shape = op->get_output_partial_shape(i);
if (shape.rank().is_dynamic()) {
IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank";
IE_THROW(Unexpected) << "CPU plug-in doesn't support " << getTypeStr() << " operation with dynamic rank. Operation name: " << getName();
}
bool isScalar = shape.rank().get_length() == 0;
@ -229,6 +230,15 @@ bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const
return true;
}
void MKLDNNNode::createPrimitive() {
if (inputShapesDefined() && isExecutable()) {
if (needPrepareParams()) {
prepareParams();
}
updateLastInputDims();
}
}
void MKLDNNNode::selectOptimalPrimitiveDescriptor() {
selectPreferPrimitiveDescriptor(getPrimitivesPriority(), false);
}
@ -509,12 +519,14 @@ void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
if (needShapeInfer()) {
redefineOutputMemory(shapeInfer());
}
if (needPrepareParams()) {
IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
" since the input shapes are not defined.";
prepareParams();
if (isExecutable()) {
if (needPrepareParams()) {
IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
" since the input shapes are not defined.";
prepareParams();
}
executeDynamicImpl(strm);
}
executeDynamicImpl(strm);
updateLastInputDims();
}
@ -716,7 +728,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) {
selectedPD->setConfig(rightConfig);
}
void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
for (size_t i = 0; i < getChildEdges().size(); i++) {
auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@ -1048,6 +1060,18 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
}
}
void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
std::unordered_map<int, mkldnn::memory>& primArgs,
const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs) {
auto post_ops = attr.get_post_ops();
int idx = 0;
for (int i = 0; i < post_ops.len(); i++) {
if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]->GetPrimitive()});
}
}
}
bool MKLDNNNode::isFusedWith(Type fusedNodeType) const {
for (auto fusedNode : fusedWith) {
if (fusedNode->type == fusedNodeType)
@ -1078,10 +1102,14 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
}
}
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
}
void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector<size_t>& binaryShape, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
IE_THROW() << "Binary fusing of " << this->getType() << " operation is not implemented";
}
std::vector<InferenceEngine::Precision> MKLDNNNode::getInputPrecisions() const {
std::vector<InferenceEngine::Precision> inputPrecisions;
for (size_t i = 0; i < getParentEdges().size(); i++) {
@ -1205,6 +1233,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
size_t fusingPort = 0;
// @todo graph optimizer can provide parentNode as nullptr. Should be avoided
const size_t channelAxis = parentNode ? parentNode->getFusingAxis() : MKLDNNNode::getFusingAxis();
for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) {
MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get();
if (node == nullptr) {
@ -1225,7 +1256,8 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
if (i == fusingPort)
continue;
auto& weightShape = getInputShapeAtPort(i).getDims();
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 ||
!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true))
return false;
}
return true;
@ -1246,6 +1278,9 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|| isConvertablePowerStatic();
}
// @todo shifts for Subtract and scales for Divide are replaced with
// Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops
// This can be avoided after dephwise post ops are gone
std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
std::vector<float> scales, shifts;
@ -1309,6 +1344,36 @@ std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts
return {scales, shifts};
}
bool MKLDNNNode::isInputTensorAtPortEmpty(size_t port) const {
if (inputShapes.size() <= port) {
IE_THROW() << "Incorrect input port number for node " << getName();
}
return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
}
bool MKLDNNNode::isOutputTensorAtPortEmpty(size_t port) const {
if (outputShapes.size() <= port) {
IE_THROW() << "Incorrect output port number for node " << getName();
}
return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
}
bool MKLDNNNode::hasEmptyInputTensors() const {
for (size_t i = 0; i < getParentEdges().size(); i++) {
if (isInputTensorAtPortEmpty(i))
return true;
}
return false;
}
bool MKLDNNNode::hasEmptyOutputTensors() const {
for (size_t i = 0; i < outputShapes.size(); i++) {
if (isOutputTensorAtPortEmpty(i))
return true;
}
return false;
}
bool MKLDNNNode::inputShapesDefined() const {
for (size_t i = 0; i < getParentEdges().size(); i++) {
if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined())
@ -1382,8 +1447,11 @@ std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<Shape>&
std::vector<VectorDims> newOutputShapes(opToShapeInfer->get_output_size());
for (size_t i = 0; i < newOutputShapes.size(); i++) {
const auto &partShape = opToShapeInfer->get_output_partial_shape(i);
if (partShape.is_dynamic())
IE_THROW(NotImplemented) << "CPU plug-in doesn't support default shape infer for nodes with internal dynamism";
if (partShape.is_dynamic()) {
IE_THROW(NotImplemented) << "CPU plug-in doesn't support default shape infer for node " << getTypeStr()
<< " with internal dynamism. Operation name: " << getName();
}
newOutputShapes[i] = partShape.get_shape();
}
return newOutputShapes;
@ -1408,10 +1476,11 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
}
return ret;
} else if (node->getType() == Eltwise) {
return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
node->canBePerformedAsScaleShift(this);
return one_of(node->getAlgorithm(),
EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
node->canBePerformedAsScaleShift(this);
}
return false;
}

View File

@ -199,11 +199,19 @@ public:
// must be called only after MKLDNNGraph::InitEdges()
virtual bool isExecutable() const {
return true;
return !hasEmptyInputTensors();
}
bool isConstant();
virtual size_t getFusingAxis() const {
return 1;
}
static void appendPostOpArgs(const mkldnn::primitive_attr& attr,
std::unordered_map<int, mkldnn::memory>& primArgs,
const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs);
bool isFusedWith(Type type) const;
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
@ -362,7 +370,7 @@ public:
*/
virtual void filterSupportedPrimitiveDescriptors();
virtual void createPrimitive() = 0;
virtual void createPrimitive();
virtual void selectOptimalPrimitiveDescriptor();
virtual void initOptimalPrimitiveDescriptor();
@ -419,7 +427,7 @@ public:
if (impl_type == selected_pd->getImplementationType() &&
descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
prepareMemory(selected_pd, itpd);
prepareMemory(itpd);
PD prim_desc = createPd<PD, D, FPD>(desc);
return {itpd.get()};
}
@ -594,8 +602,10 @@ protected:
* Seed node should call this routine and pass its post operations list as parameter.
* @param ops List of fused post operations
*/
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
virtual AttrPtr initPrimitiveAttr() const { return nullptr; }
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1);
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }
typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
GetPrimitiveMemoryFormatFunc;
@ -636,7 +646,7 @@ protected:
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
std::vector<NodeDesc> supportedPrimitiveDescriptors;
std::unordered_map<int, mkldnn::memory> primArgs;
std::vector<mkldnn::memory> binaryPostOpsArgs;
std::vector<MKLDNNMemoryPtr> binaryPostOpsArgs;
MKLDNNPrimitive prim;
std::vector<MKLDNNDescriptor> descs;
@ -714,8 +724,16 @@ protected:
supportedPrimitiveDescriptors.push_back({config, implType});
}
void prepareMemory(mkldnn::primitive_desc_iterator& itpd);
bool isDynamic = false;
bool isInputTensorAtPortEmpty(size_t port) const;
bool isOutputTensorAtPortEmpty(size_t port) const;
bool hasEmptyInputTensors() const;
bool hasEmptyOutputTensors() const;
bool inputShapesDefined() const;
bool outputShapesDefined() const;
bool shapesDefined() const;
@ -738,6 +756,7 @@ protected:
}
std::vector<VectorDims> lastInputDims = {};
std::shared_ptr<ngraph::Node> opToShapeInfer;
private:
@ -780,7 +799,6 @@ private:
return PD(*selected_desc_ptr, engine);
}
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);

View File

@ -80,6 +80,7 @@
#include "nodes/mkldnn_reduce_node.h"
#include "nodes/mkldnn_if_node.h"
#include "nodes/mkldnn_ctc_greedy_decoder_node.h"
#include "nodes/mkldnn_non_zero.h"
#define MKLDNN_NODE(__prim, __type) \
registerNodeIfRequired(MKLDNNPlugin, __prim, __type, MKLDNNNodeImpl<__prim>)
@ -168,4 +169,5 @@ MKLDNNPlugin::MKLDNNNode::NodesFactory::NodesFactory()
MKLDNN_NODE(MKLDNNTopKNode, TopK);
MKLDNN_NODE(MKLDNNStridedSliceNode, StridedSlice);
MKLDNN_NODE(MKLDNNGRNNode, GRN);
MKLDNN_NODE(MKLDNNNonZeroNode, NonZero);
}

View File

@ -504,23 +504,24 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
// verification of supported input
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
for (const auto &ii : _networkInputs) {
for (const auto &ii : network.getInputsInfo()) {
auto input_precision = ii.second->getPrecision();
if (input_precision != InferenceEngine::Precision::FP64 &&
input_precision != InferenceEngine::Precision::FP32 &&
input_precision != InferenceEngine::Precision::I32 &&
input_precision != InferenceEngine::Precision::U32 &&
input_precision != InferenceEngine::Precision::U16 &&
input_precision != InferenceEngine::Precision::I16 &&
input_precision != InferenceEngine::Precision::I8 &&
input_precision != InferenceEngine::Precision::U8 &&
input_precision != InferenceEngine::Precision::BF16 &&
input_precision != InferenceEngine::Precision::BOOL &&
input_precision != InferenceEngine::Precision::I64 &&
input_precision != InferenceEngine::Precision::U64) {
using hash_t = std::hash<typename std::underlying_type<Precision::ePrecision>::type>;
static const std::unordered_set<Precision::ePrecision, hash_t> supported_precisions = {
Precision::U8, Precision::I8,
Precision::U16, Precision::I16,
Precision::U32, Precision::I32,
Precision::U64, Precision::I64,
Precision::BF16, Precision::FP16,
Precision::FP32, Precision::FP64,
Precision::BOOL
};
if (!supported_precisions.count(input_precision)) {
IE_THROW(NotImplemented)
<< "Input image format " << input_precision << " is not supported yet...";
<< "Input image format " << input_precision << " is not supported yet...";
}
}

View File

@ -18,7 +18,6 @@ public:
operator bool() const;
MKLDNNPrimitive& operator=(const std::shared_ptr<mkldnn::primitive>& primitive);
mkldnn::primitive operator*();
void reset(mkldnn::primitive* primitive);
private:

View File

@ -36,8 +36,9 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
auto rank_a = shape_a.rank().get_length();
auto rank_b = shape_b.rank().get_length();
// Transformation to FC is not supported for 1D second input
if (rank_b == 1) {
// Transformation to FC is not supported for 1D inputs
if (rank_a == 1 || rank_b == 1 ||
rank_a > 3 || rank_b > 3) {
return false;
}
@ -47,7 +48,6 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
std::count_if(shape_b.begin(), shape_b.end(), [](ngraph::Dimension x) { return x != 1; }) > 2) {
return false;
}
/*
* get_aligned_shapes function align two input shapes to have the same size and
* the same batch dimensions (last two dimensions are not comparable).

View File

@ -7,7 +7,6 @@
#include "ngraph/op/fake_quantize.hpp"
#include "ngraph/pass/manager.hpp"
#include "reshape_fc_fusion.hpp"
#include "reshape_fully_connected.hpp"
#include "align_matmul_input_ranks.hpp"
#include "reshape_prelu.hpp"
#include "convert_broadcast_to_tiles.hpp"
@ -29,7 +28,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
manager.register_pass<AlignMatMulInputRanks>();
manager.register_pass<ConvertTileToSeqTiles>();
manager.register_pass<FullyConnectedBiasFusion>();
manager.register_pass<ReshapeFullyConnected>();
manager.register_pass<ConvertToPowerStatic>();
manager.register_pass<ConvertToLeakyRelu>();
manager.register_pass<ReshapePRelu>();

View File

@ -1,114 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "reshape_fully_connected.hpp"
#include "op/fully_connected.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/pattern/op/or.hpp>
#include <transformations/utils/utils.hpp>
#include <numeric>
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0);
MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
ngraph::OutputVector twoInputs = {
ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())};
ngraph::OutputVector threeInputs = {
ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
ngraph::pattern::any_input()};
auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_rank());
auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_rank());
const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
if (!fc || transformation_callback(fc)) {
return false;
}
auto fc_input_shape = fc->get_input_partial_shape(0);
auto input_rank = fc_input_shape.rank().get_length();
auto output_shape = fc->get_output_partial_shape(0);
if (input_rank == 2 || input_rank == 0) {
return false;
}
ngraph::NodeVector new_ops;
int64_t K = *(fc->get_input_shape(1).rbegin()); // requested 2nd input with static shape in the matcher
auto reshape = std::make_shared<ngraph::opset1::Reshape>(
fc->input_value(0), ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{-1, K}), false);
if (reshape->get_output_partial_shape(0).rank().is_dynamic())
return false;
new_ops.push_back(reshape);
reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape");
// Calculate output shape for new FullyConnected layer
// [I, K] * [O, K] = [I, O]
auto I = reshape->get_output_partial_shape(0)[0];
auto O = fc->get_input_partial_shape(1)[0];
ngraph::PartialShape output_shape_new{I, O};
std::shared_ptr<ngraph::Node> fc_new;
if (fc->get_input_size() == 2) {
fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
fc->input_value(1),
output_shape_new.rank(),
fc->get_output_type());
} else if (fc->get_input_size() == 3) {
fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
fc->input_value(1),
fc->input_value(2),
output_shape_new.rank(),
fc->get_output_type());
} else {
return false;
}
new_ops.push_back(fc_new);
if (output_shape != output_shape_new) {
auto I_idxs = std::vector<size_t>(input_rank - 1);
std::iota(I_idxs.begin(), I_idxs.end(), 0);
auto A_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(0));
auto B_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(1));
auto I_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(A_input_shape, {I_idxs});
auto O_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(B_input_shape, {0});
ngraph::OutputVector output_shape_dims{I_node, O_node};
const auto original_rank = fc->get_output_rank();
NGRAPH_CHECK(original_rank.is_static());
if (input_rank < original_rank.get_length()) {
const size_t const_shape_value = original_rank.get_length() - input_rank;
output_shape_dims.insert(
output_shape_dims.begin(), ngraph::opset1::Constant::create(I_node->get_element_type(), { const_shape_value }, { 1 }));
}
auto reshape_output_shape = ngraph::op::util::make_try_fold<ngraph::opset1::Concat>(output_shape_dims, 0);
auto reshape_output = std::make_shared<ngraph::opset1::Reshape>(fc_new, reshape_output_shape, false);
new_ops.push_back(A_input_shape);
new_ops.push_back(B_input_shape);
new_ops.push_back(I_node);
new_ops.push_back(O_node);
new_ops.push_back(reshape_output_shape);
new_ops.push_back(reshape_output);
reshape_output->set_friendly_name(fc->get_friendly_name());
fc_new->set_friendly_name(fc->get_friendly_name() + "/FC");
ngraph::copy_runtime_info(fc, new_ops);
ngraph::replace_node(fc, reshape_output);
} else {
fc_new->set_friendly_name(fc->get_friendly_name());
ngraph::copy_runtime_info(fc, new_ops);
ngraph::replace_node(fc, fc_new);
}
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnected");
this->register_matcher(m, callback);
}

View File

@ -1,25 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
/*
* Description:
* ReshapeFullyConnected transformation detects FullyConnected operations
* and for each operation where input shape is greater than 2 inserts Reshape
* operations before and after FullyConnected operation. This transformation is
* required because of IE restrictions.
*/
namespace MKLDNNPlugin {
class ReshapeFullyConnected: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ReshapeFullyConnected();
};
} // namespace MKLDNNPlugin

View File

@ -4,27 +4,208 @@
#include "cpu_convert.h"
#include "cpu_memcpy.h"
#include "utils/bfloat16.hpp"
#include <utils/bfloat16.hpp>
#include <utils/general_utils.h>
#include <mkldnn_selective_build.h>
#include <ie_parallel.hpp>
#include <openvino/core/type/float16.hpp>
#include <cpu/x64/jit_generator.hpp>
#include <algorithm>
#include <type_traits>
#include <tuple>
#include <ie_parallel.hpp>
#include <cmath>
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using namespace dnnl::impl::cpu::x64;
using namespace dnnl::impl::utils;
using namespace Xbyak;
namespace {
template<typename srcType, typename dstType>
void convert(const void *srcPtr, void *dstPtr, const size_t size) {
if (std::is_same<srcType, dstType>::value) {
cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType));
} else {
const srcType *srcData = reinterpret_cast<const srcType *>(srcPtr);
dstType *dstData = reinterpret_cast<dstType *>(dstPtr);
template <typename src_t, typename dst_t>
void convert_vec(jit_generator & gen,
const RegExp & src,
const RegExp & dst);
parallel_for(size, [&](size_t i) {
dstData[i] = static_cast<dstType>(srcData[i]);
template <>
void convert_vec<ov::float16, float>(jit_generator & gen,
const RegExp & src,
const RegExp & dst) {
auto const & f16vec = gen.xmm3;
auto const & f32vec = gen.ymm4;
gen.movdqu(f16vec, gen.xword[src]);
gen.vcvtph2ps(f32vec, f16vec);
gen.vmovups(gen.yword[dst], f32vec);
}
template <>
void convert_vec<float, ov::float16>(jit_generator & gen,
const RegExp & src,
const RegExp & dst) {
auto const & f16vec = gen.xmm3;
auto const & f32vec = gen.ymm4;
gen.vmovups(f32vec, gen.yword[src]);
gen.vcvtps2ph(f16vec, f32vec, 0);
gen.movdqu(gen.xword[dst], f16vec);
}
class jit_convert_array : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
void generate() override {
const size_t vlen = 8u;
const size_t vlen_log2 = 3;
auto reg_src = rax;
auto reg_dst = rbx;
auto reg_sz = rdx;
Label tail, exit;
preamble();
mov(reg_src, ptr[param1 + offsetof(args_t, src)]);
mov(reg_dst, ptr[param1 + offsetof(args_t, out)]);
mov(reg_sz, ptr[param1 + offsetof(args_t, count)]);
xor_(rsi, rsi);
mov(r8, reg_sz);
shr(r8, vlen_log2);
foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) {
_convert_vec(*this, reg_src, reg_dst);
add(reg_src, _src_size * vlen);
add(reg_dst, _dst_size * vlen);
});
L(tail);
shl(rsi, vlen_log2);
sub(reg_sz, rsi);
test(reg_sz, reg_sz);
jz(exit);
// allocate array for 8 floats on stack
sub(rsp, vlen * sizeof(float));
mov(r8, rsp);
vpxor(ymm4, ymm4, ymm4);
vmovups(yword[r8], ymm4);
// Tail conversion
copy(r8, reg_src, reg_sz, _src_size);
_convert_vec(*this, r8, r8);
copy(reg_dst, r8, reg_sz, _dst_size);
// Free the array on stack
add(rsp, vlen * sizeof(float));
L(exit);
postamble();
}
void foreach(const Xbyak::Reg64& idx,
size_t step,
const Xbyak::Reg64& end,
std::function<void(const Xbyak::Reg64&)> && fn) {
Label loop, exit;
L(loop);
cmp(idx, end);
jge(exit);
fn(idx);
add(idx, step);
jmp(loop);
L(exit);
}
void copy(const Xbyak::Reg64& dst,
const Xbyak::Reg64& src,
const Xbyak::Reg64& size,
size_t item_size) {
push(rsi);
push(r15);
xor_(rsi, rsi);
auto address_frame = [this](size_t size) -> const AddressFrame& {
switch (size) {
case 1: return byte;
case 2: return word;
case 4: return dword;
case 8: return qword;
default:
break;
}
return ptr;
};
const auto & addr_frame = address_frame(item_size);
foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) {
mov(r15, addr_frame[src + idx * item_size]);
mov(addr_frame[dst + idx * item_size], r15);
});
pop(r15);
pop(rsi);
}
public:
typedef struct {
const void* src;
void* out;
const size_t count;
} args_t;
typedef void (*fn_t)(const args_t*);
typedef void (*convert_vec_t)(jit_generator &,
const RegExp &,
const RegExp &);
jit_convert_array(convert_vec_t convert_vec,
size_t src_size,
size_t dst_size)
: _convert_vec(convert_vec)
, _src_size(src_size)
, _dst_size(dst_size) {}
template<typename src_t, typename dst_t>
static fn_t get() {
if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) {
static jit_convert_array converter(convert_vec<src_t, dst_t>, sizeof(src_t), sizeof(dst_t));
auto & generator = static_cast<jit_generator&>(converter);
generator.create_kernel();
return (fn_t)generator.jit_ker();
}
return nullptr;
}
private:
convert_vec_t _convert_vec;
size_t _src_size;
size_t _dst_size;
};
template <typename TI, typename TO>
void jit_convert(const TI* arg, TO* out, size_t count) {
using jit_impl = jit_convert_array;
static auto converter = jit_impl::get<TI, TO>();
if (converter) {
typename jit_impl::args_t args = { arg, out, count };
converter(&args);
} else {
for (size_t i = 0; i < count; ++i) {
out[i] = static_cast<TO>(arg[i]);
}
}
}
@ -35,84 +216,391 @@ struct PrecisionInfo {
template <>
struct PrecisionInfo<Precision::BF16> {
using value_type = MKLDNNPlugin::bfloat16_t;
using value_type = bfloat16_t;
};
template <>
struct PrecisionInfo<Precision::FP16> {
using value_type = ov::float16;
};
template <>
struct PrecisionInfo<Precision::BOOL> {
using value_type = uint8_t;
};
template<typename T,
typename U = typename std::conditional<
std::is_same<ov::float16, T>::value
|| std::is_same<bfloat16_t, T>::value,
float, T>::type>
struct Range {
const std::tuple<U, U> & fit(const Precision & prec);
private:
std::tuple<U, U> _range {
std::numeric_limits<T>::lowest(),
std::numeric_limits<T>::max()
};
};
template<typename T, typename U>
const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
if (prec.is_float()) {
double lbound, ubound;
switch (prec) {
case Precision::BF16:
lbound = static_cast<double>(std::numeric_limits<bfloat16_t>::lowest());
ubound = static_cast<double>(std::numeric_limits<bfloat16_t>::max());
break;
case Precision::FP16:
lbound = static_cast<double>(std::numeric_limits<ov::float16>::lowest());
ubound = static_cast<double>(std::numeric_limits<ov::float16>::max());
break;
case Precision::FP32:
lbound = static_cast<double>(std::numeric_limits<float>::lowest());
ubound = static_cast<double>(std::numeric_limits<float>::max());
break;
case Precision::FP64:
lbound = std::numeric_limits<double>::lowest();
ubound = std::numeric_limits<double>::max();
break;
default:
IE_THROW() << "Unsupported precision";
}
std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
} else {
int64_t lbound;
uint64_t ubound;
switch (prec) {
case Precision::BOOL:
case Precision::U8:
lbound = static_cast<int64_t>(std::numeric_limits<uint8_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
break;
case Precision::I8:
lbound = static_cast<int64_t>(std::numeric_limits<int8_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int8_t>::max());
break;
case Precision::U16:
lbound = static_cast<int64_t>(std::numeric_limits<uint16_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
break;
case Precision::I16:
lbound = static_cast<int64_t>(std::numeric_limits<int16_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int16_t>::max());
break;
case Precision::U32:
lbound = static_cast<int64_t>(std::numeric_limits<uint32_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
break;
case Precision::I32:
lbound = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
break;
case Precision::U64:
lbound = static_cast<int64_t>(std::numeric_limits<uint64_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
break;
case Precision::I64:
lbound = static_cast<int64_t>(std::numeric_limits<int64_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
break;
default:
IE_THROW() << "Unsupported precision";
}
using ltype = typename std::conditional<
std::is_floating_point<U>::value,
double, int64_t>::type;
using utype = typename std::conditional<
std::is_floating_point<U>::value,
double, uint64_t>::type;
std::get<0>(_range) = static_cast<U>(std::max(static_cast<ltype>(std::get<0>(_range)), static_cast<ltype>(lbound)));
std::get<1>(_range) = static_cast<U>(std::min(static_cast<utype>(std::get<1>(_range)), static_cast<utype>(ubound)));
}
return _range;
}
struct ConvertContext {
const void *srcPtr;
void *dstPtr;
size_t size;
Precision interimPrc;
Precision dstPrc;
bool converted;
template<typename T>
std::tuple<T, T> range() const {
Range<T> r;
r.fit(interimPrc);
return r.fit(dstPrc);
}
};
template<typename T>
struct ConvertPrecision {
using src_t = typename std::tuple_element<0, T>::type;
using dst_t = typename std::tuple_element<1, T>::type;
struct ConvertPrecision;
template<typename src_t, typename dst_t>
struct ConvertPrecision<std::tuple<src_t, dst_t>> {
void operator()(ConvertContext & ctx) {
convert<src_t, dst_t>(ctx.srcPtr, ctx.dstPtr, ctx.size);
auto src = static_cast<const src_t *>(ctx.srcPtr);
auto dst = static_cast<dst_t *>(ctx.dstPtr);
src_t lbound, ubound;
std::tie(lbound, ubound) = ctx.range<src_t>();
if (std::is_integral<src_t>::value
|| ctx.interimPrc.is_float()
|| std::is_integral<dst_t>::value) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<dst_t>(std::max(std::min(src[i], ubound), lbound));
});
} else {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<dst_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
});
}
ctx.converted = true;
}
};
template<>
struct ConvertPrecision<std::tuple<float, bfloat16_t>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const float *>(ctx.srcPtr);
auto dst = static_cast<bfloat16_t *>(ctx.dstPtr);
if (ctx.interimPrc.is_float()) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<bfloat16_t>(src[i]);
});
} else {
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<float>();
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<bfloat16_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
});
}
ctx.converted = true;
}
};
template<>
struct ConvertPrecision<std::tuple<bfloat16_t, float>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const bfloat16_t *>(ctx.srcPtr);
auto dst = static_cast<float *>(ctx.dstPtr);
if (ctx.interimPrc.is_float()) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<float>(src[i]);
});
} else {
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<bfloat16_t>();
parallel_for(ctx.size, [&](size_t i) {
dst[i] = std::trunc(std::max(std::min(static_cast<float>(src[i]), ubound), lbound));
});
}
ctx.converted = true;
}
};
template<typename src_t>
struct ConvertPrecision<std::tuple<src_t, ov::float16>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const src_t *>(ctx.srcPtr);
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
constexpr size_t batch = 64;
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
typedef float batch_type[batch];
src_t lbound, ubound;
std::tie(lbound, ubound) = ctx.range<src_t>();
if (std::is_integral<src_t>::value
|| ctx.interimPrc.is_float()) {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
tmp[j] = static_cast<float>(std::max(std::min(src[offset + j], ubound), lbound));
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
});
} else {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
tmp[j] = static_cast<float>(std::trunc(std::max(std::min(src[offset + j], ubound), lbound)));
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
});
}
ctx.converted = true;
}
};
template<typename dst_t>
struct ConvertPrecision<std::tuple<ov::float16, dst_t>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
auto dst = static_cast<dst_t *>(ctx.dstPtr);
constexpr size_t batch = 64;
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
typedef float batch_type[batch];
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<ov::float16>();
if (ctx.interimPrc.is_float()
|| std::is_integral<dst_t>::value) {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
dst[offset + j] = static_cast<dst_t>(std::max(std::min(tmp[j], ubound), lbound));
});
} else {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
dst[offset + j] = static_cast<dst_t>(std::trunc(std::max(std::min(tmp[j], ubound), lbound)));
});
}
ctx.converted = true;
}
};
template<>
struct ConvertPrecision<std::tuple<ov::float16, ov::float16>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
constexpr size_t batch = 64;
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
typedef float batch_type[batch];
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<ov::float16>();
if (ctx.interimPrc.is_float()) {
cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16));
} else {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
for (size_t j = 0; j < current_batch_size; ++j) // truncate fp32
tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound));
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
});
}
ctx.converted = true;
}
};
bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
return to.bitsSize() < from.bitsSize()
|| (from.is_float() && !to.is_float()) // float -> integral
|| (from.isSigned() != to.isSigned()) // signed <-> unsigned
|| (to == Precision::BOOL && from != to); // T -> bool
}
} // namespace
#define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
using namespace MKLDNNPlugin;
#define MKLDNN_CVT_LIST \
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \
MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \
MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \
MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \
MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \
MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \
MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \
MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \
MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \
MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \
MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \
MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \
MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \
MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \
MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \
MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \
MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \
MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \
MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \
MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \
MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \
MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \
MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \
MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \
MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
MKLDNN_CVT(BOOL, BOOL)
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
}
void cpu_convert(const void *srcPtr,
void *dstPtr,
InferenceEngine::Precision srcPrc,
InferenceEngine::Precision interimPrc,
InferenceEngine::Precision dstPrc,
const size_t size) {
if (srcPtr == nullptr || dstPtr == nullptr)
IE_THROW() << "cpu_convert has null data pointer";
if (srcPrc == dstPrc) {
cpu_memcpy(dstPtr, srcPtr, size*dstPrc.size());
return;
if (srcPrc == dstPrc && srcPrc == interimPrc) {
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
} else {
ConvertContext ctx = {
srcPtr,
dstPtr,
size,
interimPrc,
dstPrc,
false
};
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
if (!ctx.converted)
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
}
ConvertContext ctx = { srcPtr, dstPtr, size, false };
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc),
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16),
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64),
MKLDNN_CVT(U8, FP32), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, BOOL),
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16),
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64),
MKLDNN_CVT(I8, FP32), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, BOOL),
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16),
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64),
MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, BOOL),
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16),
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64),
MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, BOOL),
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16),
MKLDNN_CVT(I32, I16), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64),
MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, BOOL),
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16),
MKLDNN_CVT(U64, I16), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64),
MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, BOOL),
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16),
MKLDNN_CVT(I64, I16), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64),
MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, BOOL),
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16),
MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64),
MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL),
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16),
MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64),
MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL),
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16),
MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64),
MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16),
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16),
MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64),
MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL),
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16),
MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64),
MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL));
if (!ctx.converted)
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
}
#undef MKLDNN_CVT
#undef MKLDNN_CVT_LIST

View File

@ -19,5 +19,32 @@
* number of elements in buffers to be converted
* @return none.
*/
void cpu_convert(const void *srcPtr,
void *dstPtr,
InferenceEngine::Precision srcPrc,
InferenceEngine::Precision dstPrc,
const size_t size);
void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size);
/**
* @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
* If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
* @param srcPtr
* pointer to the buffer to convert from
* @param dstPtr
* pointer to the buffer to convert to
* @param srcPrc
* precision the buffer from which convert
* @param interimPrc
* intermediate precision used for type truncation
* @param dstPrc
* precision the buffer to which convert
* @param size
* number of elements in buffers to be converted
* @return none.
*/
void cpu_convert(const void *srcPtr,
void *dstPtr,
InferenceEngine::Precision srcPrc,
InferenceEngine::Precision interimPrc,
InferenceEngine::Precision dstPrc,
const size_t size);

View File

@ -0,0 +1,45 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "dnnl_executor.h"
using namespace mkldnn;
using namespace MKLDNNPlugin;
DnnlExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descSrc,
const mkldnn::memory::desc& descDst,
const mkldnn::engine& engine) : m_descSrc(descSrc), m_descDst(descDst) {
auto reorderPd = mkldnn::reorder::primitive_desc(engine, descSrc, engine, descDst);
m_reorder = mkldnn::reorder(reorderPd);
}
void DnnlExecutor::IntermReorder::exec(mkldnn::memory& memSrc, mkldnn::memory& memDst, mkldnn::stream strm) {
m_reorder.execute(strm, memSrc, memDst);
}
void DnnlExecutor::exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn::stream strm) {
for (auto &inReorder : inputReorders) {
if (primArgs.count(inReorder.first)) {
mkldnn::memory memDst(inReorder.second.getDstDesc(), strm.get_engine());
inReorder.second.exec(primArgs[inReorder.first], memDst, strm);
primArgs[inReorder.first] = memDst;
} else {
IE_THROW() << "DnnlExecutor has reorder for input " << inReorder.first << ", but doesn't have source memory";
}
}
std::unordered_map<int, mkldnn::memory> outputMem;
for (auto &outReorder : outputReorders) {
if (primArgs.count(outReorder.first)) {
mkldnn::memory memSrc(outReorder.second.getSrcDesc(), strm.get_engine());
outputMem[outReorder.first] = primArgs[outReorder.first];
primArgs[outReorder.first] = memSrc;
} else {
IE_THROW() << "DnnlExecutor has reorder for output " << outReorder.first << ", but doesn't have destination memory";
}
}
(*execPrim).execute(strm, primArgs);
for (auto &outReorder : outputReorders) {
outReorder.second.exec(primArgs[outReorder.first], outputMem[outReorder.first], strm);
}
}

View File

@ -0,0 +1,39 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "mkldnn_memory.h"
#include "mkldnn_primitive.h"
namespace MKLDNNPlugin {
class DnnlExecutor {
protected:
class IntermReorder {
public:
IntermReorder(const mkldnn::memory::desc& descSrc, const mkldnn::memory::desc& descDst, const mkldnn::engine& engine);
void exec(mkldnn::memory& memSrc, mkldnn::memory& memDst, mkldnn::stream strm);
const mkldnn::memory::desc& getSrcDesc() const { return m_descSrc; }
const mkldnn::memory::desc& getDstDesc() const { return m_descDst; }
private:
mkldnn::reorder m_reorder;
mkldnn::memory::desc m_descSrc;
mkldnn::memory::desc m_descDst;
};
public:
void exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn::stream strm);
virtual ~DnnlExecutor() = default;
protected:
DnnlExecutor() = default;
MKLDNNPrimitive execPrim;
// key is the port number for the primitive that needs memory reordering
std::unordered_map<int, IntermReorder> inputReorders;
std::unordered_map<int, IntermReorder> outputReorders;
};
} // namespace MKLDNNPlugin

View File

@ -145,6 +145,10 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNAdaptivePoolingNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) {
auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType();
auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType();
@ -283,8 +287,6 @@ bool MKLDNNAdaptivePoolingNode::created() const {
return getType() == AdaptivePooling;
}
void MKLDNNAdaptivePoolingNode::createPrimitive() {}
inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) {
*(startPtr) = idx * inputLength / outputLength;
*(endPtr) = ceil(static_cast<float>((idx + 1) * inputLength) / outputLength);

View File

@ -18,7 +18,6 @@ public:
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
@ -36,7 +35,7 @@ protected:
bool needShapeInfer() const override;
std::vector<VectorDims> shapeInfer() const override;
bool needPrepareParams() const override { return false; };
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
void executeDynamicImpl(mkldnn::stream strm) override;
};
} // namespace MKLDNNPlugin

View File

@ -225,6 +225,10 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
});
}
void MKLDNNBatchToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) {
case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>(); break;

View File

@ -18,12 +18,11 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override {};
void execute(mkldnn::stream strm) override;
bool created() const override;
bool needPrepareParams() const override { return false; };
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
void executeDynamicImpl(mkldnn::stream strm) override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

View File

@ -107,14 +107,6 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
supportedPrimitiveDescriptors = getSupportedConfigs(this);
}
void MKLDNNBroadcastNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
bool MKLDNNBroadcastNode::needPrepareParams() const {
return needPrepareParamsVar;
}
@ -215,6 +207,14 @@ std::vector<VectorDims> MKLDNNBroadcastNode::shapeInfer() const {
return newOutputShapes;
}
bool MKLDNNBroadcastNode::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
void MKLDNNBroadcastNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
if (optimizedCase) {
optimizedExecute(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr());

View File

@ -19,13 +19,11 @@ public:
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override {
execute(strm);
}
void executeDynamicImpl(mkldnn::stream strm) override;
bool created() const override;
bool isExecutable() const override;
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
protected:

View File

@ -203,12 +203,8 @@ void MKLDNNBucketizeNode::prepareParams() {
std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
}
void MKLDNNBucketizeNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
bool MKLDNNBucketizeNode::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
std::vector<VectorDims> MKLDNNBucketizeNode::shapeInfer() const {

View File

@ -15,15 +15,16 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
void executeDynamicImpl(mkldnn::stream strm) override {
execute(strm);
}
void prepareParams() override;
std::vector<VectorDims> shapeInfer() const override;
bool isExecutable() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
private:

View File

@ -31,6 +31,10 @@ namespace {
constexpr size_t channelAxis = 1lu;
}
bool MKLDNNConcatNode::isExecutable() const {
return !hasEmptyOutputTensors() && !isOptimized();
}
bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
@ -173,7 +177,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
}
// TODO [DS]: inplace
if (!canBeInPlace)
if (!canBeInPlace || std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape) { return shape.hasZeroDims(); }))
return;
// Optimized inplace case
@ -353,7 +357,6 @@ void MKLDNNConcatNode::prepareParams() {
IE_THROW() << "Preferable primitive descriptor is not set.";
std::vector<memory::desc> srcs_d;
for (size_t i = 0; i < getParentEdges().size(); i++) {
const auto& srcMemPtr = getParentEdgesAtPort(i)[0]->getMemoryPtr();
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) {
@ -362,6 +365,10 @@ void MKLDNNConcatNode::prepareParams() {
<< getName() << ".";
}
if (srcMemPtr->GetShape().hasZeroDims()) {
continue;
}
auto desc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
const auto& dims = srcMemPtr->getStaticDims();
for (size_t j = 0; j < dims.size(); j++) {
@ -382,14 +389,6 @@ void MKLDNNConcatNode::prepareParams() {
prim.reset(new concat(primitive_desc));
}
void MKLDNNConcatNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) {
for (size_t i = 0; i < order.size(); i++) {
if (axis == order[i]) {
@ -489,16 +488,23 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
return;
}
const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
if (canOptimizeNspc) {
execNspcSpecCase();
return;
}
const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
const size_t num_src = getParentEdges().size();
std::unordered_map<int, memory> mem_ags {{DNNL_ARG_DST, dst_memory.GetPrimitive()}};
for (int i = 0; i < num_src; i++)
mem_ags[DNNL_ARG_MULTIPLE_SRC + i] = getParentEdgeAt(i)->getMemory().GetPrimitive();
size_t nonZeroInShapes = 0;
for (int i = 0; i < num_src; i++) {
const auto& srcMem = getParentEdgesAtPort(i)[0]->getMemory();
if (srcMem.GetShape().hasZeroDims()) {
continue;
}
mem_ags[DNNL_ARG_MULTIPLE_SRC + nonZeroInShapes] = srcMem.GetPrimitive();
nonZeroInShapes++;
}
(*prim).execute(strm, mem_ags);
}
@ -518,21 +524,32 @@ void MKLDNNConcatNode::execNspcSpecCase() {
std::vector<const uint8_t*> src_ptrs;
std::vector<uint8_t*> dst_ptrs;
size_t nonZeroInShapes = 0;
int firstNonZeroEdge = -1;
for (size_t i = 0; i < num_src; i++) {
const MKLDNNMemory& src_mem = getParentEdgeAt(i)->getMemory();
const MKLDNNMemory& src_mem = getParentEdgesAtPort(i)[0]->getMemory();
if (src_mem.GetShape().hasZeroDims()) {
continue;
}
const size_t num_channels = src_mem.getStaticDims()[channelAxis];
channelsDataSize.push_back(num_channels * dataSize);
src_ptrs.push_back(reinterpret_cast<const uint8_t*>(src_mem.GetData()));
dst_ptrs.push_back(dst_ptr + channels_size);
channels_size += num_channels * dataSize;
if (firstNonZeroEdge == -1) {
firstNonZeroEdge = i;
}
nonZeroInShapes++;
}
const size_t iter_count = getParentEdgeAt(0)->getMemory().GetSize() / channelsDataSize[0];
const size_t iter_count = getParentEdgeAt(firstNonZeroEdge)->getMemory().GetSize() / channelsDataSize[0];
parallel_for(iter_count, [&](int i) {
const size_t dst_off = i * channels_size;
for (int j = 0; j < num_src; j++) {
for (int j = 0; j < nonZeroInShapes; j++) {
cpu_memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channelsDataSize[j], channelsDataSize[j]);
}
});

View File

@ -19,7 +19,6 @@ public:
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void initOptimalPrimitiveDescriptor() override;
void createPrimitive() override;
void selectOptimalPrimitiveDescriptor() override;
bool created() const override;
void execute(mkldnn::stream strm) override;
@ -28,10 +27,8 @@ public:
bool isOptimized() const;
InferenceEngine::Precision getRuntimePrecision() const override;
bool isExecutable() const override {
return !isOptimized();
}
bool isExecutable() const override;
bool needPrepareParams() const override;
void prepareParams() override;

View File

@ -330,48 +330,42 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
}
}
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false, bool initAsBinary = false) {
bool initBinaryMemory = initWeights;
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) {
mkldnn::post_ops ops;
bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed
auto getBinPostOpShape = [&](){
const auto outShape = getOutputShapeAtPort(0).getStaticDims();
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
const auto chIdx = getFusingAxis();
std::vector<size_t> binaryShape(outShapeRank, 1);
binaryShape[chIdx] = outShape[chIdx];
return binaryShape;
};
for (auto &node : fusedWith) {
if (node->getType() == Split || node->getType() == Concatenation)
continue;
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
} else {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
if (initBinaryMemory) {
if (eltwiseNode->scalesMemory)
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
if (eltwiseNode->shiftsMemory)
binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive());
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, dims, align);
} else {
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
}
}
continue;
}
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
constexpr int align = -1;
fakeQuantizeNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
if (initBinaryMemory) {
if (fakeQuantizeNode->cropHighMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
if (fakeQuantizeNode->cropLowMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive());
if (fakeQuantizeNode->inputScaleMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive());
if (fakeQuantizeNode->inputShiftMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive());
if (fakeQuantizeNode->outputScaleMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive());
if (fakeQuantizeNode->outputShiftMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive());
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
if (useLegacyPostOps) {
fakeQuantizeNode->appendPostOps(ops, dims);
} else {
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
}
continue;
}
@ -416,7 +410,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
// attr[1] - binary
mkldnn::primitive_attr attrs[1];
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
// setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false, true);
bool containJitImpl = false;
@ -494,15 +487,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNConvolutionNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
bool MKLDNNConvolutionNode::created() const {
return getType() == Convolution;
}
@ -552,7 +536,14 @@ MKLDNNConvolutionNode::createDescriptorInternal(const mkldnn::memory::desc& inpu
void MKLDNNConvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
const std::vector<MemoryDescPtr>& outputDesc) {
auto inpDesc = inputDesc[0]->isDefined() ? inputDesc[0] : MemoryDescUtils::makeDummyDesc(*inputDesc[0]);
MemoryDescPtr inpDesc;
if (inputDesc[0]->isDefined()) {
inpDesc = inputDesc[0];
} else {
auto dummyInDims = MemoryDescUtils::makeDummyShape(inputDesc[0]->getShape()).getStaticDims();
dummyInDims[1] = IC;
inpDesc = inputDesc[0]->cloneWithNewDims(dummyInDims);
}
DnnlMemoryDescPtr definedInpMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inpDesc);
DnnlMemoryDescPtr definedOutMemDesc;
@ -630,7 +621,6 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
// attr[1] - binary
mkldnn::primitive_attr attrs[1];
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
// setPostOps(attrs[1], false, true);
auto rightConfig = selectedPD->getConfig();
size_t selected_count = 0;
@ -914,25 +904,63 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn
return internalBlob;
}
std::shared_ptr<MKLDNNDescriptor> MKLDNNConvolutionNode::createMkldnnConvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
const mkldnn::memory::desc& biasDesc) {
std::shared_ptr<mkldnn::convolution_forward::desc> dnnlConvDesc;
auto alg = isWinograd() ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
if (withBiases) {
// WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
mkldnn::memory::desc dnnlBiasDesc = biasDesc.reshape(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims));
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
wghDesc,
dnnlBiasDesc,
dstDesc,
alg));
} else {
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
wghDesc,
dstDesc,
alg));
}
}
void MKLDNNConvolutionNode::prepareParams() {
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
IE_THROW() << "Destination memory didn't allocate.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
IE_THROW() << "Input memory didn't allocate.";
if (!wghMemPtr || !wghMemPtr->GetPrimitivePtr())
IE_THROW() << "Weight memory didn't allocate.";
MKLDNNMemoryPtr biasMemPtr = nullptr;
if (withBiases) {
biasMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
if (!biasMemPtr || !biasMemPtr->GetPrimitivePtr())
IE_THROW() << "Input memory didn't allocate.";
}
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
auto weightMemoryDesc = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
auto inMemoryDesc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>();
auto weightMemoryDesc = wghMemPtr->GetDescWithType<DnnlMemoryDesc>();
auto outMemoryDesc = dstMemPtr->GetDescWithType<DnnlMemoryDesc>();
mkldnn::memory::desc biasDesc;
if (biasMemPtr) {
biasDesc = biasMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
}
auto initPrimitiveAttr = [&]() {
mkldnn::primitive_attr attr;
addZeroPoints(attr);
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
// todo: [AV] delete "false" to use binary mechanism
if (false && getSelectedPrimitiveDescriptor()->getImplementationType() == jit_gemm) {
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true, true);
} else {
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
}
return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
};
@ -947,61 +975,95 @@ void MKLDNNConvolutionNode::prepareParams() {
pAttrLocal = initPrimitiveAttr();
}
std::shared_ptr<mkldnn::convolution_forward::desc> dnnlConvDesc;
auto alg = isWinograd() ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
std::shared_ptr<MKLDNNDescriptor> desc = createMkldnnConvDesc(inMemoryDesc->getDnnlDesc(),
weightMemoryDesc->getDnnlDesc(),
outMemoryDesc->getDnnlDesc(),
biasDesc);
if (withBiases) {
auto biasMemoryDesc = getParentEdgesAtPort(2).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
// WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
mkldnn::memory::desc dnnlBiasDesc = biasMemoryDesc->getDnnlDesc().reshape(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims));
dnnlConvDesc = createDescriptorInternal(inMemoryDesc->getDnnlDesc(),
weightMemoryDesc->getDnnlDesc(),
dnnlBiasDesc,
outMemoryDesc->getDnnlDesc(),
alg);
} else {
dnnlConvDesc = createDescriptorInternal(inMemoryDesc->getDnnlDesc(),
weightMemoryDesc->getDnnlDesc(),
outMemoryDesc->getDnnlDesc(),
alg);
}
MKLDNNDescriptor desc(dnnlConvDesc);
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
convolution_forward::primitive_desc prim_desc;
while (static_cast<bool>(itpd)) {
execPtr = nullptr;
while (static_cast<bool>(itpd)) {
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
if (impl_type == selected_pd->getImplementationType()) {
prim_desc = convolution_forward::primitive_desc(itpd.get());
execPtr = std::make_shared<ConvolutionExecutor>(prim_desc,
srcMemPtr->GetPrimitive().get_desc(),
wghMemPtr->GetPrimitive().get_desc(),
dstMemPtr->GetPrimitive().get_desc(),
getEngine());
break;
}
if (!itpd.next_impl())
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
if (!itpd.next_impl()) {
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
srcMemPtr->GetDataType(),
memory::format_tag::any);
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
wghMemPtr->GetDataType(),
memory::format_tag::any);
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
dstMemPtr->GetDataType(),
memory::format_tag::any);
std::shared_ptr<MKLDNNDescriptor> reorderConvDesc = createMkldnnConvDesc(inDesc, wghDesc, outDesc, biasDesc);
auto reordItpd = reorderConvDesc->createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
if (static_cast<bool>(reordItpd)) {
auto prim_desc = convolution_forward::primitive_desc(reordItpd.get());
execPtr = std::make_shared<ConvolutionExecutor>(prim_desc, srcMemPtr->GetPrimitive().get_desc(),
wghMemPtr->GetPrimitive().get_desc(),
dstMemPtr->GetPrimitive().get_desc(),
getEngine());
break;
}
}
}
if (execPtr) {
primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive();
primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
prim.reset(new convolution_forward(prim_desc));
if (withBiases) {
primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive();
}
primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
primArgs[DNNL_ARG_WEIGHTS] = getWeights();
primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
if (withBiases) {
primArgs[DNNL_ARG_BIAS] = getBias();
MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, binaryPostOpsArgs);
} else {
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
}
// todo: [AV] uncomment to use binary mechanism
// auto post_ops = attr.get_post_ops();
// int idx = 0;
// for (int i = 0; i < post_ops.len(); i++) {
// if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
// primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]});
// }
// }
}
void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
const mkldnn::engine& engine) {
execPrim.reset(new mkldnn::convolution_forward(pd));
if (inMemDesc != pd.src_desc()) {
inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
}
if (weightMemDesc != pd.weights_desc()) {
inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
}
if (outMemDesc != pd.dst_desc()) {
outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
}
}
void MKLDNNConvolutionNode::execute(mkldnn::stream strm) {
if (!execPtr) {
IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled";
}
execPtr->exec(primArgs, strm);
}
void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}

View File

@ -9,6 +9,7 @@
#include <memory>
#include <string>
#include <vector>
#include "common/dnnl_executor.h"
namespace MKLDNNPlugin {
@ -23,7 +24,6 @@ public:
void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
const std::vector<MemoryDescPtr>& outputDesc) override;
void initDescriptor(const NodeConfig& config) override;
void createPrimitive() override;
void selectOptimalPrimitiveDescriptor() override;
void initSupportedPrimitiveDescriptors() override;
void filterSupportedPrimitiveDescriptors() override;
@ -65,11 +65,29 @@ protected:
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
private:
using executorPtr = std::shared_ptr<DnnlExecutor>;
executorPtr execPtr = nullptr;
class ConvolutionExecutor : public DnnlExecutor {
public:
ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
const mkldnn::engine& engine);
};
std::shared_ptr<MKLDNNDescriptor> createMkldnnConvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
const mkldnn::memory::desc& biasDesc);
void prepareParams() override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override;
void addZeroPoints(mkldnn::primitive_attr& attr) const;
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights, bool initAsBinary);
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights);
void filterSupportedDescriptors();
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
bool isNspcAvailable() const;
@ -122,4 +140,3 @@ private:
};
} // namespace MKLDNNPlugin

View File

@ -7,7 +7,8 @@
#include "common/cpu_convert.h"
#include "common/blocked_desc_creator.h"
#include <ngraph/opsets/opset1.hpp>
#include "utils/ngraph_utils.hpp"
#include <ie_ngraph_utils.hpp>
#include <utils/ngraph_utils.hpp>
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
return true;
}
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(op, eng, cache) {
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
errorPrefix = "Convert node with name '" + getName() + "'";
} else {
IE_THROW(NotImplemented) << errorMessage;
}
auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
origPrc = details::convertPrecision(convert->get_destination_type());
}
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
@ -42,7 +46,8 @@ std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode("Convert", nodeName, eng, cache) {
: MKLDNNNode("Convert", nodeName, eng, cache)
, origPrc(outPrc) {
inputShapes.push_back(shape);
addOriginalInputPrecision(inPrc);
outputShapes.push_back(shape);
@ -124,15 +129,8 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNConvertNode::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
IE_THROW() << errorPrefix << " has not allocated destination memory";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
IE_THROW() << errorPrefix << " has not allocated input memory";
if (getSelectedPrimitiveDescriptor() == nullptr)
IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
void MKLDNNConvertNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNConvertNode::execute(mkldnn::stream strm) {
@ -147,7 +145,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
void* srcPtr = parentMem.GetPtr();
void* dstPtr = childMem.GetPtr();
cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount);
cpu_convert(srcPtr,
dstPtr,
parentMem.getDesc().getPrecision(),
origPrc,
childMem.getDesc().getPrecision(),
parentPaddElemCount);
}
bool MKLDNNConvertNode::created() const {

View File

@ -19,9 +19,8 @@ public:
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
void executeDynamicImpl(mkldnn::stream strm) override;
bool created() const override;
bool canBeInPlace() const override {
return false;
@ -49,6 +48,7 @@ public:
private:
MemoryDescPtr input;
MemoryDescPtr output;
InferenceEngine::Precision origPrc;
std::string errorPrefix;
};

View File

@ -165,14 +165,8 @@ bool MKLDNNCTCGreedyDecoderNode::created() const {
return getType() == CTCGreedyDecoder;
}
void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(dnnl::stream strm) {
MKLDNNCTCGreedyDecoderNode::execute(strm);
}
void MKLDNNCTCGreedyDecoderNode::createPrimitive() {
if (inputShapesDefined()) {
updateLastInputDims();
}
void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNCTCGreedyDecoderNode::needPrepareParams() const {

View File

@ -15,7 +15,6 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
void executeDynamicImpl(dnnl::stream strm) override;

View File

@ -168,14 +168,8 @@ bool MKLDNNCTCGreedyDecoderSeqLenNode::created() const {
return getType() == CTCGreedyDecoderSeqLen;
}
void MKLDNNCTCGreedyDecoderSeqLenNode::createPrimitive() {
if (inputShapesDefined()) {
updateLastInputDims();
}
}
void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(dnnl::stream strm) {
MKLDNNCTCGreedyDecoderSeqLenNode::execute(strm);
void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNCTCGreedyDecoderSeqLenNode::needPrepareParams() const {

View File

@ -15,7 +15,6 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
void executeDynamicImpl(dnnl::stream strm) override;

View File

@ -57,12 +57,8 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
impl_desc_type::ref_any);
}
void MKLDNNCTCLossNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
void MKLDNNCTCLossNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {

View File

@ -15,13 +15,12 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
void executeDynamicImpl(mkldnn::stream strm) override;
bool needPrepareParams() const override { return false; };
private:

View File

@ -12,6 +12,7 @@
#include "ie_precision.hpp"
#include <ie_ngraph_utils.hpp>
#include "mkldnn_cum_sum_node.h"
#include "utils/bfloat16.hpp"
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
@ -70,8 +71,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
return;
dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA);
if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
if (!one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::BF16, Precision::I32, Precision::FP32, Precision::I64, Precision::U64))
IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
if (inputShapes.size() == numOfInputs) {
@ -95,43 +95,17 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
if (inputShapes.size() == numOfInputs)
axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());
switch (dataPrecision) {
case Precision::I8 : {
exec<int8_t>();
break;
}
case Precision::U8 : {
exec<uint8_t>();
break;
}
case Precision::I16 : {
exec<int16_t>();
break;
}
case Precision::I32 : {
exec<int32_t>();
break;
}
case Precision::FP32 : {
exec<float>();
break;
}
case Precision::I64 : {
exec<int64_t>();
break;
}
case Precision::U64 : {
exec<uint64_t>();
break;
}
default : {
std::string errorMsg = errorPrefix + " has unsupported 'data' input precision: " + dataPrecision.name();
IE_THROW() << errorMsg;
}
}
OV_SWITCH(MKLDNNPlugin, CumSumExecute, this, dataPrecision,
OV_CASE(Precision::I8, int8_t),
OV_CASE(Precision::U8, uint8_t),
OV_CASE(Precision::I16, int16_t),
OV_CASE(Precision::BF16, bfloat16_t),
OV_CASE(Precision::I32, int32_t),
OV_CASE(Precision::FP32, float),
OV_CASE(Precision::I64, int64_t),
OV_CASE(Precision::U64, uint64_t))
}
template <typename dataType>
void MKLDNNCumSumNode::exec() {
const auto *input = reinterpret_cast<const dataType *>(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr());
@ -284,13 +258,7 @@ bool MKLDNNCumSumNode::needPrepareParams() const {
}
void MKLDNNCumSumNode::executeDynamicImpl(mkldnn::stream strm) {
return execute(strm);
}
void MKLDNNCumSumNode::createPrimitive() {
if (inputShapesDefined()) {
updateLastInputDims();
}
execute(strm);
}
REG_MKLDNN_PRIM_FOR(MKLDNNCumSumNode, CumSum)

View File

@ -15,7 +15,6 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
@ -47,6 +46,13 @@ private:
InferenceEngine::Precision dataPrecision;
std::string errorPrefix;
template<typename T>
struct CumSumExecute {
void operator()(MKLDNNCumSumNode* node) {
node->exec<T>();
}
};
};
} // namespace MKLDNNPlugin

View File

@ -13,34 +13,38 @@
#include <mkldnn_extension_utils.h>
#include "ie_parallel.hpp"
#include "utils/general_utils.h"
#include <ngraph/opsets/opset1.hpp>
#include <cpu/x64/cpu_isa_traits.hpp>
#include <nodes/common/cpu_memcpy.h>
#include <memory_desc/cpu_memory_desc_utils.h>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/cpu_utils.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <utils/shape_inference/static_shape.hpp>
#include <utils/shape_inference/shape_inference.hpp>
#include <ie_ngraph_utils.hpp>
#include "convolution_shape_inference.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported";
return false;
}
size_t ndims = op->get_input_shape(0).size();
size_t ndims = op->get_input_partial_shape(0).rank().get_length();
if ((ndims < 3) || (ndims > 5)) {
errorMessage = "Only 3D, 4D and 5D blobs are supported as input";
return false;
}
if (op->get_input_partial_shape(1).is_dynamic() || (op->get_input_size() > 2 && op->get_input_partial_shape(2).is_dynamic())) {
errorMessage = "Doesn't support dynamic shapes for 'weights' and 'output_shape' inputs";
return false;
}
} catch (...) {
return false;
}
@ -58,15 +62,14 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
auto convBackprop = std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op);
auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op);
const auto dataShape = op->get_input_shape(0);
weightDims = op->get_input_shape(1);
const auto outShape = op->get_shape();
OC = outShape[1];
IC = dataShape[1];
const auto& weightDims = getWeightDims();
if (convBackprop) {
algorithm = DeconvolutionCommon;
IC = weightDims[0];
OC = weightDims[1];
groupNum = 1;
withGroups = false;
@ -78,10 +81,17 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
}
paddingL = convBackprop->get_pads_begin();
paddingR = convBackprop->get_pads_end();
outputPadding = convBackprop->get_output_padding();
autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
} else if (groupConvBackprop) {
algorithm = DeconvolutionGrouped;
groupNum = weightDims[0];
IC = groupNum * weightDims[1];
OC = groupNum * weightDims[2];
withGroups = groupNum > 1;
isDW = withGroups && groupNum == OC && groupNum == IC;
@ -93,10 +103,26 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
}
paddingL = groupConvBackprop->get_pads_begin();
paddingR = groupConvBackprop->get_pads_end();
outputPadding = groupConvBackprop->get_output_padding();
autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
}
for (int i = 0; i < dilation.size(); i++) {
kernel.push_back(weightDims[withGroups + 2 + i]);
}
externOutShape = inputShapes.size() == 3;
if (externOutShape && isDynamicNode()) {
bool isConstOutShape = ngraph::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
if (isConstOutShape) {
lastOutputSpatialDims = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2))->cast_vector<int32_t>();
}
const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || (isConstOutShape && lastOutputSpatialDims.size() != spDimsNum)) {
IE_THROW() << "'output_shape' input has incorrect number of elements. Expected = " << spDimsNum;
}
}
} else {
IE_THROW(NotImplemented) << errorMessage;
}
@ -113,14 +139,6 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
auto const blbSize = blb->GetSize();
// WA: In int8 case, we are processing weights using internal blob.
// So we disconnect constant node containing weights from the graph and then don't use it.
if (getParentEdges().size() == 3) {
removeEdge(getParentEdgeAt(2));
inputShapes.erase(inputShapes.begin() + 2);
}
removeEdge(getParentEdgeAt(1));
inputShapes.erase(inputShapes.begin() + 1);
InferenceEngine::SizeVector dimsForBlockedDesc{dims};
std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);
@ -157,19 +175,19 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
return false;
}
// todo: [antonvor] added these checks to fix performance problems
if (kernel.size() == 3)
return false;
if (!withGroups && stride.back() > 3)
return false;
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
auto inDims = getOutputShapeAtPort(0).getStaticDims();
const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
return false;
}
// heuristicConst = 2^26
// heuristicParam = IC^2 * SP
auto heuristicConst = 67108864;
auto heuristicParam = IC * IC;
for (int i = 2; i < inDims.size(); i++)
heuristicParam *= inDims[i];
for (int i = 2; i < inMaxDims.size(); i++)
heuristicParam *= inMaxDims[i];
if (heuristicParam > heuristicConst)
return false;
}
@ -206,10 +224,65 @@ bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
}
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
if (!descs_fwd.empty() && !descs_bwd.empty())
return;
void MKLDNNDeconvolutionNode::initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inDims, const std::vector<int32_t>& outSpDims) {
std::vector<ov::StaticShape> input_shapes{inDims.getStaticDims(), getWeightDims()};
ov::StaticShape output_shape_input;
if (externOutShape) {
IE_ASSERT(outSpDims.size() == getInputShapeAtPort(2).getStaticDims()[0]);
input_shapes.push_back({outSpDims.size()});
for (size_t i = 0; i < outSpDims.size(); i++) {
output_shape_input.push_back(outSpDims[i]);
}
}
if (getAlgorithm() == DeconvolutionCommon) {
auto deconv = ngraph::as_type_ptr<ngraph::op::v1::ConvolutionBackpropData>(op);
IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 2));
} else if (getAlgorithm() == DeconvolutionGrouped) {
auto deconv = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolutionBackpropData>(op);
IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 3));
}
}
std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape() {
auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0));
auto outShape = getOutputShapeAtPort(0);
if (isDynamicNode()) {
if (externOutShape) {
if (lastOutputSpatialDims.empty()) {
const auto& shape = getOutputShapeAtPort(0);
lastOutputSpatialDims.resize(shape.getRank() - 2);
const auto& minDims = shape.getMinDims();
const auto& maxDims = shape.getMaxDims();
const auto& dims = shape.getDims();
for (size_t i = 0; i < dims.size() - 2; ++i) {
lastOutputSpatialDims[i] = dims[i + 2] == Shape::UNDEFINED_DIM ? std::min(maxDims[i + 2],
std::max(minDims[i + 2], static_cast<Dim>(64))) : dims[i + 2];
}
}
ov::CoordinateDiff pb = autoPad ? ov::CoordinateDiff(paddingL.size(), 0) : paddingL;
ov::CoordinateDiff pe = autoPad ? ov::CoordinateDiff(paddingR.size(), 0) : paddingR;
auto inputDims = inShape.getStaticDims();
const auto& weightDims = getWeightDims();
const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
for (size_t i = 0; i < inputDims.size() - 2; i++) {
inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) *
(weightDims[wghOffset + 2 + i] - 1) - 1 + pb[i] + pe[i] - outputPadding[i])) /
stride[i] + 1;
}
inShape = Shape(inputDims);
}
initPadding(opToShapeInfer, inShape, lastOutputSpatialDims);
outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
}
return {inShape.getStaticDims(), outShape.getStaticDims()};
}
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
isInt8 = canBeExecutedInInt8();
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
@ -239,21 +312,17 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
if (getChildEdges().empty())
IE_THROW() << errorPrefix << " has incorrect number of output edges";
for (int i = 0; i < paddingR.size(); i++) {
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
int krn = weightDims[with_group + 2 + i];
int src = getOutputShapeAtPort(0).getStaticDims()[2 + i];
int dst = getInputShapeAtPort(0).getStaticDims()[2 + i];
krn = (krn - 1)*(dilation[i] + 1) + 1;
int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
paddingR[i] = (dst - calc_dst) * stride[i];
}
VectorDims inDims, outDims;
std::tie(inDims, outDims) = makeDummyInOutShape();
inShape = Shape(inDims);
Shape outShape(outDims);
initPaddingR(inShape, outShape);
if (isInt8) {
int8WeightDims = getWeightDims();
// WA: if int8 deconvolution is supported, we create internal weights blob in IO format
std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]);
internalBlobs.push_back(createWeiBlobAsIO(weightDims));
std::swap(int8WeightDims[withGroups + 0], int8WeightDims[withGroups + 1]);
internalBlobs.push_back(createWeiBlobAsIO(int8WeightDims));
auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
MemoryDescPtr in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType, format);
MemoryDescPtr out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType, format);
@ -265,23 +334,44 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
createDescriptor({in_candidate}, {out_candidate});
}
}
setPostOps(attr);
setPostOps(attr, outShape.getStaticDims());
}
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) {
for (int i = 0; i < paddingR.size(); i++) {
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
const auto& weightDims = getWeightDims();
int krn = weightDims[with_group + 2 + i];
int src = outShape.getStaticDims()[2 + i];
int dst = inShape.getStaticDims()[2 + i];
krn = (krn - 1)*(dilation[i] + 1) + 1;
int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
paddingR[i] = (dst - calc_dst) * stride[i];
}
}
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
mkldnn::post_ops ops;
auto getBinPostOpShape = [&](){
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
const auto chIdx = getFusingAxis();
std::vector<size_t> binaryShape(outShapeRank, 1);
binaryShape[chIdx] = dims[chIdx];
return binaryShape;
};
for (auto &node : fusedWith) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
// TODO [DS]: change to shape from memory
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
// use legacy depthwise since backprop convolution does not support binary post ops
eltwiseNode->appendPostOps(ops, dims, align);
continue;
}
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
fakeQuantizeNode->appendPostOps(ops);
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
continue;
}
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
@ -334,78 +424,300 @@ bool MKLDNNDeconvolutionNode::created() const {
return getType() == Deconvolution;
}
void MKLDNNDeconvolutionNode::createPrimitive() {
if (prim)
return;
bool MKLDNNDeconvolutionNode::needShapeInfer() const {
if (inputShapesModified()) {
return true;
}
if (externOutShape) {
if (lastOutputSpatialDims != readOutputSpatialDims()) {
return true;
}
}
if (isInt8) {
auto prim_desc = createPrimitiveDescriptor<deconvolution_forward::primitive_desc,
deconvolution_forward::desc>(attr);
return false;
}
prim.reset(new deconvolution_forward(prim_desc));
std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
std::vector<int32_t> outSpDims;
if (externOutShape) {
outSpDims = readOutputSpatialDims();
}
return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)};
}
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}};
} else {
auto prim_desc = createPrimitiveDescriptor<convolution_backward_data::primitive_desc,
convolution_backward_data::desc, convolution_forward::primitive_desc>(attr);
VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
std::vector<ov::StaticShape> inputShapes = {
inDims,
getWeightDims()
};
prim.reset(new convolution_backward_data(prim_desc));
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> inputValues;
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive();
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
if (externOutShape) {
if (outSpDims.size() != getInputShapeAtPort(2).getStaticDims()[0]) {
IE_THROW() << "Can't compute output shape for node with name: " << getName()
<< ", because the node has 'output_shape' input, but provided output spatial dims number is incorrect";
}
inputShapes.push_back({outSpDims.size()});
inputValues.insert({2, std::make_shared<ngraph::runtime::HostTensor>(ngraph::element::Type_t::i32,
inputShapes.back().to_shape(),
outSpDims.data())});
}
std::vector<ov::StaticShape> outputShapes(1);
shape_inference(opToShapeInfer.get(), inputShapes, outputShapes, inputValues);
return outputShapes.back().to_shape();
}
void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
if (!execPtr) {
IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
}
execPtr->exec(primArgs, strm);
if (externOutShape) {
lastOutputSpatialDims = readOutputSpatialDims();
}
}
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
const std::vector<MemoryDescPtr> &outputDesc) {
const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]);
const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]);
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
bool isWinograd) const {
mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(srcDesc, wghDesc, dstDesc, alg);
if (fwd_conv_pd->get(true) == nullptr) {
IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName();
}
return std::make_shared<MKLDNNDescriptor>(deconv_desc, fwd_conv_pd);
}
// grouping and autoblicking is not compatible
if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
return;
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc) const {
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
}
void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
MKLDNNMemoryPtr srcMemPtr,
MKLDNNMemoryPtr wghMemPtr,
MKLDNNMemoryPtr dstMemPtr,
AttrPtr attr,
impl_desc_type selectedImpl) {
auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr);
while (static_cast<bool>(itpd)) {
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
if (impl_type == selectedImpl) {
if (isInt8) {
if (internalBlobMemory.empty()) {
prepareMemory(itpd);
}
auto prim_desc = deconvolution_forward::primitive_desc(itpd.get());
execPtr = std::make_shared<DeconvExecutorInt8>(prim_desc,
srcMemPtr->GetPrimitive().get_desc(),
internalBlobMemory.front()->GetPrimitive().get_desc(),
dstMemPtr->GetPrimitive().get_desc(),
getEngine());
} else {
auto prim_desc = convolution_backward_data::primitive_desc(itpd.get());
execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc,
srcMemPtr->GetPrimitive().get_desc(),
wghMemPtr->GetPrimitive().get_desc(),
dstMemPtr->GetPrimitive().get_desc(),
getEngine());
}
return;
}
if (!itpd.next_impl()) {
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
memory::data_type::f32,
memory::format_tag::any);
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
memory::data_type::f32,
memory::format_tag::any);
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
memory::data_type::f32,
memory::format_tag::any);
std::shared_ptr<MKLDNNDescriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr);
if (static_cast<bool>(anyDeconvItpd)) {
auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get());
execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc,
srcMemPtr->GetPrimitive().get_desc(),
wghMemPtr->GetPrimitive().get_desc(),
dstMemPtr->GetPrimitive().get_desc(),
getEngine());
return;
}
}
}
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
}
void MKLDNNDeconvolutionNode::prepareParams() {
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
IE_THROW() << "Destination memory didn't allocate.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
IE_THROW() << "Input memory didn't allocate.";
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
if (!wghMemPtr || !wghMemPtr->GetPrimitivePtr())
IE_THROW() << "Weight memory didn't allocate.";
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
auto initPrimitiveAttr = [&]() {
mkldnn::primitive_attr attr;
setPostOps(attr, dstMemPtr->getStaticDims());
return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
};
AttrPtr pAttrLocal;
if (isDynamicNode()) {
if (!pAttr) {
pAttr = initPrimitiveAttr();
}
pAttrLocal = pAttr;
if (autoPad || externOutShape) {
initPadding(opToShapeInfer, inMemoryDesc->getShape(), externOutShape ? readOutputSpatialDims() : std::vector<int32_t>{});
}
initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
} else {
pAttrLocal = initPrimitiveAttr();
}
const auto in_candidate = inMemoryDesc->getDnnlDesc();
const auto out_candidate = outMemoryDesc->getDnnlDesc();
mkldnn::memory::desc wgh_candidate;
if (isInt8) {
if (internalBlobMemory.empty()) {
wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
} else {
wgh_candidate = internalBlobMemory.front()->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
}
} else {
wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
}
std::shared_ptr<MKLDNNDescriptor> desc;
if (isInt8) {
desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate);
} else {
desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate,
selected_pd->getImplementationType() == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd);
}
createDeconvPrim(desc, srcMemPtr, wghMemPtr, dstMemPtr, pAttrLocal, selected_pd->getImplementationType());
if (std::dynamic_pointer_cast<DeconvExecutorInt8>(execPtr)) {
primArgs = {{DNNL_ARG_SRC, srcMemPtr->GetPrimitive()},
{DNNL_ARG_WEIGHTS, internalBlobMemory.front()->GetPrimitive()},
{DNNL_ARG_DST, dstMemPtr->GetPrimitive()}};
} else {
primArgs = {{DNNL_ARG_DIFF_DST, srcMemPtr->GetPrimitive()},
{DNNL_ARG_WEIGHTS, wghMemPtr->GetPrimitive()},
{DNNL_ARG_DIFF_SRC, dstMemPtr->GetPrimitive()}};
}
MKLDNNNode::appendPostOpArgs(attr, primArgs, binaryPostOpsArgs);
}
void MKLDNNDeconvolutionNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate,
mkldnn::algorithm alg) const {
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
return memory::dims(orig_dims.begin(), orig_dims.end());
};
std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
conv_desc = std::make_shared<convolution_forward::desc>(prop_kind::forward_inference, alg,
out_candidate, wgh_candidate, in_candidate,
convertDims(stride),
convertDims(dilation),
convertDims(paddingL),
convertDims(paddingR));
std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
deconv_desc = std::make_shared<convolution_backward_data::desc>(alg, out_candidate, wgh_candidate,
in_candidate,
convertDims(stride),
convertDims(dilation),
convertDims(paddingL),
convertDims(paddingR));
auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
return {deconv_desc, fwd_conv_pd};
}
MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate) const {
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
return memory::dims(orig_dims.begin(), orig_dims.end());
};
MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc;
deconv_desc = std::make_shared<mkldnn::deconvolution_forward::desc>(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
in_candidate, wgh_candidate, out_candidate,
convertDims(stride), convertDims(dilation),
convertDims(paddingL), convertDims(paddingR));
return deconv_desc;
}
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
const std::vector<MemoryDescPtr> &outputDesc) {
auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims());
auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc);
auto in_candidate = dnnlInDesc.getDnnlDesc();
auto outDesc = outputDesc[0];
if (!outDesc->isDefined()) {
const auto outShape = shapeInferInternal(inDesc->getShape().getStaticDims(), lastOutputSpatialDims);
outDesc = outDesc->cloneWithNewDims(outShape);
}
auto dnnlOutDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outDesc);
auto out_candidate = dnnlOutDesc.getDnnlDesc();
// grouping and autoblocking is not compatible
if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended()))
return;
if (isInt8) {
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), memory::data_type::s8, memory::format_tag::any);
std::shared_ptr<mkldnn::deconvolution_forward::desc> deconv_desc;
deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(),
convertDims(stride), convertDims(dilation),
convertDims(paddingL), convertDims(paddingR)));
descs.emplace_back(deconv_desc);
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate));
} else {
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any);
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()),
dnnlInDesc.getDataType(), memory::format_tag::any);
for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg,
out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(),
convertDims(stride),
convertDims(dilation),
convertDims(paddingL),
convertDims(paddingR)));
std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate,
in_candidate.getDnnlDesc(),
convertDims(stride),
convertDims(dilation),
convertDims(paddingL),
convertDims(paddingR)));
descs_fwd.push_back(conv_desc);
descs_bwd.push_back(deconv_desc);
auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, alg);
if (fwd_conv_pd->get(true) == nullptr)
continue;
descs.emplace_back(deconv_desc, fwd_conv_pd);
}
}
@ -413,15 +725,25 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
if (idx == 2) {
return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims()));
return std::make_shared<CpuBlockedMemoryDesc>(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims()));
} else if (idx > 0 && isInt8) {
// we need to store 'weight' input as edge,
// because at this moment we can't simple replace internal blob with input, since we need to save weight data as is, but with different order
return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(idx), Shape(getInputShapeAtPort(idx).getStaticDims()));
}
auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx);
if (getInputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(desc);
}
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx);
if (getOutputShapeAtPort(idx).isDynamic()) {
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
}
return MKLDNNExtensionUtils::makeDescriptor(desc);
}
@ -439,4 +761,61 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const
return getMaxPrecision(inputPrecisions);
}
MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
const mkldnn::engine& engine) {
execPrim.reset(new mkldnn::convolution_backward_data(pd));
if (inMemDesc != pd.diff_dst_desc()) {
inputReorders.insert({DNNL_ARG_DIFF_DST, IntermReorder(inMemDesc, pd.diff_dst_desc(), engine)});
}
if (weightMemDesc != pd.weights_desc()) {
inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
}
if (outMemDesc != pd.diff_src_desc()) {
outputReorders.insert({DNNL_ARG_DIFF_SRC, IntermReorder(pd.diff_src_desc(), outMemDesc, engine)});
}
}
MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
const mkldnn::engine& engine) {
execPrim.reset(new mkldnn::deconvolution_forward(pd));
if (inMemDesc != pd.src_desc()) {
inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
}
if (weightMemDesc != pd.weights_desc()) {
inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
}
if (outMemDesc != pd.dst_desc()) {
outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
}
}
std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
if (getParentEdges().size() < 3) {
IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size();
}
const auto &shapeMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
if (!shapeMemPtr || !shapeMemPtr->GetPrimitivePtr()) {
IE_THROW() << "'output_shape' input memory is not allocated.";
}
const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
if (shapeMemPtr->getStaticDims()[0] != spDimsNum) {
IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements";
}
const int32_t *outShapePtr = reinterpret_cast<const int32_t *>(shapeMemPtr->GetPtr());
std::vector<int32_t> outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]);
return outSpDims;
}
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);

View File

@ -9,10 +9,15 @@
#include <memory>
#include <string>
#include <vector>
#include "common/dnnl_executor.h"
namespace MKLDNNPlugin {
class MKLDNNDeconvolutionNode : public MKLDNNNode {
using DefaultDeconvDescs = std::pair<std::shared_ptr<mkldnn::convolution_backward_data::desc>,
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>>;
using Int8DeconvDesc = std::shared_ptr<mkldnn::deconvolution_forward::desc>;
public:
MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
@ -39,27 +44,88 @@ public:
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
bool canFuse(const MKLDNNNodePtr& node) const override;
const InferenceEngine::SizeVector& getWeightDims() { return weightDims; }
const std::vector<ptrdiff_t>& getStride() { return stride; }
const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
const std::vector<ptrdiff_t>& getStride() const { return stride; }
void prepareParams() override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
bool needShapeInfer() const override;
std::vector<VectorDims> shapeInfer() const override;
private:
using executorPtr = std::shared_ptr<DnnlExecutor>;
executorPtr execPtr = nullptr;
class DeconvExecutorDefault : public DnnlExecutor {
public:
DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
const mkldnn::engine& engine);
};
class DeconvExecutorInt8 : public DnnlExecutor {
public:
DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
const mkldnn::memory::desc& inMemDesc,
const mkldnn::memory::desc& weightMemDesc,
const mkldnn::memory::desc& outMemDesc,
const mkldnn::engine& engine);
};
bool withGroups = false;
bool isDW = false;
bool isInt8 = false;
bool autoPad = false;
bool externOutShape = false;
size_t groupNum = 1;
size_t IC;
size_t OC;
std::vector<ptrdiff_t> kernel;
std::vector<ptrdiff_t> stride;
std::vector<ptrdiff_t> dilation;
std::vector<ptrdiff_t> paddingL;
std::vector<ptrdiff_t> paddingR;
InferenceEngine::SizeVector weightDims;
std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
ov::CoordinateDiff paddingL;
ov::CoordinateDiff paddingR;
ov::CoordinateDiff outputPadding;
std::vector<int32_t> lastOutputSpatialDims;
VectorDims int8WeightDims;
Shape inShape;
AttrPtr pAttr;
mkldnn::primitive_attr attr;
void setPostOps(mkldnn::primitive_attr &attr);
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims);
VectorDims shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const;
void initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inShape, const std::vector<int32_t>& outSpDims);
void initPaddingR(const Shape &inShape, const Shape &outShape);
std::vector<int32_t> readOutputSpatialDims() const;
std::pair<VectorDims, VectorDims> makeDummyInOutShape();
DefaultDeconvDescs createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate,
mkldnn::algorithm alg) const;
Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
const mkldnn::memory::desc& wgh_candidate,
const mkldnn::memory::desc& out_candidate) const;
std::shared_ptr<MKLDNNDescriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc,
bool isWinograd) const;
std::shared_ptr<MKLDNNDescriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
const mkldnn::memory::desc& wghDesc,
const mkldnn::memory::desc& dstDesc) const;
void createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
MKLDNNMemoryPtr srcMemPtr,
MKLDNNMemoryPtr wghMemPtr,
MKLDNNMemoryPtr dstMemPtr,
AttrPtr attr,
impl_desc_type selectedImpl);
std::string errorPrefix;

View File

@ -49,15 +49,7 @@ bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr<const
return true;
}
void MKLDNNDetectionOutputNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
@ -170,6 +162,10 @@ struct ConfidenceComparatorDO {
const float* confData;
};
void MKLDNNDetectionOutputNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
float *dstData = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());

View File

@ -16,7 +16,6 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
@ -24,7 +23,7 @@ public:
protected:
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
void executeDynamicImpl(mkldnn::stream strm) override;
private:
static const int ID_LOC = 0;

View File

@ -7,6 +7,7 @@
#include <ie_parallel.hpp>
#include <mkldnn_types.h>
#include "cpu_types.h"
#include "utils/bfloat16.hpp"
#include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
#include <cpu/ref_eltwise.hpp>
@ -31,6 +32,7 @@
#include "ngraph_transformations/op/leaky_relu.hpp"
#include "ngraph_transformations/op/swish_cpu.hpp"
#include <oneapi/dnnl/dnnl.hpp>
#include <string>
#include <vector>
#include <memory>
@ -791,18 +793,41 @@ private:
}
};
MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op) {
const auto const1 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(0));
const auto const2 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
int constPort = -1;
if (const2) {
constPort = 1;
} else if (const1) {
constPort = 0;
} else {
return Undefined;
}
auto const_shape = op->get_input_shape(constPort);
if (ngraph::shape_size(const_shape) == 1)
return PerTensor;
else
return PerChannel;
}
const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> MKLDNNEltwiseNode::initializers = {
{ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseAdd;
node.broadcastingPolicy = determineBroadcastingPolicy(op);
}},
{ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseSubtract;
node.broadcastingPolicy = determineBroadcastingPolicy(op);
}},
{ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseMultiply;
node.broadcastingPolicy = determineBroadcastingPolicy(op);
}},
{ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseDivide;
node.broadcastingPolicy = determineBroadcastingPolicy(op);
}},
{ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseSquaredDifference;
@ -828,6 +853,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
node.alpha = powerStatic->get_power();
node.beta = powerStatic->get_scale();
node.gamma = powerStatic->get_shift();
node.broadcastingPolicy = PerTensor;
}},
{ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseEqual;
@ -954,6 +980,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
}},
{ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwisePrelu;
node.broadcastingPolicy = determineBroadcastingPolicy(op);
}},
{ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
node.algorithm = EltwiseErf;
@ -984,7 +1011,7 @@ bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr<const ngraph:
}
MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(op, eng, cache) {
MKLDNNNode(op, eng, cache), broadcastingPolicy(Undefined) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
@ -1498,14 +1525,6 @@ void MKLDNNEltwiseNode::selectOptimalPrimitiveDescriptor() {
selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true);
}
void MKLDNNEltwiseNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
@ -1656,6 +1675,10 @@ void MKLDNNEltwiseNode::executeReference(const jit_eltwise_params &jep, const ji
});
}
void MKLDNNEltwiseNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
void MKLDNNEltwiseNode::execute(mkldnn::stream strm) {
if (execPtr) {
jit_eltwise_call_args_ptrs args_ptrs = {};
@ -1713,106 +1736,124 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
getInputShapeAtPort(0) == getInputShapeAtPort(1);
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
}
}
MKLDNNNode::fuseInto(parentNode);
}
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
switch (getMKLDNNAlgorithm()) {
case mkldnn::algorithm::eltwise_relu:
case mkldnn::algorithm::eltwise_tanh:
case mkldnn::algorithm::eltwise_elu:
case mkldnn::algorithm::eltwise_square:
case mkldnn::algorithm::eltwise_abs:
case mkldnn::algorithm::eltwise_sqrt:
case mkldnn::algorithm::eltwise_linear:
case mkldnn::algorithm::eltwise_bounded_relu:
case mkldnn::algorithm::eltwise_soft_relu:
case mkldnn::algorithm::eltwise_logistic:
case mkldnn::algorithm::eltwise_exp:
case mkldnn::algorithm::eltwise_gelu_erf:
case mkldnn::algorithm::eltwise_gelu_tanh:
case mkldnn::algorithm::eltwise_clip:
case mkldnn::algorithm::eltwise_swish:
case mkldnn::algorithm::eltwise_hardswish:
case mkldnn::algorithm::eltwise_mish:
case mkldnn::algorithm::eltwise_hsigmoid:
case mkldnn::algorithm::eltwise_round_half_to_even:
case mkldnn::algorithm::eltwise_round_half_away_from_zero:
ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
break;
default: IE_THROW() << errorPrefix << "as post operation is not supported";
case mkldnn::algorithm::eltwise_relu:
case mkldnn::algorithm::eltwise_tanh:
case mkldnn::algorithm::eltwise_elu:
case mkldnn::algorithm::eltwise_square:
case mkldnn::algorithm::eltwise_abs:
case mkldnn::algorithm::eltwise_sqrt:
case mkldnn::algorithm::eltwise_linear:
case mkldnn::algorithm::eltwise_bounded_relu:
case mkldnn::algorithm::eltwise_soft_relu:
case mkldnn::algorithm::eltwise_logistic:
case mkldnn::algorithm::eltwise_exp:
case mkldnn::algorithm::eltwise_gelu_erf:
case mkldnn::algorithm::eltwise_gelu_tanh:
case mkldnn::algorithm::eltwise_clip:
case mkldnn::algorithm::eltwise_swish:
case mkldnn::algorithm::eltwise_hardswish:
case mkldnn::algorithm::eltwise_mish:
case mkldnn::algorithm::eltwise_hsigmoid:
case mkldnn::algorithm::eltwise_round_half_to_even:
case mkldnn::algorithm::eltwise_round_half_away_from_zero:
ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
break;
default: IE_THROW() << errorPrefix << "as post operation is not supported";
}
} else {
const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0;
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
if (getAlgorithm() != EltwisePrelu) {
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
}
if (initAsBinary) {
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
if (data.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
std::vector<size_t> binaryDims(postOpDims.size(), 1);
binaryDims[chIdx] = postOpDims[chIdx];
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
ops.append_binary(alg, memoryDesc.getDnnlDesc());
if (initBinaryMemory) {
memPtr.reset(new MKLDNNMemory(getEngine()));
memPtr->Create(memoryDesc, &data[0]);
}
};
switch (getAlgorithm()) {
case EltwiseAdd:
case EltwiseSubtract:
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
break;
case EltwiseMultiply:
case EltwiseDivide:
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
break;
case EltwiseMulAdd:
case EltwisePowerStatic:
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
break;
case EltwisePrelu:
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
break;
default:
IE_THROW() << errorPrefix << "as post operation is not supported";
}
} else {
switch (getAlgorithm()) {
case EltwiseAdd:
case EltwiseSubtract:
case EltwiseMultiply:
case EltwiseDivide:
case EltwiseMulAdd:
case EltwisePowerStatic:
if (scalesBuffer.empty() || shiftsBuffer.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
break;
case EltwisePrelu:
if (scalesBuffer.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
break;
default:
IE_THROW() << errorPrefix << "as post operation is not supported";
}
/* @todo legacy depthwise post ops are kept for now
* for performance reasons
*/
switch (getAlgorithm()) {
case EltwiseAdd:
case EltwiseSubtract:
case EltwiseMultiply:
case EltwiseDivide:
case EltwiseMulAdd:
case EltwisePowerStatic:
if (scales.empty() || shifts.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
break;
case EltwisePrelu:
if (scales.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
break;
default:
IE_THROW() << errorPrefix << "as post operation is not supported";
}
}
}
void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' as binary post op ";
VectorDims broadcastBinaryShape(postOpDims.size(), 1);
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
if (data.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
if (broadcastingPolicy == Undefined)
IE_THROW() << errorPrefix << "cannot be performed since policy is Undefined";
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, broadcastingPolicy == PerTensor ? Shape(broadcastBinaryShape) : Shape(postOpDims));
ops.append_binary(alg, memoryDesc.getDnnlDesc());
if (!memPtr) {
memPtr.reset(new MKLDNNMemory(getEngine()));
memPtr->Create(memoryDesc, &data[0]);
binaryPostOpsMem.push_back(memPtr);
}
};
switch (getAlgorithm()) {
case EltwiseAdd:
case EltwiseSubtract:
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
break;
case EltwiseDivide:
case EltwiseMultiply:
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
break;
case EltwiseMulAdd:
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
break;
case EltwisePowerStatic:
if (beta != 1.0f) // Multiply if has scales
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
if (gamma != 0.0f) // Add only if has shifts
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
break;
case EltwisePrelu:
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
break;
default:
IE_THROW() << errorPrefix << "as post operation is not supported";
}
}
bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) {
// [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results

View File

@ -75,7 +75,8 @@ public:
bool created() const override;
bool canBeInPlace() const override;
bool canFuse(const MKLDNNNodePtr& node) const override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override;
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
void fuseInto(MKLDNNNodePtr& parentNode) override;
InferenceEngine::Precision getRuntimePrecision() const override;
@ -89,16 +90,23 @@ public:
bool isWithBroadcast();
bool isSpecialConvolutionAddFusing() const { return specialConvolutionAddFusing; }
void createPrimitive() override;
std::vector<VectorDims> shapeInfer() const override;
bool needPrepareParams() const override;
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
void executeDynamicImpl(mkldnn::stream strm) override;
enum BroadcastingPolicy {
PerChannel,
PerTensor,
Undefined,
};
BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; }
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
private:
struct EltwiseExecutor {
EltwiseExecutor(size_t batch) : batchDimIdx(batch) {}
@ -130,6 +138,8 @@ private:
size_t fullWorkAmount = 0;
};
BroadcastingPolicy broadcastingPolicy;
mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;
static const int optimalTensorRank = 6;
@ -157,6 +167,8 @@ private:
using Initializer = std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>;
static const std::map<const ngraph::DiscreteTypeInfo, Initializer> initializers;
static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op);
void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
const VectorDims &dims_out) const;
void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,

View File

@ -70,14 +70,6 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
}
void MKLDNNEmbeddingBagOffsetSumNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
void MKLDNNEmbeddingBagOffsetSumNode::prepareParams() {
_indicesLen = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
_offsetsLen = getParentEdgesAtPort(OFFSETS_IDX)[0]->getMemory().getStaticDims()[0];
@ -126,6 +118,14 @@ void MKLDNNEmbeddingBagOffsetSumNode::getIndices(int embIndex, const int*& indic
weightsIdx = offsetsData_[embIndex];
}
void MKLDNNEmbeddingBagOffsetSumNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNEmbeddingBagOffsetSumNode::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) {
const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());

View File

@ -19,15 +19,15 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
bool isExecutable() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
protected:
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
void executeDynamicImpl(mkldnn::stream strm) override;
private:
void initFromInputs() override;

View File

@ -64,14 +64,6 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
}
void MKLDNNEmbeddingBagPackedSumNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
void MKLDNNEmbeddingBagPackedSumNode::prepareParams() {
_batch = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
_indicesPerBag = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[1];
@ -94,6 +86,14 @@ void MKLDNNEmbeddingBagPackedSumNode::getIndices(int embIndex, const int*& indic
weightsIdx = embIndex * _indicesPerBag;
}
void MKLDNNEmbeddingBagPackedSumNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNEmbeddingBagPackedSumNode::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) {
const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());

View File

@ -19,15 +19,15 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
bool isExecutable() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
protected:
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
void executeDynamicImpl(mkldnn::stream strm) override;
private:
void initFromInputs() override;

View File

@ -11,14 +11,6 @@
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
void MKLDNNEmbeddingSegmentsSumNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
const auto embBagSegSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingSegmentsSum>(op);
@ -129,6 +121,14 @@ void MKLDNNEmbeddingSegmentsSumNode::getIndices(int embIndex, const int*& indice
}
}
void MKLDNNEmbeddingSegmentsSumNode::executeDynamicImpl(mkldnn::stream strm) {
execute(strm);
}
bool MKLDNNEmbeddingSegmentsSumNode::isExecutable() const {
return !isInputTensorAtPortEmpty(0);
}
void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) {
const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());

View File

@ -19,15 +19,15 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
bool isExecutable() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
protected:
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
void executeDynamicImpl(mkldnn::stream strm) override;
private:
void initFromInputs() override;

View File

@ -272,12 +272,6 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr
impl_desc_type::ref_any);
}
void MKLDNNExperimentalDetectronDetectionOutputNode::createPrimitive() {
if (inputShapesDefined()) {
updateLastInputDims();
}
}
void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) {
const int rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0];
assert(classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1]));

View File

@ -15,7 +15,6 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;

Some files were not shown because too many files have changed in this diff Show More