Merge remote-tracking branch 'github/master' into auto-batch-master
# Conflicts: # samples/cpp/benchmark_app/inputs_filling.cpp # samples/cpp/benchmark_app/remote_blobs_filling.cpp # src/plugins/auto/executable_network.cpp # src/tests/unit/auto/exec_network_get_metrics.cpp
This commit is contained in:
commit
9426db9b00
@ -241,7 +241,7 @@ jobs:
|
||||
. $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/tests/mo/unit_tests --junitxml=TEST-ModelOptimizer.xml
|
||||
displayName: 'Model Optimizer UT'
|
||||
continueOnError: false
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
|
||||
workingDirectory: $(INSTALL_TEST_DIR)
|
||||
@ -334,7 +334,7 @@ jobs:
|
||||
displayName: 'Samples Smoke Tests'
|
||||
continueOnError: false
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
- script: |
|
||||
export DATA_PATH=$(MODELS_PATH)
|
||||
@ -353,7 +353,7 @@ jobs:
|
||||
workingDirectory: $(LAYER_TESTS_DIR)
|
||||
displayName: 'Layer Tests'
|
||||
continueOnError: false
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
- task: PublishTestResults@2
|
||||
condition: always()
|
||||
|
@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- checkout: none
|
||||
|
||||
- script: git -C ~/work/openvino checkout -m --recurse-submodules $(Build.SourceVersion)
|
||||
- script: git -C ~/work/openvino checkout -m $(Build.SourceVersion) && git -C ~/work/openvino submodule update --init --recursive
|
||||
displayName: checkout
|
||||
|
||||
# Should be after 'Install dependencies' because Git lfs is not installed
|
||||
@ -71,7 +71,7 @@ jobs:
|
||||
./buildreleasenolto.sh
|
||||
libinference_engine_preproc.so
|
||||
MKLDNNPlugin
|
||||
clDNNPlugin
|
||||
ov_intel_gpu_plugin
|
||||
clDNN_unit_tests64
|
||||
gpuFuncTests
|
||||
displayName: Build Lin
|
||||
|
@ -83,7 +83,7 @@ jobs:
|
||||
displayName: 'Make dir'
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
||||
curl -O https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat
|
||||
call install_ib_console.bat
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install IncrediBuild'
|
||||
@ -117,9 +117,9 @@ jobs:
|
||||
python -m pip install -r $(REPO_DIR)\tools\mo\requirements.txt
|
||||
python -m pip install -r $(REPO_DIR)\tools\mo\requirements_dev.txt
|
||||
rem Speed up build
|
||||
certutil -urlcache -split -f https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip cmake-$(CMAKE_VERSION)-windows-x86_64.zip
|
||||
powershell -command "Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip -OutFile cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
|
||||
powershell -command "Expand-Archive -Force cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
workingDirectory: $(WORK_DIR)
|
||||
|
@ -59,7 +59,7 @@ jobs:
|
||||
|
||||
- script: |
|
||||
rem Speed up build
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install dependencies'
|
||||
|
22
CODEOWNERS
22
CODEOWNERS
@ -35,7 +35,7 @@ Jenkinsfile @openvinotoolkit/openvino-admins
|
||||
/src/common/ @openvinotoolkit/openvino-ie-maintainers
|
||||
/src/core/ @openvinotoolkit/openvino-ngraph-maintainers
|
||||
/src/frontends/ @openvinotoolkit/openvino-ngraph-maintainers
|
||||
/inference-engine/tests_deprecated/readers/ @openvinotoolkit/openvino-ngraph-maintainers
|
||||
/src/tests_deprecated/readers/ @openvinotoolkit/openvino-ngraph-maintainers
|
||||
|
||||
# IE CPU:
|
||||
/inference-engine/src/mkldnn_plugin/ @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
|
||||
@ -53,12 +53,12 @@ Jenkinsfile @openvinotoolkit/openvino-admins
|
||||
/inference-engine/src/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
/src/inference/include/ie/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
/inference-engine/thirdparty/movidius/ @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
/inference-engine/tests_deprecated/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests_deprecated/functional/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests_deprecated/behavior/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests/functional/plugin/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests/unit/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests_deprecated/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests_deprecated/functional/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests_deprecated/behavior/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests/functional/plugin/myriad/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests/unit/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/scripts/run_tests_myriad_multistick.sh @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
|
||||
# IE GNA:
|
||||
@ -70,10 +70,10 @@ Jenkinsfile @openvinotoolkit/openvino-admins
|
||||
/src/inference/include/ie/multi-device/ @openvinotoolkit/openvino-ie-multi-maintainers
|
||||
|
||||
# IE Tests:
|
||||
/inference-engine/tests/ @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests_deprecated/ @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/inference-engine/tests/functional/inference_engine/ngraph_reader/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
|
||||
/inference-engine/tests/functional/inference_engine/transformations/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
|
||||
/src/tests/ @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests_deprecated/ @openvinotoolkit/openvino-ie-tests-maintainers
|
||||
/src/tests/functional/inference_engine/ngraph_reader/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
|
||||
/src/tests/functional/inference_engine/transformations/ @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
|
||||
|
||||
# Documentation:
|
||||
/docs/ @openvinotoolkit/openvino-docs-maintainers
|
||||
|
@ -79,8 +79,20 @@ function(_ie_add_api_validator_post_build_step)
|
||||
_ie_add_api_validator_post_build_step_recursive(TARGET ${API_VALIDATOR_TARGET})
|
||||
|
||||
# remove targets which were tested before
|
||||
|
||||
foreach(item IN LISTS VALIDATED_LIBRARIES)
|
||||
foreach(target IN LISTS API_VALIDATOR_TARGETS)
|
||||
list(FIND VALIDATED_LIBRARIES ${target} index)
|
||||
if (NOT index EQUAL -1)
|
||||
list(APPEND VALIDATED_TARGETS ${target})
|
||||
endif()
|
||||
if(TARGET "${target}")
|
||||
get_target_property(orig_target ${target} ALIASED_TARGET)
|
||||
list(FIND VALIDATED_LIBRARIES ${orig_target} index)
|
||||
if (NOT index EQUAL -1)
|
||||
list(APPEND VALIDATED_TARGETS ${target})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
foreach(item IN LISTS VALIDATED_TARGETS)
|
||||
list(REMOVE_ITEM API_VALIDATOR_TARGETS ${item})
|
||||
endforeach()
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set(FRONTEND_INSTALL_INCLUDE "runtime/include/ngraph/frontend")
|
||||
set(FRONTEND_INSTALL_INCLUDE "runtime/include/")
|
||||
set(FRONTEND_NAME_SUFFIX "_ov_frontend")
|
||||
|
||||
set(FRONTEND_NAMES "" CACHE INTERNAL "")
|
||||
@ -225,7 +225,7 @@ macro(ov_add_frontend)
|
||||
|
||||
if(OV_FRONTEND_LINKABLE_FRONTEND)
|
||||
# install -dev part
|
||||
install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/${OV_FRONTEND_NAME}_frontend
|
||||
install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/
|
||||
DESTINATION ${FRONTEND_INSTALL_INCLUDE}
|
||||
COMPONENT core_dev
|
||||
FILES_MATCHING PATTERN "*.hpp")
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/frontend.hpp"
|
||||
#include "openvino/frontend/frontend.hpp"
|
||||
|
||||
@OV_FRONTEND_DECLARATIONS@
|
||||
|
||||
|
@ -66,22 +66,22 @@ ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/src/core/tests"
|
||||
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/core"
|
||||
onnx_out_files)
|
||||
|
||||
set(rel_path "inference-engine/tests/functional/plugin/shared/models")
|
||||
set(rel_path "src/tests/functional/plugin/shared/models")
|
||||
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
|
||||
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/func_tests/models"
|
||||
ft_out_files)
|
||||
|
||||
set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
|
||||
set(rel_path "src/tests/functional/inference_engine/onnx_reader")
|
||||
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
|
||||
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
|
||||
ie_onnx_out_files)
|
||||
|
||||
set(rel_path "inference-engine/tests/functional/inference_engine/ir_serialization")
|
||||
set(rel_path "src/tests/functional/inference_engine/ir_serialization")
|
||||
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
|
||||
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ir_serialization"
|
||||
ie_serialize_out_files)
|
||||
|
||||
set(rel_path "inference-engine/tests/unit/frontends/onnx_import/models")
|
||||
set(rel_path "src/tests/unit/frontends/onnx_import/models")
|
||||
ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
|
||||
"${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import"
|
||||
ie_onnx_import_out_files)
|
||||
|
@ -2,12 +2,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#! [complex:transformation]
|
||||
import logging as log
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from mo.graph.graph import Graph
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from openvino.tools.mo.graph.graph import Graph
|
||||
|
||||
|
||||
class Complex(FrontReplacementSubgraph):
|
||||
@ -41,4 +38,3 @@ class Complex(FrontReplacementSubgraph):
|
||||
# change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
|
||||
complex_node.out_port(0).get_connection().set_source(input_node_output_port)
|
||||
#! [complex:transformation]
|
||||
|
||||
|
@ -4,11 +4,11 @@
|
||||
#! [complex_abs:transformation]
|
||||
import numpy as np
|
||||
|
||||
from extensions.ops.elementwise import Pow
|
||||
from extensions.ops.ReduceOps import ReduceSum
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.graph.graph import Graph, Node
|
||||
from mo.ops.const import Const
|
||||
from openvino.tools.mo.ops.elementwise import Pow
|
||||
from openvino.tools.mo.ops.ReduceOps import ReduceSum
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
|
||||
from openvino.tools.mo.graph.graph import Graph, Node
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
|
||||
|
||||
class ComplexAbs(FrontReplacementOp):
|
||||
|
@ -3,8 +3,7 @@
|
||||
|
||||
# ! [fft_ext:extractor]
|
||||
from ...ops.FFT import FFT
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.utils.error import Error
|
||||
from openvino.tools.mo.front.extractor import FrontExtractorOp
|
||||
|
||||
|
||||
class FFT2DFrontExtractor(FrontExtractorOp):
|
||||
|
@ -2,9 +2,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#! [fft:operation]
|
||||
from mo.front.common.partial_infer.elemental import copy_shape_infer
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.ops.op import Op
|
||||
from openvino.tools.mo.front.common.partial_infer.elemental import copy_shape_infer
|
||||
from openvino.tools.mo.graph.graph import Graph
|
||||
from openvino.tools.mo.ops.op import Op
|
||||
|
||||
|
||||
class FFT(Op):
|
||||
|
@ -868,7 +868,7 @@ EXAMPLE_PATH = ../template_plugin/src \
|
||||
../template_plugin/tests/functional/CMakeLists.txt \
|
||||
../template_plugin/tests/functional/transformations \
|
||||
../template_plugin/tests/functional/shared_tests_instances/ \
|
||||
../../inference-engine/tests/functional/plugin/shared/include \
|
||||
../../src/tests/functional/plugin/shared/include \
|
||||
../snippets
|
||||
|
||||
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
|
||||
|
@ -59,11 +59,14 @@ Framework-agnostic parameters:
|
||||
--reverse_input_channels
|
||||
Switch the input channels order from RGB to BGR (or
|
||||
vice versa). Applied to original inputs of the model
|
||||
if and only if a number of channels equals 3. Applied
|
||||
after application of --mean_values and --scale_values
|
||||
options, so numbers in --mean_values and
|
||||
--scale_values go in the order of channels used in the
|
||||
original model.
|
||||
if and only if a number of channels equals 3.
|
||||
When --mean_values/--scale_values are also specified,
|
||||
reversing of channels will be applied to user's input
|
||||
data first, so that numbers in --mean_values and
|
||||
--scale_values go in the order of channels used in
|
||||
the original model. In other words, if both options are
|
||||
specified then the data flow in the model looks as following:
|
||||
Parameter -> ReverseInputChannels -> Mean/Scale apply -> the original body of the model.
|
||||
--log_level {CRITICAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}
|
||||
Logger level
|
||||
--input INPUT Quoted list of comma-separated input nodes names with
|
||||
|
@ -639,9 +639,9 @@ graph. Consider the extractor for the TensorFlow\* operation `Const` (refer to t
|
||||
`extensions/front/tf/const_ext.py`):
|
||||
|
||||
```py
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
|
||||
from mo.ops.const import Const
|
||||
from openvino.tools.mo.front.extractor import FrontExtractorOp
|
||||
from openvino.tools.mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
|
||||
|
||||
class ConstExtractor(FrontExtractorOp):
|
||||
@ -679,9 +679,9 @@ Consider another example with an extractor of ONNX\* operation `Constant` (refer
|
||||
from onnx import numpy_helper
|
||||
from onnx.numpy_helper import to_array
|
||||
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.front.onnx.extractors.utils import onnx_attr
|
||||
from mo.ops.const import Const
|
||||
from openvino.tools.mo.front.extractor import FrontExtractorOp
|
||||
from openvino.tools.mo.front.onnx.extractors.utils import onnx_attr
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
|
||||
|
||||
class ConstantExtractor(FrontExtractorOp):
|
||||
@ -814,11 +814,11 @@ fusing of the sub-graph defining the [Mish](../../../ops/activation/Mish_4.md) a
|
||||
operation:
|
||||
|
||||
```py
|
||||
from extensions.front.Softplus_fusion import SoftplusFusion
|
||||
from extensions.ops.activation_ops import Mish
|
||||
from mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from mo.front.subgraph_matcher import SubgraphMatch
|
||||
from mo.graph.graph import Graph, rename_nodes
|
||||
from openvino.tools.mo.front.Softplus_fusion import SoftplusFusion
|
||||
from openvino.tools.mo.ops.activation_ops import Mish
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from openvino.tools.mo.front.subgraph_matcher import SubgraphMatch
|
||||
from openvino.tools.mo.graph.graph import Graph, rename_nodes
|
||||
|
||||
|
||||
class MishFusion(FrontReplacementSubgraph):
|
||||
@ -886,12 +886,12 @@ transformation.
|
||||
Consider an example transformation from the file is `extensions/front/Pack.py` which replaces operation `Pack` from
|
||||
the TensorFlow\*:
|
||||
```py
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.front.tf.graph_utils import create_op_with_const_inputs
|
||||
from mo.graph.graph import Node, Graph, rename_nodes
|
||||
from mo.ops.concat import Concat
|
||||
from mo.ops.unsqueeze import Unsqueeze
|
||||
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementOp
|
||||
from openvino.tools.mo.front.tf.graph_utils import create_op_with_const_inputs
|
||||
from openvino.tools.mo.graph.graph import Node, Graph, rename_nodes
|
||||
from openvino.tools.mo.ops.concat import Concat
|
||||
from openvino.tools.mo.ops.unsqueeze import Unsqueeze
|
||||
|
||||
|
||||
class Pack(FrontReplacementOp):
|
||||
@ -932,11 +932,11 @@ specification.
|
||||
```py
|
||||
import logging as log
|
||||
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.front.common.replacement import FrontReplacementPattern
|
||||
from mo.graph.graph import Graph
|
||||
from mo.ops.const import Const
|
||||
from mo.utils.error import Error
|
||||
from openvino.tools.mo.front.common.partial_infer.utils import int64_array
|
||||
from openvino.tools.mo.front.common.replacement import FrontReplacementPattern
|
||||
from openvino.tools.mo.graph.graph import Graph
|
||||
from openvino.tools.mo.ops.const import Const
|
||||
from openvino.tools.mo.utils.error import Error
|
||||
|
||||
|
||||
class SqueezeNormalize(FrontReplacementPattern):
|
||||
@ -1200,13 +1200,13 @@ The example of the configuration file for this type of transformation is `extens
|
||||
and the corresponding transformation file is `./extensions/front/YOLO.py`:
|
||||
|
||||
```py
|
||||
from extensions.front.no_op_eraser import NoOpEraser
|
||||
from extensions.front.standalone_const_eraser import StandaloneConstEraser
|
||||
from extensions.ops.regionyolo import RegionYoloOp
|
||||
from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.ops.result import Result
|
||||
from mo.utils.error import Error
|
||||
from openvino.tools.mo.front.no_op_eraser import NoOpEraser
|
||||
from openvino.tools.mo.front.standalone_const_eraser import StandaloneConstEraser
|
||||
from openvino.tools.mo.ops.regionyolo import RegionYoloOp
|
||||
from openvino.tools.mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
|
||||
from openvino.tools.mo.graph.graph import Node, Graph
|
||||
from openvino.tools.mo.ops.result import Result
|
||||
from openvino.tools.mo.utils.error import Error
|
||||
|
||||
|
||||
class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):
|
||||
|
@ -20,9 +20,9 @@ assume that we have already created the `CustomOp` class (inherited from `Op` cl
|
||||
for this MXNet custom operation as described in the [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
|
||||
|
||||
```py
|
||||
from extension.ops.custom_op import CustomOp # implementation of the MO operation class
|
||||
from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
|
||||
from mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
from openvino.tools.mo.ops.custom_op import CustomOp # implementation of the MO operation class
|
||||
from openvino.tools.mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
|
||||
from openvino.tools.mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
|
||||
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp): # inherit from specific base class
|
||||
op = 'MyCustomOp' # the value corresponding to the `op_type` value of the MXNet operation
|
||||
|
@ -40,8 +40,8 @@ operation `ProposalOp` which corresponds to `Proposal` operation described in th
|
||||
document. Refer to the source code below for a detailed explanation of the extractor.
|
||||
|
||||
```py
|
||||
from extensions.ops.proposal import ProposalOp
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
from openvino.tools.mo.ops.proposal import ProposalOp
|
||||
from openvino.tools.mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
|
||||
|
||||
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
|
@ -430,7 +430,7 @@ PassConfig instance taken from pass::Manager is shared across all registered tra
|
||||
## Transformations testing <a name="transformations_testing"></a>
|
||||
|
||||
If you are developing new transformation inside plugin, you need to add test into the `template_plugin/tests/functional/transformations` folder.
|
||||
We have two types of tests: nGraph reader tests located in `inference-engine/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `inference-engine/tests/functional/inference_engine/transformations`
|
||||
We have two types of tests: nGraph reader tests located in `src/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `src/tests/functional/inference_engine/transformations`
|
||||
Reader tests are IR based and test end-to-end conversion from IR to CNNNetwork. Transformation tests test single ngraph transformations or low-level functions that are used inside transformations.
|
||||
|
||||
The basic transformation test looks like this:
|
||||
|
@ -38,17 +38,10 @@ target_include_directories(interpreter_backend PUBLIC $<BUILD_INTERFACE:${CMAKE_
|
||||
file(GLOB_RECURSE all_backends_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
|
||||
add_clang_format_target(interpreter_backend_clang FOR_SOURCES ${all_backends_src})
|
||||
|
||||
|
||||
# developer package
|
||||
|
||||
openvino_developer_export_targets(COMPONENT core TARGETS interpreter_backend)
|
||||
|
||||
install(TARGETS interpreter_backend
|
||||
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
ARCHIVE DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
install(TARGETS interpreter_backend
|
||||
RUNTIME DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
ARCHIVE DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
LIBRARY DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
# install
|
||||
|
||||
ov_install_static_lib(interpreter_backend template)
|
||||
|
@ -1707,7 +1707,24 @@ bool evaluate(const shared_ptr<op::v0::Log>& op, const HostTensorVector& outputs
|
||||
}
|
||||
|
||||
namespace ctc_loss_v4 {
|
||||
template <element::Type_t t1, element::Type_t t2>
|
||||
template <element::Type_t t1,
|
||||
element::Type_t t2,
|
||||
typename std::enable_if<!std::is_floating_point<typename element_type_traits<t1>::value_type>::value &&
|
||||
!std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value &&
|
||||
!std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
|
||||
bool>::type = true>
|
||||
inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
|
||||
const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) {
|
||||
OPENVINO_ASSERT(false, "The data type for logits is expected to be a floating point type. Got:", element::Type(t1));
|
||||
}
|
||||
|
||||
template <element::Type_t t1,
|
||||
element::Type_t t2,
|
||||
typename std::enable_if<std::is_floating_point<typename element_type_traits<t1>::value_type>::value ||
|
||||
std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value ||
|
||||
std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
|
||||
bool>::type = true>
|
||||
inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
|
||||
const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) {
|
||||
@ -1944,6 +1961,30 @@ bool evaluate(const shared_ptr<op::v0::RNNCell>& op, const HostTensorVector& out
|
||||
return true;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
bool evaluate(const shared_ptr<op::v0::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
runtime::reference::lstm_cell<T>(inputs[0]->get_data_ptr<ET>(),
|
||||
inputs[0]->get_shape(),
|
||||
inputs[1]->get_data_ptr<ET>(),
|
||||
inputs[1]->get_shape(),
|
||||
inputs[2]->get_data_ptr<ET>(),
|
||||
inputs[2]->get_shape(),
|
||||
inputs[3]->get_data_ptr<ET>(),
|
||||
inputs[3]->get_shape(),
|
||||
inputs[4]->get_data_ptr<ET>(),
|
||||
inputs[4]->get_shape(),
|
||||
inputs[5]->get_data_ptr<ET>(),
|
||||
inputs[5]->get_shape(),
|
||||
outputs[0]->get_data_ptr<ET>(),
|
||||
outputs[1]->get_data_ptr<ET>(),
|
||||
op->get_activations()[0],
|
||||
op->get_activations()[1],
|
||||
op->get_activations()[2],
|
||||
op->get_clip());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
bool evaluate(const shared_ptr<op::v4::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
|
@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0)
|
||||
NGRAPH_OP(GRN, op::v0)
|
||||
NGRAPH_OP(HardSigmoid, op::v0)
|
||||
NGRAPH_OP(LRN, ngraph::op::v0)
|
||||
NGRAPH_OP(LSTMCell, op::v0)
|
||||
NGRAPH_OP(MVN, ngraph::op::v0)
|
||||
NGRAPH_OP(NormalizeL2, op::v0)
|
||||
NGRAPH_OP(PriorBox, ngraph::op::v0)
|
||||
|
@ -37,4 +37,3 @@ set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_REL
|
||||
# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
|
||||
# POSSIBLE_PLUGINS ${TARGET_NAME})
|
||||
# [cmake:plugin]
|
||||
ov_install_static_lib(interpreter_backend tests)
|
||||
|
182
docs/template_plugin/tests/functional/op_reference/einsum.cpp
Normal file
182
docs/template_plugin/tests/functional/op_reference/einsum.cpp
Normal file
@ -0,0 +1,182 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/opsets/opset7.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
using namespace ov;
|
||||
|
||||
namespace {
|
||||
struct EinsumParams {
|
||||
std::vector<Tensor> inputs;
|
||||
std::string equation;
|
||||
Tensor expectedResult;
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
struct Builder : ParamsBuilder<EinsumParams> {
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputs);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, equation);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
|
||||
};
|
||||
|
||||
class ReferenceEinsumTest : public testing::TestWithParam<EinsumParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
function = CreateModel(params);
|
||||
for (const auto& input_tensor : params.inputs) {
|
||||
inputData.push_back(input_tensor.data);
|
||||
}
|
||||
refOutData = {params.expectedResult.data};
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<EinsumParams>& obj) {
|
||||
auto param = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "iType=" << param.inputs[0].type;
|
||||
result << "_iShape=" << param.inputs[0].shape;
|
||||
result << "_equation=" << param.equation;
|
||||
result << "_eType=" << param.expectedResult.type;
|
||||
result << "_eShape=" << param.expectedResult.shape;
|
||||
if (param.testcaseName != "") {
|
||||
result << "_=" << param.testcaseName;
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateModel(const EinsumParams& params) {
|
||||
OutputVector output_vector;
|
||||
ParameterVector param_vector;
|
||||
for (const auto& input_tensor : params.inputs) {
|
||||
auto param = std::make_shared<opset1::Parameter>(input_tensor.type, input_tensor.shape);
|
||||
output_vector.push_back(param);
|
||||
param_vector.push_back(param);
|
||||
}
|
||||
const auto einsum = std::make_shared<opset7::Einsum>(output_vector, params.equation);
|
||||
const auto f = std::make_shared<Model>(OutputVector{einsum}, param_vector);
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceEinsumTest, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<EinsumParams> generateParams() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<EinsumParams> params {
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 2}, std::vector<T>{1, 2}},
|
||||
{ET, {3, 4}, std::vector<T>{3, 4, 5, 6,
|
||||
7, 8, 9, 10,
|
||||
11, 12, 13, 14}}})
|
||||
.equation("ab,cd->abcd")
|
||||
.expectedResult({ET, {1, 2, 3, 4}, std::vector<T>{3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 6, 8, 10, 12,
|
||||
14, 16, 18, 20, 22, 24, 26, 28}})
|
||||
.testcaseName("einsum_no_reduction"),
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
|
||||
.equation("ijk->kij")
|
||||
.expectedResult({ET, {3, 1, 2}, std::vector<T>{1, 4, 2, 5, 3, 6}})
|
||||
.testcaseName("einsum_transpose"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
|
||||
.equation("ab->a")
|
||||
.expectedResult({ET, {2}, std::vector<T>{6, 15}})
|
||||
.testcaseName("einsum_reduce"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}},
|
||||
{ET, {3, 2}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
|
||||
.equation("ab,bc->ac")
|
||||
.expectedResult({ET, {2, 2}, std::vector<T>{22, 28, 49, 64}})
|
||||
.testcaseName("einsum_matrix_multiplication"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 4}, std::vector<T>{1, 3, 2, 7, 5, 6, 0, 1}},
|
||||
{ET, {4, 3, 1}, std::vector<T>{1, 2, 3, 4, 5, 6, 5, 7, 3, 7, 9, 1}},
|
||||
{ET, {4, 3}, std::vector<T>{4, 3, 1, 6, 4, 2, 2, 5, 3, 1, 9, 4}}})
|
||||
.equation("ab,bcd,bc->ca")
|
||||
.expectedResult({ET, {3, 2}, std::vector<T>{145, 171, 703, 231, 85, 91}})
|
||||
.testcaseName("einsum_multiple_multiplication"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
|
||||
.equation("a...->...")
|
||||
.expectedResult({ET, {2, 3}, std::vector<T>{4, 8, 4, 8, 5, 13}})
|
||||
.testcaseName("einsum_ellipsis_one_input_reduction"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
|
||||
.equation("a...->...a")
|
||||
.expectedResult({ET, {2, 3, 2}, std::vector<T>{1, 3, 3, 5, 2, 2, 7, 1, 5, 0, 6, 7}})
|
||||
.testcaseName("einsum_ellipsis_one_input_transpose"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
|
||||
{ET, {1}, std::vector<T>{2}}})
|
||||
.equation("ab...,...->ab...")
|
||||
.expectedResult({ET, {2, 2, 3}, std::vector<T>{2, 6, 4, 14, 10, 12, 6, 10, 4, 2, 0, 14}})
|
||||
.testcaseName("einsum_ellipsis_mul_by_1dscalar"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 1, 4, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
|
||||
{ET, {3, 4, 2, 1}, std::vector<T>{3, 1, 6, 2, 3, 10, 9, 8, 2, 9, 3, 2,
|
||||
4, 2, 3, 1, 9, 1, 11, 4, 7, 2, 3, 1}}})
|
||||
.equation("a...j,j...->a...")
|
||||
.expectedResult({ET, {1, 4, 2, 4}, std::vector<T>{27, 85, 37, 66, 30, 58, 50, 8,
|
||||
37, 123, 55, 83, 16, 48, 24, 30,
|
||||
29, 83, 43, 52, 20, 92, 44, 24,
|
||||
24, 96, 48, 30, 13, 67, 31, 15}})
|
||||
.testcaseName("einsum_ellipsis_complex_mul"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 3, 3}, std::vector<T>{1, 2, 3, 4, 5, 6, 7, 8, 9}}})
|
||||
.equation("kii->ki")
|
||||
.expectedResult({ET, {1, 3}, std::vector<T>{1, 5, 9}})
|
||||
.testcaseName("einsum_diagonal"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 3, 3, 2, 4}, std::vector<T>{4, 2, 5, 4, 5, 5, 1, 1, 3, 3, 1, 1, 2, 2, 4, 1, 3, 4,
|
||||
4, 5, 1, 3, 1, 3, 1, 4, 3, 5, 4, 4, 5, 4, 4, 5, 4, 2,
|
||||
2, 2, 3, 3, 1, 1, 4, 3, 4, 2, 2, 1, 1, 2, 3, 1, 1, 4,
|
||||
2, 3, 1, 3, 4, 2, 5, 5, 3, 4, 3, 4, 5, 4, 4, 5, 1, 3,
|
||||
4, 4, 5, 3, 1, 3, 2, 5, 3, 2, 5, 4, 4, 2, 4, 4, 1, 4,
|
||||
4, 5, 4, 4, 4, 2, 3, 3, 4, 2, 4, 2, 5, 1, 3, 2, 4, 3,
|
||||
5, 1, 2, 3, 1, 1, 2, 5, 1, 1, 2, 1, 4, 5, 3, 4, 1, 3,
|
||||
3, 1, 3, 2, 4, 5, 1, 1, 5, 4, 5, 2, 2, 3, 3, 1, 2, 4}},
|
||||
{ET, {3, 2, 1}, std::vector<T>{1, 4, 4, 5, 3, 3}}})
|
||||
.equation("abbac,bad->ad")
|
||||
.expectedResult({ET, {2, 1}, std::vector<T>{123, 129}})
|
||||
.testcaseName("einsum_diagonal_with_matmul"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<EinsumParams> generateCombinedParams() {
|
||||
const std::vector<std::vector<EinsumParams>> generatedParams {
|
||||
generateParams<element::Type_t::i32>(),
|
||||
generateParams<element::Type_t::f32>(),
|
||||
};
|
||||
std::vector<EinsumParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest,
|
||||
testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName);
|
||||
} // namespace
|
@ -0,0 +1,246 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/opsets/opset3.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
using namespace ov;
|
||||
|
||||
namespace {
|
||||
struct ExtractImagePatchesParams {
|
||||
Tensor data;
|
||||
Shape sizes;
|
||||
Strides strides;
|
||||
Shape rates;
|
||||
op::PadType autoPad;
|
||||
Tensor expectedResult;
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
struct Builder : ParamsBuilder<ExtractImagePatchesParams> {
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, data);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, sizes);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, strides);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, rates);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, autoPad);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
|
||||
};
|
||||
|
||||
class ReferenceExtractImagePatchesTest : public testing::TestWithParam<ExtractImagePatchesParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
function = CreateModel(params);
|
||||
inputData = {params.data.data};
|
||||
refOutData = {params.expectedResult.data};
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<ExtractImagePatchesParams>& obj) {
|
||||
auto param = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "dType=" << param.data.type;
|
||||
result << "_dShape=" << param.data.shape;
|
||||
result << "_sizes=" << param.sizes;
|
||||
result << "_strides=" << param.strides;
|
||||
result << "_rates=" << param.rates;
|
||||
result << "_autoPad=" << param.autoPad;
|
||||
result << "_eType=" << param.expectedResult.type;
|
||||
result << "_eShape=" << param.expectedResult.shape;
|
||||
if (param.testcaseName != "") {
|
||||
result << "_=" << param.testcaseName;
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateModel(const ExtractImagePatchesParams& params) {
|
||||
const auto data = std::make_shared<opset1::Parameter>(params.data.type, params.data.shape);
|
||||
const auto extrace_image_patches = std::make_shared<opset3::ExtractImagePatches>(data,
|
||||
params.sizes,
|
||||
params.strides,
|
||||
params.rates,
|
||||
params.autoPad);
|
||||
const auto f = std::make_shared<Model>(extrace_image_patches, ParameterVector{data});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceExtractImagePatchesTest, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<ExtractImagePatchesParams> generateParams() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<ExtractImagePatchesParams> params {
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({3, 3})
|
||||
.strides({5, 5})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
|
||||
1, 6, 51, 56,
|
||||
2, 7, 52, 57,
|
||||
3, 8, 53, 58,
|
||||
11, 16, 61, 66,
|
||||
12, 17, 62, 67,
|
||||
13, 18, 63, 68,
|
||||
21, 26, 71, 76,
|
||||
22, 27, 72, 77,
|
||||
23, 28, 73, 78}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({4, 4})
|
||||
.strides({8, 8})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 16, 1, 1}, std::vector<T>{
|
||||
1, 2, 3, 4,
|
||||
11, 12, 13, 14,
|
||||
21, 22, 23, 24,
|
||||
31, 32, 33, 34}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({4, 4})
|
||||
.strides({9, 9})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::SAME_UPPER)
|
||||
.expectedResult({ET, {1, 16, 2, 2}, std::vector<T>{
|
||||
0, 0, 0, 89,
|
||||
0, 0, 81, 90,
|
||||
0, 0, 82, 0,
|
||||
0, 0, 83, 0,
|
||||
0, 9, 0, 99,
|
||||
1, 10, 91, 100,
|
||||
2, 0, 92, 0,
|
||||
3, 0, 93, 0,
|
||||
0, 19, 0, 0,
|
||||
11, 20, 0, 0,
|
||||
12, 0, 0, 0,
|
||||
13, 0, 0, 0,
|
||||
0, 29, 0, 0,
|
||||
21, 30, 0, 0,
|
||||
22, 0, 0, 0,
|
||||
23, 0, 0, 0}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({3, 3})
|
||||
.strides({5, 5})
|
||||
.rates({2, 2})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
|
||||
1, 6, 51, 56,
|
||||
3, 8, 53, 58,
|
||||
5, 10, 55, 60,
|
||||
21, 26, 71, 76,
|
||||
23, 28, 73, 78,
|
||||
25, 30, 75, 80,
|
||||
41, 46, 91, 96,
|
||||
43, 48, 93, 98,
|
||||
45, 50, 95, 100}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 2, 5, 5}, std::vector<T>{
|
||||
1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50}})
|
||||
.sizes({2, 2})
|
||||
.strides({3, 3})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 8, 2, 2}, std::vector<T>{
|
||||
1, 4, 16, 19,
|
||||
26, 29, 41, 44,
|
||||
2, 5, 17, 20,
|
||||
27, 30, 42, 45,
|
||||
6, 9, 21, 24,
|
||||
31, 34, 46, 49,
|
||||
7, 10, 22, 25,
|
||||
32, 35, 47, 50}}),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<ExtractImagePatchesParams> generateCombinedParams() {
|
||||
const std::vector<std::vector<ExtractImagePatchesParams>> generatedParams {
|
||||
generateParams<element::Type_t::i8>(),
|
||||
generateParams<element::Type_t::i16>(),
|
||||
generateParams<element::Type_t::i32>(),
|
||||
generateParams<element::Type_t::i64>(),
|
||||
generateParams<element::Type_t::u8>(),
|
||||
generateParams<element::Type_t::u16>(),
|
||||
generateParams<element::Type_t::u32>(),
|
||||
generateParams<element::Type_t::u64>(),
|
||||
generateParams<element::Type_t::bf16>(),
|
||||
generateParams<element::Type_t::f16>(),
|
||||
generateParams<element::Type_t::f32>(),
|
||||
generateParams<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<ExtractImagePatchesParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest,
|
||||
testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName);
|
||||
} // namespace
|
@ -4,7 +4,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/op/lstm_cell.hpp"
|
||||
#include "openvino/opsets/opset4.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
@ -12,13 +13,6 @@ using namespace ov;
|
||||
|
||||
namespace {
|
||||
struct LSTMCellParams {
|
||||
LSTMCellParams(
|
||||
int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount,
|
||||
const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B,
|
||||
const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") :
|
||||
batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount),
|
||||
X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {}
|
||||
|
||||
int32_t batchSize;
|
||||
int32_t inputSize;
|
||||
int32_t hiddenSize;
|
||||
@ -34,6 +28,22 @@ struct LSTMCellParams {
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
struct Builder : ParamsBuilder<LSTMCellParams> {
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, X);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, W);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, R);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, B);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
|
||||
};
|
||||
|
||||
class ReferenceLSTMCellTest : public testing::TestWithParam<LSTMCellParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
@ -63,26 +73,24 @@ public:
|
||||
result << "_hoType=" << param.Ho.type;
|
||||
result << "_hoShape=" << param.Ho.shape;
|
||||
result << "_coType=" << param.Co.type;
|
||||
result << "_coShape=" << param.Co.shape;
|
||||
if (param.testcaseName != "") {
|
||||
result << "_coShape=" << param.Co.shape;
|
||||
result << "_=" << param.testcaseName;
|
||||
} else {
|
||||
result << "_coShape=" << param.Co.shape;
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<op::v4::LSTMCell>(X,
|
||||
std::make_shared<opset4::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
@ -107,15 +115,15 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<op::v4::LSTMCell>(X,
|
||||
std::make_shared<opset4::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
@ -142,15 +150,15 @@ private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const float clip_threshold = 3.5f;
|
||||
|
||||
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<op::v4::LSTMCell>(X,
|
||||
std::make_shared<opset4::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
W,
|
||||
@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<opset1::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
|
||||
params.hiddenSize);
|
||||
|
||||
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
|
||||
return function;
|
||||
}
|
||||
};
|
||||
|
||||
class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<opset1::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
|
||||
params.hiddenSize);
|
||||
|
||||
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
|
||||
return function;
|
||||
}
|
||||
};
|
||||
|
||||
class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const float clip_threshold = 3.5f;
|
||||
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<opset1::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
W,
|
||||
R,
|
||||
B,
|
||||
params.hiddenSize,
|
||||
op::LSTMWeightsFormat::IFCO,
|
||||
std::vector<std::string>{"sigmoid", "tanh", "tanh"},
|
||||
std::vector<float>{},
|
||||
std::vector<float>{},
|
||||
clip_threshold);
|
||||
|
||||
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
|
||||
return function;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParams() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
LSTMCellParams(
|
||||
2, 3, 3, 4,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
|
||||
Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}),
|
||||
"lstm_cell_zero_bias_default_attrs"),
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
|
||||
.testcaseName("lstm_cell_zero_bias_default_attrs")
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@ -232,53 +334,56 @@ template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
LSTMCellParams(
|
||||
2, 3, 3, 4,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
|
||||
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82509011030197144,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4578367471694946,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}),
|
||||
"lstm_cell_bias_default_attrs"),
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_bias_default_attrs"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@ -302,53 +407,56 @@ template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasClip() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
LSTMCellParams(
|
||||
2, 3, 3, 4,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
|
||||
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82387429475784302,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4510968923568726,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}),
|
||||
"lstm_cell_bias_clip"),
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_bias_clip"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip,
|
||||
testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsV1() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
|
||||
.testcaseName("lstm_cell_v1_zero_bias_default_attrs")
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<LSTMCellParams> generateCombinedParamsV1() {
|
||||
const std::vector<std::vector<LSTMCellParams>> generatedParams {
|
||||
generateParamsV1<element::Type_t::bf16>(),
|
||||
generateParamsV1<element::Type_t::f16>(),
|
||||
generateParamsV1<element::Type_t::f32>(),
|
||||
generateParamsV1<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<LSTMCellParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrsV1() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82509011030197144,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4578367471694946,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_v1_bias_default_attrs"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<LSTMCellParams> generateCombinedParamsBiasDefaultAttrsV1() {
|
||||
const std::vector<std::vector<LSTMCellParams>> generatedParams {
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::bf16>(),
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::f16>(),
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::f32>(),
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<LSTMCellParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasClipV1() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82387429475784302,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4510968923568726,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_v1_bias_clip"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<LSTMCellParams> generateCombinedParamsBiasClipV1() {
|
||||
const std::vector<std::vector<LSTMCellParams>> generatedParams {
|
||||
generateParamsBiasClipV1<element::Type_t::bf16>(),
|
||||
generateParamsBiasClipV1<element::Type_t::f16>(),
|
||||
generateParamsBiasClipV1<element::Type_t::f32>(),
|
||||
generateParamsBiasClipV1<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<LSTMCellParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test,
|
||||
testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs,
|
||||
testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip,
|
||||
testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName);
|
||||
} // namespace
|
@ -4,8 +4,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/op/topk.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/opsets/opset3.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
@ -15,7 +15,7 @@ namespace {
|
||||
struct TopKParams {
|
||||
TopKParams(
|
||||
const Tensor& A, const Tensor& k, const int64_t axis,
|
||||
const op::v1::TopK::Mode mode, const op::v1::TopK::SortType sort,
|
||||
const opset1::TopK::Mode mode, const opset1::TopK::SortType sort,
|
||||
const Tensor& result0, const Tensor& result1, const size_t outIdx,
|
||||
const std::string& testcaseName = "") :
|
||||
A(A), k(k), axis(axis), mode(mode), sort(sort),
|
||||
@ -25,8 +25,8 @@ struct TopKParams {
|
||||
Tensor A;
|
||||
Tensor k;
|
||||
int64_t axis;
|
||||
op::v1::TopK::Mode mode;
|
||||
op::v1::TopK::SortType sort;
|
||||
opset1::TopK::Mode mode;
|
||||
opset1::TopK::SortType sort;
|
||||
Tensor result0;
|
||||
Tensor result1;
|
||||
size_t outIdx;
|
||||
@ -71,7 +71,6 @@ struct TopKParamsResnet50 {
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
|
||||
class ReferenceTopKTestResnet50 : public testing::TestWithParam<TopKParamsResnet50>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
@ -101,18 +100,18 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto B = std::make_shared<op::v1::TopK>(A,
|
||||
op::v0::Constant::create(element::i64, {}, {5}),
|
||||
const auto B = std::make_shared<opset1::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES);
|
||||
const auto C = std::make_shared<op::v1::TopK>(A,
|
||||
op::v0::Constant::create(element::i64, {}, {1}),
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
const auto C = std::make_shared<opset1::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES);
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
|
||||
const auto out5_value = B->output(0);
|
||||
const auto out5_index = B->output(1);
|
||||
@ -220,12 +219,12 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
@ -253,8 +252,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -292,8 +291,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -331,8 +330,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -366,8 +365,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -401,8 +400,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -440,8 +439,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -467,8 +466,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
|
||||
0,
|
||||
@ -478,8 +477,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
|
||||
0,
|
||||
@ -489,8 +488,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
|
||||
0,
|
||||
@ -500,8 +499,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
|
||||
0,
|
||||
@ -536,7 +535,7 @@ std::vector<TopKParams> generateCombinedParamsMaxMinSort() {
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSort,
|
||||
testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTest::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestV3 : public ReferenceTopKTest {
|
||||
class ReferenceTopKTestBackend : public ReferenceTopKTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
@ -547,18 +546,18 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestV3, CompareWithRefs) {
|
||||
TEST_P(ReferenceTopKTestBackend, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
@ -572,8 +571,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
|
||||
0,
|
||||
@ -583,8 +582,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
|
||||
0,
|
||||
@ -594,8 +593,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
|
||||
0,
|
||||
@ -605,8 +604,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
|
||||
0,
|
||||
@ -615,7 +614,7 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<TopKParams> generateCombinedParamsV3() {
|
||||
std::vector<TopKParams> generateCombinedParamsBackend() {
|
||||
const std::vector<std::vector<TopKParams>> generatedParams {
|
||||
generateParamsMaxMinSort<element::Type_t::i8, element::Type_t::i64, element::Type_t::i32>(),
|
||||
generateParamsMaxMinSort<element::Type_t::i16, element::Type_t::i64, element::Type_t::i32>(),
|
||||
@ -638,8 +637,8 @@ std::vector<TopKParams> generateCombinedParamsV3() {
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestV3,
|
||||
testing::ValuesIn(generateCombinedParamsV3()), ReferenceTopKTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackend,
|
||||
testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTest::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTest1dMaxMin : public ReferenceTopKTest {
|
||||
public:
|
||||
@ -673,12 +672,12 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
@ -698,8 +697,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
0,
|
||||
@ -709,8 +708,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
1,
|
||||
@ -720,8 +719,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
0,
|
||||
@ -731,8 +730,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
1,
|
||||
@ -742,8 +741,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{6}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
0,
|
||||
@ -753,8 +752,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{6}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
1,
|
||||
@ -764,8 +763,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
0,
|
||||
@ -775,8 +774,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
1,
|
||||
@ -786,8 +785,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
0,
|
||||
@ -797,8 +796,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
1,
|
||||
@ -808,8 +807,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{1}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
0,
|
||||
@ -819,8 +818,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{1}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
1,
|
||||
@ -832,8 +831,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -849,8 +848,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -882,8 +881,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
|
||||
169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
|
||||
187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
|
||||
@ -923,8 +922,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
|
||||
169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
|
||||
187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
|
||||
@ -948,8 +947,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 11, 7, 6, 3
|
||||
}),
|
||||
@ -965,8 +964,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 11, 7, 6, 3
|
||||
}),
|
||||
@ -982,8 +981,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
10, 12, 11, 7
|
||||
}),
|
||||
@ -999,8 +998,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
10, 12, 11, 7
|
||||
}),
|
||||
@ -1016,8 +1015,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
|
||||
}),
|
||||
@ -1033,8 +1032,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
|
||||
}),
|
||||
@ -1050,8 +1049,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 5, 1, 6, 3
|
||||
}),
|
||||
@ -1067,8 +1066,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 5, 1, 6, 3
|
||||
}),
|
||||
@ -1084,8 +1083,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
8, 2, 5, 1
|
||||
}),
|
||||
@ -1101,8 +1100,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
8, 2, 5, 1
|
||||
}),
|
||||
@ -1118,8 +1117,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
|
||||
}),
|
||||
@ -1135,8 +1134,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
|
||||
}),
|
||||
@ -1152,8 +1151,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7
|
||||
}),
|
||||
@ -1169,8 +1168,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7
|
||||
}),
|
||||
@ -1186,8 +1185,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
12, 11, 10
|
||||
}),
|
||||
@ -1203,8 +1202,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
12, 11, 10
|
||||
}),
|
||||
@ -1220,8 +1219,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1}, std::vector<T>{
|
||||
4, 3
|
||||
}),
|
||||
@ -1237,8 +1236,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1}, std::vector<T>{
|
||||
4, 3
|
||||
}),
|
||||
@ -1254,8 +1253,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
|
||||
}),
|
||||
@ -1271,8 +1270,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
|
||||
}),
|
||||
@ -1288,8 +1287,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5
|
||||
}),
|
||||
@ -1305,8 +1304,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5
|
||||
}),
|
||||
@ -1322,8 +1321,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
3, 1, 4
|
||||
}),
|
||||
@ -1339,8 +1338,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
3, 1, 4
|
||||
}),
|
||||
@ -1380,12 +1379,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxM
|
||||
class ReferenceTopKTestInt64 : public ReferenceTopKTest1dMaxMin {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A,
|
||||
const auto B = std::make_shared<opset1::TopK>(A,
|
||||
k,
|
||||
params.axis,
|
||||
params.mode,
|
||||
@ -1412,8 +1411,8 @@ std::vector<TopKParams> generateParamsInt64() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -1428,8 +1427,8 @@ std::vector<TopKParams> generateParamsInt64() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -1468,12 +1467,12 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
@ -1493,8 +1492,8 @@ std::vector<TopKParams> generateParamsSingleOutput() {
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{}),
|
||||
Tensor(ET_OUT, {2, 2, 2}, std::vector<T_OUT>{2, 0, 1, 2, 1, 0, 0, 1}),
|
||||
0,
|
||||
@ -1530,19 +1529,181 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingle
|
||||
testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTest::getTestCaseName);
|
||||
|
||||
TEST(ReferenceTopKTestInvalid, topk_v1_invalid_strings) {
|
||||
const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k = op::v0::Constant::create(element::i64, Shape{}, {1});
|
||||
EXPECT_THROW(op::v1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
|
||||
EXPECT_THROW(op::v1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
|
||||
EXPECT_THROW(opset1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
|
||||
EXPECT_THROW(opset1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
|
||||
}
|
||||
|
||||
TEST(ReferenceTopKTestInvalid, topk_v1_invalid_k) {
|
||||
const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k_non_scalar = op::v0::Constant::create(element::i64, Shape{2}, {1, 2});
|
||||
EXPECT_THROW(op::v1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_float = op::v0::Constant::create(element::f32, Shape{}, {1.0f});
|
||||
EXPECT_THROW(op::v1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_negative = op::v0::Constant::create(element::i8, Shape{}, {-1});
|
||||
EXPECT_THROW(op::v1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
|
||||
EXPECT_THROW(opset1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
|
||||
EXPECT_THROW(opset1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
|
||||
EXPECT_THROW(opset1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
class ReferenceTopKTestResnet50V3 : public ReferenceTopKTestResnet50 {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto B = std::make_shared<opset3::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {5}),
|
||||
1,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
const auto C = std::make_shared<opset3::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {1}),
|
||||
1,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
|
||||
const auto out5_value = B->output(0);
|
||||
const auto out5_index = B->output(1);
|
||||
const auto out1_value = C->output(0);
|
||||
const auto out1_index = C->output(1);
|
||||
const auto f = std::make_shared<Model>(OutputVector{out5_value, out5_index, out1_value, out1_index}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestResnet50V3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestResnet50V3,
|
||||
testing::ValuesIn(generateCombinedParamsResnet50()), ReferenceTopKTestResnet50V3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestMaxMinSortV3 : public ReferenceTopKTestMaxMinSort {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestMaxMinSortV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSortV3,
|
||||
testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTestMaxMinSortV3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestBackendV3 : public ReferenceTopKTestBackend {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestBackendV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackendV3,
|
||||
testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTestBackendV3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTest1dMaxMinV3 : public ReferenceTopKTest1dMaxMin {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTest1dMaxMinV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxMinV3,
|
||||
testing::ValuesIn(generateCombinedParams1dMaxMin()), ReferenceTopKTest1dMaxMinV3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestInt64V3 : public ReferenceTopKTestInt64 {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A,
|
||||
k,
|
||||
params.axis,
|
||||
params.mode,
|
||||
params.sort,
|
||||
element::i64);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestInt64V3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestInt64V3,
|
||||
testing::ValuesIn(generateCombinedParamsInt64()), ReferenceTopKTestInt64V3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestSingleOutputV3 : public ReferenceTopKTestSingleOutput {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestSingleOutputV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingleOutputV3,
|
||||
testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTestSingleOutputV3::getTestCaseName);
|
||||
|
||||
TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_strings) {
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
|
||||
EXPECT_THROW(opset3::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
|
||||
EXPECT_THROW(opset3::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
|
||||
}
|
||||
|
||||
TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_k) {
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
|
||||
EXPECT_THROW(opset3::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
|
||||
EXPECT_THROW(opset3::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
|
||||
EXPECT_THROW(opset3::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
}
|
||||
} // namespace
|
||||
|
@ -7,8 +7,3 @@ project(InferenceEngine)
|
||||
if(ENABLE_PYTHON)
|
||||
add_subdirectory(ie_bridges/python)
|
||||
endif()
|
||||
|
||||
if(ENABLE_TESTS)
|
||||
add_subdirectory(tests_deprecated)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
@ -1,15 +0,0 @@
|
||||
WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@
|
||||
WHEEL_VERSION=@WHEEL_VERSION@
|
||||
WHEEL_BUILD=@WHEEL_BUILD@
|
||||
WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@
|
||||
WHEEL_AUTHOR=@WHEEL_AUTHOR@
|
||||
WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@
|
||||
WHEEL_DESC=@WHEEL_DESC@
|
||||
WHEEL_LICENSE=@WHEEL_LICENSE@
|
||||
WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@
|
||||
WHEEL_OVERVIEW=@WHEEL_OVERVIEW@
|
||||
|
||||
CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@
|
||||
OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@
|
||||
TBB_LIBS_DIR=@TBB_LIBS_DIR@
|
||||
PY_PACKAGES_DIR=@PY_PACKAGES_DIR@
|
@ -1,40 +1,14 @@
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package")
|
||||
set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package")
|
||||
set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package author’s name")
|
||||
set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author")
|
||||
set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package")
|
||||
set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url")
|
||||
set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url")
|
||||
set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE)
|
||||
set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE)
|
||||
set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file")
|
||||
set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file")
|
||||
set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description")
|
||||
|
||||
set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
|
||||
set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
|
||||
set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env")
|
||||
|
||||
set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION})
|
||||
set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib)
|
||||
|
||||
if(APPLE)
|
||||
set(WHEEL_PLATFORM macosx_10_15_x86_64)
|
||||
elseif(UNIX)
|
||||
set(WHEEL_PLATFORM manylinux2014_x86_64)
|
||||
elseif(WIN32)
|
||||
set(WHEEL_PLATFORM win_amd64)
|
||||
if(WIN32)
|
||||
set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin)
|
||||
else()
|
||||
message(FATAL_ERROR "This platform is not supported")
|
||||
endif()
|
||||
|
||||
configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY)
|
||||
|
||||
if(LINUX)
|
||||
find_host_program(patchelf_program
|
||||
NAMES patchelf
|
||||
@ -55,21 +29,30 @@ endforeach()
|
||||
|
||||
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI)
|
||||
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER)
|
||||
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM)
|
||||
string(STRIP ${PYTHON_ABI} PYTHON_ABI)
|
||||
string(STRIP ${INTERPRETER} INTERPRETER)
|
||||
string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM)
|
||||
|
||||
set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl")
|
||||
set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels")
|
||||
set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}")
|
||||
|
||||
add_custom_command(OUTPUT ${openvino_wheel_path}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing"
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
|
||||
COMMAND ${CMAKE_COMMAND} -E env WHEEL_VERSION=${WHEEL_VERSION}
|
||||
WHEEL_BUILD=${WHEEL_BUILD}
|
||||
CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR}
|
||||
OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH}
|
||||
TBB_LIBS_DIR=${TBB_LIBS_DIR}
|
||||
PY_PACKAGES_DIR=${PY_PACKAGES_DIR}
|
||||
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel
|
||||
--dist-dir ${openvino_wheels_output_dir}
|
||||
--build=${WHEEL_BUILD}
|
||||
--plat-name=${WHEEL_PLATFORM}
|
||||
# COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT}
|
||||
DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT}
|
||||
DEPENDS ${openvino_wheel_deps}
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
COMMENT "Building Python wheel ${openvino_wheel_name}"
|
||||
VERBATIM)
|
||||
|
@ -1,28 +0,0 @@
|
||||
defusedxml>=0.7.1
|
||||
scipy~=1.5.4
|
||||
jstyleson~=0.0.2
|
||||
numpy>=1.16.6,<1.20
|
||||
addict>=2.4.0
|
||||
pandas~=1.1.5
|
||||
hyperopt~=0.1.2
|
||||
networkx~=2.5
|
||||
tqdm>=4.54.1
|
||||
texttable~=1.6.3
|
||||
py-cpuinfo>=7.0.0
|
||||
PyYAML>=5.4.1
|
||||
pillow>=8.1.2
|
||||
scikit-image>=0.17.2
|
||||
scikit-learn>=0.24.1
|
||||
yamlloader>=0.5
|
||||
shapely>=1.7.1
|
||||
nibabel>=3.2.1
|
||||
pydicom>=2.1.2
|
||||
sentencepiece>=0.1.95
|
||||
tokenizers>=0.10.1
|
||||
editdistance>=0.5.3
|
||||
parasail>=1.2.4
|
||||
fast-ctc-decode>=0.2.5
|
||||
rawpy>=0.16.0
|
||||
nltk>=3.5
|
||||
opencv-python==4.5.*
|
||||
progress>=1.5
|
@ -1,22 +0,0 @@
|
||||
[options]
|
||||
py_modules =
|
||||
mo
|
||||
mo_tf
|
||||
mo_caffe
|
||||
mo_mxnet
|
||||
mo_onnx
|
||||
mo_kaldi
|
||||
|
||||
[options.package_data]
|
||||
* = *
|
||||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
|
||||
[metadata]
|
||||
license_files =
|
||||
readme*
|
||||
*LICENSE*
|
||||
*license*
|
||||
*third-party-programs*
|
||||
*EULA*
|
@ -1 +0,0 @@
|
||||
numpy>=1.16.6,<1.20
|
@ -1,32 +0,0 @@
|
||||
## OpenVINO™ Toolkit
|
||||
|
||||
OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINO™ toolkit includes the Deep Learning Deployment Toolkit (DLDT).
|
||||
|
||||
OpenVINO™ toolkit:
|
||||
|
||||
- Enables CNN-based deep learning inference on the edge
|
||||
- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
|
||||
- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
|
||||
- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCL™
|
||||
|
||||
Operating Systems:
|
||||
- Ubuntu* 18.04 long-term support (LTS), 64-bit
|
||||
- Windows* 10, 64-bit
|
||||
- macOS* 10.15, 64-bit
|
||||
|
||||
## Install the Runtime Package Using the PyPI Repository
|
||||
1. Set up and update pip to the highest version:
|
||||
```sh
|
||||
python3 -m pip install --upgrade pip
|
||||
```
|
||||
2. Install the Intel® distribution of OpenVINO™ toolkit:
|
||||
```sh
|
||||
pip install openvino
|
||||
```
|
||||
|
||||
3. Verify that the package is installed:
|
||||
```sh
|
||||
python3 -c "from openvino.inference_engine import IECore"
|
||||
```
|
||||
|
||||
Now you are ready to develop and run your application.
|
@ -1,3 +1,3 @@
|
||||
setuptools>=53.0.0
|
||||
wheel>=0.36.2
|
||||
python-decouple>=3.4
|
||||
|
||||
|
@ -1,7 +1,11 @@
|
||||
[metadata]
|
||||
license_files =
|
||||
readme*
|
||||
*LICENSE*
|
||||
*license*
|
||||
*third-party-programs*
|
||||
*EULA*
|
||||
readme*
|
||||
*LICENSE*
|
||||
*license*
|
||||
*third-party-programs*
|
||||
../../../../licensing/runtime-third-party-programs.txt
|
||||
../../../../licensing/tbb_third-party-programs.txt
|
||||
../../../../licensing/onednn_third-party-programs.txt
|
||||
../../../../LICENSE
|
||||
|
||||
|
@ -21,7 +21,6 @@ from setuptools import setup, find_namespace_packages, Extension
|
||||
from setuptools.command.build_ext import build_ext
|
||||
from setuptools.command.build_clib import build_clib
|
||||
from setuptools.command.install import install
|
||||
from decouple import config
|
||||
|
||||
WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs')
|
||||
WHEEL_LIBS_PACKAGE = 'openvino.libs'
|
||||
@ -41,10 +40,11 @@ elif machine == 'aarch64':
|
||||
ARCH = 'arm64'
|
||||
|
||||
# The following variables can be defined in environment or .env file
|
||||
CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
|
||||
OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
|
||||
TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
|
||||
PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
|
||||
SCRIPT_DIR = Path(__file__).resolve().parents[0]
|
||||
CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.')
|
||||
OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
|
||||
TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
|
||||
PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
|
||||
LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
|
||||
|
||||
LIB_INSTALL_CFG = {
|
||||
@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms):
|
||||
sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin')
|
||||
|
||||
# copy license file into the build directory
|
||||
package_license = config('WHEEL_LICENSE', '')
|
||||
package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE')
|
||||
if os.path.exists(package_license):
|
||||
copyfile(package_license, 'LICENSE')
|
||||
|
||||
packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
|
||||
package_data: typing.Dict[str, list] = {}
|
||||
pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino')
|
||||
pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino')
|
||||
ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else []
|
||||
|
||||
setup(
|
||||
version=config('WHEEL_VERSION', '0.0.0'),
|
||||
build=config('WHEEL_BUILD', '000'),
|
||||
author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
|
||||
version=os.getenv('WHEEL_VERSION', '0.0.0'),
|
||||
build=os.getenv('WHEEL_BUILD', '000'),
|
||||
author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
|
||||
name=pkg_name,
|
||||
license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
|
||||
author=config('WHEEL_AUTHOR', 'Intel Corporation'),
|
||||
description=config('WHEEL_DESC', 'Inference Engine Python* API'),
|
||||
install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
|
||||
long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
|
||||
license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
|
||||
author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'),
|
||||
description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'),
|
||||
install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')),
|
||||
long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')),
|
||||
long_description_content_type='text/markdown',
|
||||
download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
|
||||
url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
|
||||
download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
|
||||
url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
|
||||
cmdclass={
|
||||
'build': CustomBuild,
|
||||
'install': CustomInstall,
|
||||
|
@ -25,6 +25,8 @@ public:
|
||||
type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
|
||||
|
||||
initDims();
|
||||
|
||||
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
|
||||
}
|
||||
|
||||
explicit Shape(const InferenceEngine::SizeVector& shape) {
|
||||
@ -33,6 +35,8 @@ public:
|
||||
type = ShapeType::Static;
|
||||
|
||||
initDims();
|
||||
|
||||
hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
|
||||
}
|
||||
|
||||
/**
|
||||
@ -106,6 +110,10 @@ public:
|
||||
return type == ShapeType::Dynamic;
|
||||
}
|
||||
|
||||
bool hasZeroDims() const {
|
||||
return hasZeroDimensions;
|
||||
}
|
||||
|
||||
size_t getRank() const {
|
||||
return minDims.size();
|
||||
}
|
||||
@ -169,6 +177,8 @@ private:
|
||||
Dynamic
|
||||
} type {ShapeType::Static};
|
||||
|
||||
bool hasZeroDimensions = false;
|
||||
|
||||
VectorDims minDims;
|
||||
VectorDims maxDims;
|
||||
VectorDims dims;
|
||||
|
@ -16,7 +16,8 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
|
||||
offsetPadding = 0;
|
||||
offsetPaddingToData.resize(dims.size(), 0);
|
||||
strides.resize(order.size());
|
||||
strides[strides.size() - 1] = 1;
|
||||
// for empty tensor case we fill all strides with 0 values
|
||||
strides[strides.size() - 1] = shape.hasZeroDims() ? 0 : 1;
|
||||
for (size_t i = 2; i <= order.size(); i++) {
|
||||
strides[strides.size() - i] = strides[strides.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)];
|
||||
}
|
||||
@ -33,6 +34,15 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
|
||||
IE_THROW() << "CpuBlockedMemoryDesc doesn't support undefined blockedDims.";
|
||||
}
|
||||
|
||||
if (shape.hasZeroDims()) {
|
||||
const auto& dims = shape.getDims();
|
||||
for (size_t i = 0; i < shape.getRank(); i++) {
|
||||
if (dims[order[i]] == 0 && !dimsEqualWeak(blockedDims[i], 0)) {
|
||||
IE_THROW() << "Can't create CpuBlockedMemoryDesc. Mistmatch zero dims in dims and blocked dims";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->order = order;
|
||||
this->blockedDims = blockedDims;
|
||||
this->offsetPadding = offsetPadding;
|
||||
@ -44,7 +54,9 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
|
||||
}
|
||||
|
||||
if (strides.empty() && !order.empty()) {
|
||||
if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
if (shape.hasZeroDims()) {
|
||||
this->strides.resize(order.size(), 0);
|
||||
} else if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
this->strides.resize(order.size(), Shape::UNDEFINED_DIM);
|
||||
} else {
|
||||
this->strides.resize(order.size());
|
||||
@ -54,6 +66,9 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
|
||||
IE_THROW() << "Can't create CpuBlockedMemoryDesc with zero dim, but with non zero strides";
|
||||
}
|
||||
this->strides = strides;
|
||||
}
|
||||
|
||||
@ -92,11 +107,16 @@ bool CpuBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc &rhs) const
|
||||
return rhs.isCompatible(*this);
|
||||
}
|
||||
|
||||
bool CpuBlockedMemoryDesc::canComputeMemSizeZeroDims() const {
|
||||
return getShape().hasZeroDims() && getOffsetPadding() != Shape::UNDEFINED_DIM;
|
||||
}
|
||||
|
||||
size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const {
|
||||
int64_t e_size = getOffsetPadding() + 1; // size in bytes (from begin of data to last element)
|
||||
for (int j = 0; j < getBlockDims().size(); j++)
|
||||
e_size += (getBlockDims()[j] - 1) * getStrides()[j];
|
||||
|
||||
if (!getShape().hasZeroDims()) {
|
||||
for (int j = 0; j < getBlockDims().size(); j++)
|
||||
e_size += (getBlockDims()[j] - 1) * getStrides()[j];
|
||||
}
|
||||
|
||||
e_size *= getPrecision() == InferenceEngine::Precision::BIN ? 1 : getPrecision().size();
|
||||
|
||||
@ -104,14 +124,14 @@ size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const {
|
||||
}
|
||||
|
||||
size_t CpuBlockedMemoryDesc::getMaxMemSize() const {
|
||||
if (shape.isStatic()) {
|
||||
if (shape.isStatic() || shape.hasZeroDims()) {
|
||||
return getCurrentMemSize();
|
||||
}
|
||||
|
||||
auto& maxDims = shape.getMaxDims();
|
||||
const auto& maxDims = shape.getMaxDims();
|
||||
if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x ||
|
||||
// WA: for some nodes ngraph compute upper bound depending on precision max value
|
||||
std::numeric_limits<int32_t>::max() == x; })) {
|
||||
x >= std::numeric_limits<int32_t>::max(); })) {
|
||||
return UNDEFINED_SIZE;
|
||||
}
|
||||
|
||||
@ -270,15 +290,23 @@ bool CpuBlockedMemoryDesc::blocksExtended() const {
|
||||
}
|
||||
|
||||
size_t CpuBlockedMemoryDesc::getPaddedElementsCount() const {
|
||||
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; }))
|
||||
if (getShape().hasZeroDims()) {
|
||||
return 0;
|
||||
}
|
||||
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
|
||||
IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
|
||||
}
|
||||
return std::accumulate(blockedDims.begin(), blockedDims.end(), size_t{1}, std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithUndefStridesAndOffset() const {
|
||||
const auto orderSize = getOrder().size();
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getPrecision(), getShape(), getBlockDims(), getOrder(), Shape::UNDEFINED_DIM,
|
||||
VectorDims(orderSize, 0), VectorDims(orderSize, Shape::UNDEFINED_DIM));
|
||||
CpuBlockedMemoryDescPtr newDesc = std::make_shared<CpuBlockedMemoryDesc>(*this);
|
||||
newDesc->strides = VectorDims(orderSize, Shape::UNDEFINED_DIM);
|
||||
newDesc->offsetPadding = Shape::UNDEFINED_DIM;
|
||||
newDesc->offsetPaddingToData = VectorDims(orderSize, 0);
|
||||
newDesc->status = descStatus::Undefined;
|
||||
return newDesc;
|
||||
}
|
||||
|
||||
MemoryDescPtr CpuBlockedMemoryDesc::cloneWithDefaultStridesAndOffset() const {
|
||||
|
@ -84,6 +84,7 @@ public:
|
||||
|
||||
private:
|
||||
size_t getElementOffset(size_t elemNumber) const override;
|
||||
bool canComputeMemSizeZeroDims() const override;
|
||||
size_t getCurrentMemSizeImp() const override;
|
||||
size_t getOffset(const InferenceEngine::SizeVector& v) const;
|
||||
bool isPlainFormat() const;
|
||||
|
@ -93,7 +93,7 @@ public:
|
||||
*/
|
||||
size_t getCurrentMemSize() const {
|
||||
size_t retVal = UNDEFINED_SIZE;
|
||||
if (isDefined()) {
|
||||
if (canComputeMemSize()) {
|
||||
retVal = getCurrentMemSizeImp();
|
||||
}
|
||||
return retVal;
|
||||
@ -140,8 +140,13 @@ protected:
|
||||
// Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc.
|
||||
virtual size_t getElementOffset(size_t elemNumber) const = 0;
|
||||
|
||||
virtual bool canComputeMemSizeZeroDims() const = 0;
|
||||
virtual bool isDefinedImp() const = 0;
|
||||
|
||||
bool canComputeMemSize() const {
|
||||
return isDefined() || canComputeMemSizeZeroDims();
|
||||
}
|
||||
|
||||
virtual MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const = 0;
|
||||
|
||||
MemoryDescType type;
|
||||
|
@ -48,17 +48,37 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Memo
|
||||
CpuBlockedMemoryDesc MemoryDescUtils::convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
|
||||
if (desc.getLayout() == InferenceEngine::Layout::ANY)
|
||||
IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to CpuBlockedMemoryDesc";
|
||||
const auto &blkDesc = desc.getBlockingDesc();
|
||||
return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
|
||||
blkDesc.getOffsetPaddingToData(), blkDesc.getStrides());
|
||||
|
||||
const auto& blkDesc = desc.getBlockingDesc();
|
||||
const auto& dims = desc.getDims();
|
||||
|
||||
auto strides = blkDesc.getStrides();
|
||||
// for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
|
||||
// i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
|
||||
if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
|
||||
std::fill(strides.begin(), strides.end(), 0);
|
||||
}
|
||||
|
||||
return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(dims), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
|
||||
blkDesc.getOffsetPaddingToData(), strides);
|
||||
}
|
||||
|
||||
DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
|
||||
const auto &blkDesc = desc.getBlockingDesc();
|
||||
if (desc.getLayout() == InferenceEngine::Layout::ANY)
|
||||
IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to DnnlBlockedMemoryDesc";
|
||||
|
||||
const auto& blkDesc = desc.getBlockingDesc();
|
||||
const auto& dims = desc.getDims();
|
||||
|
||||
auto strides = blkDesc.getStrides();
|
||||
// for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
|
||||
// i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
|
||||
if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
|
||||
std::fill(strides.begin(), strides.end(), 0);
|
||||
}
|
||||
|
||||
return DnnlBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
|
||||
blkDesc.getOffsetPaddingToData(), blkDesc.getStrides());
|
||||
blkDesc.getOffsetPaddingToData(), strides);
|
||||
}
|
||||
|
||||
BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) {
|
||||
@ -80,9 +100,16 @@ InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &
|
||||
|
||||
InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) {
|
||||
if (auto blockingDesc = dynamic_cast<const BlockedMemoryDesc*>(&desc)) {
|
||||
return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(),
|
||||
{blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(),
|
||||
blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()});
|
||||
InferenceEngine::BlockingDesc blkDesc = desc.getShape().hasZeroDims() ? InferenceEngine::BlockingDesc(blockingDesc->getBlockDims(),
|
||||
blockingDesc->getOrder(),
|
||||
blockingDesc->getOffsetPadding(),
|
||||
blockingDesc->getOffsetPaddingToData()) :
|
||||
InferenceEngine::BlockingDesc(blockingDesc->getBlockDims(),
|
||||
blockingDesc->getOrder(),
|
||||
blockingDesc->getOffsetPadding(),
|
||||
blockingDesc->getOffsetPaddingToData(),
|
||||
blockingDesc->getStrides());
|
||||
return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), blkDesc);
|
||||
} else {
|
||||
IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc";
|
||||
}
|
||||
|
@ -15,12 +15,17 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
const auto &dims = shape.getDims();
|
||||
|
||||
if (!strides.empty()) { // custom strides
|
||||
if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
|
||||
IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
|
||||
}
|
||||
desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims),
|
||||
MKLDNNExtensionUtils::IEPrecisionToDataType(prc),
|
||||
MKLDNNExtensionUtils::convertToDnnlDims(strides)};
|
||||
} else {
|
||||
mkldnn::memory::dims plain_strides;
|
||||
if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
if (shape.hasZeroDims()) {
|
||||
plain_strides.resize(ndims, 0);
|
||||
} else if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL);
|
||||
} else {
|
||||
plain_strides.resize(ndims, 1);
|
||||
@ -58,8 +63,8 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
* Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N]
|
||||
*/
|
||||
DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims,
|
||||
const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData,
|
||||
const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) {
|
||||
const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData,
|
||||
const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) {
|
||||
using namespace mkldnn;
|
||||
// scalar case
|
||||
if (shape.getRank() == 0) {
|
||||
@ -90,8 +95,8 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined order.";
|
||||
}
|
||||
|
||||
if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined blockedDims.";
|
||||
if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM || val == 0; })) {
|
||||
IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined or zero blockedDims.";
|
||||
}
|
||||
|
||||
auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims());
|
||||
@ -106,7 +111,12 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
|
||||
size_t inner_ndims = order.size() - dims.size();
|
||||
|
||||
const bool emptyDesc = shape.hasZeroDims();
|
||||
if (!strides.empty()) {
|
||||
if (emptyDesc && std::any_of(strides.begin(), strides.end(), [](size_t dim) { return dim != 0; } )) {
|
||||
IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
|
||||
}
|
||||
|
||||
bool is_descending_strides = true;
|
||||
for (int i = 1; i < strides.size(); i++) {
|
||||
is_descending_strides &= (strides[i - 1] >= strides[i]);
|
||||
@ -118,7 +128,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
|
||||
IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides);
|
||||
}
|
||||
|
||||
if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
|
||||
if (!strides.empty() && !emptyDesc && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
|
||||
bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted
|
||||
for (int i = outer_ndims; i < strides.size() - 1; i++) {
|
||||
inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]);
|
||||
@ -203,6 +213,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory:
|
||||
order.swap(perm);
|
||||
order.insert(order.end(), inner_idxs.begin(), inner_idxs.end());
|
||||
|
||||
if (shape.hasZeroDims()) {
|
||||
auto& blk = desc.data.format_desc.blocking;
|
||||
std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
|
||||
}
|
||||
|
||||
initBlockedParams();
|
||||
}
|
||||
|
||||
@ -296,6 +311,12 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc)
|
||||
IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc";
|
||||
|
||||
order = extractOrder(desc);
|
||||
|
||||
if (getShape().hasZeroDims()) {
|
||||
auto& blk = desc.data.format_desc.blocking;
|
||||
std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
|
||||
}
|
||||
|
||||
initBlockedParams();
|
||||
}
|
||||
|
||||
@ -368,6 +389,7 @@ bool DnnlBlockedMemoryDesc::isTailCFormat() const {
|
||||
static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& desc, const VectorDims& dims, const VectorDims& order) {
|
||||
using namespace dnnl::impl::utils;
|
||||
auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims);
|
||||
const auto offsetPadding = desc.data.offset0;
|
||||
mkldnn::memory::desc newMklDesc = desc;
|
||||
array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size());
|
||||
std::vector<int> perm(order.begin(), order.begin() + mklDims.size());
|
||||
@ -379,6 +401,9 @@ static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& des
|
||||
if (retCode != dnnl::impl::status::success) {
|
||||
IE_THROW() << "Can not clone DnnlBlockedMemoryDesc with dims: " << MemoryDescUtils::dims2str(dims);
|
||||
}
|
||||
// dnnl::impl::fill_blocked always set offset0 to 0
|
||||
// so we need to restore actual value
|
||||
newMklDesc.data.offset0 = offsetPadding;
|
||||
return newMklDesc;
|
||||
}
|
||||
|
||||
@ -476,14 +501,14 @@ bool DnnlBlockedMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const {
|
||||
}
|
||||
|
||||
size_t DnnlBlockedMemoryDesc::getMaxMemSize() const {
|
||||
if (shape.isStatic()) {
|
||||
if (shape.isStatic() || shape.hasZeroDims()) {
|
||||
return getCurrentMemSize();
|
||||
}
|
||||
|
||||
auto& maxDims = shape.getMaxDims();
|
||||
const auto& maxDims = shape.getMaxDims();
|
||||
if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x ||
|
||||
// WA: for some nodes ngraph compute upper bound depending on precision max value
|
||||
std::numeric_limits<int32_t>::max() == x; })) {
|
||||
x >= std::numeric_limits<int32_t>::max(); })) {
|
||||
return UNDEFINED_SIZE;
|
||||
}
|
||||
|
||||
@ -492,6 +517,13 @@ size_t DnnlBlockedMemoryDesc::getMaxMemSize() const {
|
||||
}
|
||||
|
||||
size_t DnnlBlockedMemoryDesc::getPaddedElementsCount() const {
|
||||
if (getShape().hasZeroDims()) {
|
||||
return 0;
|
||||
}
|
||||
if (std::any_of(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims,
|
||||
[](dnnl_dim_t dim) { return dim == DNNL_RUNTIME_DIM_VAL; })) {
|
||||
IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
|
||||
}
|
||||
return std::accumulate(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims, size_t{1},
|
||||
std::multiplies<int64_t>());
|
||||
}
|
||||
@ -548,7 +580,7 @@ void DnnlBlockedMemoryDesc::initStrides() {
|
||||
const size_t total_ndims = outer_ndims + inner_ndims;
|
||||
|
||||
// strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
|
||||
VectorDims inner_strides(inner_ndims, 1);
|
||||
VectorDims inner_strides(inner_ndims, getShape().hasZeroDims() ? 0 : 1);
|
||||
for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
|
||||
inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i];
|
||||
}
|
||||
@ -600,7 +632,9 @@ void DnnlBlockedMemoryDesc::recomputeDefaultStrides() {
|
||||
IE_THROW() << "Can't recompute stride: order size != blocked dims size";
|
||||
|
||||
auto &oneDnnStrides = desc.data.format_desc.blocking.strides;
|
||||
if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
if (getShape().hasZeroDims()) {
|
||||
std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + getShape().getRank(), 0);
|
||||
} else if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
|
||||
std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + rank, DNNL_RUNTIME_DIM_VAL);
|
||||
initStrides();
|
||||
} else {
|
||||
@ -633,6 +667,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc,
|
||||
|
||||
desc = cloneDescWithNewDims(mdesc, shape.getDims(), order);
|
||||
|
||||
if (shape.hasZeroDims()) {
|
||||
auto& blk = desc.data.format_desc.blocking;
|
||||
std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
|
||||
}
|
||||
|
||||
initBlockedParams();
|
||||
}
|
||||
|
||||
|
@ -71,7 +71,7 @@ private:
|
||||
explicit DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc);
|
||||
|
||||
// Creates DnnlBlockedMemoryDesc using the shape parameter as a true shape but all other params (layout, blocks, etc.) are used from the mdesc, but
|
||||
// the mdesc own shape is ignored. The main purpose of this constructor is making dynamic descriptor form some dummy mdesc, which stores info about
|
||||
// the mdesc own shape is ignored. The main purpose of this constructor is making dynamic descriptor from some dummy mdesc, which stores info about
|
||||
// layout, blocking, strides, etc., and the provided dynamic shape.
|
||||
DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc, const Shape& shape);
|
||||
|
||||
|
@ -15,6 +15,10 @@ DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) :
|
||||
IE_THROW(Unexpected) << "Memory format any is prohibited!";
|
||||
}
|
||||
|
||||
bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const {
|
||||
return getShape().hasZeroDims() && desc.data.offset0 != DNNL_RUNTIME_DIM_VAL;
|
||||
}
|
||||
|
||||
size_t DnnlMemoryDesc::getCurrentMemSizeImp() const {
|
||||
return MKLDNNExtensionUtils::getMemSizeForDnnlDesc(desc);
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ private:
|
||||
|
||||
size_t getElementOffset(size_t elemNumber) const override;
|
||||
|
||||
bool canComputeMemSizeZeroDims() const override;
|
||||
size_t getCurrentMemSizeImp() const override;
|
||||
bool isDefinedImp() const override;
|
||||
MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override;
|
||||
|
@ -319,7 +319,6 @@ void MKLDNNGraph::InitGraph() {
|
||||
SortTopologically();
|
||||
|
||||
InitDescriptors();
|
||||
RemoveDroppedEdges();
|
||||
|
||||
InitOptimalPrimitiveDescriptors();
|
||||
|
||||
@ -385,15 +384,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
|
||||
void MKLDNNGraph::ExtractConstantAndExecutableNodes() {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantAndExecutableNodes");
|
||||
for (const auto& graphNode : graphNodes) {
|
||||
if (graphNode->isConstant())
|
||||
if (graphNode->isConstant()) {
|
||||
constantGraphNodes.emplace_back(graphNode);
|
||||
else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable()))
|
||||
} else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable())) {
|
||||
/* @todo
|
||||
* Revise implementation.
|
||||
* With current way it is possible that with debug_caps enabled
|
||||
* we execute a node, which is not ready to be executed
|
||||
*/
|
||||
executableGraphNodes.emplace_back(graphNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -793,7 +793,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
|
||||
// check for empty output blob
|
||||
if (std::any_of(outDims.begin(), outDims.end(), [](const Dim dim) {return dim == 0;})) {
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto srcPrec = actualDesc.getPrecision();
|
||||
@ -836,10 +836,11 @@ inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::st
|
||||
DUMP(node, infer_count);
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute);
|
||||
|
||||
if (node->isDynamicNode())
|
||||
if (node->isDynamicNode()) {
|
||||
node->executeDynamic(stream);
|
||||
else
|
||||
} else {
|
||||
node->execute(stream);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
|
||||
@ -855,7 +856,6 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
|
||||
|
||||
if (request)
|
||||
request->ThrowIfCanceled();
|
||||
|
||||
ExecuteNode(node, stream);
|
||||
}
|
||||
|
||||
@ -994,22 +994,6 @@ Config MKLDNNGraph::getProperty() const {
|
||||
return config;
|
||||
}
|
||||
|
||||
Blob::Ptr MKLDNNGraph::getInputBlob(const std::string& name) {
|
||||
auto itr = inputNodesMap.find(name);
|
||||
if (itr != inputNodesMap.end()) {
|
||||
return MemoryDescUtils::interpretAsBlob(itr->second->getChildEdgeAt(0)->getMemory());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Blob::Ptr MKLDNNGraph::getOutputBlob(const std::string& name) {
|
||||
auto itr = outputNodesMap.find(name);
|
||||
if (itr != outputNodesMap.end()) {
|
||||
return MemoryDescUtils::interpretAsBlob(itr->second->getParentEdgeAt(0)->getMemory());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
|
||||
for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) {
|
||||
if ((*it) == edge) {
|
||||
|
@ -44,9 +44,6 @@ public:
|
||||
void setProperty(const std::map<std::string, std::string> &properties);
|
||||
Config getProperty() const;
|
||||
|
||||
InferenceEngine::Blob::Ptr getInputBlob(const std::string& name);
|
||||
InferenceEngine::Blob::Ptr getOutputBlob(const std::string& name);
|
||||
|
||||
template<typename NET>
|
||||
void CreateGraph(NET &network,
|
||||
const MKLDNNExtensionManager::Ptr& extMgr,
|
||||
|
@ -59,7 +59,7 @@ MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
|
||||
|
||||
void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias");
|
||||
FuseConvolutionAndBias(graph);
|
||||
FuseConvolutionMatMulAndBias(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd");
|
||||
@ -166,37 +166,38 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
|
||||
graph.RemoveDroppedEdges();
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
|
||||
void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
|
||||
return node->getType() == Convolution &&
|
||||
auto isSuitableParentNode = [](const MKLDNNNodePtr& node) {
|
||||
return (node->getType() == Convolution || node->getType() == MatMul) &&
|
||||
node->getChildEdges().size() == 1 &&
|
||||
node->getParentEdges().size() == 2 &&
|
||||
node->getFusedWith().empty();
|
||||
};
|
||||
|
||||
auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
|
||||
auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
|
||||
if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
|
||||
return false;
|
||||
|
||||
auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
|
||||
const auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
|
||||
if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1)
|
||||
return false;
|
||||
|
||||
auto convOutDims = parentNode->getOutputShapeAtPort(0).getDims();
|
||||
auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
|
||||
convOutDims.size());
|
||||
const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims();
|
||||
const auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
|
||||
parentOutDims.size());
|
||||
// TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
|
||||
// Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
|
||||
if (convOutDims.size() != biasDims.size() || biasDims.size() < 2)
|
||||
if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2)
|
||||
return false;
|
||||
|
||||
if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1]))
|
||||
const auto channelAxis = parentNode->getFusingAxis();
|
||||
if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis]))
|
||||
return false;
|
||||
|
||||
for (int i = 2; i < biasDims.size(); i++) {
|
||||
if (biasDims[i] != 1)
|
||||
for (int i = 0; i < biasDims.size(); i++) {
|
||||
if (biasDims[i] != 1 && i != channelAxis)
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -262,13 +263,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
|
||||
auto parentEltwise = parentNode;
|
||||
const auto& parentEltwise = parentNode;
|
||||
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
|
||||
auto &graphEdges = graph.GetEdges();
|
||||
auto& graphEdges = graph.GetEdges();
|
||||
graphEdges.push_back(newEdge);
|
||||
parent->addEdge(newEdge);
|
||||
|
||||
auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[1] };
|
||||
auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[parentEltwise->getFusingAxis()] };
|
||||
parent->outputShapes[inNum] = Shape(partialShape);
|
||||
parentEltwise->inputShapes.push_back(parent->outputShapes[0]);
|
||||
}
|
||||
@ -627,7 +628,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
|
||||
/**
|
||||
* @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
|
||||
* for bf16 depthwise postops.
|
||||
* This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
|
||||
* multiple binary post ops.
|
||||
* This check can already be removed for FC fusing, but should be kept for Convolution,
|
||||
* which still uses legacy depthwise postops for performance reasons.
|
||||
*/
|
||||
static bool BF16QuantizeNodeFusing(const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
|
||||
return childNode->getType() == FakeQuantize &&
|
||||
one_of(Precision::BF16,
|
||||
parentNode->getOriginalOutputPrecisionAtPort(0),
|
||||
@ -638,7 +647,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
|
||||
return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getInputShapeAtPort(0).getRank() != 3;
|
||||
return node->getType() == FullyConnected && node->getChildEdges().size() == 1;
|
||||
};
|
||||
|
||||
auto parent = graphNodes.begin();
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
|
||||
|
||||
private:
|
||||
void FuseConvolutionAndBias(MKLDNNGraph &graph);
|
||||
void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph);
|
||||
void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
|
||||
void FuseMultiplyAndAdd(MKLDNNGraph &graph);
|
||||
void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
|
||||
|
@ -190,8 +190,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::redefineMemoryForInputNodes() {
|
||||
const auto inputNode = cpuInputNodes.find(blob.first);
|
||||
if (inputNode == cpuInputNodes.end())
|
||||
IE_THROW() << "CPU execution graph doesn't contain input node with name: " << blob.first;
|
||||
if (inputNode->second->isDynamicNode())
|
||||
if (inputNode->second->isDynamicNode()) {
|
||||
inputNode->second->redefineOutputMemory({blob.second->getTensorDesc().getDims()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "mkldnn_node.h"
|
||||
#include "dnnl_debug.h"
|
||||
#include "mkldnn_edge.h"
|
||||
#include "mkldnn_extension_mngr.h"
|
||||
#include "mkldnn_itt.h"
|
||||
|
||||
@ -83,7 +84,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
|
||||
for (size_t i = 0; i < op->get_input_size(); i++) {
|
||||
const auto &shape = op->get_input_partial_shape(i);
|
||||
if (shape.rank().is_dynamic()) {
|
||||
IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank";
|
||||
IE_THROW(Unexpected) << "CPU plug-in doesn't support " << getTypeStr() << " operation with dynamic rank. Operation name: " << getName();
|
||||
}
|
||||
|
||||
bool isScalar = shape.rank().get_length() == 0;
|
||||
@ -98,7 +99,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
|
||||
for (size_t i = 0; i < op->get_output_size(); i++) {
|
||||
const auto &shape = op->get_output_partial_shape(i);
|
||||
if (shape.rank().is_dynamic()) {
|
||||
IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank";
|
||||
IE_THROW(Unexpected) << "CPU plug-in doesn't support " << getTypeStr() << " operation with dynamic rank. Operation name: " << getName();
|
||||
}
|
||||
|
||||
bool isScalar = shape.rank().get_length() == 0;
|
||||
@ -229,6 +230,15 @@ bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const
|
||||
return true;
|
||||
}
|
||||
|
||||
void MKLDNNNode::createPrimitive() {
|
||||
if (inputShapesDefined() && isExecutable()) {
|
||||
if (needPrepareParams()) {
|
||||
prepareParams();
|
||||
}
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::selectOptimalPrimitiveDescriptor() {
|
||||
selectPreferPrimitiveDescriptor(getPrimitivesPriority(), false);
|
||||
}
|
||||
@ -509,12 +519,14 @@ void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
|
||||
if (needShapeInfer()) {
|
||||
redefineOutputMemory(shapeInfer());
|
||||
}
|
||||
if (needPrepareParams()) {
|
||||
IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
|
||||
" since the input shapes are not defined.";
|
||||
prepareParams();
|
||||
if (isExecutable()) {
|
||||
if (needPrepareParams()) {
|
||||
IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
|
||||
" since the input shapes are not defined.";
|
||||
prepareParams();
|
||||
}
|
||||
executeDynamicImpl(strm);
|
||||
}
|
||||
executeDynamicImpl(strm);
|
||||
updateLastInputDims();
|
||||
}
|
||||
|
||||
@ -716,7 +728,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) {
|
||||
selectedPD->setConfig(rightConfig);
|
||||
}
|
||||
|
||||
void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
|
||||
void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
@ -1048,6 +1060,18 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
|
||||
std::unordered_map<int, mkldnn::memory>& primArgs,
|
||||
const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs) {
|
||||
auto post_ops = attr.get_post_ops();
|
||||
int idx = 0;
|
||||
for (int i = 0; i < post_ops.len(); i++) {
|
||||
if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
|
||||
primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]->GetPrimitive()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isFusedWith(Type fusedNodeType) const {
|
||||
for (auto fusedNode : fusedWith) {
|
||||
if (fusedNode->type == fusedNodeType)
|
||||
@ -1078,10 +1102,14 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector<size_t>& binaryShape, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
|
||||
IE_THROW() << "Binary fusing of " << this->getType() << " operation is not implemented";
|
||||
}
|
||||
|
||||
std::vector<InferenceEngine::Precision> MKLDNNNode::getInputPrecisions() const {
|
||||
std::vector<InferenceEngine::Precision> inputPrecisions;
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
@ -1205,6 +1233,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
|
||||
|
||||
bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
|
||||
size_t fusingPort = 0;
|
||||
// @todo graph optimizer can provide parentNode as nullptr. Should be avoided
|
||||
const size_t channelAxis = parentNode ? parentNode->getFusingAxis() : MKLDNNNode::getFusingAxis();
|
||||
|
||||
for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) {
|
||||
MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get();
|
||||
if (node == nullptr) {
|
||||
@ -1225,7 +1256,8 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|
||||
if (i == fusingPort)
|
||||
continue;
|
||||
auto& weightShape = getInputShapeAtPort(i).getDims();
|
||||
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
|
||||
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 ||
|
||||
!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1246,6 +1278,9 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|
||||
|| isConvertablePowerStatic();
|
||||
}
|
||||
|
||||
// @todo shifts for Subtract and scales for Divide are replaced with
|
||||
// Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops
|
||||
// This can be avoided after dephwise post ops are gone
|
||||
std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
|
||||
std::vector<float> scales, shifts;
|
||||
|
||||
@ -1309,6 +1344,36 @@ std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts
|
||||
return {scales, shifts};
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isInputTensorAtPortEmpty(size_t port) const {
|
||||
if (inputShapes.size() <= port) {
|
||||
IE_THROW() << "Incorrect input port number for node " << getName();
|
||||
}
|
||||
return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isOutputTensorAtPortEmpty(size_t port) const {
|
||||
if (outputShapes.size() <= port) {
|
||||
IE_THROW() << "Incorrect output port number for node " << getName();
|
||||
}
|
||||
return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
|
||||
}
|
||||
|
||||
bool MKLDNNNode::hasEmptyInputTensors() const {
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
if (isInputTensorAtPortEmpty(i))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MKLDNNNode::hasEmptyOutputTensors() const {
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
if (isOutputTensorAtPortEmpty(i))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MKLDNNNode::inputShapesDefined() const {
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined())
|
||||
@ -1382,8 +1447,11 @@ std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<Shape>&
|
||||
std::vector<VectorDims> newOutputShapes(opToShapeInfer->get_output_size());
|
||||
for (size_t i = 0; i < newOutputShapes.size(); i++) {
|
||||
const auto &partShape = opToShapeInfer->get_output_partial_shape(i);
|
||||
if (partShape.is_dynamic())
|
||||
IE_THROW(NotImplemented) << "CPU plug-in doesn't support default shape infer for nodes with internal dynamism";
|
||||
if (partShape.is_dynamic()) {
|
||||
IE_THROW(NotImplemented) << "CPU plug-in doesn't support default shape infer for node " << getTypeStr()
|
||||
<< " with internal dynamism. Operation name: " << getName();
|
||||
}
|
||||
|
||||
newOutputShapes[i] = partShape.get_shape();
|
||||
}
|
||||
return newOutputShapes;
|
||||
@ -1408,10 +1476,11 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
|
||||
}
|
||||
return ret;
|
||||
} else if (node->getType() == Eltwise) {
|
||||
return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
|
||||
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
|
||||
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
|
||||
node->canBePerformedAsScaleShift(this);
|
||||
return one_of(node->getAlgorithm(),
|
||||
EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
|
||||
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
|
||||
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
|
||||
node->canBePerformedAsScaleShift(this);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -199,11 +199,19 @@ public:
|
||||
|
||||
// must be called only after MKLDNNGraph::InitEdges()
|
||||
virtual bool isExecutable() const {
|
||||
return true;
|
||||
return !hasEmptyInputTensors();
|
||||
}
|
||||
|
||||
bool isConstant();
|
||||
|
||||
virtual size_t getFusingAxis() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void appendPostOpArgs(const mkldnn::primitive_attr& attr,
|
||||
std::unordered_map<int, mkldnn::memory>& primArgs,
|
||||
const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs);
|
||||
|
||||
bool isFusedWith(Type type) const;
|
||||
|
||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
@ -362,7 +370,7 @@ public:
|
||||
*/
|
||||
virtual void filterSupportedPrimitiveDescriptors();
|
||||
|
||||
virtual void createPrimitive() = 0;
|
||||
virtual void createPrimitive();
|
||||
|
||||
virtual void selectOptimalPrimitiveDescriptor();
|
||||
virtual void initOptimalPrimitiveDescriptor();
|
||||
@ -419,7 +427,7 @@ public:
|
||||
if (impl_type == selected_pd->getImplementationType() &&
|
||||
descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
|
||||
descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
|
||||
prepareMemory(selected_pd, itpd);
|
||||
prepareMemory(itpd);
|
||||
PD prim_desc = createPd<PD, D, FPD>(desc);
|
||||
return {itpd.get()};
|
||||
}
|
||||
@ -594,8 +602,10 @@ protected:
|
||||
* Seed node should call this routine and pass its post operations list as parameter.
|
||||
* @param ops List of fused post operations
|
||||
*/
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
|
||||
virtual AttrPtr initPrimitiveAttr() const { return nullptr; }
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1);
|
||||
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
|
||||
|
||||
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }
|
||||
|
||||
typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
|
||||
GetPrimitiveMemoryFormatFunc;
|
||||
@ -636,7 +646,7 @@ protected:
|
||||
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
||||
std::vector<NodeDesc> supportedPrimitiveDescriptors;
|
||||
std::unordered_map<int, mkldnn::memory> primArgs;
|
||||
std::vector<mkldnn::memory> binaryPostOpsArgs;
|
||||
std::vector<MKLDNNMemoryPtr> binaryPostOpsArgs;
|
||||
MKLDNNPrimitive prim;
|
||||
std::vector<MKLDNNDescriptor> descs;
|
||||
|
||||
@ -714,8 +724,16 @@ protected:
|
||||
supportedPrimitiveDescriptors.push_back({config, implType});
|
||||
}
|
||||
|
||||
void prepareMemory(mkldnn::primitive_desc_iterator& itpd);
|
||||
|
||||
bool isDynamic = false;
|
||||
|
||||
bool isInputTensorAtPortEmpty(size_t port) const;
|
||||
bool isOutputTensorAtPortEmpty(size_t port) const;
|
||||
|
||||
bool hasEmptyInputTensors() const;
|
||||
bool hasEmptyOutputTensors() const;
|
||||
|
||||
bool inputShapesDefined() const;
|
||||
bool outputShapesDefined() const;
|
||||
bool shapesDefined() const;
|
||||
@ -738,6 +756,7 @@ protected:
|
||||
}
|
||||
|
||||
std::vector<VectorDims> lastInputDims = {};
|
||||
|
||||
std::shared_ptr<ngraph::Node> opToShapeInfer;
|
||||
|
||||
private:
|
||||
@ -780,7 +799,6 @@ private:
|
||||
return PD(*selected_desc_ptr, engine);
|
||||
}
|
||||
|
||||
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
|
||||
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
|
||||
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
|
||||
|
||||
|
@ -80,6 +80,7 @@
|
||||
#include "nodes/mkldnn_reduce_node.h"
|
||||
#include "nodes/mkldnn_if_node.h"
|
||||
#include "nodes/mkldnn_ctc_greedy_decoder_node.h"
|
||||
#include "nodes/mkldnn_non_zero.h"
|
||||
|
||||
#define MKLDNN_NODE(__prim, __type) \
|
||||
registerNodeIfRequired(MKLDNNPlugin, __prim, __type, MKLDNNNodeImpl<__prim>)
|
||||
@ -168,4 +169,5 @@ MKLDNNPlugin::MKLDNNNode::NodesFactory::NodesFactory()
|
||||
MKLDNN_NODE(MKLDNNTopKNode, TopK);
|
||||
MKLDNN_NODE(MKLDNNStridedSliceNode, StridedSlice);
|
||||
MKLDNN_NODE(MKLDNNGRNNode, GRN);
|
||||
MKLDNN_NODE(MKLDNNNonZeroNode, NonZero);
|
||||
}
|
||||
|
@ -504,23 +504,24 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
|
||||
|
||||
// verification of supported input
|
||||
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
|
||||
for (const auto &ii : _networkInputs) {
|
||||
for (const auto &ii : network.getInputsInfo()) {
|
||||
auto input_precision = ii.second->getPrecision();
|
||||
if (input_precision != InferenceEngine::Precision::FP64 &&
|
||||
input_precision != InferenceEngine::Precision::FP32 &&
|
||||
input_precision != InferenceEngine::Precision::I32 &&
|
||||
input_precision != InferenceEngine::Precision::U32 &&
|
||||
input_precision != InferenceEngine::Precision::U16 &&
|
||||
input_precision != InferenceEngine::Precision::I16 &&
|
||||
input_precision != InferenceEngine::Precision::I8 &&
|
||||
input_precision != InferenceEngine::Precision::U8 &&
|
||||
input_precision != InferenceEngine::Precision::BF16 &&
|
||||
input_precision != InferenceEngine::Precision::BOOL &&
|
||||
input_precision != InferenceEngine::Precision::I64 &&
|
||||
input_precision != InferenceEngine::Precision::U64) {
|
||||
|
||||
using hash_t = std::hash<typename std::underlying_type<Precision::ePrecision>::type>;
|
||||
|
||||
static const std::unordered_set<Precision::ePrecision, hash_t> supported_precisions = {
|
||||
Precision::U8, Precision::I8,
|
||||
Precision::U16, Precision::I16,
|
||||
Precision::U32, Precision::I32,
|
||||
Precision::U64, Precision::I64,
|
||||
Precision::BF16, Precision::FP16,
|
||||
Precision::FP32, Precision::FP64,
|
||||
Precision::BOOL
|
||||
};
|
||||
|
||||
if (!supported_precisions.count(input_precision)) {
|
||||
IE_THROW(NotImplemented)
|
||||
<< "Input image format " << input_precision << " is not supported yet...";
|
||||
<< "Input image format " << input_precision << " is not supported yet...";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,6 @@ public:
|
||||
operator bool() const;
|
||||
MKLDNNPrimitive& operator=(const std::shared_ptr<mkldnn::primitive>& primitive);
|
||||
mkldnn::primitive operator*();
|
||||
|
||||
void reset(mkldnn::primitive* primitive);
|
||||
|
||||
private:
|
||||
|
@ -36,8 +36,9 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
auto rank_a = shape_a.rank().get_length();
|
||||
auto rank_b = shape_b.rank().get_length();
|
||||
|
||||
// Transformation to FC is not supported for 1D second input
|
||||
if (rank_b == 1) {
|
||||
// Transformation to FC is not supported for 1D inputs
|
||||
if (rank_a == 1 || rank_b == 1 ||
|
||||
rank_a > 3 || rank_b > 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -47,7 +48,6 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
std::count_if(shape_b.begin(), shape_b.end(), [](ngraph::Dimension x) { return x != 1; }) > 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* get_aligned_shapes function align two input shapes to have the same size and
|
||||
* the same batch dimensions (last two dimensions are not comparable).
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
#include "ngraph/pass/manager.hpp"
|
||||
#include "reshape_fc_fusion.hpp"
|
||||
#include "reshape_fully_connected.hpp"
|
||||
#include "align_matmul_input_ranks.hpp"
|
||||
#include "reshape_prelu.hpp"
|
||||
#include "convert_broadcast_to_tiles.hpp"
|
||||
@ -29,7 +28,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
|
||||
manager.register_pass<AlignMatMulInputRanks>();
|
||||
manager.register_pass<ConvertTileToSeqTiles>();
|
||||
manager.register_pass<FullyConnectedBiasFusion>();
|
||||
manager.register_pass<ReshapeFullyConnected>();
|
||||
manager.register_pass<ConvertToPowerStatic>();
|
||||
manager.register_pass<ConvertToLeakyRelu>();
|
||||
manager.register_pass<ReshapePRelu>();
|
||||
|
@ -1,114 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "reshape_fully_connected.hpp"
|
||||
#include "op/fully_connected.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <numeric>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0);
|
||||
|
||||
MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
|
||||
ngraph::OutputVector twoInputs = {
|
||||
ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())};
|
||||
ngraph::OutputVector threeInputs = {
|
||||
ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
|
||||
ngraph::pattern::any_input()};
|
||||
auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_rank());
|
||||
auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_rank());
|
||||
const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
|
||||
|
||||
ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
|
||||
auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
|
||||
if (!fc || transformation_callback(fc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto fc_input_shape = fc->get_input_partial_shape(0);
|
||||
auto input_rank = fc_input_shape.rank().get_length();
|
||||
auto output_shape = fc->get_output_partial_shape(0);
|
||||
|
||||
if (input_rank == 2 || input_rank == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::NodeVector new_ops;
|
||||
int64_t K = *(fc->get_input_shape(1).rbegin()); // requested 2nd input with static shape in the matcher
|
||||
auto reshape = std::make_shared<ngraph::opset1::Reshape>(
|
||||
fc->input_value(0), ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{-1, K}), false);
|
||||
if (reshape->get_output_partial_shape(0).rank().is_dynamic())
|
||||
return false;
|
||||
new_ops.push_back(reshape);
|
||||
|
||||
reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape");
|
||||
|
||||
// Calculate output shape for new FullyConnected layer
|
||||
// [I, K] * [O, K] = [I, O]
|
||||
auto I = reshape->get_output_partial_shape(0)[0];
|
||||
auto O = fc->get_input_partial_shape(1)[0];
|
||||
ngraph::PartialShape output_shape_new{I, O};
|
||||
|
||||
std::shared_ptr<ngraph::Node> fc_new;
|
||||
if (fc->get_input_size() == 2) {
|
||||
fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
|
||||
fc->input_value(1),
|
||||
output_shape_new.rank(),
|
||||
fc->get_output_type());
|
||||
} else if (fc->get_input_size() == 3) {
|
||||
fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
|
||||
fc->input_value(1),
|
||||
fc->input_value(2),
|
||||
output_shape_new.rank(),
|
||||
fc->get_output_type());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
new_ops.push_back(fc_new);
|
||||
|
||||
if (output_shape != output_shape_new) {
|
||||
auto I_idxs = std::vector<size_t>(input_rank - 1);
|
||||
std::iota(I_idxs.begin(), I_idxs.end(), 0);
|
||||
auto A_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(0));
|
||||
auto B_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(1));
|
||||
auto I_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(A_input_shape, {I_idxs});
|
||||
auto O_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(B_input_shape, {0});
|
||||
ngraph::OutputVector output_shape_dims{I_node, O_node};
|
||||
|
||||
const auto original_rank = fc->get_output_rank();
|
||||
NGRAPH_CHECK(original_rank.is_static());
|
||||
if (input_rank < original_rank.get_length()) {
|
||||
const size_t const_shape_value = original_rank.get_length() - input_rank;
|
||||
output_shape_dims.insert(
|
||||
output_shape_dims.begin(), ngraph::opset1::Constant::create(I_node->get_element_type(), { const_shape_value }, { 1 }));
|
||||
}
|
||||
|
||||
auto reshape_output_shape = ngraph::op::util::make_try_fold<ngraph::opset1::Concat>(output_shape_dims, 0);
|
||||
auto reshape_output = std::make_shared<ngraph::opset1::Reshape>(fc_new, reshape_output_shape, false);
|
||||
new_ops.push_back(A_input_shape);
|
||||
new_ops.push_back(B_input_shape);
|
||||
new_ops.push_back(I_node);
|
||||
new_ops.push_back(O_node);
|
||||
new_ops.push_back(reshape_output_shape);
|
||||
new_ops.push_back(reshape_output);
|
||||
reshape_output->set_friendly_name(fc->get_friendly_name());
|
||||
fc_new->set_friendly_name(fc->get_friendly_name() + "/FC");
|
||||
ngraph::copy_runtime_info(fc, new_ops);
|
||||
ngraph::replace_node(fc, reshape_output);
|
||||
} else {
|
||||
fc_new->set_friendly_name(fc->get_friendly_name());
|
||||
ngraph::copy_runtime_info(fc, new_ops);
|
||||
ngraph::replace_node(fc, fc_new);
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnected");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
/*
|
||||
* Description:
|
||||
* ReshapeFullyConnected transformation detects FullyConnected operations
|
||||
* and for each operation where input shape is greater than 2 inserts Reshape
|
||||
* operations before and after FullyConnected operation. This transformation is
|
||||
* required because of IE restrictions.
|
||||
*/
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class ReshapeFullyConnected: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReshapeFullyConnected();
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -4,27 +4,208 @@
|
||||
|
||||
#include "cpu_convert.h"
|
||||
#include "cpu_memcpy.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
#include <utils/bfloat16.hpp>
|
||||
#include <utils/general_utils.h>
|
||||
#include <mkldnn_selective_build.h>
|
||||
#include <ie_parallel.hpp>
|
||||
#include <openvino/core/type/float16.hpp>
|
||||
#include <cpu/x64/jit_generator.hpp>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <ie_parallel.hpp>
|
||||
#include <cmath>
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
using namespace dnnl::impl::cpu::x64;
|
||||
using namespace dnnl::impl::utils;
|
||||
using namespace Xbyak;
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename srcType, typename dstType>
|
||||
void convert(const void *srcPtr, void *dstPtr, const size_t size) {
|
||||
if (std::is_same<srcType, dstType>::value) {
|
||||
cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType));
|
||||
} else {
|
||||
const srcType *srcData = reinterpret_cast<const srcType *>(srcPtr);
|
||||
dstType *dstData = reinterpret_cast<dstType *>(dstPtr);
|
||||
template <typename src_t, typename dst_t>
|
||||
void convert_vec(jit_generator & gen,
|
||||
const RegExp & src,
|
||||
const RegExp & dst);
|
||||
|
||||
parallel_for(size, [&](size_t i) {
|
||||
dstData[i] = static_cast<dstType>(srcData[i]);
|
||||
template <>
|
||||
void convert_vec<ov::float16, float>(jit_generator & gen,
|
||||
const RegExp & src,
|
||||
const RegExp & dst) {
|
||||
auto const & f16vec = gen.xmm3;
|
||||
auto const & f32vec = gen.ymm4;
|
||||
|
||||
gen.movdqu(f16vec, gen.xword[src]);
|
||||
gen.vcvtph2ps(f32vec, f16vec);
|
||||
gen.vmovups(gen.yword[dst], f32vec);
|
||||
}
|
||||
|
||||
template <>
|
||||
void convert_vec<float, ov::float16>(jit_generator & gen,
|
||||
const RegExp & src,
|
||||
const RegExp & dst) {
|
||||
auto const & f16vec = gen.xmm3;
|
||||
auto const & f32vec = gen.ymm4;
|
||||
|
||||
gen.vmovups(f32vec, gen.yword[src]);
|
||||
gen.vcvtps2ph(f16vec, f32vec, 0);
|
||||
gen.movdqu(gen.xword[dst], f16vec);
|
||||
}
|
||||
|
||||
class jit_convert_array : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
|
||||
|
||||
void generate() override {
|
||||
const size_t vlen = 8u;
|
||||
const size_t vlen_log2 = 3;
|
||||
|
||||
auto reg_src = rax;
|
||||
auto reg_dst = rbx;
|
||||
auto reg_sz = rdx;
|
||||
|
||||
Label tail, exit;
|
||||
|
||||
preamble();
|
||||
|
||||
mov(reg_src, ptr[param1 + offsetof(args_t, src)]);
|
||||
mov(reg_dst, ptr[param1 + offsetof(args_t, out)]);
|
||||
mov(reg_sz, ptr[param1 + offsetof(args_t, count)]);
|
||||
|
||||
xor_(rsi, rsi);
|
||||
mov(r8, reg_sz);
|
||||
shr(r8, vlen_log2);
|
||||
|
||||
foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) {
|
||||
_convert_vec(*this, reg_src, reg_dst);
|
||||
add(reg_src, _src_size * vlen);
|
||||
add(reg_dst, _dst_size * vlen);
|
||||
});
|
||||
|
||||
L(tail);
|
||||
|
||||
shl(rsi, vlen_log2);
|
||||
sub(reg_sz, rsi);
|
||||
test(reg_sz, reg_sz);
|
||||
jz(exit);
|
||||
|
||||
// allocate array for 8 floats on stack
|
||||
sub(rsp, vlen * sizeof(float));
|
||||
mov(r8, rsp);
|
||||
|
||||
vpxor(ymm4, ymm4, ymm4);
|
||||
vmovups(yword[r8], ymm4);
|
||||
|
||||
// Tail conversion
|
||||
copy(r8, reg_src, reg_sz, _src_size);
|
||||
_convert_vec(*this, r8, r8);
|
||||
copy(reg_dst, r8, reg_sz, _dst_size);
|
||||
|
||||
// Free the array on stack
|
||||
add(rsp, vlen * sizeof(float));
|
||||
|
||||
L(exit);
|
||||
|
||||
postamble();
|
||||
}
|
||||
|
||||
void foreach(const Xbyak::Reg64& idx,
|
||||
size_t step,
|
||||
const Xbyak::Reg64& end,
|
||||
std::function<void(const Xbyak::Reg64&)> && fn) {
|
||||
Label loop, exit;
|
||||
|
||||
L(loop);
|
||||
cmp(idx, end);
|
||||
jge(exit);
|
||||
|
||||
fn(idx);
|
||||
|
||||
add(idx, step);
|
||||
jmp(loop);
|
||||
L(exit);
|
||||
}
|
||||
|
||||
void copy(const Xbyak::Reg64& dst,
|
||||
const Xbyak::Reg64& src,
|
||||
const Xbyak::Reg64& size,
|
||||
size_t item_size) {
|
||||
push(rsi);
|
||||
push(r15);
|
||||
|
||||
xor_(rsi, rsi);
|
||||
|
||||
auto address_frame = [this](size_t size) -> const AddressFrame& {
|
||||
switch (size) {
|
||||
case 1: return byte;
|
||||
case 2: return word;
|
||||
case 4: return dword;
|
||||
case 8: return qword;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ptr;
|
||||
};
|
||||
|
||||
const auto & addr_frame = address_frame(item_size);
|
||||
|
||||
foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) {
|
||||
mov(r15, addr_frame[src + idx * item_size]);
|
||||
mov(addr_frame[dst + idx * item_size], r15);
|
||||
});
|
||||
|
||||
pop(r15);
|
||||
pop(rsi);
|
||||
}
|
||||
|
||||
public:
|
||||
typedef struct {
|
||||
const void* src;
|
||||
void* out;
|
||||
const size_t count;
|
||||
} args_t;
|
||||
|
||||
typedef void (*fn_t)(const args_t*);
|
||||
|
||||
typedef void (*convert_vec_t)(jit_generator &,
|
||||
const RegExp &,
|
||||
const RegExp &);
|
||||
|
||||
jit_convert_array(convert_vec_t convert_vec,
|
||||
size_t src_size,
|
||||
size_t dst_size)
|
||||
: _convert_vec(convert_vec)
|
||||
, _src_size(src_size)
|
||||
, _dst_size(dst_size) {}
|
||||
|
||||
template<typename src_t, typename dst_t>
|
||||
static fn_t get() {
|
||||
if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) {
|
||||
static jit_convert_array converter(convert_vec<src_t, dst_t>, sizeof(src_t), sizeof(dst_t));
|
||||
auto & generator = static_cast<jit_generator&>(converter);
|
||||
generator.create_kernel();
|
||||
return (fn_t)generator.jit_ker();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
convert_vec_t _convert_vec;
|
||||
size_t _src_size;
|
||||
size_t _dst_size;
|
||||
};
|
||||
|
||||
template <typename TI, typename TO>
|
||||
void jit_convert(const TI* arg, TO* out, size_t count) {
|
||||
using jit_impl = jit_convert_array;
|
||||
static auto converter = jit_impl::get<TI, TO>();
|
||||
|
||||
if (converter) {
|
||||
typename jit_impl::args_t args = { arg, out, count };
|
||||
converter(&args);
|
||||
} else {
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
out[i] = static_cast<TO>(arg[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,84 +216,391 @@ struct PrecisionInfo {
|
||||
|
||||
template <>
|
||||
struct PrecisionInfo<Precision::BF16> {
|
||||
using value_type = MKLDNNPlugin::bfloat16_t;
|
||||
using value_type = bfloat16_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct PrecisionInfo<Precision::FP16> {
|
||||
using value_type = ov::float16;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct PrecisionInfo<Precision::BOOL> {
|
||||
using value_type = uint8_t;
|
||||
};
|
||||
|
||||
template<typename T,
|
||||
typename U = typename std::conditional<
|
||||
std::is_same<ov::float16, T>::value
|
||||
|| std::is_same<bfloat16_t, T>::value,
|
||||
float, T>::type>
|
||||
struct Range {
|
||||
const std::tuple<U, U> & fit(const Precision & prec);
|
||||
|
||||
private:
|
||||
std::tuple<U, U> _range {
|
||||
std::numeric_limits<T>::lowest(),
|
||||
std::numeric_limits<T>::max()
|
||||
};
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
|
||||
if (prec.is_float()) {
|
||||
double lbound, ubound;
|
||||
switch (prec) {
|
||||
case Precision::BF16:
|
||||
lbound = static_cast<double>(std::numeric_limits<bfloat16_t>::lowest());
|
||||
ubound = static_cast<double>(std::numeric_limits<bfloat16_t>::max());
|
||||
break;
|
||||
case Precision::FP16:
|
||||
lbound = static_cast<double>(std::numeric_limits<ov::float16>::lowest());
|
||||
ubound = static_cast<double>(std::numeric_limits<ov::float16>::max());
|
||||
break;
|
||||
case Precision::FP32:
|
||||
lbound = static_cast<double>(std::numeric_limits<float>::lowest());
|
||||
ubound = static_cast<double>(std::numeric_limits<float>::max());
|
||||
break;
|
||||
case Precision::FP64:
|
||||
lbound = std::numeric_limits<double>::lowest();
|
||||
ubound = std::numeric_limits<double>::max();
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << "Unsupported precision";
|
||||
}
|
||||
std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
|
||||
std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
|
||||
} else {
|
||||
int64_t lbound;
|
||||
uint64_t ubound;
|
||||
switch (prec) {
|
||||
case Precision::BOOL:
|
||||
case Precision::U8:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint8_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
|
||||
break;
|
||||
case Precision::I8:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int8_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int8_t>::max());
|
||||
break;
|
||||
case Precision::U16:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint16_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
|
||||
break;
|
||||
case Precision::I16:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int16_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int16_t>::max());
|
||||
break;
|
||||
case Precision::U32:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint32_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
|
||||
break;
|
||||
case Precision::I32:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
|
||||
break;
|
||||
case Precision::U64:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint64_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
|
||||
break;
|
||||
case Precision::I64:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int64_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << "Unsupported precision";
|
||||
}
|
||||
using ltype = typename std::conditional<
|
||||
std::is_floating_point<U>::value,
|
||||
double, int64_t>::type;
|
||||
using utype = typename std::conditional<
|
||||
std::is_floating_point<U>::value,
|
||||
double, uint64_t>::type;
|
||||
std::get<0>(_range) = static_cast<U>(std::max(static_cast<ltype>(std::get<0>(_range)), static_cast<ltype>(lbound)));
|
||||
std::get<1>(_range) = static_cast<U>(std::min(static_cast<utype>(std::get<1>(_range)), static_cast<utype>(ubound)));
|
||||
}
|
||||
return _range;
|
||||
}
|
||||
|
||||
struct ConvertContext {
|
||||
const void *srcPtr;
|
||||
void *dstPtr;
|
||||
size_t size;
|
||||
Precision interimPrc;
|
||||
Precision dstPrc;
|
||||
bool converted;
|
||||
|
||||
template<typename T>
|
||||
std::tuple<T, T> range() const {
|
||||
Range<T> r;
|
||||
r.fit(interimPrc);
|
||||
return r.fit(dstPrc);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ConvertPrecision {
|
||||
using src_t = typename std::tuple_element<0, T>::type;
|
||||
using dst_t = typename std::tuple_element<1, T>::type;
|
||||
struct ConvertPrecision;
|
||||
|
||||
template<typename src_t, typename dst_t>
|
||||
struct ConvertPrecision<std::tuple<src_t, dst_t>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
convert<src_t, dst_t>(ctx.srcPtr, ctx.dstPtr, ctx.size);
|
||||
auto src = static_cast<const src_t *>(ctx.srcPtr);
|
||||
auto dst = static_cast<dst_t *>(ctx.dstPtr);
|
||||
src_t lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<src_t>();
|
||||
|
||||
if (std::is_integral<src_t>::value
|
||||
|| ctx.interimPrc.is_float()
|
||||
|| std::is_integral<dst_t>::value) {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<dst_t>(std::max(std::min(src[i], ubound), lbound));
|
||||
});
|
||||
} else {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<dst_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ConvertPrecision<std::tuple<float, bfloat16_t>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const float *>(ctx.srcPtr);
|
||||
auto dst = static_cast<bfloat16_t *>(ctx.dstPtr);
|
||||
|
||||
if (ctx.interimPrc.is_float()) {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<bfloat16_t>(src[i]);
|
||||
});
|
||||
} else {
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<float>();
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<bfloat16_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ConvertPrecision<std::tuple<bfloat16_t, float>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const bfloat16_t *>(ctx.srcPtr);
|
||||
auto dst = static_cast<float *>(ctx.dstPtr);
|
||||
|
||||
if (ctx.interimPrc.is_float()) {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<float>(src[i]);
|
||||
});
|
||||
} else {
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<bfloat16_t>();
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = std::trunc(std::max(std::min(static_cast<float>(src[i]), ubound), lbound));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename src_t>
|
||||
struct ConvertPrecision<std::tuple<src_t, ov::float16>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const src_t *>(ctx.srcPtr);
|
||||
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
|
||||
|
||||
constexpr size_t batch = 64;
|
||||
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
|
||||
typedef float batch_type[batch];
|
||||
|
||||
src_t lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<src_t>();
|
||||
|
||||
if (std::is_integral<src_t>::value
|
||||
|| ctx.interimPrc.is_float()) {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
|
||||
tmp[j] = static_cast<float>(std::max(std::min(src[offset + j], ubound), lbound));
|
||||
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
|
||||
});
|
||||
} else {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
|
||||
tmp[j] = static_cast<float>(std::trunc(std::max(std::min(src[offset + j], ubound), lbound)));
|
||||
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename dst_t>
|
||||
struct ConvertPrecision<std::tuple<ov::float16, dst_t>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
|
||||
auto dst = static_cast<dst_t *>(ctx.dstPtr);
|
||||
|
||||
constexpr size_t batch = 64;
|
||||
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
|
||||
typedef float batch_type[batch];
|
||||
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<ov::float16>();
|
||||
|
||||
if (ctx.interimPrc.is_float()
|
||||
|| std::is_integral<dst_t>::value) {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
|
||||
dst[offset + j] = static_cast<dst_t>(std::max(std::min(tmp[j], ubound), lbound));
|
||||
});
|
||||
} else {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
|
||||
dst[offset + j] = static_cast<dst_t>(std::trunc(std::max(std::min(tmp[j], ubound), lbound)));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ConvertPrecision<std::tuple<ov::float16, ov::float16>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
|
||||
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
|
||||
|
||||
constexpr size_t batch = 64;
|
||||
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
|
||||
typedef float batch_type[batch];
|
||||
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<ov::float16>();
|
||||
|
||||
if (ctx.interimPrc.is_float()) {
|
||||
cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16));
|
||||
} else {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // truncate fp32
|
||||
tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound));
|
||||
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
|
||||
return to.bitsSize() < from.bitsSize()
|
||||
|| (from.is_float() && !to.is_float()) // float -> integral
|
||||
|| (from.isSigned() != to.isSigned()) // signed <-> unsigned
|
||||
|| (to == Precision::BOOL && from != to); // T -> bool
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
|
||||
|
||||
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
|
||||
using namespace MKLDNNPlugin;
|
||||
#define MKLDNN_CVT_LIST \
|
||||
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \
|
||||
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \
|
||||
MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \
|
||||
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \
|
||||
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \
|
||||
MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \
|
||||
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \
|
||||
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \
|
||||
MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \
|
||||
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \
|
||||
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \
|
||||
MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \
|
||||
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \
|
||||
MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \
|
||||
MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \
|
||||
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \
|
||||
MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \
|
||||
MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \
|
||||
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \
|
||||
MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \
|
||||
MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \
|
||||
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \
|
||||
MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \
|
||||
MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \
|
||||
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \
|
||||
MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \
|
||||
MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
|
||||
MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \
|
||||
MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \
|
||||
MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
|
||||
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \
|
||||
MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \
|
||||
MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
|
||||
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \
|
||||
MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \
|
||||
MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
|
||||
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \
|
||||
MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \
|
||||
MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
|
||||
MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \
|
||||
MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \
|
||||
MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
|
||||
MKLDNN_CVT(BOOL, BOOL)
|
||||
|
||||
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
|
||||
cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
|
||||
}
|
||||
|
||||
void cpu_convert(const void *srcPtr,
|
||||
void *dstPtr,
|
||||
InferenceEngine::Precision srcPrc,
|
||||
InferenceEngine::Precision interimPrc,
|
||||
InferenceEngine::Precision dstPrc,
|
||||
const size_t size) {
|
||||
if (srcPtr == nullptr || dstPtr == nullptr)
|
||||
IE_THROW() << "cpu_convert has null data pointer";
|
||||
|
||||
if (srcPrc == dstPrc) {
|
||||
cpu_memcpy(dstPtr, srcPtr, size*dstPrc.size());
|
||||
return;
|
||||
if (srcPrc == dstPrc && srcPrc == interimPrc) {
|
||||
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
|
||||
} else {
|
||||
ConvertContext ctx = {
|
||||
srcPtr,
|
||||
dstPtr,
|
||||
size,
|
||||
interimPrc,
|
||||
dstPrc,
|
||||
false
|
||||
};
|
||||
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
|
||||
if (!ctx.converted)
|
||||
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
|
||||
}
|
||||
|
||||
ConvertContext ctx = { srcPtr, dstPtr, size, false };
|
||||
|
||||
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc),
|
||||
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16),
|
||||
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64),
|
||||
MKLDNN_CVT(U8, FP32), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, BOOL),
|
||||
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16),
|
||||
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64),
|
||||
MKLDNN_CVT(I8, FP32), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, BOOL),
|
||||
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16),
|
||||
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64),
|
||||
MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, BOOL),
|
||||
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16),
|
||||
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64),
|
||||
MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, BOOL),
|
||||
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16),
|
||||
MKLDNN_CVT(I32, I16), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64),
|
||||
MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, BOOL),
|
||||
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16),
|
||||
MKLDNN_CVT(U64, I16), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64),
|
||||
MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, BOOL),
|
||||
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16),
|
||||
MKLDNN_CVT(I64, I16), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64),
|
||||
MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, BOOL),
|
||||
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16),
|
||||
MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64),
|
||||
MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL),
|
||||
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16),
|
||||
MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64),
|
||||
MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL),
|
||||
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16),
|
||||
MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64),
|
||||
MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16),
|
||||
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16),
|
||||
MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64),
|
||||
MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL),
|
||||
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16),
|
||||
MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64),
|
||||
MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL));
|
||||
|
||||
if (!ctx.converted)
|
||||
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
|
||||
}
|
||||
|
||||
#undef MKLDNN_CVT
|
||||
#undef MKLDNN_CVT_LIST
|
||||
|
@ -19,5 +19,32 @@
|
||||
* number of elements in buffers to be converted
|
||||
* @return none.
|
||||
*/
|
||||
void cpu_convert(const void *srcPtr,
|
||||
void *dstPtr,
|
||||
InferenceEngine::Precision srcPrc,
|
||||
InferenceEngine::Precision dstPrc,
|
||||
const size_t size);
|
||||
|
||||
void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size);
|
||||
/**
|
||||
* @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
|
||||
* If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
|
||||
* @param srcPtr
|
||||
* pointer to the buffer to convert from
|
||||
* @param dstPtr
|
||||
* pointer to the buffer to convert to
|
||||
* @param srcPrc
|
||||
* precision the buffer from which convert
|
||||
* @param interimPrc
|
||||
* intermediate precision used for type truncation
|
||||
* @param dstPrc
|
||||
* precision the buffer to which convert
|
||||
* @param size
|
||||
* number of elements in buffers to be converted
|
||||
* @return none.
|
||||
*/
|
||||
void cpu_convert(const void *srcPtr,
|
||||
void *dstPtr,
|
||||
InferenceEngine::Precision srcPrc,
|
||||
InferenceEngine::Precision interimPrc,
|
||||
InferenceEngine::Precision dstPrc,
|
||||
const size_t size);
|
||||
|
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "dnnl_executor.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
|
||||
DnnlExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descSrc,
|
||||
const mkldnn::memory::desc& descDst,
|
||||
const mkldnn::engine& engine) : m_descSrc(descSrc), m_descDst(descDst) {
|
||||
auto reorderPd = mkldnn::reorder::primitive_desc(engine, descSrc, engine, descDst);
|
||||
m_reorder = mkldnn::reorder(reorderPd);
|
||||
}
|
||||
|
||||
void DnnlExecutor::IntermReorder::exec(mkldnn::memory& memSrc, mkldnn::memory& memDst, mkldnn::stream strm) {
|
||||
m_reorder.execute(strm, memSrc, memDst);
|
||||
}
|
||||
|
||||
void DnnlExecutor::exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn::stream strm) {
|
||||
for (auto &inReorder : inputReorders) {
|
||||
if (primArgs.count(inReorder.first)) {
|
||||
mkldnn::memory memDst(inReorder.second.getDstDesc(), strm.get_engine());
|
||||
inReorder.second.exec(primArgs[inReorder.first], memDst, strm);
|
||||
primArgs[inReorder.first] = memDst;
|
||||
} else {
|
||||
IE_THROW() << "DnnlExecutor has reorder for input " << inReorder.first << ", but doesn't have source memory";
|
||||
}
|
||||
}
|
||||
std::unordered_map<int, mkldnn::memory> outputMem;
|
||||
for (auto &outReorder : outputReorders) {
|
||||
if (primArgs.count(outReorder.first)) {
|
||||
mkldnn::memory memSrc(outReorder.second.getSrcDesc(), strm.get_engine());
|
||||
outputMem[outReorder.first] = primArgs[outReorder.first];
|
||||
primArgs[outReorder.first] = memSrc;
|
||||
} else {
|
||||
IE_THROW() << "DnnlExecutor has reorder for output " << outReorder.first << ", but doesn't have destination memory";
|
||||
}
|
||||
}
|
||||
(*execPrim).execute(strm, primArgs);
|
||||
for (auto &outReorder : outputReorders) {
|
||||
outReorder.second.exec(primArgs[outReorder.first], outputMem[outReorder.first], strm);
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mkldnn_memory.h"
|
||||
#include "mkldnn_primitive.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class DnnlExecutor {
|
||||
protected:
|
||||
class IntermReorder {
|
||||
public:
|
||||
IntermReorder(const mkldnn::memory::desc& descSrc, const mkldnn::memory::desc& descDst, const mkldnn::engine& engine);
|
||||
void exec(mkldnn::memory& memSrc, mkldnn::memory& memDst, mkldnn::stream strm);
|
||||
const mkldnn::memory::desc& getSrcDesc() const { return m_descSrc; }
|
||||
const mkldnn::memory::desc& getDstDesc() const { return m_descDst; }
|
||||
|
||||
private:
|
||||
mkldnn::reorder m_reorder;
|
||||
mkldnn::memory::desc m_descSrc;
|
||||
mkldnn::memory::desc m_descDst;
|
||||
};
|
||||
|
||||
public:
|
||||
void exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn::stream strm);
|
||||
virtual ~DnnlExecutor() = default;
|
||||
|
||||
protected:
|
||||
DnnlExecutor() = default;
|
||||
MKLDNNPrimitive execPrim;
|
||||
// key is the port number for the primitive that needs memory reordering
|
||||
std::unordered_map<int, IntermReorder> inputReorders;
|
||||
std::unordered_map<int, IntermReorder> outputReorders;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -145,6 +145,10 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNAdaptivePoolingNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) {
|
||||
auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType();
|
||||
auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType();
|
||||
@ -283,8 +287,6 @@ bool MKLDNNAdaptivePoolingNode::created() const {
|
||||
return getType() == AdaptivePooling;
|
||||
}
|
||||
|
||||
void MKLDNNAdaptivePoolingNode::createPrimitive() {}
|
||||
|
||||
inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) {
|
||||
*(startPtr) = idx * inputLength / outputLength;
|
||||
*(endPtr) = ceil(static_cast<float>((idx + 1) * inputLength) / outputLength);
|
||||
|
@ -18,7 +18,6 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
@ -36,7 +35,7 @@ protected:
|
||||
bool needShapeInfer() const override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
bool needPrepareParams() const override { return false; };
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -225,6 +225,10 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNBatchToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
|
||||
switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) {
|
||||
case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>(); break;
|
||||
|
@ -18,12 +18,11 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override {};
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
bool needPrepareParams() const override { return false; };
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
|
@ -107,14 +107,6 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
|
||||
supportedPrimitiveDescriptors = getSupportedConfigs(this);
|
||||
}
|
||||
|
||||
void MKLDNNBroadcastNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNBroadcastNode::needPrepareParams() const {
|
||||
return needPrepareParamsVar;
|
||||
}
|
||||
@ -215,6 +207,14 @@ std::vector<VectorDims> MKLDNNBroadcastNode::shapeInfer() const {
|
||||
return newOutputShapes;
|
||||
}
|
||||
|
||||
bool MKLDNNBroadcastNode::isExecutable() const {
|
||||
return !isInputTensorAtPortEmpty(0);
|
||||
}
|
||||
|
||||
void MKLDNNBroadcastNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
|
||||
if (optimizedCase) {
|
||||
optimizedExecute(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr());
|
||||
|
@ -19,13 +19,11 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override {
|
||||
execute(strm);
|
||||
}
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
bool isExecutable() const override;
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
|
@ -203,12 +203,8 @@ void MKLDNNBucketizeNode::prepareParams() {
|
||||
std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
void MKLDNNBucketizeNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
bool MKLDNNBucketizeNode::isExecutable() const {
|
||||
return !isInputTensorAtPortEmpty(0);
|
||||
}
|
||||
|
||||
std::vector<VectorDims> MKLDNNBucketizeNode::shapeInfer() const {
|
||||
|
@ -15,15 +15,16 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void prepareParams() override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
|
||||
bool isExecutable() const override;
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
private:
|
||||
|
@ -31,6 +31,10 @@ namespace {
|
||||
constexpr size_t channelAxis = 1lu;
|
||||
}
|
||||
|
||||
bool MKLDNNConcatNode::isExecutable() const {
|
||||
return !hasEmptyOutputTensors() && !isOptimized();
|
||||
}
|
||||
|
||||
bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
const auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
|
||||
@ -173,7 +177,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
// TODO [DS]: inplace
|
||||
if (!canBeInPlace)
|
||||
if (!canBeInPlace || std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape) { return shape.hasZeroDims(); }))
|
||||
return;
|
||||
|
||||
// Optimized inplace case
|
||||
@ -353,7 +357,6 @@ void MKLDNNConcatNode::prepareParams() {
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
|
||||
std::vector<memory::desc> srcs_d;
|
||||
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
const auto& srcMemPtr = getParentEdgesAtPort(i)[0]->getMemoryPtr();
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) {
|
||||
@ -362,6 +365,10 @@ void MKLDNNConcatNode::prepareParams() {
|
||||
<< getName() << ".";
|
||||
}
|
||||
|
||||
if (srcMemPtr->GetShape().hasZeroDims()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto desc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
const auto& dims = srcMemPtr->getStaticDims();
|
||||
for (size_t j = 0; j < dims.size(); j++) {
|
||||
@ -382,14 +389,6 @@ void MKLDNNConcatNode::prepareParams() {
|
||||
prim.reset(new concat(primitive_desc));
|
||||
}
|
||||
|
||||
void MKLDNNConcatNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) {
|
||||
for (size_t i = 0; i < order.size(); i++) {
|
||||
if (axis == order[i]) {
|
||||
@ -489,16 +488,23 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
|
||||
return;
|
||||
}
|
||||
|
||||
const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
|
||||
if (canOptimizeNspc) {
|
||||
execNspcSpecCase();
|
||||
return;
|
||||
}
|
||||
|
||||
const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
|
||||
const size_t num_src = getParentEdges().size();
|
||||
std::unordered_map<int, memory> mem_ags {{DNNL_ARG_DST, dst_memory.GetPrimitive()}};
|
||||
for (int i = 0; i < num_src; i++)
|
||||
mem_ags[DNNL_ARG_MULTIPLE_SRC + i] = getParentEdgeAt(i)->getMemory().GetPrimitive();
|
||||
size_t nonZeroInShapes = 0;
|
||||
for (int i = 0; i < num_src; i++) {
|
||||
const auto& srcMem = getParentEdgesAtPort(i)[0]->getMemory();
|
||||
if (srcMem.GetShape().hasZeroDims()) {
|
||||
continue;
|
||||
}
|
||||
mem_ags[DNNL_ARG_MULTIPLE_SRC + nonZeroInShapes] = srcMem.GetPrimitive();
|
||||
nonZeroInShapes++;
|
||||
}
|
||||
|
||||
(*prim).execute(strm, mem_ags);
|
||||
}
|
||||
@ -518,21 +524,32 @@ void MKLDNNConcatNode::execNspcSpecCase() {
|
||||
std::vector<const uint8_t*> src_ptrs;
|
||||
std::vector<uint8_t*> dst_ptrs;
|
||||
|
||||
size_t nonZeroInShapes = 0;
|
||||
int firstNonZeroEdge = -1;
|
||||
for (size_t i = 0; i < num_src; i++) {
|
||||
const MKLDNNMemory& src_mem = getParentEdgeAt(i)->getMemory();
|
||||
const MKLDNNMemory& src_mem = getParentEdgesAtPort(i)[0]->getMemory();
|
||||
if (src_mem.GetShape().hasZeroDims()) {
|
||||
continue;
|
||||
}
|
||||
const size_t num_channels = src_mem.getStaticDims()[channelAxis];
|
||||
|
||||
channelsDataSize.push_back(num_channels * dataSize);
|
||||
src_ptrs.push_back(reinterpret_cast<const uint8_t*>(src_mem.GetData()));
|
||||
dst_ptrs.push_back(dst_ptr + channels_size);
|
||||
channels_size += num_channels * dataSize;
|
||||
|
||||
if (firstNonZeroEdge == -1) {
|
||||
firstNonZeroEdge = i;
|
||||
}
|
||||
|
||||
nonZeroInShapes++;
|
||||
}
|
||||
|
||||
const size_t iter_count = getParentEdgeAt(0)->getMemory().GetSize() / channelsDataSize[0];
|
||||
const size_t iter_count = getParentEdgeAt(firstNonZeroEdge)->getMemory().GetSize() / channelsDataSize[0];
|
||||
|
||||
parallel_for(iter_count, [&](int i) {
|
||||
const size_t dst_off = i * channels_size;
|
||||
for (int j = 0; j < num_src; j++) {
|
||||
for (int j = 0; j < nonZeroInShapes; j++) {
|
||||
cpu_memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channelsDataSize[j], channelsDataSize[j]);
|
||||
}
|
||||
});
|
||||
|
@ -19,7 +19,6 @@ public:
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void initOptimalPrimitiveDescriptor() override;
|
||||
void createPrimitive() override;
|
||||
void selectOptimalPrimitiveDescriptor() override;
|
||||
bool created() const override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
@ -28,10 +27,8 @@ public:
|
||||
bool isOptimized() const;
|
||||
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
bool isExecutable() const override {
|
||||
return !isOptimized();
|
||||
}
|
||||
|
||||
bool isExecutable() const override;
|
||||
bool needPrepareParams() const override;
|
||||
void prepareParams() override;
|
||||
|
||||
|
@ -330,48 +330,42 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false, bool initAsBinary = false) {
|
||||
bool initBinaryMemory = initWeights;
|
||||
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) {
|
||||
mkldnn::post_ops ops;
|
||||
bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed
|
||||
|
||||
auto getBinPostOpShape = [&](){
|
||||
const auto outShape = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
|
||||
const auto chIdx = getFusingAxis();
|
||||
std::vector<size_t> binaryShape(outShapeRank, 1);
|
||||
binaryShape[chIdx] = outShape[chIdx];
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
if (node->getType() == Split || node->getType() == Concatenation)
|
||||
continue;
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
|
||||
} else {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (eltwiseNode->scalesMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
|
||||
if (eltwiseNode->shiftsMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive());
|
||||
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
} else {
|
||||
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
constexpr int align = -1;
|
||||
fakeQuantizeNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (fakeQuantizeNode->cropHighMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->cropLowMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->inputScaleMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->inputShiftMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->outputScaleMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->outputShiftMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive());
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
if (useLegacyPostOps) {
|
||||
fakeQuantizeNode->appendPostOps(ops, dims);
|
||||
} else {
|
||||
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -416,7 +410,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
// attr[1] - binary
|
||||
mkldnn::primitive_attr attrs[1];
|
||||
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
|
||||
// setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false, true);
|
||||
|
||||
bool containJitImpl = false;
|
||||
|
||||
@ -494,15 +487,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MKLDNNConvolutionNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNConvolutionNode::created() const {
|
||||
return getType() == Convolution;
|
||||
}
|
||||
@ -552,7 +536,14 @@ MKLDNNConvolutionNode::createDescriptorInternal(const mkldnn::memory::desc& inpu
|
||||
|
||||
void MKLDNNConvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
|
||||
const std::vector<MemoryDescPtr>& outputDesc) {
|
||||
auto inpDesc = inputDesc[0]->isDefined() ? inputDesc[0] : MemoryDescUtils::makeDummyDesc(*inputDesc[0]);
|
||||
MemoryDescPtr inpDesc;
|
||||
if (inputDesc[0]->isDefined()) {
|
||||
inpDesc = inputDesc[0];
|
||||
} else {
|
||||
auto dummyInDims = MemoryDescUtils::makeDummyShape(inputDesc[0]->getShape()).getStaticDims();
|
||||
dummyInDims[1] = IC;
|
||||
inpDesc = inputDesc[0]->cloneWithNewDims(dummyInDims);
|
||||
}
|
||||
DnnlMemoryDescPtr definedInpMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inpDesc);
|
||||
DnnlMemoryDescPtr definedOutMemDesc;
|
||||
|
||||
@ -630,7 +621,6 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
|
||||
// attr[1] - binary
|
||||
mkldnn::primitive_attr attrs[1];
|
||||
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
|
||||
// setPostOps(attrs[1], false, true);
|
||||
|
||||
auto rightConfig = selectedPD->getConfig();
|
||||
size_t selected_count = 0;
|
||||
@ -914,25 +904,63 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn
|
||||
return internalBlob;
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> MKLDNNConvolutionNode::createMkldnnConvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc,
|
||||
const mkldnn::memory::desc& biasDesc) {
|
||||
std::shared_ptr<mkldnn::convolution_forward::desc> dnnlConvDesc;
|
||||
auto alg = isWinograd() ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
|
||||
|
||||
if (withBiases) {
|
||||
// WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
|
||||
mkldnn::memory::desc dnnlBiasDesc = biasDesc.reshape(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims));
|
||||
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
|
||||
wghDesc,
|
||||
dnnlBiasDesc,
|
||||
dstDesc,
|
||||
alg));
|
||||
} else {
|
||||
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
|
||||
wghDesc,
|
||||
dstDesc,
|
||||
alg));
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::prepareParams() {
|
||||
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
|
||||
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Input memory didn't allocate.";
|
||||
if (!wghMemPtr || !wghMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Weight memory didn't allocate.";
|
||||
MKLDNNMemoryPtr biasMemPtr = nullptr;
|
||||
if (withBiases) {
|
||||
biasMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
|
||||
if (!biasMemPtr || !biasMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Input memory didn't allocate.";
|
||||
}
|
||||
|
||||
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
|
||||
|
||||
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
auto weightMemoryDesc = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
auto inMemoryDesc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>();
|
||||
auto weightMemoryDesc = wghMemPtr->GetDescWithType<DnnlMemoryDesc>();
|
||||
auto outMemoryDesc = dstMemPtr->GetDescWithType<DnnlMemoryDesc>();
|
||||
mkldnn::memory::desc biasDesc;
|
||||
if (biasMemPtr) {
|
||||
biasDesc = biasMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
}
|
||||
|
||||
auto initPrimitiveAttr = [&]() {
|
||||
mkldnn::primitive_attr attr;
|
||||
addZeroPoints(attr);
|
||||
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
|
||||
|
||||
// todo: [AV] delete "false" to use binary mechanism
|
||||
if (false && getSelectedPrimitiveDescriptor()->getImplementationType() == jit_gemm) {
|
||||
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true, true);
|
||||
} else {
|
||||
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
|
||||
}
|
||||
return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
|
||||
};
|
||||
|
||||
@ -947,61 +975,95 @@ void MKLDNNConvolutionNode::prepareParams() {
|
||||
pAttrLocal = initPrimitiveAttr();
|
||||
}
|
||||
|
||||
std::shared_ptr<mkldnn::convolution_forward::desc> dnnlConvDesc;
|
||||
auto alg = isWinograd() ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
|
||||
std::shared_ptr<MKLDNNDescriptor> desc = createMkldnnConvDesc(inMemoryDesc->getDnnlDesc(),
|
||||
weightMemoryDesc->getDnnlDesc(),
|
||||
outMemoryDesc->getDnnlDesc(),
|
||||
biasDesc);
|
||||
|
||||
if (withBiases) {
|
||||
auto biasMemoryDesc = getParentEdgesAtPort(2).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
// WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
|
||||
mkldnn::memory::desc dnnlBiasDesc = biasMemoryDesc->getDnnlDesc().reshape(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims));
|
||||
dnnlConvDesc = createDescriptorInternal(inMemoryDesc->getDnnlDesc(),
|
||||
weightMemoryDesc->getDnnlDesc(),
|
||||
dnnlBiasDesc,
|
||||
outMemoryDesc->getDnnlDesc(),
|
||||
alg);
|
||||
} else {
|
||||
dnnlConvDesc = createDescriptorInternal(inMemoryDesc->getDnnlDesc(),
|
||||
weightMemoryDesc->getDnnlDesc(),
|
||||
outMemoryDesc->getDnnlDesc(),
|
||||
alg);
|
||||
}
|
||||
|
||||
MKLDNNDescriptor desc(dnnlConvDesc);
|
||||
|
||||
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
|
||||
auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
|
||||
|
||||
convolution_forward::primitive_desc prim_desc;
|
||||
while (static_cast<bool>(itpd)) {
|
||||
|
||||
execPtr = nullptr;
|
||||
while (static_cast<bool>(itpd)) {
|
||||
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
|
||||
|
||||
if (impl_type == selected_pd->getImplementationType()) {
|
||||
prim_desc = convolution_forward::primitive_desc(itpd.get());
|
||||
execPtr = std::make_shared<ConvolutionExecutor>(prim_desc,
|
||||
srcMemPtr->GetPrimitive().get_desc(),
|
||||
wghMemPtr->GetPrimitive().get_desc(),
|
||||
dstMemPtr->GetPrimitive().get_desc(),
|
||||
getEngine());
|
||||
break;
|
||||
}
|
||||
if (!itpd.next_impl())
|
||||
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
|
||||
|
||||
if (!itpd.next_impl()) {
|
||||
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
|
||||
srcMemPtr->GetDataType(),
|
||||
memory::format_tag::any);
|
||||
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
|
||||
wghMemPtr->GetDataType(),
|
||||
memory::format_tag::any);
|
||||
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
|
||||
dstMemPtr->GetDataType(),
|
||||
memory::format_tag::any);
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> reorderConvDesc = createMkldnnConvDesc(inDesc, wghDesc, outDesc, biasDesc);
|
||||
auto reordItpd = reorderConvDesc->createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
|
||||
if (static_cast<bool>(reordItpd)) {
|
||||
auto prim_desc = convolution_forward::primitive_desc(reordItpd.get());
|
||||
execPtr = std::make_shared<ConvolutionExecutor>(prim_desc, srcMemPtr->GetPrimitive().get_desc(),
|
||||
wghMemPtr->GetPrimitive().get_desc(),
|
||||
dstMemPtr->GetPrimitive().get_desc(),
|
||||
getEngine());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (execPtr) {
|
||||
primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
|
||||
primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive();
|
||||
primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
|
||||
|
||||
prim.reset(new convolution_forward(prim_desc));
|
||||
if (withBiases) {
|
||||
primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive();
|
||||
}
|
||||
|
||||
primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs[DNNL_ARG_WEIGHTS] = getWeights();
|
||||
primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
|
||||
if (withBiases) {
|
||||
primArgs[DNNL_ARG_BIAS] = getBias();
|
||||
MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, binaryPostOpsArgs);
|
||||
} else {
|
||||
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
|
||||
}
|
||||
// todo: [AV] uncomment to use binary mechanism
|
||||
// auto post_ops = attr.get_post_ops();
|
||||
// int idx = 0;
|
||||
// for (int i = 0; i < post_ops.len(); i++) {
|
||||
// if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
|
||||
// primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]});
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
|
||||
const mkldnn::memory::desc& inMemDesc,
|
||||
const mkldnn::memory::desc& weightMemDesc,
|
||||
const mkldnn::memory::desc& outMemDesc,
|
||||
const mkldnn::engine& engine) {
|
||||
execPrim.reset(new mkldnn::convolution_forward(pd));
|
||||
|
||||
if (inMemDesc != pd.src_desc()) {
|
||||
inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
|
||||
}
|
||||
|
||||
if (weightMemDesc != pd.weights_desc()) {
|
||||
inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
|
||||
}
|
||||
|
||||
if (outMemDesc != pd.dst_desc()) {
|
||||
outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::execute(mkldnn::stream strm) {
|
||||
if (!execPtr) {
|
||||
IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled";
|
||||
}
|
||||
execPtr->exec(primArgs, strm);
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "common/dnnl_executor.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
@ -23,7 +24,6 @@ public:
|
||||
void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
|
||||
const std::vector<MemoryDescPtr>& outputDesc) override;
|
||||
void initDescriptor(const NodeConfig& config) override;
|
||||
void createPrimitive() override;
|
||||
void selectOptimalPrimitiveDescriptor() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void filterSupportedPrimitiveDescriptors() override;
|
||||
@ -65,11 +65,29 @@ protected:
|
||||
InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
|
||||
|
||||
private:
|
||||
using executorPtr = std::shared_ptr<DnnlExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
|
||||
class ConvolutionExecutor : public DnnlExecutor {
|
||||
public:
|
||||
ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
|
||||
const mkldnn::memory::desc& inMemDesc,
|
||||
const mkldnn::memory::desc& weightMemDesc,
|
||||
const mkldnn::memory::desc& outMemDesc,
|
||||
const mkldnn::engine& engine);
|
||||
};
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> createMkldnnConvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc,
|
||||
const mkldnn::memory::desc& biasDesc);
|
||||
|
||||
void prepareParams() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
void addZeroPoints(mkldnn::primitive_attr& attr) const;
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights, bool initAsBinary);
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights);
|
||||
void filterSupportedDescriptors();
|
||||
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
|
||||
bool isNspcAvailable() const;
|
||||
@ -122,4 +140,3 @@ private:
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
|
@ -7,7 +7,8 @@
|
||||
#include "common/cpu_convert.h"
|
||||
#include "common/blocked_desc_creator.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "utils/ngraph_utils.hpp"
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <utils/ngraph_utils.hpp>
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
|
||||
return true;
|
||||
}
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(op, eng, cache) {
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
if (isSupportedOperation(op, errorMessage)) {
|
||||
errorPrefix = "Convert node with name '" + getName() + "'";
|
||||
} else {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
|
||||
origPrc = details::convertPrecision(convert->get_destination_type());
|
||||
}
|
||||
|
||||
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
|
||||
@ -42,7 +46,8 @@ std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
|
||||
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode("Convert", nodeName, eng, cache) {
|
||||
: MKLDNNNode("Convert", nodeName, eng, cache)
|
||||
, origPrc(outPrc) {
|
||||
inputShapes.push_back(shape);
|
||||
addOriginalInputPrecision(inPrc);
|
||||
outputShapes.push_back(shape);
|
||||
@ -124,15 +129,8 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvertNode::createPrimitive() {
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << errorPrefix << " has not allocated destination memory";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << errorPrefix << " has not allocated input memory";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
|
||||
void MKLDNNConvertNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNConvertNode::execute(mkldnn::stream strm) {
|
||||
@ -147,7 +145,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
|
||||
|
||||
void* srcPtr = parentMem.GetPtr();
|
||||
void* dstPtr = childMem.GetPtr();
|
||||
cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount);
|
||||
|
||||
cpu_convert(srcPtr,
|
||||
dstPtr,
|
||||
parentMem.getDesc().getPrecision(),
|
||||
origPrc,
|
||||
childMem.getDesc().getPrecision(),
|
||||
parentPaddElemCount);
|
||||
}
|
||||
|
||||
bool MKLDNNConvertNode::created() const {
|
||||
|
@ -19,9 +19,8 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override {
|
||||
return false;
|
||||
@ -49,6 +48,7 @@ public:
|
||||
private:
|
||||
MemoryDescPtr input;
|
||||
MemoryDescPtr output;
|
||||
InferenceEngine::Precision origPrc;
|
||||
|
||||
std::string errorPrefix;
|
||||
};
|
||||
|
@ -165,14 +165,8 @@ bool MKLDNNCTCGreedyDecoderNode::created() const {
|
||||
return getType() == CTCGreedyDecoder;
|
||||
}
|
||||
|
||||
void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
MKLDNNCTCGreedyDecoderNode::execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNCTCGreedyDecoderNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
updateLastInputDims();
|
||||
}
|
||||
void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
bool MKLDNNCTCGreedyDecoderNode::needPrepareParams() const {
|
||||
|
@ -15,7 +15,6 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
void executeDynamicImpl(dnnl::stream strm) override;
|
||||
|
@ -168,14 +168,8 @@ bool MKLDNNCTCGreedyDecoderSeqLenNode::created() const {
|
||||
return getType() == CTCGreedyDecoderSeqLen;
|
||||
}
|
||||
|
||||
void MKLDNNCTCGreedyDecoderSeqLenNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
MKLDNNCTCGreedyDecoderSeqLenNode::execute(strm);
|
||||
void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
bool MKLDNNCTCGreedyDecoderSeqLenNode::needPrepareParams() const {
|
||||
|
@ -15,7 +15,6 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
void executeDynamicImpl(dnnl::stream strm) override;
|
||||
|
@ -57,12 +57,8 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
|
||||
impl_desc_type::ref_any);
|
||||
}
|
||||
|
||||
void MKLDNNCTCLossNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
void MKLDNNCTCLossNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {
|
||||
|
@ -15,13 +15,12 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
bool needPrepareParams() const override { return false; };
|
||||
|
||||
private:
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "ie_precision.hpp"
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "mkldnn_cum_sum_node.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
@ -70,8 +71,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
|
||||
return;
|
||||
|
||||
dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA);
|
||||
if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
|
||||
dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
|
||||
if (!one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::BF16, Precision::I32, Precision::FP32, Precision::I64, Precision::U64))
|
||||
IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
|
||||
|
||||
if (inputShapes.size() == numOfInputs) {
|
||||
@ -95,43 +95,17 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
|
||||
if (inputShapes.size() == numOfInputs)
|
||||
axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());
|
||||
|
||||
switch (dataPrecision) {
|
||||
case Precision::I8 : {
|
||||
exec<int8_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::U8 : {
|
||||
exec<uint8_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::I16 : {
|
||||
exec<int16_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::I32 : {
|
||||
exec<int32_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::FP32 : {
|
||||
exec<float>();
|
||||
break;
|
||||
}
|
||||
case Precision::I64 : {
|
||||
exec<int64_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::U64 : {
|
||||
exec<uint64_t>();
|
||||
break;
|
||||
}
|
||||
default : {
|
||||
std::string errorMsg = errorPrefix + " has unsupported 'data' input precision: " + dataPrecision.name();
|
||||
IE_THROW() << errorMsg;
|
||||
}
|
||||
}
|
||||
OV_SWITCH(MKLDNNPlugin, CumSumExecute, this, dataPrecision,
|
||||
OV_CASE(Precision::I8, int8_t),
|
||||
OV_CASE(Precision::U8, uint8_t),
|
||||
OV_CASE(Precision::I16, int16_t),
|
||||
OV_CASE(Precision::BF16, bfloat16_t),
|
||||
OV_CASE(Precision::I32, int32_t),
|
||||
OV_CASE(Precision::FP32, float),
|
||||
OV_CASE(Precision::I64, int64_t),
|
||||
OV_CASE(Precision::U64, uint64_t))
|
||||
}
|
||||
|
||||
|
||||
template <typename dataType>
|
||||
void MKLDNNCumSumNode::exec() {
|
||||
const auto *input = reinterpret_cast<const dataType *>(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr());
|
||||
@ -284,13 +258,7 @@ bool MKLDNNCumSumNode::needPrepareParams() const {
|
||||
}
|
||||
|
||||
void MKLDNNCumSumNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
return execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNCumSumNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
updateLastInputDims();
|
||||
}
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNCumSumNode, CumSum)
|
||||
|
@ -15,7 +15,6 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
@ -47,6 +46,13 @@ private:
|
||||
|
||||
InferenceEngine::Precision dataPrecision;
|
||||
std::string errorPrefix;
|
||||
|
||||
template<typename T>
|
||||
struct CumSumExecute {
|
||||
void operator()(MKLDNNCumSumNode* node) {
|
||||
node->exec<T>();
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -13,34 +13,38 @@
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "utils/general_utils.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <cpu/x64/cpu_isa_traits.hpp>
|
||||
#include <nodes/common/cpu_memcpy.h>
|
||||
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "convolution_shape_inference.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
|
||||
std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
|
||||
errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported";
|
||||
return false;
|
||||
}
|
||||
size_t ndims = op->get_input_shape(0).size();
|
||||
size_t ndims = op->get_input_partial_shape(0).rank().get_length();
|
||||
if ((ndims < 3) || (ndims > 5)) {
|
||||
errorMessage = "Only 3D, 4D and 5D blobs are supported as input";
|
||||
return false;
|
||||
}
|
||||
if (op->get_input_partial_shape(1).is_dynamic() || (op->get_input_size() > 2 && op->get_input_partial_shape(2).is_dynamic())) {
|
||||
errorMessage = "Doesn't support dynamic shapes for 'weights' and 'output_shape' inputs";
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
@ -58,15 +62,14 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
|
||||
|
||||
auto convBackprop = std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op);
|
||||
auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op);
|
||||
const auto dataShape = op->get_input_shape(0);
|
||||
weightDims = op->get_input_shape(1);
|
||||
const auto outShape = op->get_shape();
|
||||
OC = outShape[1];
|
||||
IC = dataShape[1];
|
||||
const auto& weightDims = getWeightDims();
|
||||
|
||||
if (convBackprop) {
|
||||
algorithm = DeconvolutionCommon;
|
||||
|
||||
IC = weightDims[0];
|
||||
OC = weightDims[1];
|
||||
|
||||
groupNum = 1;
|
||||
withGroups = false;
|
||||
|
||||
@ -78,10 +81,17 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
|
||||
}
|
||||
paddingL = convBackprop->get_pads_begin();
|
||||
paddingR = convBackprop->get_pads_end();
|
||||
|
||||
outputPadding = convBackprop->get_output_padding();
|
||||
|
||||
autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
|
||||
} else if (groupConvBackprop) {
|
||||
algorithm = DeconvolutionGrouped;
|
||||
|
||||
groupNum = weightDims[0];
|
||||
IC = groupNum * weightDims[1];
|
||||
OC = groupNum * weightDims[2];
|
||||
|
||||
withGroups = groupNum > 1;
|
||||
isDW = withGroups && groupNum == OC && groupNum == IC;
|
||||
|
||||
@ -93,10 +103,26 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
|
||||
}
|
||||
paddingL = groupConvBackprop->get_pads_begin();
|
||||
paddingR = groupConvBackprop->get_pads_end();
|
||||
|
||||
outputPadding = groupConvBackprop->get_output_padding();
|
||||
|
||||
autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
|
||||
}
|
||||
for (int i = 0; i < dilation.size(); i++) {
|
||||
kernel.push_back(weightDims[withGroups + 2 + i]);
|
||||
}
|
||||
|
||||
externOutShape = inputShapes.size() == 3;
|
||||
if (externOutShape && isDynamicNode()) {
|
||||
bool isConstOutShape = ngraph::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
|
||||
if (isConstOutShape) {
|
||||
lastOutputSpatialDims = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2))->cast_vector<int32_t>();
|
||||
}
|
||||
const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
|
||||
if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || (isConstOutShape && lastOutputSpatialDims.size() != spDimsNum)) {
|
||||
IE_THROW() << "'output_shape' input has incorrect number of elements. Expected = " << spDimsNum;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
@ -113,14 +139,6 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
|
||||
auto const blbSize = blb->GetSize();
|
||||
|
||||
// WA: In int8 case, we are processing weights using internal blob.
|
||||
// So we disconnect constant node containing weights from the graph and then don't use it.
|
||||
if (getParentEdges().size() == 3) {
|
||||
removeEdge(getParentEdgeAt(2));
|
||||
inputShapes.erase(inputShapes.begin() + 2);
|
||||
}
|
||||
removeEdge(getParentEdgeAt(1));
|
||||
inputShapes.erase(inputShapes.begin() + 1);
|
||||
|
||||
InferenceEngine::SizeVector dimsForBlockedDesc{dims};
|
||||
std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);
|
||||
|
||||
@ -157,19 +175,19 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
// todo: [antonvor] added these checks to fix performance problems
|
||||
if (kernel.size() == 3)
|
||||
return false;
|
||||
if (!withGroups && stride.back() > 3)
|
||||
return false;
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
|
||||
auto inDims = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
|
||||
if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
|
||||
return false;
|
||||
}
|
||||
// heuristicConst = 2^26
|
||||
// heuristicParam = IC^2 * SP
|
||||
auto heuristicConst = 67108864;
|
||||
auto heuristicParam = IC * IC;
|
||||
for (int i = 2; i < inDims.size(); i++)
|
||||
heuristicParam *= inDims[i];
|
||||
for (int i = 2; i < inMaxDims.size(); i++)
|
||||
heuristicParam *= inMaxDims[i];
|
||||
if (heuristicParam > heuristicConst)
|
||||
return false;
|
||||
}
|
||||
@ -206,10 +224,65 @@ bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
if (!descs_fwd.empty() && !descs_bwd.empty())
|
||||
return;
|
||||
void MKLDNNDeconvolutionNode::initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inDims, const std::vector<int32_t>& outSpDims) {
|
||||
std::vector<ov::StaticShape> input_shapes{inDims.getStaticDims(), getWeightDims()};
|
||||
ov::StaticShape output_shape_input;
|
||||
if (externOutShape) {
|
||||
IE_ASSERT(outSpDims.size() == getInputShapeAtPort(2).getStaticDims()[0]);
|
||||
input_shapes.push_back({outSpDims.size()});
|
||||
for (size_t i = 0; i < outSpDims.size(); i++) {
|
||||
output_shape_input.push_back(outSpDims[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (getAlgorithm() == DeconvolutionCommon) {
|
||||
auto deconv = ngraph::as_type_ptr<ngraph::op::v1::ConvolutionBackpropData>(op);
|
||||
IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 2));
|
||||
} else if (getAlgorithm() == DeconvolutionGrouped) {
|
||||
auto deconv = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolutionBackpropData>(op);
|
||||
IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 3));
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape() {
|
||||
auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0));
|
||||
auto outShape = getOutputShapeAtPort(0);
|
||||
|
||||
if (isDynamicNode()) {
|
||||
if (externOutShape) {
|
||||
if (lastOutputSpatialDims.empty()) {
|
||||
const auto& shape = getOutputShapeAtPort(0);
|
||||
lastOutputSpatialDims.resize(shape.getRank() - 2);
|
||||
|
||||
const auto& minDims = shape.getMinDims();
|
||||
const auto& maxDims = shape.getMaxDims();
|
||||
const auto& dims = shape.getDims();
|
||||
for (size_t i = 0; i < dims.size() - 2; ++i) {
|
||||
lastOutputSpatialDims[i] = dims[i + 2] == Shape::UNDEFINED_DIM ? std::min(maxDims[i + 2],
|
||||
std::max(minDims[i + 2], static_cast<Dim>(64))) : dims[i + 2];
|
||||
}
|
||||
}
|
||||
ov::CoordinateDiff pb = autoPad ? ov::CoordinateDiff(paddingL.size(), 0) : paddingL;
|
||||
ov::CoordinateDiff pe = autoPad ? ov::CoordinateDiff(paddingR.size(), 0) : paddingR;
|
||||
|
||||
auto inputDims = inShape.getStaticDims();
|
||||
const auto& weightDims = getWeightDims();
|
||||
const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
|
||||
for (size_t i = 0; i < inputDims.size() - 2; i++) {
|
||||
inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) *
|
||||
(weightDims[wghOffset + 2 + i] - 1) - 1 + pb[i] + pe[i] - outputPadding[i])) /
|
||||
stride[i] + 1;
|
||||
}
|
||||
|
||||
inShape = Shape(inputDims);
|
||||
}
|
||||
initPadding(opToShapeInfer, inShape, lastOutputSpatialDims);
|
||||
outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
|
||||
}
|
||||
return {inShape.getStaticDims(), outShape.getStaticDims()};
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
isInt8 = canBeExecutedInInt8();
|
||||
|
||||
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
|
||||
@ -239,21 +312,17 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
if (getChildEdges().empty())
|
||||
IE_THROW() << errorPrefix << " has incorrect number of output edges";
|
||||
|
||||
for (int i = 0; i < paddingR.size(); i++) {
|
||||
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
|
||||
int krn = weightDims[with_group + 2 + i];
|
||||
int src = getOutputShapeAtPort(0).getStaticDims()[2 + i];
|
||||
int dst = getInputShapeAtPort(0).getStaticDims()[2 + i];
|
||||
|
||||
krn = (krn - 1)*(dilation[i] + 1) + 1;
|
||||
int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
|
||||
paddingR[i] = (dst - calc_dst) * stride[i];
|
||||
}
|
||||
VectorDims inDims, outDims;
|
||||
std::tie(inDims, outDims) = makeDummyInOutShape();
|
||||
inShape = Shape(inDims);
|
||||
Shape outShape(outDims);
|
||||
initPaddingR(inShape, outShape);
|
||||
|
||||
if (isInt8) {
|
||||
int8WeightDims = getWeightDims();
|
||||
// WA: if int8 deconvolution is supported, we create internal weights blob in IO format
|
||||
std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]);
|
||||
internalBlobs.push_back(createWeiBlobAsIO(weightDims));
|
||||
std::swap(int8WeightDims[withGroups + 0], int8WeightDims[withGroups + 1]);
|
||||
internalBlobs.push_back(createWeiBlobAsIO(int8WeightDims));
|
||||
auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
|
||||
MemoryDescPtr in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType, format);
|
||||
MemoryDescPtr out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType, format);
|
||||
@ -265,23 +334,44 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
createDescriptor({in_candidate}, {out_candidate});
|
||||
}
|
||||
}
|
||||
setPostOps(attr);
|
||||
setPostOps(attr, outShape.getStaticDims());
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) {
|
||||
for (int i = 0; i < paddingR.size(); i++) {
|
||||
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
|
||||
const auto& weightDims = getWeightDims();
|
||||
int krn = weightDims[with_group + 2 + i];
|
||||
int src = outShape.getStaticDims()[2 + i];
|
||||
int dst = inShape.getStaticDims()[2 + i];
|
||||
|
||||
krn = (krn - 1)*(dilation[i] + 1) + 1;
|
||||
int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
|
||||
paddingR[i] = (dst - calc_dst) * stride[i];
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
|
||||
mkldnn::post_ops ops;
|
||||
|
||||
auto getBinPostOpShape = [&](){
|
||||
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
|
||||
const auto chIdx = getFusingAxis();
|
||||
std::vector<size_t> binaryShape(outShapeRank, 1);
|
||||
binaryShape[chIdx] = dims[chIdx];
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
// use legacy depthwise since backprop convolution does not support binary post ops
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
continue;
|
||||
}
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
fakeQuantizeNode->appendPostOps(ops);
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
|
||||
@ -334,78 +424,300 @@ bool MKLDNNDeconvolutionNode::created() const {
|
||||
return getType() == Deconvolution;
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createPrimitive() {
|
||||
if (prim)
|
||||
return;
|
||||
bool MKLDNNDeconvolutionNode::needShapeInfer() const {
|
||||
if (inputShapesModified()) {
|
||||
return true;
|
||||
}
|
||||
if (externOutShape) {
|
||||
if (lastOutputSpatialDims != readOutputSpatialDims()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (isInt8) {
|
||||
auto prim_desc = createPrimitiveDescriptor<deconvolution_forward::primitive_desc,
|
||||
deconvolution_forward::desc>(attr);
|
||||
return false;
|
||||
}
|
||||
|
||||
prim.reset(new deconvolution_forward(prim_desc));
|
||||
std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
|
||||
const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
std::vector<int32_t> outSpDims;
|
||||
if (externOutShape) {
|
||||
outSpDims = readOutputSpatialDims();
|
||||
}
|
||||
return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)};
|
||||
}
|
||||
|
||||
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}};
|
||||
} else {
|
||||
auto prim_desc = createPrimitiveDescriptor<convolution_backward_data::primitive_desc,
|
||||
convolution_backward_data::desc, convolution_forward::primitive_desc>(attr);
|
||||
VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
|
||||
std::vector<ov::StaticShape> inputShapes = {
|
||||
inDims,
|
||||
getWeightDims()
|
||||
};
|
||||
|
||||
prim.reset(new convolution_backward_data(prim_desc));
|
||||
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> inputValues;
|
||||
|
||||
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive();
|
||||
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
|
||||
if (externOutShape) {
|
||||
if (outSpDims.size() != getInputShapeAtPort(2).getStaticDims()[0]) {
|
||||
IE_THROW() << "Can't compute output shape for node with name: " << getName()
|
||||
<< ", because the node has 'output_shape' input, but provided output spatial dims number is incorrect";
|
||||
}
|
||||
inputShapes.push_back({outSpDims.size()});
|
||||
inputValues.insert({2, std::make_shared<ngraph::runtime::HostTensor>(ngraph::element::Type_t::i32,
|
||||
inputShapes.back().to_shape(),
|
||||
outSpDims.data())});
|
||||
}
|
||||
|
||||
std::vector<ov::StaticShape> outputShapes(1);
|
||||
shape_inference(opToShapeInfer.get(), inputShapes, outputShapes, inputValues);
|
||||
|
||||
return outputShapes.back().to_shape();
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
|
||||
if (!execPtr) {
|
||||
IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
|
||||
}
|
||||
execPtr->exec(primArgs, strm);
|
||||
|
||||
if (externOutShape) {
|
||||
lastOutputSpatialDims = readOutputSpatialDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
|
||||
const std::vector<MemoryDescPtr> &outputDesc) {
|
||||
const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]);
|
||||
const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]);
|
||||
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc,
|
||||
bool isWinograd) const {
|
||||
mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
|
||||
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
|
||||
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
|
||||
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(srcDesc, wghDesc, dstDesc, alg);
|
||||
if (fwd_conv_pd->get(true) == nullptr) {
|
||||
IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName();
|
||||
}
|
||||
return std::make_shared<MKLDNNDescriptor>(deconv_desc, fwd_conv_pd);
|
||||
}
|
||||
|
||||
// grouping and autoblicking is not compatible
|
||||
if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
|
||||
return;
|
||||
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc) const {
|
||||
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
|
||||
MKLDNNMemoryPtr srcMemPtr,
|
||||
MKLDNNMemoryPtr wghMemPtr,
|
||||
MKLDNNMemoryPtr dstMemPtr,
|
||||
AttrPtr attr,
|
||||
impl_desc_type selectedImpl) {
|
||||
auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr);
|
||||
|
||||
while (static_cast<bool>(itpd)) {
|
||||
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
|
||||
|
||||
if (impl_type == selectedImpl) {
|
||||
if (isInt8) {
|
||||
if (internalBlobMemory.empty()) {
|
||||
prepareMemory(itpd);
|
||||
}
|
||||
auto prim_desc = deconvolution_forward::primitive_desc(itpd.get());
|
||||
execPtr = std::make_shared<DeconvExecutorInt8>(prim_desc,
|
||||
srcMemPtr->GetPrimitive().get_desc(),
|
||||
internalBlobMemory.front()->GetPrimitive().get_desc(),
|
||||
dstMemPtr->GetPrimitive().get_desc(),
|
||||
getEngine());
|
||||
} else {
|
||||
auto prim_desc = convolution_backward_data::primitive_desc(itpd.get());
|
||||
execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc,
|
||||
srcMemPtr->GetPrimitive().get_desc(),
|
||||
wghMemPtr->GetPrimitive().get_desc(),
|
||||
dstMemPtr->GetPrimitive().get_desc(),
|
||||
getEngine());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!itpd.next_impl()) {
|
||||
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
|
||||
memory::data_type::f32,
|
||||
memory::format_tag::any);
|
||||
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
|
||||
memory::data_type::f32,
|
||||
memory::format_tag::any);
|
||||
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
|
||||
memory::data_type::f32,
|
||||
memory::format_tag::any);
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
|
||||
auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr);
|
||||
if (static_cast<bool>(anyDeconvItpd)) {
|
||||
auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get());
|
||||
execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc,
|
||||
srcMemPtr->GetPrimitive().get_desc(),
|
||||
wghMemPtr->GetPrimitive().get_desc(),
|
||||
dstMemPtr->GetPrimitive().get_desc(),
|
||||
getEngine());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::prepareParams() {
|
||||
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
|
||||
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Input memory didn't allocate.";
|
||||
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (!wghMemPtr || !wghMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Weight memory didn't allocate.";
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
|
||||
|
||||
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
|
||||
auto initPrimitiveAttr = [&]() {
|
||||
mkldnn::primitive_attr attr;
|
||||
setPostOps(attr, dstMemPtr->getStaticDims());
|
||||
return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
|
||||
};
|
||||
|
||||
AttrPtr pAttrLocal;
|
||||
|
||||
if (isDynamicNode()) {
|
||||
if (!pAttr) {
|
||||
pAttr = initPrimitiveAttr();
|
||||
}
|
||||
pAttrLocal = pAttr;
|
||||
if (autoPad || externOutShape) {
|
||||
initPadding(opToShapeInfer, inMemoryDesc->getShape(), externOutShape ? readOutputSpatialDims() : std::vector<int32_t>{});
|
||||
}
|
||||
initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
|
||||
} else {
|
||||
pAttrLocal = initPrimitiveAttr();
|
||||
}
|
||||
|
||||
const auto in_candidate = inMemoryDesc->getDnnlDesc();
|
||||
const auto out_candidate = outMemoryDesc->getDnnlDesc();
|
||||
|
||||
mkldnn::memory::desc wgh_candidate;
|
||||
if (isInt8) {
|
||||
if (internalBlobMemory.empty()) {
|
||||
wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
} else {
|
||||
wgh_candidate = internalBlobMemory.front()->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
}
|
||||
} else {
|
||||
wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> desc;
|
||||
if (isInt8) {
|
||||
desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate);
|
||||
} else {
|
||||
desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate,
|
||||
selected_pd->getImplementationType() == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd);
|
||||
}
|
||||
|
||||
createDeconvPrim(desc, srcMemPtr, wghMemPtr, dstMemPtr, pAttrLocal, selected_pd->getImplementationType());
|
||||
|
||||
if (std::dynamic_pointer_cast<DeconvExecutorInt8>(execPtr)) {
|
||||
primArgs = {{DNNL_ARG_SRC, srcMemPtr->GetPrimitive()},
|
||||
{DNNL_ARG_WEIGHTS, internalBlobMemory.front()->GetPrimitive()},
|
||||
{DNNL_ARG_DST, dstMemPtr->GetPrimitive()}};
|
||||
} else {
|
||||
primArgs = {{DNNL_ARG_DIFF_DST, srcMemPtr->GetPrimitive()},
|
||||
{DNNL_ARG_WEIGHTS, wghMemPtr->GetPrimitive()},
|
||||
{DNNL_ARG_DIFF_SRC, dstMemPtr->GetPrimitive()}};
|
||||
}
|
||||
MKLDNNNode::appendPostOpArgs(attr, primArgs, binaryPostOpsArgs);
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate,
|
||||
mkldnn::algorithm alg) const {
|
||||
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
|
||||
return memory::dims(orig_dims.begin(), orig_dims.end());
|
||||
};
|
||||
|
||||
std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
|
||||
conv_desc = std::make_shared<convolution_forward::desc>(prop_kind::forward_inference, alg,
|
||||
out_candidate, wgh_candidate, in_candidate,
|
||||
convertDims(stride),
|
||||
convertDims(dilation),
|
||||
convertDims(paddingL),
|
||||
convertDims(paddingR));
|
||||
|
||||
std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
|
||||
deconv_desc = std::make_shared<convolution_backward_data::desc>(alg, out_candidate, wgh_candidate,
|
||||
in_candidate,
|
||||
convertDims(stride),
|
||||
convertDims(dilation),
|
||||
convertDims(paddingL),
|
||||
convertDims(paddingR));
|
||||
|
||||
auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
|
||||
|
||||
return {deconv_desc, fwd_conv_pd};
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate) const {
|
||||
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
|
||||
return memory::dims(orig_dims.begin(), orig_dims.end());
|
||||
};
|
||||
|
||||
MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc;
|
||||
deconv_desc = std::make_shared<mkldnn::deconvolution_forward::desc>(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
|
||||
in_candidate, wgh_candidate, out_candidate,
|
||||
convertDims(stride), convertDims(dilation),
|
||||
convertDims(paddingL), convertDims(paddingR));
|
||||
return deconv_desc;
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
|
||||
const std::vector<MemoryDescPtr> &outputDesc) {
|
||||
auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims());
|
||||
auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc);
|
||||
auto in_candidate = dnnlInDesc.getDnnlDesc();
|
||||
|
||||
auto outDesc = outputDesc[0];
|
||||
if (!outDesc->isDefined()) {
|
||||
const auto outShape = shapeInferInternal(inDesc->getShape().getStaticDims(), lastOutputSpatialDims);
|
||||
outDesc = outDesc->cloneWithNewDims(outShape);
|
||||
}
|
||||
auto dnnlOutDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outDesc);
|
||||
auto out_candidate = dnnlOutDesc.getDnnlDesc();
|
||||
|
||||
// grouping and autoblocking is not compatible
|
||||
if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended()))
|
||||
return;
|
||||
|
||||
if (isInt8) {
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
std::shared_ptr<mkldnn::deconvolution_forward::desc> deconv_desc;
|
||||
deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
|
||||
in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(),
|
||||
convertDims(stride), convertDims(dilation),
|
||||
convertDims(paddingL), convertDims(paddingR)));
|
||||
descs.emplace_back(deconv_desc);
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate));
|
||||
} else {
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any);
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()),
|
||||
dnnlInDesc.getDataType(), memory::format_tag::any);
|
||||
for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
|
||||
std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
|
||||
conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg,
|
||||
out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(),
|
||||
convertDims(stride),
|
||||
convertDims(dilation),
|
||||
convertDims(paddingL),
|
||||
convertDims(paddingR)));
|
||||
|
||||
std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
|
||||
deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate,
|
||||
in_candidate.getDnnlDesc(),
|
||||
convertDims(stride),
|
||||
convertDims(dilation),
|
||||
convertDims(paddingL),
|
||||
convertDims(paddingR)));
|
||||
descs_fwd.push_back(conv_desc);
|
||||
descs_bwd.push_back(deconv_desc);
|
||||
|
||||
auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
|
||||
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
|
||||
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
|
||||
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, alg);
|
||||
if (fwd_conv_pd->get(true) == nullptr)
|
||||
continue;
|
||||
|
||||
descs.emplace_back(deconv_desc, fwd_conv_pd);
|
||||
}
|
||||
}
|
||||
@ -413,15 +725,25 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>
|
||||
|
||||
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
if (idx == 2) {
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims()));
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims()));
|
||||
} else if (idx > 0 && isInt8) {
|
||||
// we need to store 'weight' input as edge,
|
||||
// because at this moment we can't simple replace internal blob with input, since we need to save weight data as is, but with different order
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(idx), Shape(getInputShapeAtPort(idx).getStaticDims()));
|
||||
}
|
||||
|
||||
auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx);
|
||||
if (getInputShapeAtPort(idx).isDynamic()) {
|
||||
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
|
||||
}
|
||||
return MKLDNNExtensionUtils::makeDescriptor(desc);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx);
|
||||
if (getOutputShapeAtPort(idx).isDynamic()) {
|
||||
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
|
||||
}
|
||||
return MKLDNNExtensionUtils::makeDescriptor(desc);
|
||||
}
|
||||
|
||||
@ -439,4 +761,61 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const
|
||||
return getMaxPrecision(inputPrecisions);
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
|
||||
const mkldnn::memory::desc& inMemDesc,
|
||||
const mkldnn::memory::desc& weightMemDesc,
|
||||
const mkldnn::memory::desc& outMemDesc,
|
||||
const mkldnn::engine& engine) {
|
||||
execPrim.reset(new mkldnn::convolution_backward_data(pd));
|
||||
|
||||
if (inMemDesc != pd.diff_dst_desc()) {
|
||||
inputReorders.insert({DNNL_ARG_DIFF_DST, IntermReorder(inMemDesc, pd.diff_dst_desc(), engine)});
|
||||
}
|
||||
|
||||
if (weightMemDesc != pd.weights_desc()) {
|
||||
inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
|
||||
}
|
||||
|
||||
if (outMemDesc != pd.diff_src_desc()) {
|
||||
outputReorders.insert({DNNL_ARG_DIFF_SRC, IntermReorder(pd.diff_src_desc(), outMemDesc, engine)});
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
|
||||
const mkldnn::memory::desc& inMemDesc,
|
||||
const mkldnn::memory::desc& weightMemDesc,
|
||||
const mkldnn::memory::desc& outMemDesc,
|
||||
const mkldnn::engine& engine) {
|
||||
execPrim.reset(new mkldnn::deconvolution_forward(pd));
|
||||
|
||||
if (inMemDesc != pd.src_desc()) {
|
||||
inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
|
||||
}
|
||||
|
||||
if (weightMemDesc != pd.weights_desc()) {
|
||||
inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
|
||||
}
|
||||
|
||||
if (outMemDesc != pd.dst_desc()) {
|
||||
outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
|
||||
if (getParentEdges().size() < 3) {
|
||||
IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size();
|
||||
}
|
||||
const auto &shapeMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
|
||||
if (!shapeMemPtr || !shapeMemPtr->GetPrimitivePtr()) {
|
||||
IE_THROW() << "'output_shape' input memory is not allocated.";
|
||||
}
|
||||
const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
|
||||
if (shapeMemPtr->getStaticDims()[0] != spDimsNum) {
|
||||
IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements";
|
||||
}
|
||||
const int32_t *outShapePtr = reinterpret_cast<const int32_t *>(shapeMemPtr->GetPtr());
|
||||
std::vector<int32_t> outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]);
|
||||
return outSpDims;
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
|
||||
|
@ -9,10 +9,15 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "common/dnnl_executor.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNDeconvolutionNode : public MKLDNNNode {
|
||||
using DefaultDeconvDescs = std::pair<std::shared_ptr<mkldnn::convolution_backward_data::desc>,
|
||||
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>>;
|
||||
using Int8DeconvDesc = std::shared_ptr<mkldnn::deconvolution_forward::desc>;
|
||||
|
||||
public:
|
||||
MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
|
||||
@ -39,27 +44,88 @@ public:
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
|
||||
const InferenceEngine::SizeVector& getWeightDims() { return weightDims; }
|
||||
const std::vector<ptrdiff_t>& getStride() { return stride; }
|
||||
const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
|
||||
const std::vector<ptrdiff_t>& getStride() const { return stride; }
|
||||
|
||||
void prepareParams() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
bool needShapeInfer() const override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
|
||||
private:
|
||||
using executorPtr = std::shared_ptr<DnnlExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
|
||||
class DeconvExecutorDefault : public DnnlExecutor {
|
||||
public:
|
||||
DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
|
||||
const mkldnn::memory::desc& inMemDesc,
|
||||
const mkldnn::memory::desc& weightMemDesc,
|
||||
const mkldnn::memory::desc& outMemDesc,
|
||||
const mkldnn::engine& engine);
|
||||
};
|
||||
|
||||
class DeconvExecutorInt8 : public DnnlExecutor {
|
||||
public:
|
||||
DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
|
||||
const mkldnn::memory::desc& inMemDesc,
|
||||
const mkldnn::memory::desc& weightMemDesc,
|
||||
const mkldnn::memory::desc& outMemDesc,
|
||||
const mkldnn::engine& engine);
|
||||
};
|
||||
|
||||
bool withGroups = false;
|
||||
bool isDW = false;
|
||||
bool isInt8 = false;
|
||||
bool autoPad = false;
|
||||
bool externOutShape = false;
|
||||
size_t groupNum = 1;
|
||||
size_t IC;
|
||||
size_t OC;
|
||||
std::vector<ptrdiff_t> kernel;
|
||||
std::vector<ptrdiff_t> stride;
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
std::vector<ptrdiff_t> paddingL;
|
||||
std::vector<ptrdiff_t> paddingR;
|
||||
InferenceEngine::SizeVector weightDims;
|
||||
std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
|
||||
std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
|
||||
ov::CoordinateDiff paddingL;
|
||||
ov::CoordinateDiff paddingR;
|
||||
ov::CoordinateDiff outputPadding;
|
||||
std::vector<int32_t> lastOutputSpatialDims;
|
||||
VectorDims int8WeightDims;
|
||||
|
||||
Shape inShape;
|
||||
|
||||
AttrPtr pAttr;
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
void setPostOps(mkldnn::primitive_attr &attr);
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims);
|
||||
|
||||
VectorDims shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const;
|
||||
void initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inShape, const std::vector<int32_t>& outSpDims);
|
||||
void initPaddingR(const Shape &inShape, const Shape &outShape);
|
||||
std::vector<int32_t> readOutputSpatialDims() const;
|
||||
std::pair<VectorDims, VectorDims> makeDummyInOutShape();
|
||||
|
||||
DefaultDeconvDescs createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate,
|
||||
mkldnn::algorithm alg) const;
|
||||
Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate) const;
|
||||
std::shared_ptr<MKLDNNDescriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc,
|
||||
bool isWinograd) const;
|
||||
std::shared_ptr<MKLDNNDescriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc) const;
|
||||
|
||||
void createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
|
||||
MKLDNNMemoryPtr srcMemPtr,
|
||||
MKLDNNMemoryPtr wghMemPtr,
|
||||
MKLDNNMemoryPtr dstMemPtr,
|
||||
AttrPtr attr,
|
||||
impl_desc_type selectedImpl);
|
||||
|
||||
std::string errorPrefix;
|
||||
|
||||
|
@ -49,15 +49,7 @@ bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr<const
|
||||
return true;
|
||||
}
|
||||
|
||||
void MKLDNNDetectionOutputNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
|
||||
MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
@ -170,6 +162,10 @@ struct ConfidenceComparatorDO {
|
||||
const float* confData;
|
||||
};
|
||||
|
||||
void MKLDNNDetectionOutputNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
|
||||
float *dstData = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
|
||||
|
||||
|
@ -16,7 +16,6 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
@ -24,7 +23,7 @@ public:
|
||||
|
||||
protected:
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
private:
|
||||
static const int ID_LOC = 0;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <ie_parallel.hpp>
|
||||
|
||||
#include <mkldnn_types.h>
|
||||
#include "cpu_types.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
#include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
|
||||
#include <cpu/ref_eltwise.hpp>
|
||||
@ -31,6 +32,7 @@
|
||||
#include "ngraph_transformations/op/leaky_relu.hpp"
|
||||
#include "ngraph_transformations/op/swish_cpu.hpp"
|
||||
|
||||
#include <oneapi/dnnl/dnnl.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
@ -791,18 +793,41 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op) {
|
||||
const auto const1 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(0));
|
||||
const auto const2 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
|
||||
int constPort = -1;
|
||||
if (const2) {
|
||||
constPort = 1;
|
||||
} else if (const1) {
|
||||
constPort = 0;
|
||||
} else {
|
||||
return Undefined;
|
||||
}
|
||||
|
||||
auto const_shape = op->get_input_shape(constPort);
|
||||
if (ngraph::shape_size(const_shape) == 1)
|
||||
return PerTensor;
|
||||
else
|
||||
return PerChannel;
|
||||
}
|
||||
|
||||
const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> MKLDNNEltwiseNode::initializers = {
|
||||
{ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseAdd;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseSubtract;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseMultiply;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseDivide;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseSquaredDifference;
|
||||
@ -828,6 +853,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
|
||||
node.alpha = powerStatic->get_power();
|
||||
node.beta = powerStatic->get_scale();
|
||||
node.gamma = powerStatic->get_shift();
|
||||
node.broadcastingPolicy = PerTensor;
|
||||
}},
|
||||
{ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseEqual;
|
||||
@ -954,6 +980,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
|
||||
}},
|
||||
{ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwisePrelu;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseErf;
|
||||
@ -984,7 +1011,7 @@ bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr<const ngraph:
|
||||
}
|
||||
|
||||
MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(op, eng, cache) {
|
||||
MKLDNNNode(op, eng, cache), broadcastingPolicy(Undefined) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
@ -1498,14 +1525,6 @@ void MKLDNNEltwiseNode::selectOptimalPrimitiveDescriptor() {
|
||||
selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true);
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() {
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
@ -1656,6 +1675,10 @@ void MKLDNNEltwiseNode::executeReference(const jit_eltwise_params &jep, const ji
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::execute(mkldnn::stream strm) {
|
||||
if (execPtr) {
|
||||
jit_eltwise_call_args_ptrs args_ptrs = {};
|
||||
@ -1713,106 +1736,124 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
|
||||
getInputShapeAtPort(0) == getInputShapeAtPort(1);
|
||||
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
|
||||
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
|
||||
if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
|
||||
EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
|
||||
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
|
||||
}
|
||||
}
|
||||
MKLDNNNode::fuseInto(parentNode);
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
|
||||
|
||||
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
switch (getMKLDNNAlgorithm()) {
|
||||
case mkldnn::algorithm::eltwise_relu:
|
||||
case mkldnn::algorithm::eltwise_tanh:
|
||||
case mkldnn::algorithm::eltwise_elu:
|
||||
case mkldnn::algorithm::eltwise_square:
|
||||
case mkldnn::algorithm::eltwise_abs:
|
||||
case mkldnn::algorithm::eltwise_sqrt:
|
||||
case mkldnn::algorithm::eltwise_linear:
|
||||
case mkldnn::algorithm::eltwise_bounded_relu:
|
||||
case mkldnn::algorithm::eltwise_soft_relu:
|
||||
case mkldnn::algorithm::eltwise_logistic:
|
||||
case mkldnn::algorithm::eltwise_exp:
|
||||
case mkldnn::algorithm::eltwise_gelu_erf:
|
||||
case mkldnn::algorithm::eltwise_gelu_tanh:
|
||||
case mkldnn::algorithm::eltwise_clip:
|
||||
case mkldnn::algorithm::eltwise_swish:
|
||||
case mkldnn::algorithm::eltwise_hardswish:
|
||||
case mkldnn::algorithm::eltwise_mish:
|
||||
case mkldnn::algorithm::eltwise_hsigmoid:
|
||||
case mkldnn::algorithm::eltwise_round_half_to_even:
|
||||
case mkldnn::algorithm::eltwise_round_half_away_from_zero:
|
||||
ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
|
||||
break;
|
||||
default: IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
case mkldnn::algorithm::eltwise_relu:
|
||||
case mkldnn::algorithm::eltwise_tanh:
|
||||
case mkldnn::algorithm::eltwise_elu:
|
||||
case mkldnn::algorithm::eltwise_square:
|
||||
case mkldnn::algorithm::eltwise_abs:
|
||||
case mkldnn::algorithm::eltwise_sqrt:
|
||||
case mkldnn::algorithm::eltwise_linear:
|
||||
case mkldnn::algorithm::eltwise_bounded_relu:
|
||||
case mkldnn::algorithm::eltwise_soft_relu:
|
||||
case mkldnn::algorithm::eltwise_logistic:
|
||||
case mkldnn::algorithm::eltwise_exp:
|
||||
case mkldnn::algorithm::eltwise_gelu_erf:
|
||||
case mkldnn::algorithm::eltwise_gelu_tanh:
|
||||
case mkldnn::algorithm::eltwise_clip:
|
||||
case mkldnn::algorithm::eltwise_swish:
|
||||
case mkldnn::algorithm::eltwise_hardswish:
|
||||
case mkldnn::algorithm::eltwise_mish:
|
||||
case mkldnn::algorithm::eltwise_hsigmoid:
|
||||
case mkldnn::algorithm::eltwise_round_half_to_even:
|
||||
case mkldnn::algorithm::eltwise_round_half_away_from_zero:
|
||||
ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
|
||||
break;
|
||||
default: IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
} else {
|
||||
const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
|
||||
const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0;
|
||||
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
|
||||
if (getAlgorithm() != EltwisePrelu) {
|
||||
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
|
||||
}
|
||||
|
||||
if (initAsBinary) {
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
|
||||
if (data.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
|
||||
std::vector<size_t> binaryDims(postOpDims.size(), 1);
|
||||
binaryDims[chIdx] = postOpDims[chIdx];
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (initBinaryMemory) {
|
||||
memPtr.reset(new MKLDNNMemory(getEngine()));
|
||||
memPtr->Create(memoryDesc, &data[0]);
|
||||
}
|
||||
};
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
|
||||
break;
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
|
||||
break;
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
} else {
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
if (scalesBuffer.empty() || shiftsBuffer.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
if (scalesBuffer.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
/* @todo legacy depthwise post ops are kept for now
|
||||
* for performance reasons
|
||||
*/
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
if (scales.empty() || shifts.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
if (scales.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
|
||||
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' as binary post op ";
|
||||
VectorDims broadcastBinaryShape(postOpDims.size(), 1);
|
||||
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
|
||||
if (data.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
if (broadcastingPolicy == Undefined)
|
||||
IE_THROW() << errorPrefix << "cannot be performed since policy is Undefined";
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, broadcastingPolicy == PerTensor ? Shape(broadcastBinaryShape) : Shape(postOpDims));
|
||||
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (!memPtr) {
|
||||
memPtr.reset(new MKLDNNMemory(getEngine()));
|
||||
memPtr->Create(memoryDesc, &data[0]);
|
||||
|
||||
binaryPostOpsMem.push_back(memPtr);
|
||||
}
|
||||
};
|
||||
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
break;
|
||||
case EltwiseDivide:
|
||||
case EltwiseMultiply:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
break;
|
||||
case EltwiseMulAdd:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
break;
|
||||
case EltwisePowerStatic:
|
||||
if (beta != 1.0f) // Multiply if has scales
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
if (gamma != 0.0f) // Add only if has shifts
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) {
|
||||
// [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results
|
||||
|
@ -75,7 +75,8 @@ public:
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override;
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override;
|
||||
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
|
||||
void fuseInto(MKLDNNNodePtr& parentNode) override;
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
|
||||
@ -89,16 +90,23 @@ public:
|
||||
bool isWithBroadcast();
|
||||
bool isSpecialConvolutionAddFusing() const { return specialConvolutionAddFusing; }
|
||||
|
||||
void createPrimitive() override;
|
||||
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
bool needPrepareParams() const override;
|
||||
void prepareParams() override;
|
||||
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
enum BroadcastingPolicy {
|
||||
PerChannel,
|
||||
PerTensor,
|
||||
Undefined,
|
||||
};
|
||||
|
||||
BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; }
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
|
||||
private:
|
||||
struct EltwiseExecutor {
|
||||
EltwiseExecutor(size_t batch) : batchDimIdx(batch) {}
|
||||
@ -130,6 +138,8 @@ private:
|
||||
size_t fullWorkAmount = 0;
|
||||
};
|
||||
|
||||
BroadcastingPolicy broadcastingPolicy;
|
||||
|
||||
mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;
|
||||
|
||||
static const int optimalTensorRank = 6;
|
||||
@ -157,6 +167,8 @@ private:
|
||||
using Initializer = std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>;
|
||||
static const std::map<const ngraph::DiscreteTypeInfo, Initializer> initializers;
|
||||
|
||||
static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op);
|
||||
|
||||
void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
const VectorDims &dims_out) const;
|
||||
void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
|
@ -70,14 +70,6 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
|
||||
addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagOffsetSumNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagOffsetSumNode::prepareParams() {
|
||||
_indicesLen = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
|
||||
_offsetsLen = getParentEdgesAtPort(OFFSETS_IDX)[0]->getMemory().getStaticDims()[0];
|
||||
@ -126,6 +118,14 @@ void MKLDNNEmbeddingBagOffsetSumNode::getIndices(int embIndex, const int*& indic
|
||||
weightsIdx = offsetsData_[embIndex];
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagOffsetSumNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
bool MKLDNNEmbeddingBagOffsetSumNode::isExecutable() const {
|
||||
return !isInputTensorAtPortEmpty(0);
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) {
|
||||
const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
@ -19,15 +19,15 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
bool isExecutable() const override;
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
private:
|
||||
void initFromInputs() override;
|
||||
|
@ -64,14 +64,6 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
|
||||
addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagPackedSumNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagPackedSumNode::prepareParams() {
|
||||
_batch = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
|
||||
_indicesPerBag = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[1];
|
||||
@ -94,6 +86,14 @@ void MKLDNNEmbeddingBagPackedSumNode::getIndices(int embIndex, const int*& indic
|
||||
weightsIdx = embIndex * _indicesPerBag;
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagPackedSumNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
bool MKLDNNEmbeddingBagPackedSumNode::isExecutable() const {
|
||||
return !isInputTensorAtPortEmpty(0);
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) {
|
||||
const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
@ -19,15 +19,15 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
bool isExecutable() const override;
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
private:
|
||||
void initFromInputs() override;
|
||||
|
@ -11,14 +11,6 @@
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
void MKLDNNEmbeddingSegmentsSumNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
const auto embBagSegSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingSegmentsSum>(op);
|
||||
@ -129,6 +121,14 @@ void MKLDNNEmbeddingSegmentsSumNode::getIndices(int embIndex, const int*& indice
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingSegmentsSumNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
execute(strm);
|
||||
}
|
||||
|
||||
bool MKLDNNEmbeddingSegmentsSumNode::isExecutable() const {
|
||||
return !isInputTensorAtPortEmpty(0);
|
||||
}
|
||||
|
||||
void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) {
|
||||
const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
@ -19,15 +19,15 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
bool isExecutable() const override;
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
private:
|
||||
void initFromInputs() override;
|
||||
|
@ -272,12 +272,6 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr
|
||||
impl_desc_type::ref_any);
|
||||
}
|
||||
|
||||
void MKLDNNExperimentalDetectronDetectionOutputNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) {
|
||||
const int rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0];
|
||||
assert(classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1]));
|
||||
|
@ -15,7 +15,6 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user