Merge remote-tracking branch 'github/master' into auto-batch-master

# Conflicts:
#	inference-engine/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp
This commit is contained in:
myshevts 2021-12-02 18:50:10 +03:00
commit 8986ad583b
515 changed files with 13240 additions and 9316 deletions

View File

@ -88,7 +88,7 @@ jobs:
rm -rf $(BUILD_SAMPLES_DIR) ; mkdir $(BUILD_SAMPLES_DIR)
sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
sudo mkdir -p $(SHARE_DIR)
sudo apt --assume-yes install nfs-common
sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(SHARE_DIR) -o vers=4,minorversion=1,sec=sys
mkdir -p $(CCACHE_DIR)
displayName: 'Make dir'
@ -316,8 +316,8 @@ jobs:
workingDirectory: $(BUILD_SAMPLES_TESTS_DIR)
displayName: 'Install Samples Tests'
- script: |
python3 -m pip install -r $(INSTALL_DIR)/tests/smoke_tests/requirements.txt
- script: |
python3 -m pip install -r $(INSTALL_DIR)/tests/smoke_tests/requirements.txt
workingDirectory: $(INSTALL_DIR)
displayName: 'Install dependencies for samples smoke tests'
continueOnError: false

View File

@ -60,7 +60,7 @@ jobs:
- script: |
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
sudo mkdir -p $(MODELS_DIR)
sudo apt --assume-yes install nfs-common
sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
mkdir -p $(MODELS_DIR)/models_data
displayName: 'Make dirs'

View File

@ -53,7 +53,7 @@ jobs:
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
sudo mkdir -p $(MODELS_DIR)
sudo apt --assume-yes install nfs-common
sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
displayName: 'Make dirs'
@ -98,7 +98,7 @@ jobs:
-DENABLE_CPPLINT=OFF
-DENABLE_TESTS=OFF
-DENABLE_MKL_DNN=ON
-DENABLE_CLDNN=OFF
-DENABLE_INTEL_GPU=OFF
-DENABLE_PROFILING_ITT=OFF
-DENABLE_SAMPLES=OFF
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON

View File

@ -132,7 +132,7 @@ jobs:
- script: |
set PATH=$(WORK_DIR)\ninja-win;%PATH%
call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_CLDNN=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_INTEL_GPU=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
workingDirectory: $(BUILD_DIR)
displayName: 'CMake'
@ -181,7 +181,7 @@ jobs:
continueOnError: false
- script: |
python -m pip install -r $(INSTALL_DIR)\tests\smoke_tests\requirements.txt
python -m pip install -r $(INSTALL_DIR)\tests\smoke_tests\requirements.txt
workingDirectory: $(INSTALL_DIR)
displayName: 'Install dependencies for samples smoke tests'
continueOnError: false

View File

@ -61,7 +61,7 @@ RUN cmake .. \
-DENABLE_CPPLINT=OFF \
-DENABLE_TESTS=OFF \
-DENABLE_MKL_DNN=ON \
-DENABLE_CLDNN=OFF \
-DENABLE_INTEL_GPU=OFF \
-DENABLE_PROFILING_ITT=OFF \
-DENABLE_SAMPLES=OFF \
-DENABLE_PYTHON=ON \

3
.gitmodules vendored
View File

@ -59,3 +59,6 @@
[submodule "tools/pot/thirdparty/open_model_zoo"]
path = tools/pot/thirdparty/open_model_zoo
url = https://github.com/openvinotoolkit/open_model_zoo.git
[submodule "thirdparty/nlohmann_json"]
path = thirdparty/nlohmann_json
url = https://github.com/nlohmann/json.git

View File

@ -86,9 +86,6 @@ function(openvino_developer_export_targets)
"A list of OpenVINO exported components" FORCE)
endfunction()
ie_cpack_add_component(ngraph REQUIRED)
ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph)
# add target with processed tests model zoo
include(cmake/test_model_zoo.cmake)
@ -103,6 +100,7 @@ add_subdirectory(model-optimizer)
add_subdirectory(docs)
add_subdirectory(tools)
add_subdirectory(scripts)
add_subdirectory(licensing)
#
# CPack

View File

@ -30,11 +30,11 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins
/inference-engine/thirdparty/mkl-dnn/ @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
# IE GPU:
/inference-engine/src/cldnn_engine/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/inference/include/ie/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/inference/include/ie/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/inference/include/openvino/runtime/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/inference-engine/thirdparty/clDNN/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/plugins/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
# IE VPU:
/inference-engine/src/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers

View File

@ -15,12 +15,12 @@ ie_coverage_capture(INFO_FILE "openvino"
ie_coverage_extract(INPUT "openvino" OUTPUT "inference"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/inference/*")
ie_coverage_genhtml(INFO_FILE "inference_engine"
ie_coverage_genhtml(INFO_FILE "inference"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_legacy"
ie_coverage_extract(INPUT "openvino" OUTPUT "legacy"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/legacy/*")
ie_coverage_genhtml(INFO_FILE "inference_engine_legacy"
ie_coverage_genhtml(INFO_FILE "legacy"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
ie_coverage_extract(INPUT "openvino" OUTPUT "ov_hetero_plugin"
@ -38,14 +38,14 @@ ie_coverage_extract(INPUT "openvino" OUTPUT "preprocessing"
ie_coverage_genhtml(INFO_FILE "preprocessing"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_transformations"
ie_coverage_extract(INPUT "openvino" OUTPUT "transformations"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/transformations/*")
ie_coverage_genhtml(INFO_FILE "inference_engine_transformations"
ie_coverage_genhtml(INFO_FILE "transformations"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_snippets"
ie_coverage_extract(INPUT "openvino" OUTPUT "snippets"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/snippets/*")
ie_coverage_genhtml(INFO_FILE "inference_engine_snippets"
ie_coverage_genhtml(INFO_FILE "snippets"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
ie_coverage_extract(INPUT "openvino" OUTPUT "low_precision_transformations"
@ -65,10 +65,10 @@ if(ENABLE_MKL_DNN)
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
endif()
if(ENABLE_CLDNN)
ie_coverage_extract(INPUT "openvino" OUTPUT "cldnn_engine"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference-engine/src/cldnn_engine/*")
ie_coverage_genhtml(INFO_FILE "cldnn_engine"
if (ENABLE_INTEL_GPU)
ie_coverage_extract(INPUT "openvino" OUTPUT "intel_gpu_plugin"
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/plugins/intel_gpu/*")
ie_coverage_genhtml(INFO_FILE "intel_gpu_plugin"
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
endif()

View File

@ -129,7 +129,7 @@ set(IE_DEBUG_POSTFIX_WIN "d")
set(IE_RELEASE_POSTFIX_WIN "")
set(IE_DEBUG_POSTFIX_LIN "")
set(IE_RELEASE_POSTFIX_LIN "")
set(IE_DEBUG_POSTFIX_MAC "")
set(IE_DEBUG_POSTFIX_MAC "d")
set(IE_RELEASE_POSTFIX_MAC "")
if(WIN32)

View File

@ -5,20 +5,33 @@
include(CheckCXXCompilerFlag)
if (ENABLE_SANITIZER)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address")
check_cxx_compiler_flag("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED)
if (SANITIZE_RECOVER_ADDRESS_SUPPORTED)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
if (WIN32)
check_cxx_compiler_flag("/fsanitize=address" SANITIZE_ADDRESS_SUPPORTED)
if (SANITIZE_ADDRESS_SUPPORTED)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} /fsanitize=address")
else()
message(FATAL_ERROR "Address sanitizer is not supported by current compiler.\n"
"Please, check requirements:\n"
"https://github.com/openvinotoolkit/openvino/wiki/AddressSanitizer-and-LeakSanitizer")
endif()
else()
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address")
check_cxx_compiler_flag("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED)
if (SANITIZE_RECOVER_ADDRESS_SUPPORTED)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
endif()
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address")
endif()
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address")
endif()
if (ENABLE_UB_SANITIZER)
if (WIN32)
message(FATAL_ERROR "UndefinedBehavior sanitizer is not supported in Windows")
endif()
# TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved:
# https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# TODO: Remove -Wno-maybe-uninitialized after CVS-61143 fix
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -Wno-maybe-uninitialized")
endif()
@ -38,17 +51,21 @@ endif()
# common sanitizer options
if (DEFINED SANITIZER_COMPILER_FLAGS)
# ensure symbols are present
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
if(NOT OV_COMPILER_IS_CLANG)
# GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC.
# Clang has no var-tracking-assignments.
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments")
endif()
# prevent unloading libraries at runtime, so sanitizer can resolve their symbols
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
if (NOT WIN32)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
if(NOT OV_COMPILER_IS_CLANG)
# GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC.
# Clang has no var-tracking-assignments.
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments")
endif()
# prevent unloading libraries at runtime, so sanitizer can resolve their symbols
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
if(OV_COMPILER_IS_CLANG AND NOT WIN32 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
if(OV_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
endif()
else()
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} /Oy-")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")

View File

@ -13,6 +13,7 @@ ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON)
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
ie_dependent_option (ENABLE_INTEL_GPU "GPU plugin for inference engine on Intel GPU" ON "ENABLE_CLDNN" OFF)
if (NOT ENABLE_CLDNN OR ANDROID OR
(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
@ -49,14 +50,17 @@ ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONL
find_package(PythonInterp 3 QUIET)
ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF)
# this option should not be a part of InferenceEngineDeveloperPackage
# since wheels can be built only together with main OV build
cmake_dependent_option (ENABLE_WHEEL "Build wheel packages for PyPi" OFF
"PYTHONINTERP_FOUND;CMAKE_SOURCE_DIR STREQUAL OpenVINO_SOURCE_DIR" OFF)
#
# Inference Engine specific options
#
ie_dependent_option (ENABLE_GNA "GNA support for inference engine" ON "NOT APPLE;NOT ANDROID;X86_64" OFF)
ie_dependent_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF "ENABLE_CLDNN" OFF)
# "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
if(X86 OR ARM OR (MSVC AND (ARM OR AARCH64)) )
set(THREADING_DEFAULT "SEQ")
@ -199,8 +203,8 @@ if (ENABLE_MYRIAD_NO_BOOT AND ENABLE_MYRIAD )
add_definitions(-DENABLE_MYRIAD_NO_BOOT=1)
endif()
if (ENABLE_CLDNN)
add_definitions(-DENABLE_CLDNN=1)
if (ENABLE_INTEL_GPU)
add_definitions(-DENABLE_INTEL_GPU=1)
endif()
if (ENABLE_MKL_DNN)

View File

@ -8,9 +8,9 @@ After you have used the Model Optimizer to create an Intermediate Representation
Inference Engine is a set of C++ libraries providing a common API to deliver inference solutions on the platform of your choice: CPU, GPU, or VPU. Use the Inference Engine API to read the Intermediate Representation, set the input and output formats, and execute the model on devices. While the C++ libraries is the primary implementation, C libraries and Python bindings are also available.
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
To learn about how to use the Inference Engine API for your application, see the [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md) documentation.
@ -66,7 +66,7 @@ The table below shows the plugin libraries and additional dependencies for Linux
| Plugin | Library name for Linux | Dependency libraries for Linux | Library name for Windows | Dependency libraries for Windows | Library name for macOS | Dependency libraries for macOS |
|--------|-----------------------------|-------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------|------------------------------|---------------------------------------------|
| CPU | `libMKLDNNPlugin.so` | `libinference_engine_lp_transformations.so` | `MKLDNNPlugin.dll` | `inference_engine_lp_transformations.dll` | `libMKLDNNPlugin.so` | `inference_engine_lp_transformations.dylib` |
| GPU | `libclDNNPlugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `clDNNPlugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
| GPU | `libov_intel_gpu_plugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `ov_intel_gpu_plugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
| MYRIAD | `libmyriadPlugin.so` | `libusb.so`, | `myriadPlugin.dll` | `usb.dll` | `libmyriadPlugin.so` | `libusb.dylib` |
| HDDL | `libHDDLPlugin.so` | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so` | `HDDLPlugin.dll` | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` | Is not supported | - |
| GNA | `libGNAPlugin.so` | `libgna.so`, | `GNAPlugin.dll` | `gna.dll` | Is not supported | - |

View File

@ -10,7 +10,7 @@ Based on that, the declaration of an extension class can look as follows:
@snippet template_extension/old/extension.hpp extension:header
The extension library should contain and export the InferenceEngine::CreateExtension method, which creates an `Extension` class:
The extension library should use `IE_DEFINE_EXTENSION_CREATE_FUNCTION` macro to export a function, which creates an `Extension` class:
@snippet template_extension/old/extension.cpp extension:CreateExtension

View File

@ -2,7 +2,7 @@
Inference Engine Extensibility API enables you to add support of custom operations to the Inference Engine.
Extension should contain operation sets with custom operations and execution kernels for custom operations.
Physically, an extension library can be represented as a dynamic library exporting the single `CreateExtension` function
Physically, an extension library can be represented as a dynamic library exporting the single function
that creates a new extension instance.
To load the Extensibility library to the `InferenceEngine::Core` object, use the

View File

@ -40,7 +40,6 @@ Inference Engine sample applications include the following:
- **Object Detection for SSD Sample** Inference of object detection networks based on the SSD, this sample is simplified version that supports only images as inputs.
- [Object Detection SSD C++ Sample](../../samples/cpp/object_detection_sample_ssd/README.md)
- [Object Detection SSD C Sample](../../samples/c/object_detection_sample_ssd/README.md)
- [Object Detection SSD Python* Sample](../../samples/python/object_detection_sample_ssd/README.md)
> **NOTE**: All C++ samples support input paths containing only ASCII characters, except the Hello Classification Sample, that supports Unicode.

View File

@ -11,9 +11,9 @@ After you have used the Model Optimizer to create an Intermediate Representation
Inference Engine is a set of C++ libraries providing a common API to deliver inference solutions on the platform of your choice: CPU, GPU, or VPU. Use the Inference Engine API to read the Intermediate Representation, set the input and output formats, and execute the model on devices. While the C++ libraries is the primary implementation, C libraries and Python bindings are also available.
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
To learn about how to use the Inference Engine API for your application, see the [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md) documentation.
@ -71,7 +71,7 @@ The table below shows the plugin libraries and additional dependencies for Linux
| Plugin | Library name for Linux | Dependency libraries for Linux | Library name for Windows | Dependency libraries for Windows | Library name for macOS | Dependency libraries for macOS |
|--------|-----------------------------|-------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------|------------------------------|---------------------------------------------|
| CPU | `libMKLDNNPlugin.so` | `libinference_engine_lp_transformations.so` | `MKLDNNPlugin.dll` | `inference_engine_lp_transformations.dll` | `libMKLDNNPlugin.so` | `inference_engine_lp_transformations.dylib` |
| GPU | `libclDNNPlugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `clDNNPlugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
| GPU | `libov_intel_gpu_plugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `ov_intel_gpu_plugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
| MYRIAD | `libmyriadPlugin.so` | `libusb.so`, | `myriadPlugin.dll` | `usb.dll` | `libmyriadPlugin.so` | `libusb.dylib` |
| HDDL | `libHDDLPlugin.so` | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so` | `HDDLPlugin.dll` | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` | Is not supported | - |
| GNA | `libGNAPlugin.so` | `libgna.so`, | `GNAPlugin.dll` | `gna.dll` | Is not supported | - |

View File

@ -3,7 +3,7 @@ GPU Plugin {#openvino_docs_IE_DG_supported_plugins_GPU}
The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks.
clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics.
For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/inference-engine/src/cldnn_engine) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
## Device Naming Convention
* Devices are enumerated as "GPU.X" where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered.

View File

@ -49,7 +49,7 @@ Intermediate blobs between these sub graphs are allocated automatically in the m
Samples can be used with the following command:
```sh
./object_detection_sample_ssd -m <path_to_model>/ModelSSD.xml -i <path_to_pictures>/picture.jpg -d HETERO:GPU,CPU
./hello_classification <path_to_model>/squeezenet1.1.xml <path_to_pictures>/picture.jpg HETERO:GPU,CPU
```
where:
- `HETERO` stands for heterogeneous plugin

View File

@ -299,7 +299,9 @@ TensorFlow*-specific parameters:
TensorFlow*: comma separated list of shared libraries
with TensorFlow* custom operations implementation.
--disable_nhwc_to_nchw
Disables default translation from NHWC to NCHW
[DEPRECATED] Disables default translation from NHWC to NCHW. Since 2022.1
this option is deprecated and used only to maintain backward compatibility
with previous releases.
```
> **NOTE:** Models produces with TensorFlow\* usually have not fully defined shapes (contain `-1` in some dimensions). It is necessary to pass explicit shape for the input using command line parameter `--input_shape` or `-b` to override just batch dimension. If the shape is fully defined, then there is no need to specify either `-b` or `--input_shape` options.

View File

@ -35,7 +35,6 @@ To generate the BERT Intermediate Representation (IR) of the model, run the Mode
python3 ./mo_tf.py
--input_meta_graph uncased_L-12_H-768_A-12/bert_model.ckpt.meta \
--output bert/pooler/dense/Tanh \
--disable_nhwc_to_nchw \
--input Placeholder{i32},Placeholder_1{i32},Placeholder_2{i32}
```
@ -110,10 +109,9 @@ python3 run_classifier.py \
Run the Model Optimizer with the following command line parameters to generate reshape-able BERT Intermediate Representation (IR):
```sh
python3 ./mo_tf.py
--input_model inference_graph.pb
--input "IteratorGetNext:0{i32}[1 128],IteratorGetNext:1{i32}[1 128],IteratorGetNext:4{i32}[1 128]"
--disable_nhwc_to_nchw
python3 ./mo_tf.py \
--input_model inference_graph.pb \
--input "IteratorGetNext:0{i32}[1 128],IteratorGetNext:1{i32}[1 128],IteratorGetNext:4{i32}[1 128]"
```
For other applicable parameters, refer to [Convert Model from TensorFlow](../Convert_Model_From_TensorFlow.md).

View File

@ -71,8 +71,7 @@ To generate the IR, run the Model Optimizer with the following parameters:
python3 {path_to_mo}/mo_tf.py \
--input_model output_graph.pb \
--input "input_lengths->[16],input_node[1 16 19 26],previous_state_h[1 2048],previous_state_c[1 2048]" \
--output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits" \
--disable_nhwc_to_nchw
--output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits"
```
Where:

View File

@ -69,7 +69,6 @@ The attribute names are self-explanatory or match the name in the `hparams_confi
OpenVINO&trade; toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to
[Object Detection for SSD C++ Sample](@ref openvino_inference_engine_samples_object_detection_sample_ssd_README) and
[Object Detection for SSD Python Sample](@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README).
## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR

View File

@ -186,6 +186,7 @@ The script should save into `~/XLNet-Large/xlnet`.
To generate the XLNet Intermediate Representation (IR) of the model, run the Model Optimizer with the following parameters:
```sh
python3 mo.py --input_model path-to-model/model_frozen.pb --input "input_mask[50 1],input_ids[50 1],seg_ids[50 1]" --log_level DEBUG --disable_nhwc_to_nchw --output_dir <OUTPUT_MODEL_DIR>
python3 mo.py --input_model path-to-model/model_frozen.pb \
--input "input_mask[50 1],input_ids[50 1],seg_ids[50 1]"
```

View File

@ -285,10 +285,9 @@ More information on how to develop middle transformations and dedicated API desc
### NHWC to NCHW Layout Change <a name="layout-change"></a>
There are several middle transformations responsible for changing model layout from NHWC to NCHW. These transformations
are triggered by default for TensorFlow\* models only because it is the only framework with Convolution operations in
NHWC layout.
> **NOTE**: If a TensorFlow\* model is in NCHW layout, you should specify the `--disable_nhwc_to_nchw` command line
> parameter to disable these transformations.
NHWC layout. This layout change is disabled if the model does not have operations that OpenVINO&trade needs to execute in
NCHW layout, for example, Convolutions in NHWC layout. It is still possible to force Model Optimizer to do layout change
using `--disable_nhwc_to_nchw` command-line parameter.
The layout change is a complex problem and detailed explanation of it is out of this document scope. A very brief
explanation of this process is provided below:

View File

@ -174,7 +174,6 @@ limitations under the License.
<tab type="user" title="nGraph Function Creation C++ Sample" url="@ref openvino_inference_engine_samples_ngraph_function_creation_sample_README"/>
<tab type="user" title="nGraph Function Creation Python* Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_ngraph_function_creation_sample_README"/>
<tab type="user" title="Object Detection SSD C++ Sample" url="@ref openvino_inference_engine_samples_object_detection_sample_ssd_README"/>
<tab type="user" title="Object Detection SSD Python* Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README"/>
<tab type="user" title="Object Detection SSD C Sample" url="@ref openvino_inference_engine_ie_bridges_c_samples_object_detection_sample_ssd_README"/>
<tab type="user" title="Automatic Speech Recognition C++ Sample" url="@ref openvino_inference_engine_samples_speech_sample_README"/>
<tab type="user" title="Automatic Speech Recognition Python Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_speech_sample_README"/>

View File

@ -54,29 +54,29 @@ The OpenVINO™ workflow on Raspbian* OS is as follows:
## <a name="using-sample"></a>Build and Run Code Samples
Follow the steps below to run pre-trained Face Detection network using Inference Engine samples from the OpenVINO toolkit.
Follow the steps below to run pre-trained SqueezeNet image classification network using Inference Engine samples from the OpenVINO toolkit.
1. Create a samples build directory. This example uses a directory named `build`:
```sh
mkdir build && cd build
```
2. Build the Object Detection Sample with the following command:
2. Build the Hello Classification Sample with the following command:
```sh
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2022/samples/cpp
make -j2 object_detection_sample_ssd
make -j2 hello_classification
```
3. Download the pre-trained Face Detection model with the [Model Downloader tool](@ref omz_tools_downloader):
3. Download the pre-trained SqueezeNet image classification model with the [Model Downloader tool](@ref omz_tools_downloader):
```sh
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
cd open_model_zoo/tools/downloader
python3 -m pip install -r requirements.in
python3 downloader.py --name face-detection-adas-0001
python3 downloader.py --name squeezenet1.1
```
4. Run the sample, specifying the model and path to the input image:
```sh
./armv7l/Release/object_detection_sample_ssd -m face-detection-adas-0001.xml -d MYRIAD -i <path_to_image>
./armv7l/Release/hello_classification <path_to_model>/squeezenet1.1.xml <path_to_image> MYRIAD
```
The application outputs an image (`out_0.bmp`) with detected faced enclosed in rectangles.
The application outputs to console window top 10 classification results.
## <a name="basic-guidelines-sample-application"></a>Basic Guidelines for Using Code Samples

View File

@ -138,25 +138,25 @@ Follow the next steps to use the pre-trained face detection model using Inferenc
```sh
mkdir build && cd build
```
2. Build the Object Detection Sample:
2. Build the Hello Classification Sample:
```sh
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2022/samples/cpp
```
```sh
make -j2 object_detection_sample_ssd
make -j2 hello_classifiaction
```
3. Download the pre-trained Face Detection model with the Model Downloader or copy it from the host machine:
3. Download the pre-trained squeezenet1.1 image classifiaction model with the Model Downloader or copy it from the host machine:
```sh
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
cd open_model_zoo/tools/downloader
python3 -m pip install -r requirements.in
python3 downloader.py --name face-detection-adas-0001
python3 downloader.py --name squeezenet1.1
```
4. Run the sample specifying the model, a path to the input image, and the VPU required to run with the Raspbian* OS:
```sh
./armv7l/Release/object_detection_sample_ssd -m <path_to_model>/face-detection-adas-0001.xml -d MYRIAD -i <path_to_image>
./armv7l/Release/hello_classification <path_to_model>/squeezenet1.1.xml <path_to_image> MYRIAD
```
The application outputs an image (`out_0.bmp`) with detected faced enclosed in rectangles.
The application outputs to console window top 10 classification results.
Congratulations, you have finished the OpenVINO™ toolkit for Raspbian* OS installation. You have completed all required installation, configuration and build steps in this guide.

View File

@ -152,14 +152,24 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
// So we need store as supported either unsupported node sets
std::unordered_set<std::string> supported;
std::unordered_set<std::string> unsupported;
auto opset = ngraph::get_opset4();
ngraph::OpSet op_super_set;
#define _OPENVINO_OP_REG(NAME, NAMESPACE) op_super_set.insert<NAMESPACE::NAME>();
#include "openvino/opsets/opset1_tbl.hpp"
#include "openvino/opsets/opset2_tbl.hpp"
#include "openvino/opsets/opset3_tbl.hpp"
#include "openvino/opsets/opset4_tbl.hpp"
#include "openvino/opsets/opset5_tbl.hpp"
#include "openvino/opsets/opset6_tbl.hpp"
#include "openvino/opsets/opset7_tbl.hpp"
#include "openvino/opsets/opset8_tbl.hpp"
#undef _OPENVINO_OP_REG
for (auto&& node : transformedFunction->get_ops()) {
// Extract transformation history from transformed node as list of nodes
for (auto&& fusedLayerName : ngraph::getFusedNamesVector(node)) {
// Filter just nodes from original operation set
// TODO: fill with actual decision rules based on whether kernel is supported by backend
if (InferenceEngine::details::contains(originalOps, fusedLayerName)) {
if (opset.contains_type(friendlyNameToType[fusedLayerName])) {
if (op_super_set.contains_type(friendlyNameToType[fusedLayerName])) {
supported.emplace(fusedLayerName);
} else {
unsupported.emplace(fusedLayerName);

View File

@ -21,12 +21,12 @@ namespace {
struct RefPreprocessParams {
RefPreprocessParams(const std::string& val): name(val) {}
std::function<std::shared_ptr<ov::Function>()> function;
std::vector<Tensor> inputs;
std::vector<Tensor> expected;
float abs_threshold = 0.01f;
float rel_threshold = 0.01f;
std::string name;
std::function<std::shared_ptr<ov::Function>()> function;
std::vector<Tensor> inputs;
std::vector<Tensor> expected;
float abs_threshold = 0.01f;
float rel_threshold = 0.01f;
std::string name;
};
class ReferencePreprocessTest : public testing::TestWithParam<RefPreprocessParams>, public CommonReferenceTest {
@ -95,7 +95,8 @@ static RefPreprocessParams simple_mean_scale() {
RefPreprocessParams res("simple_mean_scale");
res.function = []() {
auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2});
f = PrePostProcessor(f).input(InputInfo().preprocess(PreProcessSteps().mean(1.f).scale(2.f))).build();
auto p = PrePostProcessor(f);
p.input().preprocess().mean(1.f).scale(2.f); p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 3, 2, 2}, element::f32, std::vector<float>{1., 3., 5., 7., 9., 11., 13., 15., 17., 19., 21., 23.});
@ -107,7 +108,8 @@ static RefPreprocessParams scale_then_mean() {
RefPreprocessParams res("scale_then_mean");
res.function = []() {
auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2});
f = PrePostProcessor(f).input(InputInfo().preprocess(PreProcessSteps().scale(2.0f).mean(2.0f))).build();
auto p = PrePostProcessor(f);
p.input().preprocess().scale(2.0f).mean(2.0f); p.build();
return f;
};
@ -120,14 +122,15 @@ static RefPreprocessParams convert_only() {
RefPreprocessParams res("convert_only");
res.function = []() {
auto f = create_simple_function(element::f32, Shape{1, 1, 2, 2});
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_element_type(element::i16))
.preprocess(PreProcessSteps()
auto p = PrePostProcessor(f);
p.input()
.tensor().set_element_type(element::i16);
p.input().preprocess()
.convert_element_type(element::f32)
.scale(3.f)
.convert_element_type(element::u8)
.convert_element_type(element::f32)))
.build();
.convert_element_type(element::f32);
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 1, 2, 2}, element::i16, std::vector<int16_t>{2, 3, 4, 5});
@ -139,14 +142,14 @@ static RefPreprocessParams convert_element_type_and_scale() {
RefPreprocessParams res("convert_element_type_and_scale");
res.function = []() {
auto f = create_simple_function(element::u8, Shape{1, 3, 2, 2});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_element_type(element::i16))
.preprocess(PreProcessSteps()
.convert_element_type(element::f32)
.scale(2.f)
.convert_element_type(element::u8)))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_element_type(element::i16);
p.input().preprocess()
.convert_element_type(element::f32)
.scale(2.f)
.convert_element_type(element::u8);
p.build();
return f;
};
@ -161,11 +164,11 @@ static RefPreprocessParams tensor_element_type_and_scale() {
RefPreprocessParams res("tensor_element_type_and_scale");
res.function = []() {
auto f = create_simple_function(element::i8, Shape{1, 3, 1, 1});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_element_type(element::f32))
.preprocess(PreProcessSteps().scale(2.0f).convert_element_type(element::i8)))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_element_type(element::f32);
p.input().preprocess().scale(2.0f).convert_element_type(element::i8);
p.build();
return f;
};
@ -178,13 +181,13 @@ static RefPreprocessParams custom_preprocessing() {
RefPreprocessParams res("custom_preprocessing");
res.function = []() {
auto f = create_simple_function(element::i32, Shape{1, 3, 1, 1});
f = PrePostProcessor(f)
.input(InputInfo().preprocess(PreProcessSteps().custom([](const Output<Node>& node) {
auto abs = std::make_shared<op::v0::Abs>(node);
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
return abs;
})))
.build();
auto p = PrePostProcessor(f);
p.input().preprocess().custom([](const Output<Node>& node) {
auto abs = std::make_shared<op::v0::Abs>(node);
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
return abs;
});
p.build();
return f;
};
@ -193,42 +196,24 @@ static RefPreprocessParams custom_preprocessing() {
return res;
}
static RefPreprocessParams test_lvalue() {
RefPreprocessParams res("test_lvalue");
static RefPreprocessParams test_multiple() {
RefPreprocessParams res("test_multiple");
res.function = []() {
auto f = create_simple_function(element::i8, Shape{1, 3, 1, 1});
auto p = PrePostProcessor(f);
auto p1 = std::move(p);
p = std::move(p1);
auto inputInfo = InputInfo();
auto inputInfo2 = std::move(inputInfo);
inputInfo = std::move(inputInfo2);
{
auto inputTensorInfo = InputTensorInfo();
auto inputTensorInfo2 = std::move(inputTensorInfo);
inputTensorInfo = std::move(inputTensorInfo2);
auto &same = inputTensorInfo.set_element_type(element::f32);
same.set_layout("?CHW");
inputInfo.tensor(std::move(same));
}
{
auto preprocessSteps = PreProcessSteps();
auto preprocessSteps2 = std::move(preprocessSteps);
preprocessSteps = std::move(preprocessSteps2);
preprocessSteps.mean(1.f);
preprocessSteps.scale(2.f);
preprocessSteps.mean({1.f, 2.f, 3.f});
preprocessSteps.scale({2.f, 3.f, 4.f});
preprocessSteps.custom([](const Output<Node> &node) {
auto abs = std::make_shared<op::v0::Abs>(node);
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
return abs;
});
auto &same = preprocessSteps.convert_element_type(element::i8);
inputInfo.preprocess(std::move(same));
}
p.input(std::move(inputInfo));
f = p.build();
p1.input().tensor().set_element_type(element::f32).set_layout("?CHW");
p1.input().preprocess().mean(1.f);
p1.input().preprocess().scale(2.f);
p1.input().preprocess().mean({1.f, 2.f, 3.f});
p1.input().preprocess().scale({2.f, 3.f, 4.f});
p1.input().preprocess().custom([](const Output<Node> &node) {
auto abs = std::make_shared<op::v0::Abs>(node);
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
return abs;
});
p1.input().preprocess().convert_element_type(element::i8);
f = p1.build();
return f;
};
@ -241,16 +226,12 @@ static RefPreprocessParams test_2_inputs_basic() {
RefPreprocessParams res("test_2_inputs_basic");
res.function = []() {
auto f = create_n_inputs<2>(element::f32, Shape{1, 3, 1, 1});
f = PrePostProcessor(f).input(InputInfo(0)
.preprocess(
PreProcessSteps()
.mean(1.f)))
.input(
InputInfo("tensor_input2")
.preprocess(PreProcessSteps()
.mean(1.f)
.scale(2.0f)))
.build();
auto p = PrePostProcessor(f);
p.input(0).preprocess().mean(1.f);
p.input("tensor_input2").preprocess()
.mean(1.f)
.scale(2.0f);
p.build();
return f;
};
@ -265,11 +246,11 @@ static RefPreprocessParams mean_scale_vector_tensor_layout() {
RefPreprocessParams res("mean_scale_vector_tensor_layout");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 3, 2, 1});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_layout("NC??"))
.preprocess(PreProcessSteps().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f})))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_layout("NC??");
p.input().preprocess().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f});
p.build();
return f;
};
@ -282,11 +263,11 @@ static RefPreprocessParams mean_scale_dynamic_layout() {
RefPreprocessParams res("mean_scale_dynamic_layout");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 2, 1, 3});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_layout("N...C"))
.preprocess(PreProcessSteps().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f})))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_layout("N...C");
p.input().preprocess().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f});
p.build();
return f;
};
@ -299,13 +280,12 @@ static RefPreprocessParams resize_to_network_height() {
RefPreprocessParams res("resize_to_network_height");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 2, 1, 1});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_spatial_dynamic_shape())
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NHWC"))
)
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_spatial_dynamic_shape();
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NHWC");
p.build();
return f;
};
res.inputs.emplace_back(element::f32, Shape{1, 4, 1, 1}, std::vector<float>{0., 2., 4., 6.});
@ -317,12 +297,12 @@ static RefPreprocessParams resize_to_network_width() {
RefPreprocessParams res("resize_to_network_width");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 1, 2, 2});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_spatial_dynamic_shape())
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NCHW")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_spatial_dynamic_shape();
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NCHW");
p.build();
return f;
};
res.inputs.emplace_back(element::f32, Shape{1, 1, 2, 6}, std::vector<float>{0., 1., 2., 3., 4., 5.,
@ -335,14 +315,12 @@ static RefPreprocessParams resize_from_spatial_dims() {
RefPreprocessParams res("resize_from_spatial_dims");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 1, 1, 1});
auto t = InputTensorInfo();
t.set_spatial_static_shape(1, 4);
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(std::move(t))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_CUBIC))
.network(InputNetworkInfo().set_layout("NCHW")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_spatial_static_shape(1, 4);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_CUBIC);
p.input().network().set_layout("NCHW");
p.build();
return f;
};
res.inputs.emplace_back(element::f32, Shape{1, 1, 1, 7}, std::vector<float>{0., 0.25, 1., 2.25, 4., 6.25, 9});
@ -354,13 +332,13 @@ static RefPreprocessParams resize_i8() {
RefPreprocessParams res("resize_i8");
res.function = []() {
auto f = create_simple_function(element::i8, PartialShape{1, 3, 1, 1});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo()
.set_spatial_dynamic_shape())
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NCHW")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor()
.set_spatial_dynamic_shape();
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NCHW");
p.build();
return f;
};
res.inputs.emplace_back(element::i8, Shape{1, 3, 2, 2}, std::vector<int8_t>{0, 0, 0, 0,
@ -374,12 +352,12 @@ static RefPreprocessParams resize_to_network_width_height() {
RefPreprocessParams res("resize_to_network_width_height");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 1, 4, 4});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_spatial_static_shape(5, 5))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_NEAREST))
.network(InputNetworkInfo().set_layout("...HW")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_spatial_static_shape(5, 5);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_NEAREST);
p.input().network().set_layout("...HW");
p.build();
return f;
};
@ -404,12 +382,12 @@ static RefPreprocessParams resize_to_specified_width_height() {
RefPreprocessParams res("resize_to_specified_width_height");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 1, Dimension::dynamic(), Dimension::dynamic()});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_spatial_dynamic_shape())
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_NEAREST, 4, 4))
.network(InputNetworkInfo().set_layout("...HW")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor().set_spatial_dynamic_shape();
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_NEAREST, 4, 4);
p.input().network().set_layout("...HW");
p.build();
return f;
};
@ -430,52 +408,16 @@ static RefPreprocessParams resize_to_specified_width_height() {
return res;
}
static RefPreprocessParams resize_lvalues() {
RefPreprocessParams res("resize_lvalues");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 1, 1, 2});
f->get_parameters().front()->set_layout("NCHW");
auto t = InputTensorInfo();
t.set_spatial_dynamic_shape();
auto s = PreProcessSteps();
s.resize(ResizeAlgorithm::RESIZE_LINEAR, 1, 6); // to specified shape
s.resize(ResizeAlgorithm::RESIZE_LINEAR); // to network's shape
auto n = InputNetworkInfo();
n.set_layout("NCHW");
auto i = InputInfo();
i.tensor(std::move(t));
i.preprocess(std::move(s));
i.network(std::move(n));
f = PrePostProcessor(f)
.input(std::move(i))
.build();
return f;
};
// clang-format off
res.inputs.emplace_back(element::f32, Shape{1, 1, 1, 18}, std::vector<float>{0., 0., 0.,
1., 1., 1.,
2., 2., 2.,
3., 3., 3.,
4., 4., 4.,
5., 5., 5.});
// clang-format on
res.expected.emplace_back(Shape{1, 1, 2, 1}, element::f32, std::vector<float>{1., 4.});
return res;
}
static RefPreprocessParams convert_layout_nhwc_to_nchw_lvalue() {
RefPreprocessParams res("convert_layout_nhwc_to_nchw_lvalue");
static RefPreprocessParams convert_layout_nhwc_to_nchw() {
RefPreprocessParams res("convert_layout_nhwc_to_nchw");
res.function = []() {
auto f = create_simple_function(element::u8, {1, 3, 2, 2});
f->get_parameters()[0]->set_layout("NCHW");
auto p = PreProcessSteps();
p.convert_layout("NCHW");
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_layout("NHWC"))
.preprocess(std::move(p)))
.build();
auto p = PrePostProcessor(f);
p.input().tensor().set_layout("NHWC");
p.input().preprocess().convert_layout("NCHW");
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
@ -493,13 +435,10 @@ static RefPreprocessParams convert_layout_nhwc_to_net_no_tensor_shape() {
res.function = []() {
auto f = create_simple_function(element::u8, {1, 3, 2, 2});
f->get_parameters()[0]->set_layout("NCHW");
auto p = PreProcessSteps();
p.convert_layout();
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo().set_layout("NHWC"))
.preprocess(std::move(p)))
.build();
auto p = PrePostProcessor(f);
p.input().tensor().set_layout("NHWC");
p.input().preprocess().convert_layout();
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
@ -516,10 +455,9 @@ static RefPreprocessParams convert_layout_by_dims() {
RefPreprocessParams res("convert_layout_by_dims");
res.function = []() {
auto f = create_simple_function(element::u8, {1, 3, 2, 2});
f = PrePostProcessor(f)
.input(InputInfo()
.preprocess(PreProcessSteps().convert_layout({0, 3, 1, 2})))
.build();
auto p = PrePostProcessor(f);
p.input().preprocess().convert_layout({0, 3, 1, 2});
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
@ -536,12 +474,10 @@ static RefPreprocessParams convert_layout_by_dims_multi() {
RefPreprocessParams res("convert_layout_by_dims_multi");
res.function = []() {
auto f = create_simple_function(element::f32, {1, 3, 2, 2});
auto p = PreProcessSteps();
p.convert_layout({0, 1, 3, 2}); // NHWC->NHCW
p.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
f = PrePostProcessor(f)
.input(InputInfo().preprocess(std::move(p)))
.build();
auto p = PrePostProcessor(f);
p.input().preprocess().convert_layout({0, 1, 3, 2}) // NHWC->NHCW
.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::f32, std::vector<float>{1, 2, 3, // [H=0, W=0]
@ -558,14 +494,12 @@ static RefPreprocessParams convert_layout_by_dims_multi_layout() {
RefPreprocessParams res("convert_layout_by_dims_multi_layout");
res.function = []() {
auto f = create_simple_function(element::f32, {1, 3, 2, 2});
auto p = PreProcessSteps();
p.convert_layout({0, 1, 3, 2}); // NHWC->NHCW
p.mean({1, 2, 2}); // Apply means to 'C' channel
p.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
f = PrePostProcessor(f)
.input(InputInfo().tensor(InputTensorInfo().set_layout("N??C"))
.preprocess(std::move(p)))
.build();
auto p = PrePostProcessor(f);
p.input().tensor().set_layout("N??C");
p.input().preprocess().convert_layout({0, 1, 3, 2}) // NHWC->NHCW
.mean({1, 2, 2}) // Apply means to 'C' channel
.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::f32, std::vector<float>{1, 2, 3, // [H=0, W=0, RGB]
@ -582,16 +516,16 @@ static RefPreprocessParams resize_and_convert_layout() {
RefPreprocessParams res("resize_and_convert_layout");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 2});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo()
.set_layout("NCHW")
.set_spatial_dynamic_shape())
.preprocess(PreProcessSteps()
.resize(ResizeAlgorithm::RESIZE_LINEAR)
.convert_layout())
.network(InputNetworkInfo().set_layout("NHWC")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor()
.set_layout("NCHW")
.set_spatial_dynamic_shape();
p.input().preprocess()
.resize(ResizeAlgorithm::RESIZE_LINEAR)
.convert_layout();
p.input().network().set_layout("NHWC");
p.build();
return f;
};
@ -620,13 +554,13 @@ static RefPreprocessParams convert_color_nv12_to_bgr_two_planes() {
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
res.function = []() {
auto f = create_simple_function(element::u8, PartialShape{1, 4, 4, 3});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo()
.set_color_format(ColorFormat::NV12_TWO_PLANES))
.preprocess(PreProcessSteps()
.convert_color(ColorFormat::BGR)))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor()
.set_color_format(ColorFormat::NV12_TWO_PLANES);
p.input().preprocess()
.convert_color(ColorFormat::BGR);
p.build();
return f;
};
@ -659,13 +593,13 @@ static RefPreprocessParams convert_color_nv12_single_plane() {
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 4, 4, 3});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo()
.set_color_format(ColorFormat::NV12_SINGLE_PLANE))
.preprocess(PreProcessSteps()
.convert_color(ColorFormat::RGB)))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor()
.set_color_format(ColorFormat::NV12_SINGLE_PLANE);
p.input().preprocess()
.convert_color(ColorFormat::RGB);
p.build();
return f;
};
@ -680,7 +614,7 @@ static RefPreprocessParams convert_color_nv12_single_plane() {
255, 0, 0, 255, 0, 0, 0, 255, 0, 0, 255, 0, // RRGG
0, 0, 255, 0, 0, 255, 255, 0, 0, 255, 0, 0, // BBRR
0, 0, 255, 0, 0, 255, 255, 0, 0, 255, 0, 0, // BBRR
};
};
auto out_shape = Shape{1, 4, 4, 3};
// clang-format on
res.inputs.emplace_back(element::f32, input_shape, input);
@ -694,19 +628,19 @@ static RefPreprocessParams convert_color_nv12_layout_resize() {
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 3, 2, 2});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo()
.set_color_format(ColorFormat::NV12_SINGLE_PLANE)
.set_element_type(element::u8)
.set_spatial_dynamic_shape())
.preprocess(PreProcessSteps()
.convert_color(ColorFormat::RGB)
.convert_layout()
.convert_element_type(element::f32)
.resize(ResizeAlgorithm::RESIZE_NEAREST))
.network(InputNetworkInfo().set_layout("NCHW")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor()
.set_color_format(ColorFormat::NV12_SINGLE_PLANE)
.set_element_type(element::u8)
.set_spatial_dynamic_shape();
p.input().preprocess()
.convert_color(ColorFormat::RGB)
.convert_layout()
.convert_element_type(element::f32)
.resize(ResizeAlgorithm::RESIZE_NEAREST);
p.input().network().set_layout("NCHW");
p.build();
return f;
};
@ -734,16 +668,16 @@ static RefPreprocessParams element_type_before_convert_color_nv12() {
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 3});
f = PrePostProcessor(f)
.input(InputInfo()
.tensor(InputTensorInfo()
.set_element_type(element::u8)
.set_color_format(ColorFormat::NV12_TWO_PLANES))
.preprocess(PreProcessSteps()
.convert_element_type(element::f32)
.convert_color(ColorFormat::RGB))
.network(InputNetworkInfo().set_layout("NHWC")))
.build();
auto p = PrePostProcessor(f);
p.input()
.tensor()
.set_element_type(element::u8)
.set_color_format(ColorFormat::NV12_TWO_PLANES);
p.input().preprocess()
.convert_element_type(element::f32)
.convert_color(ColorFormat::RGB);
p.input().network().set_layout("NHWC");
p.build();
return f;
};
@ -836,15 +770,15 @@ static RefPreprocessParams postprocess_2_inputs_basic() {
RefPreprocessParams res("postprocess_2_inputs_basic");
res.function = []() {
auto f = create_n_inputs<2>(element::f32, Shape{1, 3, 1, 2});
f = PrePostProcessor(f)
.output(OutputInfo("tensor_output1")
.network(OutputNetworkInfo().set_layout("NCHW"))
.postprocess(PostProcessSteps().convert_layout())
.tensor(OutputTensorInfo().set_layout("NHWC")))
.output(OutputInfo("tensor_output2")
.postprocess(PostProcessSteps().convert_element_type())
.tensor(OutputTensorInfo().set_element_type(element::u8)))
.build();
auto p = PrePostProcessor(f);
p.output("tensor_output1")
.network().set_layout("NCHW");
p.output("tensor_output1").postprocess().convert_layout();
p.output("tensor_output1").tensor().set_layout("NHWC");
p.output("tensor_output2")
.postprocess().convert_element_type();
p.output("tensor_output2").tensor().set_element_type(element::u8);
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::f32, std::vector<float>{1.1, 2.1, 3.1, 4.1, 5.1, 6.1});
@ -858,10 +792,10 @@ static RefPreprocessParams post_convert_layout_by_dims() {
RefPreprocessParams res("post_convert_layout_by_dims");
res.function = []() {
auto f = create_simple_function(element::u8, {1, 2, 2, 3});
f = PrePostProcessor(f)
.output(OutputInfo()
.postprocess(PostProcessSteps().convert_layout({0, 3, 1, 2})))
.build();
auto p = PrePostProcessor(f);
p.output()
.postprocess().convert_layout({0, 3, 1, 2});
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
@ -878,12 +812,10 @@ static RefPreprocessParams post_convert_layout_by_dims_multi() {
RefPreprocessParams res("post_convert_layout_by_dims_multi");
res.function = []() {
auto f = create_simple_function(element::f32, {1, 2, 2, 3});
auto p = PostProcessSteps();
p.convert_layout({0, 1, 3, 2}); // NHWC->NHCW
p.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
f = PrePostProcessor(f)
.output(OutputInfo().postprocess(std::move(p)))
.build();
auto p = PrePostProcessor(f);
p.output().postprocess().convert_layout({0, 1, 3, 2}); // NHWC->NHCW;
p.output().postprocess().convert_layout({0, 2, 1, 3}); // NHCW->NCHW;
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::f32, std::vector<float>{1, 2, 3, // [H=0, W=0]
@ -900,20 +832,19 @@ static RefPreprocessParams pre_and_post_processing() {
RefPreprocessParams res("pre_and_post_processing");
res.function = []() {
auto f = create_n_inputs<2>(element::f32, Shape{1, 3, 1, 2});
f = PrePostProcessor(f)
.input(InputInfo(0)
.tensor(InputTensorInfo().set_element_type(element::u8))
.preprocess(PreProcessSteps().convert_element_type(element::f32).mean(1.f)))
.input(InputInfo(1)
.preprocess(PreProcessSteps().scale(2.f)))
.output(OutputInfo("tensor_output1")
.network(OutputNetworkInfo().set_layout("NCHW"))
.postprocess(PostProcessSteps().convert_layout())
.tensor(OutputTensorInfo().set_layout("NHWC")))
.output(OutputInfo("tensor_output2")
.postprocess(PostProcessSteps().convert_element_type())
.tensor(OutputTensorInfo().set_element_type(element::u8)))
.build();
auto p = PrePostProcessor(f);
p.input(0)
.tensor().set_element_type(element::u8);
p.input(0).preprocess().convert_element_type(element::f32).mean(1.f);
p.input(1).preprocess().scale(2.f);
p.output("tensor_output1")
.network().set_layout("NCHW");
p.output("tensor_output1").postprocess().convert_layout();
p.output("tensor_output1").tensor().set_layout("NHWC");
p.output("tensor_output2")
.postprocess().convert_element_type();
p.output("tensor_output2").tensor().set_element_type(element::u8);
p.build();
return f;
};
res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::u8, std::vector<uint8_t>{1, 2, 3, 4, 5, 6});
@ -927,9 +858,10 @@ static RefPreprocessParams rgb_to_bgr() {
RefPreprocessParams res("rgb_to_bgr");
res.function = []() {
auto f = create_simple_function(element::f32, Shape{2, 1, 1, 3});
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_color_format(ColorFormat::RGB))
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))).build();
auto p = PrePostProcessor(f);
p.input().tensor().set_color_format(ColorFormat::RGB);
p.input().preprocess().convert_color(ColorFormat::BGR);
p.build();
return f;
};
@ -942,9 +874,10 @@ static RefPreprocessParams bgr_to_rgb() {
RefPreprocessParams res("bgr_to_rgb");
res.function = []() {
auto f = create_simple_function(element::f32, Shape{2, 1, 1, 3});
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_color_format(ColorFormat::BGR))
.preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))).build();
auto p = PrePostProcessor(f);
p.input().tensor().set_color_format(ColorFormat::BGR);
p.input().preprocess().convert_color(ColorFormat::RGB);
p.build();
return f;
};
@ -957,9 +890,10 @@ static RefPreprocessParams reverse_channels_nchw() {
RefPreprocessParams res("reverse_channels_nchw");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 2});
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_layout("NCHW"))
.preprocess(PreProcessSteps().reverse_channels())).build();
auto p = PrePostProcessor(f);
p.input().tensor().set_layout("NCHW");
p.input().preprocess().reverse_channels();
p.build();
return f;
};
@ -1004,14 +938,13 @@ static RefPreprocessParams color_cut_last_channel() {
return res;
}
static RefPreprocessParams reverse_channels_dyn_layout() {
RefPreprocessParams res("reverse_channels_dyn_layout");
res.function = []() {
auto f = create_simple_function(element::f32, PartialShape{1, 1, 3, 2});
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_color_format(ColorFormat::BGR).set_layout("...CN"))
.preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))).build();
auto p = PrePostProcessor(f);
p.input().tensor().set_color_format(ColorFormat::BGR).set_layout("...CN");
p.input().preprocess().convert_color(ColorFormat::RGB); p.build();
return f;
};
@ -1024,12 +957,13 @@ static RefPreprocessParams reverse_dyn_shape() {
RefPreprocessParams res("reverse_dyn_shape");
res.function = []() {
auto f = create_simple_function(element::u8, PartialShape{Dimension::dynamic(),
Dimension::dynamic(),
Dimension::dynamic(),
Dimension::dynamic()});
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_layout("NCHW"))
.preprocess(PreProcessSteps().reverse_channels())).build();
Dimension::dynamic(),
Dimension::dynamic(),
Dimension::dynamic()});
auto p = PrePostProcessor(f);
p.input().tensor().set_layout("NCHW");
p.input().preprocess().reverse_channels();
p.build();
return f;
};
@ -1042,11 +976,10 @@ static RefPreprocessParams reverse_fully_dyn_shape() {
RefPreprocessParams res("reverse_fully_dyn_shape");
res.function = []() {
auto f = create_simple_function(element::u8, PartialShape::dynamic());
auto p = PreProcessSteps();
p.reverse_channels();
f = PrePostProcessor(f).input(InputInfo()
.tensor(InputTensorInfo().set_layout("...C??"))
.preprocess(std::move(p))).build();
auto p = PrePostProcessor(f);
p.input().tensor().set_layout("...C??");
p.input().preprocess().reverse_channels();
p.build();
return f;
};
@ -1057,47 +990,46 @@ static RefPreprocessParams reverse_fully_dyn_shape() {
std::vector<RefPreprocessParams> allPreprocessTests() {
return std::vector<RefPreprocessParams> {
simple_mean_scale(),
scale_then_mean(),
convert_only(),
convert_element_type_and_scale(),
tensor_element_type_and_scale(),
custom_preprocessing(),
test_lvalue(),
test_2_inputs_basic(),
mean_scale_vector_tensor_layout(),
mean_scale_dynamic_layout(),
resize_to_network_height(),
resize_to_network_width(),
resize_from_spatial_dims(),
resize_i8(),
resize_to_network_width_height(),
resize_to_specified_width_height(),
resize_lvalues(),
convert_layout_nhwc_to_nchw_lvalue(),
convert_layout_nhwc_to_net_no_tensor_shape(),
convert_layout_by_dims(),
convert_layout_by_dims_multi(),
convert_layout_by_dims_multi_layout(),
resize_and_convert_layout(),
convert_color_nv12_to_bgr_two_planes(),
convert_color_nv12_single_plane(),
convert_color_nv12_layout_resize(),
element_type_before_convert_color_nv12(),
convert_color_i420_to_bgr_three_planes(),
convert_color_i420_single_plane(),
postprocess_2_inputs_basic(),
post_convert_layout_by_dims(),
post_convert_layout_by_dims_multi(),
pre_and_post_processing(),
rgb_to_bgr(),
bgr_to_rgb(),
color_cut_last_channel(),
reverse_channels_nchw(),
reverse_channels_dyn_layout(),
reverse_dyn_shape(),
reverse_fully_dyn_shape()
};
simple_mean_scale(),
scale_then_mean(),
convert_only(),
convert_element_type_and_scale(),
tensor_element_type_and_scale(),
custom_preprocessing(),
test_multiple(),
test_2_inputs_basic(),
mean_scale_vector_tensor_layout(),
mean_scale_dynamic_layout(),
resize_to_network_height(),
resize_to_network_width(),
resize_from_spatial_dims(),
resize_i8(),
resize_to_network_width_height(),
resize_to_specified_width_height(),
convert_layout_nhwc_to_nchw(),
convert_layout_nhwc_to_net_no_tensor_shape(),
convert_layout_by_dims(),
convert_layout_by_dims_multi(),
convert_layout_by_dims_multi_layout(),
resize_and_convert_layout(),
convert_color_nv12_to_bgr_two_planes(),
convert_color_nv12_single_plane(),
convert_color_nv12_layout_resize(),
element_type_before_convert_color_nv12(),
convert_color_i420_to_bgr_three_planes(),
convert_color_i420_single_plane(),
postprocess_2_inputs_basic(),
post_convert_layout_by_dims(),
post_convert_layout_by_dims_multi(),
pre_and_post_processing(),
rgb_to_bgr(),
bgr_to_rgb(),
color_cut_last_channel(),
reverse_channels_nchw(),
reverse_channels_dyn_layout(),
reverse_dyn_shape(),
reverse_fully_dyn_shape()
};
}
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferencePreprocessTest,

View File

@ -57,7 +57,9 @@ static std::shared_ptr<Function> create_simple_function_yuv(const PartialShape&
TEST_F(ReferencePreprocessLegacyTest, mean) {
function = create_simple_function(element::f32, Shape{1, 3, 2, 2});
function = PrePostProcessor(function).input(InputInfo().preprocess(PreProcessSteps().mean(1.f))).build();
auto p = PrePostProcessor(function);
p.input().preprocess().mean(1.f);
p.build();
auto f2 = create_simple_function(element::f32, Shape{1, 3, 2, 2});
legacy_network = InferenceEngine::CNNNetwork(f2);
@ -75,7 +77,9 @@ TEST_F(ReferencePreprocessLegacyTest, mean) {
TEST_F(ReferencePreprocessLegacyTest, mean_scale) {
function = create_simple_function(element::f32, Shape{1, 3, 20, 20});
function = PrePostProcessor(function).input(InputInfo().preprocess(PreProcessSteps().scale(2.f))).build();
auto p = PrePostProcessor(function);
p.input().preprocess().scale(2.f);
p.build();
auto f2 = create_simple_function(element::f32, Shape{1, 3, 20, 20});
legacy_network = InferenceEngine::CNNNetwork(f2);
@ -96,11 +100,11 @@ TEST_F(ReferencePreprocessLegacyTest, resize) {
auto f2 = create_simple_function(element::f32, Shape{1, 3, 5, 5});
legacy_network = InferenceEngine::CNNNetwork(f2);
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_layout("NCHW").set_spatial_static_shape(42, 30))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NCHW")))
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_layout("NCHW").set_spatial_static_shape(42, 30);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NCHW");
p.build();
auto &preProcess = legacy_network.getInputsInfo().begin()->second->getPreProcess();
preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
@ -177,12 +181,11 @@ public:
inputData.clear();
legacy_input_blobs.clear();
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_color_format(
ColorFormat::NV12_SINGLE_PLANE))
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))
.network(InputNetworkInfo().set_layout("NCHW")))
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_color_format(ColorFormat::NV12_SINGLE_PLANE);
p.input().preprocess().convert_color(ColorFormat::BGR);
p.input().network().set_layout("NCHW");
p.build();
const auto &param = function->get_parameters()[0];
inputData.emplace_back(param->get_element_type(), param->get_shape(), ov20_input_yuv.data());

View File

@ -109,11 +109,10 @@ TEST_F(PreprocessOpenCVReferenceTest_YUV, convert_nv12_full_color_range) {
inputData.clear();
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_color_format(
ColorFormat::NV12_SINGLE_PLANE))
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR)))
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_color_format(ColorFormat::NV12_SINGLE_PLANE);
p.input().preprocess().convert_color(ColorFormat::BGR);
function = p.build();
const auto &param = function->get_parameters()[0];
inputData.emplace_back(param->get_element_type(), param->get_shape(), ov20_input_yuv.data());
@ -138,12 +137,10 @@ TEST_F(PreprocessOpenCVReferenceTest_YUV, convert_nv12_colored) {
inputData.clear();
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_color_format(
ColorFormat::NV12_SINGLE_PLANE))
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))
)
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_color_format(ColorFormat::NV12_SINGLE_PLANE);
p.input().preprocess().convert_color(ColorFormat::BGR);
function = p.build();
const auto &param = function->get_parameters()[0];
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_yuv.data());
@ -165,12 +162,11 @@ TEST_F(PreprocessOpenCVReferenceTest, resize_u8_simple_linear) {
inputData.clear();
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_spatial_static_shape(2, 2))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NCHW"))
)
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_spatial_static_shape(2, 2);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NCHW");
function = p.build();
const auto &param = function->get_parameters()[0];
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_img.data());
@ -204,12 +200,11 @@ TEST_F(PreprocessOpenCVReferenceTest, resize_u8_large_picture_linear) {
inputData.clear();
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_spatial_static_shape(input_height, input_width))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NCHW"))
)
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_spatial_static_shape(input_height, input_width);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NCHW");
function = p.build();
const auto &param = function->get_parameters()[0];
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_img.data());
@ -242,12 +237,11 @@ TEST_F(PreprocessOpenCVReferenceTest, resize_f32_large_picture_linear) {
inputData.clear();
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_spatial_static_shape(input_height, input_width))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
.network(InputNetworkInfo().set_layout("NCHW"))
)
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_spatial_static_shape(input_height, input_width);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
p.input().network().set_layout("NCHW");
function = p.build();
const auto &param = function->get_parameters()[0];
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_img.data());
@ -271,12 +265,11 @@ TEST_F(PreprocessOpenCVReferenceTest, DISABLED_resize_f32_large_picture_cubic_sm
auto element_type = element::f32;
auto input_img = std::vector<float> {1.f, 2.f, 3.f, 4.f, 4.f, 3.f, 2.f, 1.f, 1.f, 2.f, 3.f, 4.f, 4.f, 3.f, 2.f, 1.f};
function = create_simple_function(element_type, func_shape);
function = PrePostProcessor(function).input(InputInfo()
.tensor(InputTensorInfo().set_spatial_static_shape(input_height, input_width))
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_CUBIC))
.network(InputNetworkInfo().set_layout("NCHW"))
)
.build();
auto p = PrePostProcessor(function);
p.input().tensor().set_spatial_static_shape(input_height, input_width);
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_CUBIC);
p.input().network().set_layout("NCHW");
function = p.build();
inputData.emplace_back(element_type, input_shape, input_img.data());

View File

@ -13,8 +13,6 @@ if(NOT DEFINED OpenVINO_SOURCE_DIR)
endif()
option(ENABLE_CONDA_FOLDER "Create output folder with conda python bindings" OFF)
cmake_dependent_option(ENABLE_WHEEL "Create wheel package" OFF
"PYTHONINTERP_FOUND;NOT CMAKE_SOURCE_DIR STREQUAL ie_python_api_SOURCE_DIR" OFF)
set(PYTHON_BRIDGE_CPACK_PATH "python")

View File

@ -55,9 +55,6 @@ add_custom_command(TARGET ${TARGET_NAME}
# install
# TODO: use ${PYTHON_VERSION}_dev component below
# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT})
install(TARGETS ${TARGET_NAME}
RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}
LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT})

View File

@ -71,3 +71,11 @@ add_custom_command(OUTPUT ${openvino_wheel_path}
VERBATIM)
add_custom_target(ie_wheel ALL DEPENDS ${openvino_wheel_path})
# install
ie_cpack_add_component(python_wheels)
install(FILES ${openvino_wheel_path}
DESTINATION tools
COMPONENT python_wheels)

View File

@ -1,3 +1,3 @@
setuptools>=53.0.0,<=58.4.0
setuptools>=53.0.0
wheel>=0.36.2
python-decouple>=3.4

View File

@ -13,10 +13,6 @@ if(ENABLE_MKL_DNN)
add_subdirectory(mkldnn_plugin)
endif()
if(ENABLE_CLDNN)
add_subdirectory(cldnn_engine)
endif()
if(ENABLE_VPU)
add_subdirectory(vpu)
endif()

View File

@ -10,7 +10,7 @@ namespace GNAPluginNS {
struct GNAFlags {
uint8_t gna_lib_async_threads_num = 1;
bool compact_mode = false;
bool compact_mode = true;
bool exclusive_async_requests = false;
bool uniformPwlDesign = false;
float pwlMaxErrorPercent = 1.0f;

View File

@ -208,7 +208,7 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
// dont see practical use case when bind storage type need to be different that allocation type
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) {
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
});
}
@ -475,7 +475,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
}
if (num_conv_kernel_padding == 0) {
gnamem->readonly().push_local_ptr(ptr_weights,
gnamem->readonly().push_local_ptr(layer, ptr_weights,
transposedWeights.data(),
convolution._weights->byteSize(),
64);
@ -502,19 +502,19 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
offset += padding_zeros.size();
}
};
gnamem->readonly().push_initializer(ptr_weights,
gnamem->readonly().push_initializer(layer, ptr_weights,
paddedWeightsSize,
initializer,
64);
}
if (convolution._biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
convolution._biases->cbuffer().as<const void*>(),
convolution._biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
}
}
@ -600,7 +600,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
ptr_outputs,
ptr_weights,
ptr_biases);
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
@ -647,18 +646,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
transposedWeights.resize(transposedWeights.size() + kernelPad);
}
gnamem->readonly().push_local_ptr(ptr_weights,
gnamem->readonly().push_local_ptr(layer, ptr_weights,
transposedWeights.data(),
transposedWeights.size(),
64);
if (convolution._biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
convolution._biases->cbuffer().as<const void*>(),
convolution._biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
}
}
#endif
@ -712,14 +711,13 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
true);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
if (gnaFlags->sw_fp32) {
IE_ASSERT(quantized == nullptr);
gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
} else {
IE_ASSERT(quantized != nullptr);
if (!gnaFlags->input_low_precision) {
@ -727,15 +725,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
static_cast<float>(INT16_MAX)));
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset,
static_cast<float>(INT32_MAX)));
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
} else {
auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale,
static_cast<float>(INT8_MAX)));
auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset,
static_cast<float>(INT8_MAX)));
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
}
}
} else {
@ -799,12 +797,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_pwl_input,
ptr_pwl_outputs,
ptr_pwl_segments_target);
connectOutput(layer, ptr_pwl_outputs, num_data_bytes_out);
connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0);
if (ptr_pwl_segments_target != nullptr) {
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
&ptr_pwl_segments.front(),
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
64);
@ -876,7 +873,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs);
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
* outputs->getPrecision().size();
@ -921,7 +917,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
num_columns_out,
ptr_inputs,
ptr_outputs);
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
begin(outputs->getDims()), end(outputs->getDims())), 8)
* outputs->getPrecision().size();
@ -933,7 +928,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
if (concatLayer == nullptr) {
return;
}
@ -996,13 +990,10 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto layerInfo = LayerInfo(concatParent);
// auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock());
if (layerInfo.isInput()) {
connectInput(layer, &concatLayerInfo.gna_ptr,
inputLayer.tensorSize, inputLayer.offset, idx, false);
connectInput(layer, &concatLayerInfo.gna_ptr, inputLayer.tensorSize, inputLayer.offset, idx, false);
concatLayerInfo.input_allocated = true;
} else if (layerInfo.isMemory()) {
connectInput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size, inputLayer.offset, idx, false);
concatLayerInfo.input_allocated = true;
}
++idx;
@ -1114,7 +1105,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
false);
size_t num_data_bytes_out =
InferenceEngine::details::product(
begin(outputs->getDims()), end(outputs->getDims())) * 4;
@ -1128,8 +1118,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
(quantized == nullptr) ?
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) :
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
}
}
@ -1249,7 +1239,6 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
true);
size_t num_data_bytes_out =
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size();
@ -1262,36 +1251,36 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
switch (eltwise._operation) {
case EltwiseLayer::Sub:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
} else {
auto scaledIdentity = -quantized->_weights_quant.GetScale();
if (gnaFlags->input_low_precision == false) {
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
} else {
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
}
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
break;
case EltwiseLayer::Sum:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
} else {
auto scaledIdentity = quantized->_weights_quant.GetScale();
if (gnaFlags->input_low_precision == false) {
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
} else {
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
}
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
@ -1299,12 +1288,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
case EltwiseLayer::Prod:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
} else {
if (gnaFlags->input_low_precision == false) {
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
} else {
gnamem->readonly().push_value<int8_t>(ptr_biases, 0, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
}
}
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
@ -1372,9 +1361,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
if (gnaFlags->sw_fp32) {
IE_ASSERT(quantized == nullptr);
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
} else {
gnamem->readonly().push_value<int32_t>(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
}
}
@ -1485,12 +1474,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
if (num_padding == 0) {
if (!transpose) {
gnamem->readonly().push_ptr(ptr_weights,
gnamem->readonly().push_ptr(layer, ptr_weights,
weightable._weights->cbuffer().as<const void*>(),
weightable._weights->byteSize(),
64);
} else {
gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
auto cbuffer = weightable._weights->cbuffer().as<const uint8_t*>() + rowOffset;
@ -1519,7 +1508,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
auto paddedWeightsSize = paddedWeights * weightable.precision.size();
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
ie_memcpy(data, size,
weightable._weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * weightable.precision.size(),
@ -1530,16 +1519,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
}
if (weightable._biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
weightable._biases->cbuffer().as<const void*>(),
weightable._biases->byteSize(),
64);
} else {
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
if (useBiasConnection) {
gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
}
}
}
@ -1557,7 +1546,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
}
gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
int out = 0;
for (int input = offset; input < num_rows_out + offset; ++input) {
auto mem_ptr = reinterpret_cast<uint8_t*>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
@ -1624,7 +1613,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
ptr_inputs,
ptr_outputs);
size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in
* inputs->getPrecision().size();
// need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron
@ -1681,7 +1669,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
size_t roffset = weights_offset;
size_t woffset = 0;
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
@ -1696,12 +1684,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
}
if (filterLayer->_biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
filterLayer->_biases->cbuffer().as<const void*>(),
filterLayer->_biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
}
}
@ -1774,18 +1762,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
gnamem->readonly().push_ptr(ptr_weights,
gnamem->readonly().push_ptr(layer, ptr_weights,
filterLayer->_weights->cbuffer().as<const void*>(),
filterLayer->_weights->byteSize(),
64);
if (filterLayer->_biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
filterLayer->_biases->cbuffer().as<const void*>(),
filterLayer->_biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
}
}
@ -2016,7 +2004,7 @@ case name:\
connectOutput(layer, ptr_outputs, num_data_bytes_out);
if (ptr_pwl_segments_target != nullptr) {
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
&ptr_pwl_segments.front(),
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
64);
@ -2152,8 +2140,9 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
}
}
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
size_t num_data_bytes_out) {
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
void *ptr,
size_t num_data_bytes_out) {
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
int32_t output_offset = 0;
if (layer->params.find("output_offset") != layer->params.end()) {
@ -2162,7 +2151,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
return output_offset;
};
gnalog() << "Connecting output " << layer->name << " ...\n";
// in case of Memory Layer it's input allocated in meminput layer
if (layer->outData.size() == 1) {
@ -2179,7 +2167,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
if (!nextLayer.first) {
gnalog() << "for layer: " << layer->name << "outData[0] has non functional connection at " << j;
}
auto nextMemoryLayerIt =
std::find_if(begin(memory_connection), end(memory_connection),
[&](MemoryConnection::value_type &comp) {
@ -2190,14 +2177,13 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
// memory layer not yet initialized
if (nextMemoryLayer.reserved_size == 0) {
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
}
return;
}
@ -2288,7 +2274,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
return it != concatItem.second.concatInputLayers.end();
});
if (included == concat_connection.end()) {
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
std::function<void(GNAConcatLayer, GNAPluginNS::InputDesc&, ConcatConnection&)> allocate_input_recursively =
[&allocate_input_recursively](GNAConcatLayer clayer, GNAPluginNS::InputDesc& inputDesc, ConcatConnection& concat_connection) {
@ -2321,26 +2307,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
if (layer->params.find("output_offset") != layer->params.end()) {
output_offset = layer->GetParamAsInt("output_offset");
}
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, output_offset);
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
}
return;
}
}
intel_dnn_component_t * unused_input = nullptr;
if (gnaFlags->compact_mode) {
unused_input = find_first_unused_input(layer);
if (unused_input != nullptr) {
gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out));
}
}
// cannot reuse suitable input
if (unused_input == nullptr) {
gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out), 64);
}
auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true,
[](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first;
// Check that layer will be an output
gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64);
}
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx, bool connectTo) {
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
void *ptr,
size_t num_data_bytes_in,
int32_t offset,
int idx,
bool connectTo) {
// selecting particular input layers
// auto prevLayer = CNNNetPrevLayer(layer, idx);
auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) {
@ -2363,12 +2347,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// real allocation pointer will be kept in ptr not in ptr_inputs_global
if (!connectTo) {
gnamem->push_value(ptr,
gnamem->push_value(nullptr, ptr,
static_cast<uint8_t>(0),
num_data_bytes_in,
64);
} else {
gnamem->push_value(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
gnamem->push_value(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
static_cast<uint8_t>(0),
num_data_bytes_in,
64);
@ -2384,9 +2368,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
}
if (connectTo) {
gnamem->bind_ptr(ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
gnamem->bind_ptr(nullptr, ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
} else {
gnamem->bind_ptr(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
gnamem->bind_ptr(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
}
return prevLayer;
@ -2394,9 +2378,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// const input
if (LayerInfo(prevLayer).isConst()) {
if (connectTo) {
gnamem->bind_ptr(ptr, const_connections[prevLayer->name], offset);
gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
} else {
gnamem->bind_ptr(const_connections[prevLayer->name], ptr, offset);
gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
}
return prevLayer;
@ -2423,6 +2407,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
if (it != splitLayerInfoItem.splitOutputLayers.end()) {
gnalog() << "Connecting " << splitName << " input \n";
// splitting layer should take the execution order from the connected layer
splittingLayer->userValue = layer->userValue;
auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
gnalog() << "Connected \n";
return res;
@ -2435,7 +2421,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
if (concatLayerInfo != concat_connection.end()) {
auto & concatLayerInfoItem = concatLayerInfo->second;
// dnnLayer that is input for concat layer
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset);
// return layer over concat
return CNNNetPrevLayer(prevLayer);
}
@ -2444,7 +2430,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
prevLayer->name);
if (cropLayerInfo != crop_connection.end()) {
auto & cropLayerInfoItem = cropLayerInfo->second;
gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset);
gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset);
return CNNNetPrevLayer(prevLayer);
}
}
@ -2452,7 +2438,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// check for generic prev layer
if (prevDnnLayer != nullptr) {
gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset);
gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset);
return prevLayer;
}
@ -2470,20 +2456,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// connectTo used for indicate that memory layer should be bound to given buffer
if (connectTo) {
memorySize = std::max(memorySize, num_data_bytes_in);
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
} else {
if (num_data_bytes_in < memorySize + offset) {
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
}
gnamem->bind_ptr(&memoryLayer.gna_ptr, ptr, offset);
gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset);
}
memoryLayer.reserved_size = ALIGN64(memorySize);
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
}
return prevLayer;

View File

@ -17,6 +17,7 @@
#include <utility>
#include <limits>
#include <ie_common.h>
#include <legacy/graph_tools.hpp>
#include <legacy/net_pass.h>
#include <debug.h>
@ -524,7 +525,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
desc.num_elements = numElem;
// binding ptr for first infer request - then others will be setup during relocation
gnamem->bind_ptr(&desc.ptrs.front(), outputPtr);
gnamem->bind_ptr(layer, &desc.ptrs.front(), outputPtr);
};
// probing gna_primitives
@ -927,7 +928,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
}
// Creating Layer primitives
uint16_t id = 0;
for (auto & layer : sortedNoMem) {
IE_SUPPRESS_DEPRECATED_START
layer->userValue.v_int = id++;
IE_SUPPRESS_DEPRECATED_END
graphCompiler.CreateLayerPrimitive(layer);
}
@ -981,7 +986,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// TODO: how active list will work in multioutput case
// make room for active list
gnamem->reserve_ptr(nullptr,
gnamem->reserve_ptr(nullptr, nullptr,
ALIGN64(outputsDesc.front().num_bytes_per_element * outputsDesc.front().num_elements), 64);
void *pParallelExecutionData = nullptr;
@ -989,10 +994,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest
rwSegmentSize = gnamem->getRWBytes();
if (gnaFlags->gna_lib_async_threads_num > 1) {
gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
}
gnamem->commit();
gnamem->commit(gnaFlags->compact_mode);
dnn->Init(gnamem->getBasePtr(),
gnamem->getTotalBytes(),
@ -1569,7 +1574,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
graphCompiler.setGNAMemoryPtr(gnamem);
void *basePtr = nullptr;
gnamem->reserve_ptr(&basePtr, header.gnaMemSize);
gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
gnamem->commit();
#if GNA_LIB_VER == 2
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>(header.layersCount)));

View File

@ -14,6 +14,8 @@
* @brief used for creating graphviz charts, and layers dump
*/
# define PLOT
# define MODEL_DUMP
# define GNA_HEAP_PROFILER
# define gnalog() std::cout
# define gnawarn() std::cerr
#else

View File

@ -8,6 +8,8 @@
#include <vector>
#include <algorithm>
#include "gna_plugin_log.hpp"
namespace GNAPluginNS {
namespace memory {
@ -26,6 +28,45 @@ enum rRegion {
REGION_AUTO,
};
#ifdef GNA_HEAP_PROFILER
inline const char* rRegionToStr(uint8_t region) {
const char* strRegion = "UNKNOWN";
switch (region) {
case REGION_RO:
strRegion = "REGION_RO";
break;
case REGION_RW:
strRegion = "REGION_RW";
break;
case REGION_AUTO:
strRegion = "REGION_AUTO";
break;
}
return strRegion;
}
inline const char* rTypeToStr(uint8_t type) {
const char* strType = "UNKNOWN";
switch (type) {
case REQUEST_STORE:
strType = "REQUEST_STORE";
break;
case REQUEST_ALLOCATE:
strType = "REQUEST_ALLOCATE";
break;
case REQUEST_BIND:
strType = "REQUEST_BIND";
break;
case REQUEST_INITIALIZER | REQUEST_STORE:
case REQUEST_INITIALIZER | REQUEST_ALLOCATE:
case REQUEST_INITIALIZER | REQUEST_BIND:
strType = "INITIALIZER";
break;
}
return strType;
}
#endif
struct MemRequest {
rRegion _region;
uint8_t _type;
@ -40,6 +81,10 @@ struct MemRequest {
size_t _offset = 0;
// expansion in bytes due to large depended layers
size_t _padding = 0;
// fields to sort regions by execution availability
std::pair<uint16_t, uint16_t> _life_limits{0, UINT16_MAX};
MemRequest(rRegion region,
rType req,
void *ptr_out,
@ -79,7 +124,8 @@ struct MemRequest {
_data.resize(sizeof(T));
std::copy(reinterpret_cast<uint8_t *>(&element), reinterpret_cast<uint8_t *>(&element) + sizeof(T), _data.begin());
}
/**
/**
* Store initializer request
* @param req
* @param ptr_out
@ -103,4 +149,4 @@ struct MemRequest {
}
};
} // namespace memory
} // namespace GNAPluginNS
} // namespace GNAPluginNS

View File

@ -8,10 +8,23 @@
#include <vector>
#include <algorithm>
#include <functional>
#include <ie_api.h>
#include <legacy/ie_layers.h>
#include "gna_mem_requests.hpp"
namespace GNAPluginNS {
namespace memory {
/**
* @brief get layer id from legacy CNNLayer
*/
inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
IE_SUPPRESS_DEPRECATED_START
return layer->userValue.v_int;
IE_SUPPRESS_DEPRECATED_END
}
/**
* Adapter for requests submission and actual request queue
*/
@ -26,12 +39,26 @@ public:
* @param num_bytes
* @param alignment
*/
void push_initializer(void *ptr_out, size_t num_bytes, std::function<void(void * data, size_t size)> initializer, size_t alignment = 1) {
void push_initializer(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
size_t num_bytes,
std::function<void(void * data, size_t size)> initializer,
size_t alignment = 1) {
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
void push_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
void push_ptr(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
const void *ptr_in,
size_t num_bytes,
size_t alignment = 1) {
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**
@ -40,10 +67,17 @@ public:
* @param ptr_in
* @param num_bytes
*/
void push_local_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
void push_local_ptr(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
const void *ptr_in,
size_t num_bytes,
size_t alignment = 1) {
localStorage().emplace_back(reinterpret_cast<const uint8_t *>(ptr_in),
reinterpret_cast<const uint8_t *>(ptr_in) + num_bytes);
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**
@ -51,8 +85,14 @@ public:
* @param ptr_out
* @param num_bytes
*/
void reserve_ptr(void *ptr_out, size_t num_bytes, size_t alignment = 1) {
void reserve_ptr(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
size_t num_bytes,
size_t alignment = 1) {
futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
}
}
/**
@ -63,8 +103,15 @@ public:
* @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(),
* if that happens - reserved request parameters will be updated before committing memory
*/
void bind_ptr(void *source, const void *dest, size_t offset = 0, size_t num_bytes = 0) {
void bind_ptr(InferenceEngine::CNNLayerPtr layer,
void *source,
const void *dest,
size_t offset = 0,
size_t num_bytes = 0) {
futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset});
if (layer != nullptr) {
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
}
}
/**
@ -72,16 +119,28 @@ public:
* @param ptr_out - previously requested buffer
* @param initializer - initialisation routine to be called on allocated memory
*/
void bind_initializer(void *ptr_out, std::function<void(void * data, size_t size)> initializer) {
void bind_initializer(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
std::function<void(void * data, size_t size)> initializer) {
futureHeap().push_back({regionType(), ptr_out, 0, initializer, REQUEST_BIND, 1});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**
* @brief allocates buffer and set all its values to T value
*/
template<class T>
void push_value(void *ptr_out, T value, size_t num_elements, size_t alignment = 1) {
void push_value(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
T value,
size_t num_elements,
size_t alignment = 1) {
futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**

View File

@ -13,7 +13,15 @@
#include <list>
#include <algorithm>
#include <functional>
#include <iostream>
#include "gna_lib_ver_selector.hpp"
#include "memory_solver.hpp"
#include "gna_plugin_log.hpp"
#ifdef GNA_HEAP_PROFILER
#include <iomanip>
#include <fstream>
#endif
namespace GNAPluginNS {
namespace memory {
@ -32,6 +40,7 @@ class GNAMemory : public GNAMemRequestsQueue {
Allocator _allocator;
std::shared_ptr<uint8_t> heap = nullptr;
size_t _page_alignment = 1;
bool _is_compact_mode = false;
class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
std::reference_wrapper<GNAMemRequestsQueue> _that;
@ -62,93 +71,32 @@ class GNAMemory : public GNAMemRequestsQueue {
return readOnlyFrontEnd;
}
/**
* @brief enables memory optimization (compact mode). This mode can be enable in plugin configuration (COMPACT_MODE = Yes)
*/
void setCompactMode(bool isCompact) {
_is_compact_mode = isCompact;
}
/**
* @brief calculates size required for all requests, allocates memory and updates pointers
*/
void commit() {
void commit(bool isCompact = false) {
setCompactMode(isCompact);
// 1st stage -- looking for expandable bind requests:
for (auto &originated : _future_heap) {
if (originated._type & REQUEST_BIND) continue;
size_t offset = 0;
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
if (&originated == &reference) {
offset = 0;
}
offset += binded._offset;
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
expandBindings();
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
});
}
// 2nd stage -- setup offsets:
setRegionOffsets(REGION_RO);
setRegionOffsets(REGION_RW);
updateSectionsSizes();
// 3rd stage -- allocation total memory setting to 0 internally
heap = allocate(getTotalBytes());
_total = _rw_section_size + _ro_section_size;
// allocation with memory setting to 0 internally
heap = allocate(_total);
auto setupOffsets = [&](std::function<bool(MemRequest & request)> filter, size_t offset) {
for (auto &re : _future_heap) {
if (re._type == REQUEST_BIND) continue;
if (filter(re)) continue;
auto sz = re._element_size * re._num_elements;
if (re._ptr_out != nullptr) {
auto cptr = heap.get() + offset;
size_t cptr_avail_size = _total - offset;
if (re._type & REQUEST_BIND) {
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
cptr_avail_size = sz;
} else {
*reinterpret_cast<void **>(re._ptr_out) = cptr;
}
// std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n";
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
*reinterpret_cast<void **>(binded._ptr_out) =
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
binded._num_elements = reference._num_elements;
binded._element_size = reference._element_size;
});
// std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush;
switch (re._type & ~REQUEST_BIND) {
case REQUEST_ALLOCATE :
break;
case REQUEST_STORE : {
if (re._ptr_in != nullptr) {
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
} else {
size_t of = 0;
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
}
}
break;
}
case REQUEST_INITIALIZER : {
re._initializer(cptr, sz);
break;
}
}
}
if (!(re._type & REQUEST_BIND)) {
offset += ALIGN(sz + re._padding, re._alignment);
}
}
};
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
// TODO: consume bind requests separately from storage type
return !(request._type & REQUEST_BIND) && (request._region != REGION_RW);
}, 0);
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
return (request._type & REQUEST_BIND) || request._region != REGION_RO;
}, _rw_section_size);
// 4th stage -- store data and updates pointers
allocateRegion(REGION_RW, 0);
allocateRegion(REGION_RO, _rw_section_size);
}
void *getBasePtr() {
@ -180,7 +128,7 @@ class GNAMemory : public GNAMemRequestsQueue {
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
for (auto &re : _future_heap) {
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
// std::cout << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
// std::cout << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
visitor(reference, re);
// primitive loop check
if (re._ptr_in == re._ptr_out) continue;
@ -190,7 +138,6 @@ class GNAMemory : public GNAMemRequestsQueue {
}
}
std::shared_ptr<uint8_t> allocate(size_t bytes) {
std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
_allocator.deallocate(p, bytes);
@ -200,31 +147,191 @@ class GNAMemory : public GNAMemRequestsQueue {
}
protected:
/**
* @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order
*/
void expandBindings() {
for (auto &originated : _future_heap) {
// skipping bind requests to avoid duplications
if (originated._type & REQUEST_BIND) continue;
size_t offset = 0;
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
// aligning sizes
if (&originated == &reference) offset = 0;
offset += binded._offset;
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
// set execution order
originated._life_limits.first = std::min(originated._life_limits.first, binded._life_limits.first);
originated._life_limits.second = std::max(originated._life_limits.second, binded._life_limits.second);
});
}
}
/**
* @brief set offsets for specific region
*/
size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) {
size_t region_offset = 0;
for (auto &re : _future_heap) {
if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
re._offset = region_offset;
region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
}
return region_offset;
}
/**
* @brief allocates memory and updates pointers
*/
void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) {
for (auto &re : _future_heap) {
// skipping Bind, crossregion and empty requests
if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
size_t offset = baseOffset + re._offset;
auto cptr = heap.get() + offset;
size_t cptr_avail_size = _total - offset;
auto sz = re._element_size * re._num_elements;
if (re._type & REQUEST_BIND) {
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
cptr_avail_size = sz;
} else {
*reinterpret_cast<void **>(re._ptr_out) = cptr;
}
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
*reinterpret_cast<void **>(binded._ptr_out) =
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
binded._num_elements = reference._num_elements;
binded._element_size = reference._element_size;
});
switch (re._type & ~REQUEST_BIND) {
case REQUEST_ALLOCATE :
break;
case REQUEST_STORE : {
if (re._ptr_in != nullptr) {
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
} else {
size_t of = 0;
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
}
}
break;
}
case REQUEST_INITIALIZER : {
re._initializer(cptr, sz);
break;
}
}
}
}
/**
* @brief optimize memory region by reusing buffers
*/
size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) {
size_t memSize = 0;
switch (regType) {
case REGION_AUTO:
case REGION_RW:
case REGION_RO: {
std::vector<MemorySolver::Box> boxes;
for (size_t i = 0; i < _future_heap.size(); ++i) {
// skipping BIND, cross-region and empty requests
if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) {
continue;
}
auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding,
_future_heap[i]._alignment);
int start = _future_heap[i]._life_limits.first;
int stop = _future_heap[i]._life_limits.second;
boxes.push_back({start, stop, static_cast<int64_t>(original_with_pad), static_cast<int64_t>(i)});
}
MemorySolver memSolver(boxes);
memSize = memSolver.solve();
// setting offsets
for (auto const & box : boxes) {
_future_heap[box.id]._offset = memSolver.getOffset(box.id);
}
}
break;
default:
break;
}
return memSize;
}
#ifdef GNA_HEAP_PROFILER
void memoryDump(std::function<bool(MemRequest & re)> filter) {
std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out);
for (auto &re : _future_heap) {
if (filter(re)) continue;
dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", "
<< "type: " << std::setw(17) << rTypeToStr(re._type) << " "
<< "ptr_in: " << std::setw(15) << re._ptr_in << " "
<< "ptr_out: " << std::setw(15) << re._ptr_out << " "
<< std::setw(8) << re._num_elements << ", "
<< static_cast<int>(re._element_size) << ", "
<< re._padding << ", "
<< std::setw(3) << re._alignment << ", "
<< std::setw(8) << re._offset << ", "
<< "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", "
<< std::endl;
}
}
#endif
void updateSectionsSizes() {
// count total size and size of read/write regions
_rw_section_size = 0;
_ro_section_size = 0;
for (auto &re : _future_heap) {
auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
#ifdef GNA_HEAP_PROFILER
std::cout << "chunk: " << " region: " << re._region << ", " <<
"type: " << (re._type == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind " : "alloc ") <<
std::setw(10) << re._num_elements << ", " <<
static_cast<int>(re._element_size) << ", " <<
re._padding << ", " <<
re._offset << ", " <<
re._alignment << std::endl;
memoryDump([](GNAPluginNS::memory::MemRequest & request) {
return false;
});
#endif
if (re._type == REQUEST_BIND) continue;
for (auto &re : _future_heap) {
if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
if (re._region == REGION_RW) {
_rw_section_size += current;
} else {
_ro_section_size += current;
}
}
if (_is_compact_mode) {
_rw_section_size = getSectionSizeOptimized(REGION_RW);
}
gnalog() << "ro_section_size: " << _ro_section_size << std::endl;
gnalog() << "rw_section_size: " << _rw_section_size << std::endl;
gnalog() << "total: " << _total << std::endl;
_rw_section_size = ALIGN(_rw_section_size, _page_alignment);
_ro_section_size = ALIGN(_ro_section_size, _page_alignment);
_total = _rw_section_size + _ro_section_size;
gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl;
gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl;
}
};
} // namespace memory

View File

@ -81,6 +81,7 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
{ "ConvolutionBackpropData", Deconvolution },
{ "GroupConvolutionBackpropData", Deconvolution },
{ "StridedSlice", StridedSlice },
{ "Slice", StridedSlice },
{ "Tile", Tile },
{ "ROIAlign", ROIAlign },
{ "ROIPooling", ROIPooling },

View File

@ -127,13 +127,14 @@ DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::des
}
}
size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(mkldnn::memory::desc desc) {
const auto offset0 = desc.data.offset0;
desc.data.offset0 = 0;
size_t size = desc.get_size();
size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc) {
auto tmpDesc = desc;
const auto offset0 = tmpDesc.data.offset0;
tmpDesc.data.offset0 = 0;
size_t size = tmpDesc.get_size();
if (size == DNNL_RUNTIME_SIZE_VAL)
return MemoryDesc::UNDEFINED_SIZE;
size += offset0 * sizeOfDataType(desc.data_type());
size += offset0 * sizeOfDataType(tmpDesc.data_type());
return size;
}

View File

@ -45,7 +45,7 @@ public:
*/
static std::shared_ptr<DnnlBlockedMemoryDesc> makeUndefinedDesc(const mkldnn::memory::desc &desc, const Shape& shape);
static size_t getMemSizeForDnnlDesc(mkldnn::memory::desc desc);
static size_t getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc);
};
} // namespace MKLDNNPlugin

View File

@ -722,8 +722,13 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
auto input = inputNodesMap.find(name);
if (input != inputNodesMap.end()) {
auto& inTensorDesc = in->getTensorDesc();
auto node = input->second;
auto childEdge = node->getChildEdgeAt(0);
const auto& outDims = node->getOutputShapeAtPort(0);
const void *ext_data_ptr = in->cbuffer();
void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
void *inter_data_ptr = childEdge->getMemory().GetData();
if (ext_data_ptr != inter_data_ptr) {
auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc());
@ -731,17 +736,16 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
auto ext_mem = MKLDNNMemory(eng);
ext_mem.Create(ext_tdesc, ext_data_ptr, false);
input->second->getChildEdgeAt(0)->getMemory().SetData(ext_mem, 0, false);
childEdge->getMemory().SetData(ext_mem, 0, false);
}
// todo: make sure 'name' exists in this map...
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
_normalizePreprocMap[name].NormalizeImage(input->second->getOutputShapeAtPort(0),
reinterpret_cast<float *>(inter_data_ptr),
in->getTensorDesc().getLayout());
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
inTensorDesc.getLayout());
} else {
IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
IE_THROW() << "Mean image of type " << inTensorDesc.getPrecision().name() << " is unsupported";
}
}
} else {
@ -756,15 +760,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
for (auto &outputMap : outputNodesMap) {
auto name = outputMap.first;
auto node = outputMap.second;
const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory();
auto parentEdge = node->getParentEdgeAt(0);
const MKLDNNMemory& intr_blob = parentEdge->getMemory();
auto ext_blob = out.find(name);
if (ext_blob == out.end()) {
const auto ext_blob_map = out.find(name);
const auto ext_blob = ext_blob_map->second;
if (ext_blob_map == out.end()) {
IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\"";
}
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
auto &expectedDesc = ext_blob->second->getTensorDesc();
auto &expectedDesc = ext_blob->getTensorDesc();
// TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
// WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
@ -797,27 +803,16 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
auto srcPrec = actualDesc.getPrecision();
auto dstPrec = expectedDesc.getPrecision();
if (srcPrec == dstPrec && ext_blob->second->byteSize() != intr_blob.GetSize())
if (srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize())
IE_THROW() << "Output blob byte size is not equal network output byte size ("
<< ext_blob->second->byteSize() << "!=" << intr_blob.GetSize() << ").";
<< ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ").";
void *ext_blob_ptr = ext_blob->second->buffer();
void *ext_blob_ptr = ext_blob->buffer();
void *intr_blob_ptr = intr_blob.GetData();
// That is the same memory. No need to copy
if (ext_blob_ptr == intr_blob_ptr) continue;
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
if (config.batchLimit) {
if (node->isDynamicNode()) {
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
}
int MB_to_process = node->batchToProcess();
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
}
if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
auto outBloMem = MKLDNNMemory(eng);
@ -825,6 +820,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
outBloMem.SetData(intr_blob, 0, false);
} else {
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
if (config.batchLimit) {
if (node->isDynamicNode()) {
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
}
int MB_to_process = node->batchToProcess();
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
}
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
}
}

View File

@ -457,7 +457,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
if (auto convNode = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(node)) {
auto rank = convNode->getInputShapeAtPort(0).getRank();
// int8 depthwise convolution does not support fusing zero points in 3D case
if (implication(convNode->isDepthWise(), rank == 4)) {
if (implication(convNode->isDepthWise(), rank < 5)) {
retVal = true;
}
}
@ -577,7 +577,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
ptrdiff_t OC = weightsConstantDims[0 + groupOffset];
ptrdiff_t IC = weightsConstantDims[1 + groupOffset];
ptrdiff_t KD = weightsConstantDims.size() == (5 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 3] : 1;
ptrdiff_t KH = weightsConstantDims[weightsConstantDims.size() - 2];
ptrdiff_t KH = node->getInputShapeAtPort(0).getRank() > (3 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 2] : 1;
ptrdiff_t KW = weightsConstantDims[weightsConstantDims.size() - 1];
for (size_t g = 0; g < G; g++) {

View File

@ -84,27 +84,27 @@ MKLDNNPlugin::MKLDNNInferRequest::~MKLDNNInferRequest() {
}
void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
bool needConvert = inPrec != inputBlob->getTensorDesc().getPrecision();
auto& tensorDesc = inputBlob->getTensorDesc();
bool needConvert = inPrec != tensorDesc.getPrecision();
if (inputBlob->cbuffer().as<const void *>() == nullptr) {
const void* srcData = inputBlob->cbuffer().as<const void *>();
if (srcData == nullptr) {
IE_THROW() << "Input blob has no allocated memory";
}
InferenceEngine::Blob::Ptr iconv;
if (needConvert) {
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, inputBlob->getTensorDesc().getDims(),
inputBlob->getTensorDesc().getLayout()));
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, tensorDesc.getDims(), tensorDesc.getLayout()));
iconv->allocate();
if (inputBlob->size() != iconv->size())
IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and "
<< iconv->size();
void *srcData = inputBlob->cbuffer().as<void *>();
void *dstData = iconv->buffer().as<void *>();
if (dstData == nullptr) {
IE_THROW() << "Converted input blob has no allocated memory";
}
cpu_convert(srcData, dstData, inputBlob->getTensorDesc().getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
cpu_convert(srcData, dstData, tensorDesc.getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
}
graph->PushInputData(inputName, needConvert ? iconv : inputBlob);
@ -112,27 +112,30 @@ void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, I
void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
for (auto input : _inputs) {
if (!_networkInputs[input.first]) {
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first;
auto inputName = input.first;
if (!_networkInputs[inputName]) {
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName;
}
auto inPrec = input.second->getTensorDesc().getPrecision();
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
auto inputBlob = input.second;
auto& inputTensorDesc = inputBlob->getTensorDesc();
auto inPrec = inputTensorDesc.getPrecision();
if (graph->hasMeanImageFor(inputName) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
inPrec = InferenceEngine::Precision::FP32;
} else {
inPrec = normalizeToSupportedPrecision(inPrec);
}
if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
IE_THROW() << "Unsupported input precision " << inputTensorDesc.getPrecision();
}
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) {
input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout());
if (inputTensorDesc.getLayout() == InferenceEngine::ANY) {
inputTensorDesc.setLayout(_networkInputs[inputName]->getLayout());
}
pushInput(input.first, input.second, inPrec);
pushInput(inputName, inputBlob, inPrec);
}
}
@ -162,6 +165,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::PullStates() {
for (auto &node : graph->GetNodes()) {
if (node->getType() == MemoryInput) {
auto cur_node = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
if (!cur_node) {
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
}
auto cur_id = cur_node->getId();
for (const auto& state : memoryStates) {
if (state->GetName() == cur_id) {
@ -499,71 +505,104 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *
void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
for (auto& it : externalPtr) {
auto input = graph->GetInputNodesMap().find(it.first);
if (input != graph->GetInputNodesMap().end()) {
if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
const auto& inputNodesMap = graph->GetInputNodesMap();
auto input = inputNodesMap.find(it.first);
if (input != inputNodesMap.end()) {
MKLDNNNodePtr inputNodePtr = input->second;
if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
continue;
auto& childEdges = inputNodePtr->getChildEdges();
// Input cannot be in-place with other primitives
bool canBeInPlace = true;
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
auto& child = input->second->getChildEdgeAt(i)->getChild();
if (child->isConstant())
canBeInPlace = false;
for (auto& childEdge : childEdges) {
auto ce = childEdge.lock();
if (!ce)
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
if (canBeInPlace && concat && concat->isOptimized())
canBeInPlace = false;
auto& child = ce->getChild();
// Cannot be in-place before split because split is using different ptrs without offsets
auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
if (canBeInPlace && split)
canBeInPlace = false;
if (child->isInplace())
canBeInPlace = false;
for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
canBeInPlace = false;
}
}
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
}
continue;
}
MKLDNNNodePtr output;
for (auto& out : graph->GetOutputNodesMap()) {
if (out.first == it.first) {
output = out.second;
break;
}
}
if (output) {
if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
continue;
bool canBeInPlace = true;
void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
// Cannot be in-place after concat because concat is using different ptrs without offsets
auto parent = output->getParentEdgeAt(0)->getParent();
MKLDNNNodePtr previousParent;
do {
previousParent = parent;
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
if (child->isConstant()) {
canBeInPlace = false;
break;
}
for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
parent = parent->getParentEdgeAt(i)->getParent();
if (child->getType() == Concatenation && dynamic_cast<MKLDNNConcatNode*>(child.get())->isOptimized()) {
canBeInPlace = false;
break;
}
// Cannot be in-place before split because split is using different ptrs without offsets
if (child->getType() == Split) {
canBeInPlace = false;
break;
}
if (child->isInPlace()) {
canBeInPlace = false;
break;
}
auto& edges = child->getChildEdges();
for (auto& edge : edges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << child->getName() << " contains empty child edge";
if (e->getMemory().GetPrimitive().get_data_handle() == ce->getMemory().GetPrimitive().get_data_handle()) {
canBeInPlace = false;
break;
}
}
if (!canBeInPlace)
break;
}
if (canBeInPlace) {
for (auto& edge : childEdges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
changeEdgePtr(e, it.second);
}
}
continue;
}
const auto& outputNodesMap = graph->GetOutputNodesMap();
auto output = outputNodesMap.find(it.first);
if (output != outputNodesMap.end()) {
auto parentEdge = output->second->getParentEdgeAt(0);
if (parentEdge->getMemory().GetPrimitive().get_data_handle() == it.second)
continue;
bool canBeInPlace = true;
void* defaultPtr = parentEdge->getMemory().GetPrimitivePtr()->get_data_handle();
// Cannot be in-place after concat because concat is using different ptrs without offsets
auto parent = parentEdge->getParent();
MKLDNNNodePtr previousParent;
do {
previousParent = parent;
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
canBeInPlace = false;
break;
}
auto& parentEdges = parent->getParentEdges();
for (auto& edge : parentEdges) {
auto e = edge.lock();
if (!e)
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
if (e->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
parent = e->getParent();
break;
}
}
} while (previousParent != parent);
if (canBeInPlace)
changeEdgePtr(output->getParentEdgeAt(0), it.second);
changeEdgePtr(parentEdge, it.second);
continue;
}
IE_THROW() << "Cannot find input/output blob: " << it.first;

View File

@ -491,7 +491,8 @@ std::vector<memory::format_tag> MKLDNNNode::getAvailableFormatsForDims(const Sha
else if (dims.getRank() == 2)
return {memory::format_tag::nc};
else if (dims.getRank() == 3)
return {memory::format_tag::tnc, memory::format_tag::ntc};
return {memory::format_tag::tnc, memory::format_tag::ntc,
memory::format_tag::ncw, memory::format_tag::nCw8c, memory::format_tag::nCw16c };
else if (dims.getRank() == 4)
return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c};
else if (dims.getRank() == 5)
@ -769,15 +770,29 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de
}
}
bool MKLDNNNode::isInplace() const {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
auto config = selected_pd->getConfig();
bool MKLDNNNode::isInPlace() {
if (inplace == InPlaceType::Unknown) {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
for (auto &in : config.inConfs) if (in.inPlace >= 0) return true;
for (auto &out : config.outConfs) if (out.inPlace >= 0) return true;
return false;
inplace = InPlaceType::NoInPlace;
auto config = selected_pd->getConfig();
for (auto &in : config.inConfs) {
if (in.inPlace >= 0) {
inplace = InPlaceType::InPlace;
break;
}
}
for (auto &out : config.outConfs) {
if (out.inPlace >= 0) {
inplace = InPlaceType::InPlace;
break;
}
}
}
return inplace == InPlaceType::InPlace;
}
bool MKLDNNNode::isConstant() {

View File

@ -195,6 +195,8 @@ public:
return engine;
}
bool isInPlace();
// must be called only after MKLDNNGraph::InitEdges()
virtual bool isExecutable() const {
return true;
@ -202,8 +204,6 @@ public:
bool isConstant();
bool isInplace() const;
bool isFusedWith(Type type) const;
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
@ -336,6 +336,10 @@ public:
selectedPrimitiveDescriptorIndex = -1;
else
selectedPrimitiveDescriptorIndex = index;
// Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection
// we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace()
inplace = InPlaceType::Unknown;
}
std::string getPrimitiveDescriptorType();
@ -616,11 +620,17 @@ protected:
bool permanent = false;
bool temporary = false;
int dynBatchLim = 0;
enum class InPlaceType {
Unknown,
InPlace,
NoInPlace
};
enum class ConstantType {
Unknown,
Const,
NoConst
};
InPlaceType inplace = InPlaceType::Unknown;
ConstantType constant = ConstantType::Unknown;
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
std::vector<MKLDNNMemoryPtr> internalBlobMemory;

View File

@ -38,6 +38,7 @@
#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
#include <transformations/op_conversions/convert_depth_to_space.hpp>
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
#include <transformations/op_conversions/convert_slice_to_strided_slice.hpp>
#include <transformations/op_conversions/convert_space_to_depth.hpp>
#include <transformations/op_conversions/convert_gelu.hpp>
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
@ -107,6 +108,7 @@
#include "nodes/mkldnn_fake_quantize_node.h"
#include "nodes/mkldnn_normalize_node.h"
#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
#include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
#include "transformations/smart_reshape/smart_reshape.hpp"
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
@ -371,6 +373,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
pass_config->disable<ngraph::pass::ConvertReduceSumToPooling>();
pass_config->disable<ngraph::pass::SliceToStridedSlice>();
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
@ -475,6 +478,14 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
return node->get_rt_info().count("UNROLL_TI") == 0;
});
postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
if (node->get_input_size() >= 2) {
return node->get_input_element_type(1) == ngraph::element::i8 || node->get_input_element_type(1) == ngraph::element::u8;
}
return false;
});
postLPTPassManager.run_passes(nGraphFunc);
}
@ -575,15 +586,13 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
// network is below general threshold
num_streams = std::max(default_num_streams, num_streams_less_aggressive);
}
int ovPerfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests; // set thru SetConfig to the plugin
auto num_requests = config.find(PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS);
if (num_requests != config.end()) {
// arrived with config to the LoadNetwork (and thus higher pri)
ovPerfHintNumRequests = PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second);
}
num_streams = std::min(num_streams, std::max(ovPerfHintNumRequests, 1));
if (engConfig.perfHintsConfig.ovPerfHintNumRequests) // set thru SetConfig to the plugin
num_streams = std::min(engConfig.perfHintsConfig.ovPerfHintNumRequests,
engConfig.perfHintsConfig.ovPerfHintNumRequests);
if (num_requests != config.end()) // arrived with config to the LoadNetwork (and thus higher pri)
num_streams = std::min(num_streams,
PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second));
config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(num_streams);
}
}

View File

@ -6,7 +6,6 @@
#include "fc_bias_fusion.hpp"
#include "ngraph/op/fake_quantize.hpp"
#include "ngraph/pass/manager.hpp"
#include "reshape_1d_ops.hpp"
#include "reshape_fc_fusion.hpp"
#include "reshape_fully_connected.hpp"
#include "align_matmul_input_ranks.hpp"
@ -26,10 +25,6 @@ namespace MKLDNNPlugin {
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.register_pass<Reshape1DConvolution>();
manager.register_pass<Reshape1DGroupConvolution>();
manager.register_pass<Reshape1DAvgPool>();
manager.register_pass<Reshape1DMaxPool>();
manager.register_pass<ConvertMatMulToFC>();
manager.register_pass<AlignMatMulInputRanks>();
manager.register_pass<ConvertTileToSeqTiles>();

View File

@ -0,0 +1,113 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "move_eltwise_up_data_movement.hpp"
#include <memory>
#include <vector>
#include <numeric>
#include <ngraph/opsets/opset8.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::MoveEltwiseUpThroughDataMov, "MoveEltwiseUpThroughDataMov", 0);
namespace {
bool is_data_movement_operation(const std::shared_ptr<ngraph::Node>& node) {
return ov::is_type<ngraph::op::v0::Squeeze>(node) ||
ov::is_type<ngraph::op::v0::Unsqueeze>(node) ||
ov::is_type<ngraph::op::v1::Reshape>(node) ||
ov::is_type<ngraph::op::v1::Transpose>(node) ||
ov::is_type<ngraph::op::v0::ShuffleChannels>(node) ||
ov::is_type<ngraph::op::v7::Roll>(node) ||
ov::is_type<ngraph::op::v0::ReverseSequence>(node) ||
ov::is_type<ngraph::op::v0::DepthToSpace>(node) ||
ov::is_type<ngraph::op::v1::BatchToSpace>(node) ||
ov::is_type<ngraph::op::v1::Broadcast>(node) ||
ov::is_type<ngraph::op::v3::Broadcast>(node) ||
ov::is_type<ngraph::op::v1::Gather>(node) ||
ov::is_type<ngraph::op::v7::Gather>(node) ||
ov::is_type<ngraph::op::v8::Gather>(node);
}
bool is_scalar_like(const std::shared_ptr<ngraph::Node>& node) {
auto constantNode = std::dynamic_pointer_cast<ngraph::opset8::Constant>(node);
return constantNode != nullptr && shape_size(constantNode->get_shape()) == 1;
}
} // namespace
MKLDNNPlugin::MoveEltwiseUpThroughDataMov::MoveEltwiseUpThroughDataMov() {
auto eltwise_pattern = ngraph::pattern::wrap_type<ngraph::op::util::UnaryElementwiseArithmetic,
ngraph::op::util::BinaryElementwiseArithmetic>(ngraph::pattern::has_static_rank());
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto eltwise = pattern_map.at(eltwise_pattern).get_node_shared_ptr();
if (transformation_callback(eltwise)) {
return false;
}
if (eltwise->get_output_size() == 0 ||
eltwise->get_input_size() == 0 ||
eltwise->get_output_element_type(0) != eltwise->get_input_element_type(0) ||
eltwise->get_output_target_inputs(0).size() != 1) {
return false;
}
bool is_binary_op = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseArithmetic>(eltwise) != nullptr;
if (is_binary_op && !is_scalar_like(eltwise->get_input_node_shared_ptr(1))) {
return false;
}
auto current = eltwise->get_input_node_shared_ptr(0);
auto child = eltwise;
while (is_data_movement_operation(current)) {
if (current->get_output_size() != 1 ||
current->get_output_target_inputs(0).size() != 1 ||
current->get_output_element_type(0) != current->get_input_element_type(0)) {
return false;
}
child = current;
current = current->get_input_node_shared_ptr(0);
}
// now current is the first not data movement op
if (child == eltwise) {
return false;
}
// eltwise constant shape should match new input shape
if (is_binary_op && current->get_output_shape(0).size() != eltwise->get_input_shape(1).size()) {
auto old_eltwise_const = std::dynamic_pointer_cast<ngraph::opset8::Constant>(eltwise->get_input_node_shared_ptr(1));
auto new_constant = std::make_shared<ngraph::opset8::Constant>(*old_eltwise_const.get(), ngraph::Shape{});
ngraph::replace_node(old_eltwise_const, new_constant);
}
ngraph::replace_output_update_name(eltwise->output(0), eltwise->input_value(0));
ngraph::OutputVector eltwiseInputs = eltwise->input_values();
eltwiseInputs[0] = child->input_value(0);
auto newEltwise = eltwise->clone_with_new_inputs(eltwiseInputs);
ngraph::copy_runtime_info(eltwise, newEltwise);
newEltwise->set_friendly_name(eltwise->get_friendly_name());
ngraph::OutputVector childInputs = child->input_values();
childInputs[0] = newEltwise;
auto newChild = child->clone_with_new_inputs(childInputs);
ngraph::copy_runtime_info(child, newChild);
newChild->set_friendly_name(child->get_friendly_name());
ngraph::replace_node(child, newChild);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise_pattern, "MoveEltwiseUpThroughDataMov");
register_matcher(m, callback);
}

View File

@ -0,0 +1,17 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace MKLDNNPlugin {
class MoveEltwiseUpThroughDataMov : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
MoveEltwiseUpThroughDataMov();
};
} // namespace MKLDNNPlugin

View File

@ -1,218 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "reshape_1d_ops.hpp"
#include <memory>
#include <vector>
#include <numeric>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph_ops/type_relaxed.hpp>
#include "transformations/utils/utils.hpp"
namespace {
template <class BaseOp>
std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<BaseOp> node, ngraph::NodeVector &new_ops) {
auto new_strides = node->get_strides();
auto new_dilations = node->get_dilations();
auto new_pads_begin = node->get_pads_begin();
auto new_pad_end = node->get_pads_end();
new_strides.insert(new_strides.begin(), 1);
new_dilations.insert(new_dilations.begin(), 1);
new_pads_begin.insert(new_pads_begin.begin(), 0);
new_pad_end.insert(new_pad_end.begin(), 0);
const size_t weights_rank = node->get_input_partial_shape(1).size();
const auto unsqueeze_const = ngraph::opset1::Constant::create(ngraph::element::i32, { 1 }, { weights_rank - 1 });
const auto weights = ngraph::op::util::make_try_fold<ngraph::opset1::Unsqueeze>(node->input_value(1), unsqueeze_const);
new_ops.push_back(weights);
if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node)) {
return std::make_shared<ngraph::op::TypeRelaxed<BaseOp>>(std::vector<ngraph::element::Type>{ngraph::element::f32, ngraph::element::f32},
std::vector<ngraph::element::Type>{ngraph::element::f32},
ngraph::op::TemporaryReplaceOutputType(data, ngraph::element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(weights, ngraph::element::f32).get(),
new_strides,
new_pads_begin,
new_pad_end,
new_dilations,
node->get_auto_pad());
} else {
return std::make_shared<BaseOp>(data,
weights,
new_strides,
new_pads_begin,
new_pad_end,
new_dilations,
node->get_auto_pad());
}
}
template <>
std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::MaxPool> node, ngraph::NodeVector & new_ops) {
auto new_strides = node->get_strides();
auto new_pads_begin = node->get_pads_begin();
auto new_pad_end = node->get_pads_end();
auto new_kernel = node->get_kernel();
new_strides.insert(new_strides.begin(), 1);
new_pads_begin.insert(new_pads_begin.begin(), 0);
new_pad_end.insert(new_pad_end.begin(), 0);
new_kernel.insert(new_kernel.begin(), 1);
return std::make_shared<ngraph::opset1::MaxPool>(data,
new_strides,
new_pads_begin,
new_pad_end,
new_kernel,
node->get_rounding_type(),
node->get_auto_pad());
}
template <>
std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::AvgPool> node, ngraph::NodeVector & new_ops) {
// Update Pooling attributes with additional dimension
auto new_strides = node->get_strides();
auto new_pads_begin = node->get_pads_begin();
auto new_pad_end = node->get_pads_end();
auto new_kernel = node->get_kernel();
new_strides.insert(new_strides.begin(), 1);
new_pads_begin.insert(new_pads_begin.begin(), 0);
new_pad_end.insert(new_pad_end.begin(), 0);
new_kernel.insert(new_kernel.begin(), 1);
return std::make_shared<ngraph::opset1::AvgPool>(data,
new_strides,
new_pads_begin,
new_pad_end,
new_kernel,
node->get_exclude_pad(),
node->get_rounding_type(),
node->get_auto_pad());
}
ngraph::matcher_pass_callback get_callback() {
return [](ngraph::pattern::Matcher& m) {
auto node = m.get_match_root();
const auto input_rank = node->get_input_partial_shape(0).size();
if (input_rank != 3) {
return false;
}
ngraph::NodeVector new_ops;
// Update pshape from [N, C, W] to [N, C, 1, W]
const auto unsqueeze_const = ngraph::opset1::Constant::create(ngraph::element::i32, { 1 }, { input_rank - 1 });
ngraph::Output<ngraph::Node> last = std::make_shared<ngraph::opset1::Unsqueeze>(node->input_value(0), unsqueeze_const);
last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/reshape_begin");
new_ops.push_back(last.get_node_shared_ptr());
if (auto conv = std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node)) {
last = convert(last, conv, new_ops);
} else if (auto group_conv = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node)) {
last = convert(last, group_conv, new_ops);
} else if (auto max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(node)) {
last = convert(last, max_pool, new_ops);
} else if (auto avg_pool = std::dynamic_pointer_cast<ngraph::opset1::AvgPool>(node)) {
last = convert(last, avg_pool, new_ops);
} else {
throw ngraph::ngraph_error("Reshape1DOps: op type is not supported");
}
last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/new");
new_ops.push_back(last.get_node_shared_ptr());
// if convolution is followed by add we need to replace add before output reshape to fuse conv+bias on plug-in side
std::shared_ptr<ngraph::Node> add_to_replace = nullptr;
std::shared_ptr<ngraph::Node> reshaped_add = nullptr;
ngraph::NodeVector bias_ops;
if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) || std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node)) {
auto out_pshape = node->get_output_partial_shape(0);
const auto dst_nodes = node->get_output_target_inputs(0);
// we can also reshape biases if possible
if (dst_nodes.size() == 1 && out_pshape.rank().is_static() && out_pshape.rank().get_length() > 2 && out_pshape[1].is_static()) {
auto channel = node->get_output_partial_shape(0)[1];
ngraph::Shape expected_shape = ngraph::Shape(input_rank, 1);
expected_shape[1] = channel.get_length();
add_to_replace = dst_nodes.begin()->get_node()->shared_from_this();
if (std::dynamic_pointer_cast<ngraph::opset1::Add>(add_to_replace) &&
std::dynamic_pointer_cast<ngraph::opset1::Constant>(add_to_replace->get_input_node_shared_ptr(1)) &&
add_to_replace->get_input_shape(1) == expected_shape) {
ngraph::Shape new_shape(add_to_replace->get_input_shape(1));
new_shape.push_back(1);
auto new_shape_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ new_shape.size() }, new_shape);
auto new_bias = ngraph::op::util::make_try_fold<ngraph::opset1::Reshape>(add_to_replace->input_value(1), new_shape_const, true);
reshaped_add = std::make_shared<ngraph::opset1::Add>(last, new_bias);
reshaped_add->set_friendly_name(add_to_replace->get_friendly_name() + "/new");
bias_ops.push_back(new_bias);
bias_ops.push_back(reshaped_add);
}
}
}
if (reshaped_add != nullptr) {
ngraph::replace_node(node, last.get_node_shared_ptr());
ngraph::copy_runtime_info(node, new_ops);
last = reshaped_add;
node = add_to_replace;
new_ops = bias_ops;
}
// Update pshape from [N, C, 1, W] to [N, C, W]
const auto squeeze_const = ngraph::opset1::Constant::create(ngraph::element::i32, { 1 }, { input_rank - 1 });
last = std::make_shared<ngraph::opset1::Squeeze>(last, squeeze_const);
last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name());
ngraph::replace_node(node, last.get_node_shared_ptr());
ngraph::copy_runtime_info(node, new_ops);
return true;
};
}
} // namespace
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DConvolution, "Reshape1DConvolution", 0);
MKLDNNPlugin::Reshape1DConvolution::Reshape1DConvolution() {
auto activations = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
auto weights = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
auto conv = ngraph::pattern::wrap_type<ngraph::opset1::Convolution>({ activations, weights });
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "Reshape1DConvolution");
this->register_matcher(m, get_callback());
}
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DGroupConvolution, "Reshape1DGroupConvolution", 0);
MKLDNNPlugin::Reshape1DGroupConvolution::Reshape1DGroupConvolution() {
auto activations = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
auto weights = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
auto group_conv = ngraph::pattern::wrap_type<ngraph::opset1::GroupConvolution>({ activations, weights });
auto m = std::make_shared<ngraph::pattern::Matcher>(group_conv, "Reshape1DGroupConvolution");
this->register_matcher(m, get_callback());
}
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DAvgPool, "Reshape1DAvgPool", 0);
MKLDNNPlugin::Reshape1DAvgPool::Reshape1DAvgPool() {
auto input = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
auto pool = ngraph::pattern::wrap_type<ngraph::opset1::AvgPool>({ input });
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DAvgPool");
this->register_matcher(m, get_callback());
}
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DMaxPool, "Reshape1DMaxPool", 0);
MKLDNNPlugin::Reshape1DMaxPool::Reshape1DMaxPool() {
auto input = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
auto pool = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({ input });
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DMaxPool");
this->register_matcher(m, get_callback());
}

View File

@ -1,35 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
namespace MKLDNNPlugin {
class Reshape1DConvolution: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Reshape1DConvolution();
};
class Reshape1DGroupConvolution: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Reshape1DGroupConvolution();
};
class Reshape1DAvgPool: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Reshape1DAvgPool();
};
class Reshape1DMaxPool: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Reshape1DMaxPool();
};
} // namespace MKLDNNPlugin

View File

@ -66,6 +66,8 @@ MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, co
if (op->get_input_size() <= AXES_MAPPING_IDX)
IE_THROW() << errorPrefix << " and EXPLICIT mode must have tree input edges: " << getParentEdges().size();
broadcastType = EXPLICIT;
} else {
IE_THROW() << errorPrefix << "has unexpected broadcast type: " << broadcastOp->get_broadcast_spec().m_type;
}
if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX))) {

View File

@ -15,10 +15,6 @@ using namespace InferenceEngine;
bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
if (!bucketsize) {
errorMessage = "Only opset3 Bucketize operation is supported";
@ -49,22 +45,6 @@ MKLDNNBucketizeNode::MKLDNNBucketizeNode(const std::shared_ptr<ngraph::Node>& op
// check one attribute
with_right = bucketsize->get_with_right_bound();
// check dimensions of input tensors
SizeVector input_tensor_dims = op->get_input_shape(INPUT_TENSOR_PORT);
if (input_tensor_dims.size() < 1) {
IE_THROW() << errorPrefix << " has incorrect dimensions of the input.";
}
SizeVector input_bin_dims = op->get_input_shape(INPUT_BINS_PORT);
if (input_bin_dims.size() != 1) {
IE_THROW() << errorPrefix << " has incorrect dimensions of the boundaries tensor.";
}
if (input_bin_dims[0] != 0) {
with_bins = true;
}
num_bin_values = input_bin_dims[0];
num_values = std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
}
void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() {
@ -192,6 +172,49 @@ void MKLDNNBucketizeNode::execute(mkldnn::stream strm) {
}
}
void MKLDNNBucketizeNode::prepareParams() {
auto& inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr();
auto& inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr();
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
IE_THROW() << "Destination memory didn't allocate.";
if (!inputTensorMemPtr || !inputTensorMemPtr->GetPrimitivePtr())
IE_THROW() << "Input tensor didn't allocate.";
if (!inputBinsMemPtr || !inputBinsMemPtr->GetPrimitivePtr())
IE_THROW() << "Input bins didn't allocate.";
if (getSelectedPrimitiveDescriptor() == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
// update with_bins/num_values/num_bin_values
auto input_tensor_dims = inputTensorMemPtr->getStaticDims();
if (input_tensor_dims.size() < 1) {
IE_THROW() << errorPrefix << " has incorrect dimensions of the input.";
}
auto input_bin_dims = inputBinsMemPtr->getStaticDims();
if (input_bin_dims.size() != 1) {
IE_THROW() << errorPrefix << " has incorrect dimensions of the boundaries tensor.";
}
if (input_bin_dims[0] != 0) {
with_bins = true;
}
num_bin_values = input_bin_dims[0];
num_values =
std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
}
void MKLDNNBucketizeNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
}
std::vector<VectorDims> MKLDNNBucketizeNode::shapeInfer() const {
return {getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
}
template <typename T, typename T_BOUNDARIES, typename T_IND>
void MKLDNNBucketizeNode::bucketize() {
const auto *input_data = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());

View File

@ -15,9 +15,14 @@ public:
void getSupportedDescriptors() override {};
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override {};
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
void executeDynamicImpl(mkldnn::stream strm) override {
execute(strm);
}
void prepareParams() override;
std::vector<VectorDims> shapeInfer() const override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

View File

@ -33,7 +33,7 @@ bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngr
return false;
}
size_t ndims = op->get_input_partial_shape(0).rank().get_length();
if ((ndims < 4) || (ndims > 5)) {
if ((ndims < 3) || (ndims > 5)) {
errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(ndims);
return false;
}
@ -254,10 +254,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
outputDataType = memory::data_type::f32;
if (eltwisePrecision == Precision::BF16)
eltwisePrecision = Precision::FP32;
in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0),
inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0),
outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType,
ndims == 3 ? memory::format_tag::nwc : (ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType,
ndims == 3 ? memory::format_tag::nwc : (ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
createDescriptor({ in_candidate }, { out_candidate });
} else {
inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16
@ -289,11 +289,11 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
eltwisePrecision = Precision::FP32;
}
if (one_of(ndims, 4, 5)) {
memory::format_tag ncsp = ndims == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw;
memory::format_tag nspc = ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc;
memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c;
memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c;
if (one_of(ndims, 3, 4, 5)) {
memory::format_tag nspc = ndims == 3 ? memory::format_tag::nwc : (ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc);
memory::format_tag ncsp = ndims == 3 ? memory::format_tag::ncw : (ndims == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw);
memory::format_tag nCsp8c = ndims == 3 ? memory::format_tag::nCw8c : (ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c);
memory::format_tag nCsp16c = ndims == 3 ? memory::format_tag::nCw16c : (ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c);
auto inputShape = getInputShapeAtPort(0);
auto outputShape = getOutputShapeAtPort(0);
@ -830,7 +830,7 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const {
if (isDepthWise()) {
// 1d equivalent cases are painfully slow
if (1 == inpDims[inpDims.size() - 2]) {
if (inpDims.size() == 3 || 1 == inpDims[inpDims.size() - 2]) {
return false;
}
} else {

View File

@ -29,7 +29,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_
constexpr static int sampledPointsPerPixel = MKLDNNDeformableConvolutionNode::sampledPointsPerPixel;
explicit jit_uni_def_conv_kernel_f32(jit_def_conv_params jcp) : jit_uni_def_conv_kernel(std::move(jcp)), jit_generator() {}
explicit jit_uni_def_conv_kernel_f32(const jit_def_conv_params& jcp) : jit_uni_def_conv_kernel(jcp), jit_generator() {}
void create_ker() override {
jit_generator::create_kernel();

View File

@ -59,7 +59,7 @@ struct jit_uni_def_conv_kernel {
ker_(args);
}
explicit jit_uni_def_conv_kernel(jit_def_conv_params jcp) : ker_(nullptr), jcp_(std::move(jcp)) {}
explicit jit_uni_def_conv_kernel(const jit_def_conv_params& jcp) : ker_(nullptr), jcp_(jcp) {}
virtual ~jit_uni_def_conv_kernel() {}
virtual void create_ker() = 0;

View File

@ -79,8 +79,8 @@ template <cpu_isa_t isa>
struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic)
explicit jit_uni_eltwise_generic(jit_eltwise_params jep, MKLDNNEltwiseNode& eltwiseNode) :
jit_uni_eltwise_kernel(std::move(jep), eltwiseNode), jit_generator() {}
explicit jit_uni_eltwise_generic(const jit_eltwise_params& jep, MKLDNNEltwiseNode& eltwiseNode) :
jit_uni_eltwise_kernel(jep, eltwiseNode), jit_generator() {}
void create_ker() override {
jit_generator::create_kernel();
@ -1238,18 +1238,18 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
return {config, impl_type};
};
bool isChannelsFirstApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 2, 4, 5);
bool isChannelsFirstApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 2, 3, 4, 5);
for (size_t i = 0; i < getParentEdges().size(); i++) {
isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getInputShapeAtPort(i).getRank(), 1, 2, 4, 5);
isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getInputShapeAtPort(i).getRank(), 1, 2, 3, 4, 5);
isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getInputShapeAtPort(i).getRank() != 1,
getOutputShapeAtPort(0).getRank() ==
getInputShapeAtPort(i).getRank());
}
bool isBlockedApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 4, 5);
bool isBlockedApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 3, 4, 5);
for (size_t i = 0; i < getParentEdges().size(); i++) {
const auto &inShape = getInputShapeAtPort(i);
isBlockedApplicable = isBlockedApplicable && one_of(inShape.getRank(), 1, 4, 5);
isBlockedApplicable = isBlockedApplicable && one_of(inShape.getRank(), 1, 3, 4, 5);
isBlockedApplicable = isBlockedApplicable && implication(inShape.getRank() != 1,
getOutputShapeAtPort(0).getRank() ==
inShape.getRank());

View File

@ -54,7 +54,7 @@ struct jit_uni_eltwise_kernel {
ker_(const_args, indexes);
}
explicit jit_uni_eltwise_kernel(jit_eltwise_params jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(std::move(jep)), eltwiseNode(node) {}
explicit jit_uni_eltwise_kernel(const jit_eltwise_params& jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(jep), eltwiseNode(node) {}
virtual ~jit_uni_eltwise_kernel() {}
virtual void create_ker() = 0;

View File

@ -45,7 +45,7 @@ template <cpu_isa_t isa>
struct jit_uni_binarization_kernel : public jit_uni_quantize_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_binarization_kernel)
explicit jit_uni_binarization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(std::move(jqp)), jit_generator() {}
explicit jit_uni_binarization_kernel(const jit_quantize_params& jqp) : jit_uni_quantize_kernel(jqp), jit_generator() {}
void create_ker() override {
jit_generator::create_kernel();
@ -213,7 +213,7 @@ template <cpu_isa_t isa>
struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_quantization_kernel)
explicit jit_uni_quantization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(std::move(jqp)), jit_generator() {}
explicit jit_uni_quantization_kernel(const jit_quantize_params& jqp) : jit_uni_quantize_kernel(jqp), jit_generator() {}
void create_ker() override {
jit_generator::create_kernel();

View File

@ -56,7 +56,7 @@ struct jit_uni_quantize_kernel {
ker_(args);
}
explicit jit_uni_quantize_kernel(jit_quantize_params jqp) : ker_(nullptr), jqp_(std::move(jqp)) {}
explicit jit_uni_quantize_kernel(const jit_quantize_params& jqp) : ker_(nullptr), jqp_(jqp) {}
virtual ~jit_uni_quantize_kernel() {}
virtual void create_ker() = 0;

View File

@ -34,8 +34,8 @@ private:
size_t axis_;
size_t dataTypeSize_ = 0;
int strideAxDst_;
int dstAxDim_;
int strideAxDst_ = 0;
int dstAxDim_ = 0;
int strideAx1Diff_ = 0;
std::string errorPrefix_;

View File

@ -161,18 +161,17 @@ void MKLDNNGenericNode::execLayer() {
}
}
if (isDynBatch) {
// TODO: use ngraph-based extension mechnism if needed to recompute shape
isDynBatch = false;
}
// TODO: use ngraph-based extension mechnism if needed to recompute shape
isDynBatch = false;
// TODO: uncomment after using ngraph-based extension mechnism
// if (isDynBatch) {
// for (size_t i = 0; i < inputs.size(); i++) {
// auto td = inputs[i]->getTensorDesc();
// td.setDims(inputDescs[i].getDims());
// inputs[i] = make_blob_with_precision(td, getParentEdgeAt(i)->getMemory().GetData());
// }
// }
if (isDynBatch) {
for (size_t i = 0; i < inputs.size(); i++) {
auto td = inputs[i]->getTensorDesc();
td.setDims(inputDescs[i].getDims());
inputs[i] = make_blob_with_precision(td, getParentEdgeAt(i)->getMemory().GetData());
}
}
std::vector<InferenceEngine::Blob::Ptr> outputs;
for (size_t i = 0; i < outputShapes.size(); i++) {
if (isDynBatch) {

View File

@ -27,7 +27,7 @@ public:
private:
int axis;
size_t reducedAxisSize;
size_t reducedAxisSize = 0;
size_t reducedAxisStride = 1;
size_t axisStep = 1;
bool isLastDim = false;

View File

@ -676,10 +676,10 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
IE_THROW(NotImplemented) << errorMessage;
}
epsMode_ = INSIDE_SQRT;
if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v6::MVN>(op)) {
normalizeVariance_ = mvnOp->get_normalize_variance();
epsValue_ = mvnOp->get_eps();
epsMode_ = INSIDE_SQRT;
if (mvnOp->get_eps_mode() == ngraph::op::MVNEpsMode::OUTSIDE_SQRT) {
epsMode_ = OUTSIDE_SQRT;
}
@ -691,7 +691,6 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
} else if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v0::MVN>(op)) {
normalizeVariance_ = mvnOp->get_normalize_variance();
epsValue_ = mvnOp->get_eps();
epsMode_ = INSIDE_SQRT;
initAcrossChannels_ = mvnOp->get_across_channels();
}
execAcrossChannels_ = initAcrossChannels_;

View File

@ -86,7 +86,7 @@ std::vector<memory::format_tag> MKLDNNPoolingNode::getAvailableFormatsForDims(co
else if (dims.getRank() == 2)
return {memory::format_tag::nc};
else if (dims.getRank() == 3)
return {memory::format_tag::tnc, memory::format_tag::ntc};
return { memory::format_tag::nCw8c, memory::format_tag::nCw16c, memory::format_tag::nwc, memory::format_tag::ncw};
else if (dims.getRank() == 4)
return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw};
else if (dims.getRank() == 5)
@ -148,8 +148,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
const auto &childShape = getOutputShapeAtPort(0);
const size_t inputRank = getInputShapeAtPort(0).getRank();
if ((inputRank < 4) || (inputRank > 5))
IE_THROW() << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
if ((inputRank < 3) || (inputRank > 5))
IE_THROW() << "Pooling layer. Unsupported mode. Only 3D, 4D and 5D blobs are supported as input.";
initEffectivePad(MemoryDescUtils::makeDummyShape(parentShape),
MemoryDescUtils::makeDummyShape(childShape));
@ -159,17 +159,17 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
if (outputDataType == memory::data_type::bf16)
outputDataType = memory::data_type::f32;
// i8 layers supports only ndhwc and nhwc layouts
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 5 ?
memory::format_tag::ndhwc : memory::format_tag::nhwc);
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 5 ?
memory::format_tag::ndhwc : memory::format_tag::nhwc);
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 3 ?
memory::format_tag::nwc : (inputRank == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 3 ?
memory::format_tag::nwc : (inputRank == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
createDescriptor({ in_candidate }, { out_candidate });
} else if ((inputRank == 4 || inputRank == 5) && parentShape.getDims()[1] == 1) {
} else if ((inputRank == 3 || inputRank == 4 || inputRank == 5) && parentShape.getDims()[1] == 1) {
// WA. We should force planar layout since it provides better performance
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 5 ?
memory::format_tag::ncdhw : memory::format_tag::nchw);
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 5 ?
memory::format_tag::ncdhw : memory::format_tag::nchw);
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 3 ?
memory::format_tag::ncw : (inputRank == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw));
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 3 ?
memory::format_tag::ncw : (inputRank == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw));
createDescriptor({ in_candidate }, { out_candidate });
} else {
if (inputDataType != memory::data_type::bf16) {

View File

@ -229,10 +229,6 @@ private:
bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto regionYolo = std::dynamic_pointer_cast<const ngraph::opset1::RegionYolo>(op);
if (!regionYolo) {
errorMessage = "Only opset1 RegionYolo operation is supported";
@ -244,6 +240,10 @@ bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr<const ngra
return true;
}
bool MKLDNNRegionYoloNode::needPrepareParams() const {
return false;
}
MKLDNNRegionYoloNode::MKLDNNRegionYoloNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
std::string errorMessage;
@ -302,6 +302,10 @@ void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() {
}
void MKLDNNRegionYoloNode::createPrimitive() {
if (inputShapesDefined()) {
updateLastInputDims();
}
jit_logistic_config_params jcp;
jcp.src_dt = jcp.dst_dt = output_prec;
jcp.src_data_size = jcp.dst_data_size = output_prec.size();

View File

@ -49,6 +49,10 @@ public:
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
protected:
bool needPrepareParams() const override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
private:
int classes;
int coords;

View File

@ -308,11 +308,7 @@ private:
bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto roiPooling = std::dynamic_pointer_cast<const ngraph::opset2::ROIPooling>(op);
auto roiPooling = ngraph::as_type_ptr<const ngraph::opset2::ROIPooling>(op);
if (!roiPooling) {
errorMessage = "Only opset2 ROIPooling operation is supported";
return false;
@ -331,22 +327,22 @@ bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr<const ngra
MKLDNNROIPoolingNode::MKLDNNROIPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
const auto roiPooling = std::dynamic_pointer_cast<const ngraph::opset2::ROIPooling>(op);
pooled_h = roiPooling->get_output_size()[0];
pooled_w = roiPooling->get_output_size()[1];
spatial_scale = roiPooling->get_spatial_scale();
std::string m = roiPooling->get_method();
if (m == "max") {
algorithm = Algorithm::ROIPoolingMax;
} else if (m == "bilinear") {
algorithm = Algorithm::ROIPoolingBilinear;
}
} else {
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
}
std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
auto roiPooling = ngraph::as_type_ptr<const ngraph::opset2::ROIPooling>(op);
refParams.pooled_h = roiPooling->get_output_size()[0];
refParams.pooled_w = roiPooling->get_output_size()[1];
refParams.spatial_scale = roiPooling->get_spatial_scale();
std::string m = roiPooling->get_method();
if (m == "max") {
algorithm = Algorithm::ROIPoolingMax;
} else if (m == "bilinear") {
algorithm = Algorithm::ROIPoolingBilinear;
}
}
void MKLDNNROIPoolingNode::getSupportedDescriptors() {
@ -370,9 +366,9 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() {
IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank();
}
if (getInputShapeAtPort(1).getStaticDims()[1] != 5) {
IE_THROW() << errorPrefix << "has invalid shape on 1st input: ["
<< getInputShapeAtPort(1).getStaticDims()[0] << "," << getInputShapeAtPort(1).getStaticDims()[1] << "]";
const auto& dims = getInputShapeAtPort(1).getDims();
if (dims[1] != 5) {
IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" << dims[0] << "," << dims[1] << "]";
}
}
@ -380,16 +376,15 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
runtimePrecision = getOriginalInputPrecisionAtPort(0);
refParams.src_prc = getOriginalInputPrecisionAtPort(0);
if (!mayiuse(avx512_core)) {
if (runtimePrecision == Precision::BF16)
runtimePrecision = Precision::FP32;
if (refParams.src_prc == Precision::BF16)
refParams.src_prc = Precision::FP32;
}
src_data_size = dst_data_size = runtimePrecision.size();
src_data_size = dst_data_size = refParams.src_prc.size();
auto parentDims = getInputShapeAtPort(0).getStaticDims();
auto format = mayiuse(avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c;
impl_desc_type impl_type;
if (mayiuse(cpu::x64::avx512_common)) {
@ -402,159 +397,159 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
impl_type = impl_desc_type::ref;
}
addSupportedPrimDesc({{format, runtimePrecision},
{LayoutType::ncsp, runtimePrecision}},
{{format, runtimePrecision}},
addSupportedPrimDesc({{format, refParams.src_prc},
{LayoutType::ncsp, refParams.src_prc}},
{{format, refParams.src_prc}},
impl_type);
}
void MKLDNNROIPoolingNode::createPrimitive() {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
jpp.c_block = simd_w;
auto inDims = getParentEdgeAt(0)->getMemory().getStaticDims();
auto outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
jpp.mb = outDims[0];
jpp.c = rnd_up(inDims[1], simd_w);
jpp.ih = inDims[2];
jpp.iw = inDims[3];
jpp.oh = outDims[2];
jpp.ow = outDims[3];
jpp.spatial_scale = spatial_scale;
jpp.pooled_h = pooled_h;
jpp.pooled_w = pooled_w;
jpp.nb_c = jpp.c / jpp.c_block;
jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7;
auto selectedPD = getSelectedPrimitiveDescriptor();
jpp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
jpp.src_data_size = jpp.src_prc.size();
jpp.dst_data_size = jpp.dst_prc.size();
if (!selectedPD)
IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
jpp.alg = getAlgorithm();
refParams.c_block = mayiuse(cpu::x64::avx512_common) ? 16 : 8;;
refParams.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7;
refParams.alg = getAlgorithm();
if (mayiuse(cpu::x64::avx512_common)) {
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx512_common>(jpp));
} else if (mayiuse(cpu::x64::avx2)) {
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx2>(jpp));
} else if (mayiuse(cpu::x64::sse41)) {
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::sse41>(jpp));
const auto& config = selectedPD->getConfig();
refParams.src_prc = config.inConfs[0].desc->getPrecision();
refParams.dst_prc = config.outConfs[0].desc->getPrecision();
refParams.src_data_size = refParams.src_prc.size();
refParams.dst_data_size = refParams.dst_prc.size();
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
if (roi_pooling_kernel)
roi_pooling_kernel->create_ker();
}
template<typename T>
void MKLDNNROIPoolingNode::execute() {
auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
auto &dstMemory = getChildEdgeAt(0)->getMemory();
void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
if (execPtr) {
const auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
const auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
const auto &dstMemory = getChildEdgeAt(0)->getMemory();
execPtr->exec(srcMemory0, srcMemory1, dstMemory);
} else {
IE_THROW() << "Can't execute ROI Pooling node. Primitive wasn't created";
}
}
const auto *src_data = reinterpret_cast<const T*>(srcMemory0.GetPtr());
const auto *src_roi = reinterpret_cast<const T*>(srcMemory1.GetPtr());
auto *dst = reinterpret_cast<T*>(dstMemory.GetPtr());
void MKLDNNROIPoolingNode::executeDynamicImpl(mkldnn::stream strm) {
return execute(strm);
}
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
auto config = selectedPrimitiveDescriptor->getConfig();
void MKLDNNROIPoolingNode::prepareParams() {
const auto& srcMemPtr0 = getParentEdgeAt(0)->getMemoryPtr();
const auto& srcMemPtr1 = getParentEdgeAt(0)->getMemoryPtr();
const auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
if (!srcMemPtr0 || !srcMemPtr0->GetPrimitivePtr())
IE_THROW() << "Input memory didn't allocate.";
if (!srcMemPtr1 || !srcMemPtr1->GetPrimitivePtr())
IE_THROW() << "Input memory didn't allocate.";
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
IE_THROW() << "Destination memory didn't allocate.";
if (getSelectedPrimitiveDescriptor() == nullptr)
IE_THROW() << "Preferable primitive descriptor is not set.";
auto src_strides = srcMemory0.GetDescWithType<BlockedMemoryDesc>()->getStrides();
auto dst_strides = dstMemory.GetDescWithType<BlockedMemoryDesc>()->getStrides();
size_t src_roi_step = srcMemory1.GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
const auto& inDims = getParentEdgeAt(0)->getMemory().getStaticDims();
const auto& outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
int MB = jpp.mb;
refParams.mb = outDims[0];
refParams.c = rnd_up(inDims[1], refParams.c_block);
refParams.nb_c = refParams.c / refParams.c_block;
refParams.ih = inDims[2];
refParams.iw = inDims[3];
refParams.oh = outDims[2];
refParams.ow = outDims[3];
int real_rois = 0;
for (; real_rois < MB; real_rois++) {
size_t roi_off = real_rois * src_roi_step;
execPtr = ROIPoolingExecutor::createROIPoolingNewExecutor(refParams);
}
const auto *src_roi_ptr = &src_roi[roi_off];
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
if (roi_batch_ind == -1) {
break;
template <typename T>
class MKLDNNROIPoolingNode::ROIPoolingJitExecutor : public MKLDNNROIPoolingNode::ROIPoolingExecutor {
public:
ROIPoolingJitExecutor(const jit_roi_pooling_params &jpp) {
if (mayiuse(cpu::x64::avx512_common)) {
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx512_common>(jpp));
} else if (mayiuse(cpu::x64::avx2)) {
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx2>(jpp));
} else if (mayiuse(cpu::x64::sse41)) {
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::sse41>(jpp));
} else {
IE_THROW() << "Can't create jit RoiPooling kernel";
}
if (roi_pooling_kernel)
roi_pooling_kernel->create_ker();
}
parallel_for4d(MB, cb_work, jpp.oh, jpp.ow, [&](int n, int cbb, int oh, int ow) {
auto arg = jit_roi_pooling_call_args();
void exec(
const MKLDNNPlugin::MKLDNNMemory& srcData,
const MKLDNNPlugin::MKLDNNMemory& srcRoi,
const MKLDNNPlugin::MKLDNNMemory& dst) override {
if (!roi_pooling_kernel)
IE_THROW() << "Could not execute. Kernel for RoiPooling node was not compiled.";
int cb = cbb * jpp.nb_c_blocking;
int cb_num = jpp.nb_c_blocking;
int c_block = jpp.c_block;
auto src_strides = srcData.GetDescWithType<BlockedMemoryDesc>()->getStrides();
auto src_roi_step = srcRoi.GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
auto dst_strides = dst.GetDescWithType<BlockedMemoryDesc>()->getStrides();
const auto* src_ptr = reinterpret_cast<const T*>(srcData.GetPtr());
const auto* roi_ptr = reinterpret_cast<const T*>(srcRoi.GetPtr());
auto* dst_ptr = reinterpret_cast<T*>(dst.GetPtr());
executeOptimizedGeneric(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step);
}
arg.c_blocks = std::min(cb + cb_num, jpp.nb_c) - cb;
private:
void executeOptimizedGeneric(
const T* src_data,
const T* src_roi,
T* dst,
const VectorDims& src_strides,
const VectorDims& dst_strides,
const size_t src_roi_step) {
const auto& jpp = roi_pooling_kernel->jpp_;
int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
int MB = jpp.mb;
if (n >= real_rois) {
if (roi_pooling_kernel) {
int real_rois = 0;
for (; real_rois < MB; real_rois++) {
size_t roi_off = real_rois * src_roi_step;
const auto *src_roi_ptr = &src_roi[roi_off];
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
if (roi_batch_ind == -1) {
break;
}
}
parallel_for4d(MB, cb_work, jpp.oh, jpp.ow, [&](int n, int cbb, int oh, int ow) {
auto arg = jit_roi_pooling_call_args();
int cb = cbb * jpp.nb_c_blocking;
int cb_num = jpp.nb_c_blocking;
arg.c_blocks = std::min(cb + cb_num, jpp.nb_c) - cb;
if (n >= real_rois) {
arg.bin_area = 0;
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
(*roi_pooling_kernel)(&arg);
} else {
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
int ch_blk_cur = cbb * cb_num + cbb_cur;
if (ch_blk_cur >= jpp.nb_c) {
break; // current block work is done
}
for (int c = 0; c < c_block; c++) {
dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
}
}
}
} else {
size_t roi_off = n * src_roi_step;
const auto *src_roi_ptr = &src_roi[roi_off];
size_t roi_off = n * src_roi_step;
const auto *src_roi_ptr = &src_roi[roi_off];
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
if (jpp.alg == Algorithm::ROIPoolingMax) {
int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
int roi_end_h = static_cast<int>(round(src_roi_ptr[4] * jpp.spatial_scale));
if (jpp.alg == Algorithm::ROIPoolingMax) {
int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
int roi_end_h = static_cast<int>(round(src_roi_ptr[4] * jpp.spatial_scale));
int roi_height = std::max(roi_end_h - roi_start_h + 1, 1);
int roi_width = std::max(roi_end_w - roi_start_w + 1, 1);
int hstart, hend, wstart, wend;
std::tie(hstart, hend, wstart, wend) = getBordersForMaxMode(
roi_start_h, roi_end_h, roi_start_w, roi_end_w, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
int hstart = (oh * roi_height) / jpp.pooled_h;
if ((hstart * jpp.pooled_h) > (oh * roi_height)) {
--hstart;
}
int wstart = (ow * roi_width) / jpp.pooled_w;
if ((wstart * jpp.pooled_w) > (ow * roi_width)) {
--wstart;
}
int hend = ((oh + 1) * roi_height) / jpp.pooled_h;
if ((hend * jpp.pooled_h) < ((oh + 1) * roi_height)) {
++hend;
}
int wend = ((ow + 1) * roi_width) / jpp.pooled_w;
if ((wend * jpp.pooled_w) < ((ow + 1) * roi_width)) {
++wend;
}
hstart = std::min(std::max(hstart + roi_start_h, 0), jpp.ih);
hend = std::min(std::max(hend + roi_start_h, 0), jpp.ih);
wstart = std::min(std::max(wstart + roi_start_w, 0), jpp.iw);
wend = std::min(std::max(wend + roi_start_w, 0), jpp.iw);
if (roi_pooling_kernel) {
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + hstart * src_strides[2] + wstart * src_strides[3]];
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
@ -562,6 +557,121 @@ void MKLDNNROIPoolingNode::execute() {
arg.kh = hend - hstart;
arg.kw = wend - wstart;
} else {
float roi_start_w_ = src_roi_ptr[1];
float roi_start_h_ = src_roi_ptr[2];
float roi_end_w_ = src_roi_ptr[3];
float roi_end_h_ = src_roi_ptr[4];
float in_x, in_y;
std::tie(in_x, in_y) = getXYForBilinearMode(
roi_start_h_, roi_end_h_, roi_start_w_, roi_end_w_, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
arg.bin_area = 0;
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
} else {
int top_y_index = static_cast<int>(floorf(in_y));
int bottom_y_index = static_cast<int>(ceilf(in_y));
int left_x_index = static_cast<int>(floorf(in_x));
int right_x_index = static_cast<int>(ceilf(in_x));
if (right_x_index > jpp.iw - 1)
right_x_index = jpp.iw - 1;
if (bottom_y_index > jpp.ih - 1)
bottom_y_index = jpp.ih - 1;
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
arg.xf = in_x - left_x_index;
arg.yf = in_y - top_y_index;
arg.xoff = sizeof(T) * (right_x_index - left_x_index) * jpp.c_block;
arg.yoff = sizeof(T) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block;
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
top_y_index * src_strides[2] + left_x_index * src_strides[3]];
arg.bin_area = 1;
}
}
(*roi_pooling_kernel)(&arg);
}
});
}
std::shared_ptr<jit_uni_roi_pooling_kernel> roi_pooling_kernel;
};
template <typename T>
class MKLDNNROIPoolingNode::ROIPoolingRefExecutor : public MKLDNNROIPoolingNode::ROIPoolingExecutor {
public:
ROIPoolingRefExecutor(const jit_roi_pooling_params &_jpp) : jpp(_jpp) {}
void exec(
const MKLDNNPlugin::MKLDNNMemory& srcData,
const MKLDNNPlugin::MKLDNNMemory& srcRoi,
const MKLDNNPlugin::MKLDNNMemory& dst) override {
auto src_strides = srcData.GetDescWithType<BlockedMemoryDesc>()->getStrides();
auto src_roi_step = srcRoi.GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
auto dst_strides = dst.GetDescWithType<BlockedMemoryDesc>()->getStrides();
const auto* src_ptr = reinterpret_cast<const T*>(srcData.GetPtr());
const auto* roi_ptr = reinterpret_cast<const T*>(srcRoi.GetPtr());
auto* dst_ptr = reinterpret_cast<T*>(dst.GetPtr());
executeReference(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step);
}
void executeReference(
const T* src_data,
const T* src_roi,
T* dst,
const VectorDims& src_strides,
const VectorDims& dst_strides,
const size_t src_roi_step) {
int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
int MB = jpp.mb;
int real_rois = 0;
for (; real_rois < MB; real_rois++) {
size_t roi_off = real_rois * src_roi_step;
const auto *src_roi_ptr = &src_roi[roi_off];
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
if (roi_batch_ind == -1) {
break;
}
}
parallel_for4d(MB, cb_work, jpp.oh, jpp.ow, [&](int n, int cbb, int oh, int ow) {
int cb_num = jpp.nb_c_blocking;
int c_block = jpp.c_block;
if (n >= real_rois) {
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
int ch_blk_cur = cbb * cb_num + cbb_cur;
if (ch_blk_cur >= jpp.nb_c) {
break; // current block work is done
}
for (int c = 0; c < c_block; c++) {
dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
}
}
} else {
size_t roi_off = n * src_roi_step;
const auto *src_roi_ptr = &src_roi[roi_off];
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
if (jpp.alg == Algorithm::ROIPoolingMax) {
int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
int roi_end_h = static_cast<int>(round(src_roi_ptr[4] * jpp.spatial_scale));
int hstart, hend, wstart, wend;
std::tie(hstart, hend, wstart, wend) = getBordersForMaxMode(
roi_start_h, roi_end_h, roi_start_w, roi_end_w, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
int ch_blk_cur = cbb * cb_num + cbb_cur;
if (ch_blk_cur >= jpp.nb_c) {
@ -584,37 +694,17 @@ void MKLDNNROIPoolingNode::execute() {
}
}
}
}
} else {
float roi_start_w_ = src_roi_ptr[1];
float roi_start_h_ = src_roi_ptr[2];
float roi_end_w_ = src_roi_ptr[3];
float roi_end_h_ = src_roi_ptr[4];
float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0);
float width_scale = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0);
float in_y, in_x;
// because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
// ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
// and as result excess of right limit for proposal value,
// if the border case (current_h == pooled_h - 1) will not be handled explicitly
if (jpp.pooled_h > 1) {
in_y = (oh == jpp.pooled_h - 1 ? roi_end_h_ * (jpp.ih - 1) : (oh * height_scale + roi_start_h_ * (jpp.ih - 1)));
} else {
in_y = 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1);
}
if (jpp.pooled_w > 1) {
in_x = (ow == jpp.pooled_w - 1 ? roi_end_w_ * (jpp.iw - 1) : (ow * width_scale + roi_start_w_ * (jpp.iw - 1)));
} else {
in_x = 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1);
}
float roi_start_w_ = src_roi_ptr[1];
float roi_start_h_ = src_roi_ptr[2];
float roi_end_w_ = src_roi_ptr[3];
float roi_end_h_ = src_roi_ptr[4];
if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
if (roi_pooling_kernel) {
arg.bin_area = 0;
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
} else {
float in_x, in_y;
std::tie(in_x, in_y) = getXYForBilinearMode(
roi_start_h_, roi_end_h_, roi_start_w_, roi_end_w_, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
int ch_blk_cur = cbb * cb_num + cbb_cur;
if (ch_blk_cur >= jpp.nb_c) {
@ -624,33 +714,18 @@ void MKLDNNROIPoolingNode::execute() {
dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
}
}
}
} else {
int top_y_index = static_cast<int>(floorf(in_y));
int bottom_y_index = static_cast<int>(ceilf(in_y));
int left_x_index = static_cast<int>(floorf(in_x));
int right_x_index = static_cast<int>(ceilf(in_x));
if (right_x_index > jpp.iw - 1)
right_x_index = jpp.iw - 1;
if (bottom_y_index > jpp.ih - 1)
bottom_y_index = jpp.ih - 1;
if (roi_pooling_kernel) {
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
arg.xf = in_x - left_x_index;
arg.yf = in_y - top_y_index;
arg.xoff = sizeof(T) * (right_x_index - left_x_index) * jpp.c_block;
arg.yoff = sizeof(T) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block;
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
top_y_index * src_strides[2] + left_x_index * src_strides[3]];
arg.bin_area = 1;
} else {
int top_y_index = static_cast<int>(floorf(in_y));
int bottom_y_index = static_cast<int>(ceilf(in_y));
int left_x_index = static_cast<int>(floorf(in_x));
int right_x_index = static_cast<int>(ceilf(in_x));
if (right_x_index > jpp.iw - 1)
right_x_index = jpp.iw - 1;
if (bottom_y_index > jpp.ih - 1)
bottom_y_index = jpp.ih - 1;
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
int ch_blk_cur = cbb * cb_num + cbb_cur;
if (ch_blk_cur >= jpp.nb_c) {
@ -676,35 +751,89 @@ void MKLDNNROIPoolingNode::execute() {
}
}
}
if (roi_pooling_kernel) {
(*roi_pooling_kernel)(&arg);
}
}
});
}
namespace {
struct ROIPoolingContext {
MKLDNNROIPoolingNode &node;
};
}
template<typename T>
struct MKLDNNROIPoolingNode::ROIPoolingExecute {
void operator()(ROIPoolingContext & ctx) {
ctx.node.execute<T>();
});
}
private:
jit_roi_pooling_params jpp;
};
void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
ROIPoolingContext ctx = {
*this
};
// enable conditional compilation
OV_SWITCH(MKLDNNPlugin, ROIPoolingExecute, ctx, runtimePrecision,
std::shared_ptr<MKLDNNROIPoolingNode::ROIPoolingExecutor> MKLDNNROIPoolingNode::ROIPoolingExecutor::createROIPoolingNewExecutor(
const jit_roi_pooling_params& jpp) {
ROIPoolingContext ctx = { nullptr, jpp };
OV_SWITCH(MKLDNNPlugin, ROIPoolingExecutorCreation, ctx, jpp.src_prc,
OV_CASE(Precision::FP32, float),
OV_CASE(Precision::BF16, bfloat16_t))
return ctx.executor;
}
std::tuple<int, int, int, int> MKLDNNROIPoolingNode::ROIPoolingExecutor::getBordersForMaxMode(
const int roi_start_h, const int roi_end_h, const int roi_start_w, const int roi_end_w,
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w) {
int roi_height = std::max(roi_end_h - roi_start_h + 1, 1);
int roi_width = std::max(roi_end_w - roi_start_w + 1, 1);
int hstart = (oh * roi_height) / pooled_h;
if ((hstart * pooled_h) > (oh * roi_height)) {
--hstart;
}
int wstart = (ow * roi_width) / pooled_w;
if ((wstart * pooled_w) > (ow * roi_width)) {
--wstart;
}
int hend = ((oh + 1) * roi_height) / pooled_h;
if ((hend * pooled_h) < ((oh + 1) * roi_height)) {
++hend;
}
int wend = ((ow + 1) * roi_width) / pooled_w;
if ((wend * pooled_w) < ((ow + 1) * roi_width)) {
++wend;
}
hstart = std::min(std::max(hstart + roi_start_h, 0), ih);
hend = std::min(std::max(hend + roi_start_h, 0), ih);
wstart = std::min(std::max(wstart + roi_start_w, 0), iw);
wend = std::min(std::max(wend + roi_start_w, 0), iw);
return std::make_tuple(hstart, hend, wstart, wend);
}
std::pair<float, float> MKLDNNROIPoolingNode::ROIPoolingExecutor::getXYForBilinearMode(
const float roi_start_h, const float roi_end_h, const float roi_start_w, const float roi_end_w,
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w) {
float height_scale = (pooled_h > 1 ? ((roi_end_h - roi_start_h) * (ih - 1)) / (pooled_h - 1) : 0);
float width_scale = (pooled_w > 1 ? ((roi_end_w - roi_start_w) * (iw - 1)) / (pooled_w - 1) : 0);
float in_y, in_x;
// because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
// ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
// and as result excess of right limit for proposal value,
// if the border case (current_h == pooled_h - 1) will not be handled explicitly
if (pooled_h > 1) {
in_y = (oh == pooled_h - 1 ? roi_end_h * (ih - 1) : (oh * height_scale + roi_start_h * (ih - 1)));
} else {
in_y = 0.5 * (roi_start_h + roi_end_h) * (ih - 1);
}
if (pooled_w > 1) {
in_x = (ow == pooled_w - 1 ? roi_end_w * (iw - 1) : (ow * width_scale + roi_start_w * (iw - 1)));
} else {
in_x = 0.5 * (roi_start_w + roi_end_w) * (iw - 1);
}
return std::make_pair(in_x, in_y);
}
template <typename T>
std::shared_ptr<MKLDNNROIPoolingNode::ROIPoolingExecutor> MKLDNNROIPoolingNode::ROIPoolingExecutor::makeExecutor(
const jit_roi_pooling_params& jpp) {
if (mayiuse(cpu::x64::sse41))
return std::make_shared<ROIPoolingJitExecutor<T>>(jpp);
else
return std::make_shared<ROIPoolingRefExecutor<T>>(jpp);
}
bool MKLDNNROIPoolingNode::created() const {

View File

@ -74,25 +74,62 @@ public:
void execute(mkldnn::stream strm) override;
bool created() const override;
void executeDynamicImpl(mkldnn::stream strm) override;
void prepareParams() override;
private:
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
template<typename T> void execute();
template<typename T> struct ROIPoolingExecute;
InferenceEngine::Precision runtimePrecision;
size_t src_data_size = 0;
size_t dst_data_size = 0;
size_t src_data_size;
size_t dst_data_size;
int pooled_h = 0;
int pooled_w = 0;
float spatial_scale = 0;
jit_roi_pooling_params jpp = {};
std::shared_ptr<jit_uni_roi_pooling_kernel> roi_pooling_kernel = nullptr;
jit_roi_pooling_params refParams = {};
std::string errorPrefix;
};
class ROIPoolingExecutor {
public:
ROIPoolingExecutor() = default;
virtual void exec(
const MKLDNNPlugin::MKLDNNMemory& srcData,
const MKLDNNPlugin::MKLDNNMemory& srcRoi,
const MKLDNNPlugin::MKLDNNMemory& dst) = 0;
virtual ~ROIPoolingExecutor() = default;
static std::shared_ptr<ROIPoolingExecutor> createROIPoolingNewExecutor(const jit_roi_pooling_params& jpp);
protected:
std::tuple<int, int, int, int> getBordersForMaxMode(
const int roi_start_h, const int roi_end_h, const int roi_start_w, const int roi_end_w,
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w);
std::pair<float, float> getXYForBilinearMode(
const float roi_start_h, const float roi_end_h, const float roi_start_w, const float roi_end_w,
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w);
private:
template <typename T>
static std::shared_ptr<ROIPoolingExecutor> makeExecutor(const jit_roi_pooling_params& jpp);
struct ROIPoolingContext {
std::shared_ptr<ROIPoolingExecutor> executor;
jit_roi_pooling_params jpp;
};
template<typename T>
struct ROIPoolingExecutorCreation {
void operator()(ROIPoolingContext& ctx) {
ctx.executor = ROIPoolingExecutor::makeExecutor<T>(ctx.jpp);
}
};
};
template <typename T> struct ROIPoolingJitExecutor;
template <typename T> struct ROIPoolingRefExecutor;
using executorPtr = std::shared_ptr<ROIPoolingExecutor>;
executorPtr execPtr = nullptr;
};
} // namespace MKLDNNPlugin

View File

@ -4,23 +4,14 @@
#include "mkldnn_strided_slice_node.h"
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "ie_parallel.hpp"
#include "caseless.hpp"
#include "common/cpu_memcpy.h"
#include "common/blocked_desc_creator.h"
#include "utils/general_utils.h"
#include "mkldnn_input_node.h"
#include <string>
#include <tuple>
#include <algorithm>
#include "caseless.hpp"
#include <ngraph/opsets/opset1.hpp>
#define THROW_ERROR IE_THROW() << "StridedSlice layer with name '" << getName() << "' "
#include <string>
#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " node with name '" << getName() << "' "
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -35,19 +26,20 @@ static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims&
return start;
}
bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
auto ss = ov::as_type_ptr<const ngraph::opset1::StridedSlice>(op);
if (!ss) {
errorMessage = "Only opset1 StridedSlice operation is supported";
if (!ov::is_type<ov::op::v1::StridedSlice>(op) &&
!ov::is_type<ov::op::v8::Slice>(op)) {
errorMessage = "Only StridedSlice from opset1 and Slice from opset8 operations are supported.";
return false;
}
if (ss->get_input_node_shared_ptr(BEGIN_ID)->get_type_info() != ov::op::v0::Constant::get_type_info_static() ||
ss->get_input_node_shared_ptr(END_ID)->get_type_info() != ov::op::v0::Constant::get_type_info_static() ||
(ss->get_input_size() == 4 && ss->get_input_node_shared_ptr(STRIDE_ID)->get_type_info() != ov::op::v0::Constant::get_type_info_static())) {
// TODO: Support begin, end, stride inputs for dynamic shapes.
errorMessage = "Only Constant 'begin', 'end' and 'stride' inputs are supported.";
if (!ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(BEGIN_ID)) ||
!ov::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(END_ID)) ||
(op->get_input_size() > STRIDE_ID && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(STRIDE_ID))) ||
(op->get_input_size() > AXES_ID && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_ID)))) {
// TODO: Support begin, end, stride, axis inputs for dynamic shapes.
errorMessage = "Only Constant 'begin', 'end', 'stride' and 'axis' inputs are supported.";
return false;
}
} catch (...) {
@ -56,53 +48,31 @@ bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<const ng
return true;
}
MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(op, eng, cache) {
std::string errorMessage;
if (!isSupportedOperation(op, errorMessage)) {
IE_THROW(NotImplemented) << errorMessage;
}
auto ss = ov::as_type_ptr<const ngraph::opset1::StridedSlice>(op);
if (inputShapes.size() != 3 && inputShapes.size() != 4) {
isStridedSliceOp = ov::is_type<ov::op::v1::StridedSlice>(op);
if ((isStridedSliceOp && (inputShapes.size() < 3 || inputShapes.size() > 4)) ||
(!isStridedSliceOp && (inputShapes.size() < 4 || inputShapes.size() > 5))) {
THROW_ERROR << "has incorrect number of input edges";
}
if (outputShapes.size() != 1) {
THROW_ERROR << "has incorrect number of output edges";
}
const size_t inputRank = getInputShapeAtPort(DATA_ID).getRank();
const size_t outputRank = getOutputShapeAtPort(0).getRank();
const size_t nDims = std::max(inputRank, outputRank);
auto createMask = [&](const std::vector<int64_t> &origMask, const int bit = 0, bool needReverse = false) {
std::vector<int> mask(origMask.begin(), origMask.end());
if (needReverse) {
for (size_t i = 0; i < mask.size(); i++)
mask[i] = 1 - mask[i];
}
for (size_t i = mask.size(); i < nDims; ++i) mask.push_back(bit);
return mask;
};
attrs.beginMask = createMask(ss->get_begin_mask(), 1, true);
attrs.endMask = createMask(ss->get_end_mask(), 1, true);
attrs.newAxisMask = createMask(ss->get_new_axis_mask());
attrs.shrinkAxisMask = createMask(ss->get_shrink_axis_mask());
auto origEllipsisMask = ss->get_ellipsis_mask();
for (const auto &o : origEllipsisMask) {
attrs.ellipsisMask.push_back(o);
}
if (attrs.ellipsisMask.size() == 0) {
for (size_t i = attrs.ellipsisMask.size(); i < nDims; ++i) attrs.ellipsisMask.push_back(0);
for (size_t i = 0lu; i < op->get_input_size(); i++) {
isConstantInput[i] = ov::is_type<ov::op::v0::Constant>(op->inputs()[i].get_node());
}
attrs.beginDims = getInputShapeAtPort(BEGIN_ID).getStaticDims();
attrs.endDims = getInputShapeAtPort(END_ID).getStaticDims();
if (attrs.beginDims.size() != 1)
THROW_ERROR << " should have begin vector with 1 dimension";
if (attrs.beginDims.size() != 1)
THROW_ERROR << "should have begin vector with 1 dimension";
if (attrs.endDims.size() != 1)
THROW_ERROR << "should have end vector with 1 dimension";
if (attrs.beginDims[0] != attrs.endDims[0])
THROW_ERROR << "should have begin vector with size equal to end vector size";
@ -115,6 +85,59 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Nod
if (attrs.beginDims[0] != attrs.strideDims[0])
THROW_ERROR << "should have stride vector with size equal to begin vector size";
}
if (inputShapes.size() > AXES_ID) {
isAxesSpecified = true;
attrs.axesDims = inputShapes[AXES_ID].getStaticDims();
if (attrs.axesDims.size() != 1)
THROW_ERROR << "should have axes vector with 1 dimension.";
if (attrs.beginDims[0] != attrs.axesDims[0])
THROW_ERROR << "should have axes vector with size equal to begin vector size.";
}
if (isStridedSliceOp) {
auto ss = ov::as_type_ptr<const ov::op::v1::StridedSlice>(op);
const size_t inputRank = getInputShapeAtPort(DATA_ID).getRank();
const size_t outputRank = getOutputShapeAtPort(0).getRank();
const size_t nDims = std::max(inputRank, outputRank);
auto createMask = [&](const std::vector<int64_t> &origMask, const int bit = 0, bool needReverse = false) {
std::vector<int> mask(origMask.begin(), origMask.end());
if (needReverse) {
for (size_t i = 0; i < mask.size(); i++)
mask[i] = 1 - mask[i];
}
for (size_t i = mask.size(); i < nDims; ++i) mask.push_back(bit);
return mask;
};
attrs.beginMask = createMask(ss->get_begin_mask(), 1, true);
attrs.endMask = createMask(ss->get_end_mask(), 1, true);
attrs.newAxisMask = createMask(ss->get_new_axis_mask());
attrs.shrinkAxisMask = createMask(ss->get_shrink_axis_mask());
auto origEllipsisMask = ss->get_ellipsis_mask();
for (const auto &o : origEllipsisMask) {
attrs.ellipsisMask.push_back(o);
}
if (attrs.ellipsisMask.size() == 0) {
for (size_t i = attrs.ellipsisMask.size(); i < nDims; ++i) attrs.ellipsisMask.push_back(0);
}
} else {
const size_t length = outputShapes[0].getRank();
if (inputShapes.size() > AXES_ID) {
attrs.beginMask = std::vector<int>(length, 0);
attrs.endMask = std::vector<int>(length, 0);
} else {
attrs.beginMask = std::vector<int>(length, 1);
attrs.endMask = std::vector<int>(length, 1);
}
attrs.newAxisMask = std::vector<int>(length, 0);
attrs.shrinkAxisMask = std::vector<int>(length, 0);
attrs.ellipsisMask = std::vector<int>(length, 0);
}
}
void MKLDNNStridedSliceNode::getSupportedDescriptors() {
@ -124,16 +147,20 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
int ellipsisMaskCounter = 0;
int ellipsisPos1 = -1;
for (size_t i = 0; i < attrs.ellipsisMask.size(); i++) {
ellipsisMaskCounter += attrs.ellipsisMask[i];
ellipsisPos1 = attrs.ellipsisMask[i] == 1 && ellipsisPos1 == -1 ? i : ellipsisPos1;
}
if (ellipsisMaskCounter > 1)
THROW_ERROR << "has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed";
if (isStridedSliceOp) {
for (size_t i = 0; i < attrs.ellipsisMask.size(); i++) {
ellipsisMaskCounter += attrs.ellipsisMask[i];
ellipsisPos1 = attrs.ellipsisMask[i] == 1 && ellipsisPos1 == -1 ? i : ellipsisPos1;
}
if (ellipsisMaskCounter > 1)
THROW_ERROR << "has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed";
int newAxis = std::accumulate(attrs.newAxisMask.begin(), attrs.newAxisMask.end(), 0);
int shrinkAxis = std::accumulate(attrs.shrinkAxisMask.begin(), attrs.shrinkAxisMask.end(), 0);
attrs.equalDims = newAxis == 0 && shrinkAxis == 0;
int newAxis = std::accumulate(attrs.newAxisMask.begin(), attrs.newAxisMask.end(), 0);
int shrinkAxis = std::accumulate(attrs.shrinkAxisMask.begin(), attrs.shrinkAxisMask.end(), 0);
attrs.equalDims = newAxis == 0 && shrinkAxis == 0;
} else {
attrs.equalDims = true;
}
auto fillingInParameters = [&](std::vector<int> &parameter, const size_t type, const size_t size, const int value) {
const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent());
@ -146,7 +173,7 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
const int *ptr = static_cast<const int*>(blob->GetPtr());
parameter.assign(ptr, ptr + size);
if (ellipsisMaskCounter == 0 && size < nDims) {
if (type != AXES_ID && ellipsisMaskCounter == 0 && size < nDims) {
for (size_t i = size; i < nDims; i++) parameter.push_back(value);
}
};
@ -157,6 +184,25 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
fillingInParameters(attrs.end, END_ID, attrs.endDims[0], 0);
if (attrs.strideDims.size())
fillingInParameters(attrs.stride, STRIDE_ID, attrs.strideDims[0], 1);
if (attrs.axesDims.size()) {
fillingInParameters(attrs.axes, AXES_ID, attrs.axesDims[0], 0);
std::vector<int> beginTmp(outputRank, 0);
std::vector<int> endTmp(outputRank, -1);
std::vector<int> strideTmp(outputRank, 1);
size_t i = 0lu;
for (auto& a : attrs.axes) {
if (a < 0)
a += outputRank;
beginTmp[a] = attrs.begin[i];
endTmp[a] = attrs.end[i];
strideTmp[a] = attrs.stride[i++];
attrs.beginMask[a] = 1;
attrs.endMask[a] = 1;
}
attrs.begin = beginTmp;
attrs.end = endTmp;
attrs.stride = strideTmp;
}
if (inputRank > 3 && attrs.equalDims && ellipsisMaskCounter == 1)
addHiddenDims(inputRank, ellipsisPos1);
@ -194,15 +240,11 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
InferenceEngine::Precision beginPrecision = getOriginalInputPrecisionAtPort(BEGIN_ID);
InferenceEngine::Precision endPrecision = getOriginalInputPrecisionAtPort(END_ID);
InferenceEngine::Precision stridePrecision;
if (isStrideSpecified)
stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID);
const InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
const InferenceEngine::Precision iPrecision = Precision::I32;
attrs.dataSize = dataPrecision.size();
size_t nDims = getInputShapeAtPort(DATA_ID).getRank();
const size_t nDims = getInputShapeAtPort(DATA_ID).getRank();
NodeConfig config;
config.dynBatchSupport = false;
@ -210,12 +252,16 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
config.inConfs[DATA_ID].inPlace = -1;
config.inConfs[BEGIN_ID].inPlace = -1;
config.inConfs[END_ID].inPlace = -1;
config.inConfs[DATA_ID].constant = false;
config.inConfs[BEGIN_ID].constant = true;
config.inConfs[END_ID].constant = true;
config.inConfs[DATA_ID].constant = isConstantInput[DATA_ID];
config.inConfs[BEGIN_ID].constant = isConstantInput[BEGIN_ID];
config.inConfs[END_ID].constant = isConstantInput[END_ID];
if (isStrideSpecified) {
config.inConfs[STRIDE_ID].inPlace = -1;
config.inConfs[STRIDE_ID].constant = true;
config.inConfs[STRIDE_ID].constant = isConstantInput[STRIDE_ID];
}
if (isAxesSpecified) {
config.inConfs[AXES_ID].inPlace = -1;
config.inConfs[AXES_ID].constant = isConstantInput[AXES_ID];
}
config.outConfs.resize(1);
@ -241,11 +287,13 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
for (auto itr = range.first; itr != range.second; ++itr) {
config.inConfs[0].desc = itr->second->createSharedDesc(dataPrecision, getInputShapeAtPort(DATA_ID));
config.inConfs[BEGIN_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(beginPrecision, getInputShapeAtPort(BEGIN_ID));
config.inConfs[END_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(endPrecision, getInputShapeAtPort(END_ID));
config.inConfs[DATA_ID].desc = itr->second->createSharedDesc(dataPrecision, getInputShapeAtPort(DATA_ID));
config.inConfs[BEGIN_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(BEGIN_ID));
config.inConfs[END_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(END_ID));
if (isStrideSpecified)
config.inConfs[STRIDE_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(stridePrecision, getInputShapeAtPort(STRIDE_ID));
config.inConfs[STRIDE_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(STRIDE_ID));
if (isAxesSpecified)
config.inConfs[AXES_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(AXES_ID));
config.outConfs[0].desc = itr->second->createSharedDesc(dataPrecision, getOutputShapeAtPort(DATA_ID));
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
@ -254,7 +302,7 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
void MKLDNNStridedSliceNode::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_ERROR << "has not allocated destination memory.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
@ -308,9 +356,11 @@ void MKLDNNStridedSliceNode::orderParametersByLayouts(const MKLDNNMemoryPtr& src
sortByOrder(attrs.stride);
sortByOrder(attrs.beginMask);
sortByOrder(attrs.endMask);
sortByOrder(attrs.ellipsisMask);
sortByOrder(attrs.newAxisMask);
sortByOrder(attrs.shrinkAxisMask);
if (isStridedSliceOp) {
sortByOrder(attrs.ellipsisMask);
sortByOrder(attrs.newAxisMask);
sortByOrder(attrs.shrinkAxisMask);
}
}
}

View File

@ -4,7 +4,6 @@
#pragma once
#include <ie_common.h>
#include <mkldnn_node.h>
#include <string>
#include <vector>
@ -13,9 +12,9 @@ namespace MKLDNNPlugin {
class MKLDNNStridedSliceNode : public MKLDNNNode {
public:
MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
MKLDNNStridedSliceNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
@ -25,9 +24,8 @@ public:
return false;
}
void prepareParams() override;
protected:
void prepareParams() override;
void executeDynamicImpl(mkldnn::stream strm) override;
private:
@ -38,6 +36,7 @@ private:
std::vector<int> begin;
std::vector<int> end;
std::vector<int> stride;
std::vector<int> axes;
std::vector<int> beginMask;
std::vector<int> endMask;
@ -48,6 +47,7 @@ private:
VectorDims beginDims;
VectorDims endDims;
VectorDims strideDims;
VectorDims axesDims;
bool equalDims = false;
size_t dataSize = 1lu;
@ -84,12 +84,17 @@ private:
using executorPtr = std::shared_ptr<StridedSliceExecutor>;
executorPtr execPtr = nullptr;
bool isStridedSliceOp = true;
bool isStrideSpecified = false;
bool isAxesSpecified = false;
static constexpr size_t DATA_ID = 0;
static constexpr size_t BEGIN_ID = 1;
static constexpr size_t END_ID = 2;
static constexpr size_t STRIDE_ID = 3;
static constexpr size_t AXES_ID = 4;
bool isConstantInput[AXES_ID + 1] = {false};
};
} // namespace MKLDNNPlugin

View File

@ -5,13 +5,18 @@
#include <openvino/core/node.hpp>
#include <ngraph/runtime/host_tensor.hpp>
#include <openvino/opsets/opset1.hpp>
#include <openvino/opsets/opset2.hpp>
#include <openvino/opsets/opset4.hpp>
#include <openvino/opsets/opset5.hpp>
#include <openvino/opsets/opset6.hpp>
#include <openvino/opsets/opset8.hpp>
#include "static_shape.hpp"
#include "utils.hpp"
#include "shape_inference.hpp"
#include "convolution_shape_inference.hpp"
#include "reduce_shape_inference.hpp"
#include "shape_nodes.hpp"
#include "fake_quantize.hpp"
#include "experimental_detectron_detection_output_shape_inference.hpp"
@ -24,10 +29,45 @@ void shape_inference(ov::Node* op,
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 2);
OPENVINO_ASSERT(status, "Convolution shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset8::GroupConvolution>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 3);
OPENVINO_ASSERT(status, "GroupConvolution shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset8::ConvolutionBackpropData>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
ov::StaticShape output_shape_input;
if (node->get_input_size() == 3)
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
OPENVINO_ASSERT(status, "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset8::GroupConvolutionBackpropData>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
ov::StaticShape output_shape_input;
if (node->get_input_size() == 3)
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
OPENVINO_ASSERT(status, "GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::op::util::ArithmeticReductionKeepDims>(op)) {
shape_infer(node, input_shapes, output_shapes, constant_data);
} else if (auto node = ov::as_type<ov::op::util::LogicalReductionKeepDims>(op)) {
shape_infer(node, input_shapes, output_shapes, constant_data);
} else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) ||
ov::is_type<ov::opset1::Convert>(op) || ov::is_type<ov::opset1::Clamp>(op) ||
ov::is_type<ov::opset1::GRN>(op) || ov::is_type<ov::opset1::LRN>(op) ||
ov::is_type<ov::opset1::LogicalNot>(op) || ov::is_type<ov::opset4::Mish>(op) ||
ov::is_type<ov::opset2::MVN>(op) || ov::is_type<ov::opset6::MVN>(op) ||
ov::is_type<ov::opset1::PRelu>(op) || ov::is_type<ov::opset1::Relu>(op) ||
ov::is_type<ov::opset4::Swish>(op) || ov::is_type<ov::opset1::Softmax>(op) ||
ov::is_type<ov::opset1::Elu>(op) || ov::is_type<ov::opset5::Round>(op)) {
copy_shape_infer(node, input_shapes, output_shapes);
} else if (ov::is_type<ov::op::util::BinaryElementwiseArithmetic>(op) ||
ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) || ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
eltwise_shape_infer(op, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset1::FakeQuantize>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset1::Reshape>(op)) {
shape_infer(node, input_shapes, output_shapes, constant_data);
} else if (auto node = ov::as_type<ov::opset1::Squeeze>(op)) {

View File

@ -0,0 +1,133 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "shared_test_classes/single_layer/slice.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace ov::test;
namespace {
TEST_P(Slice8LayerTest, Serialize) {
serialize();
}
const std::vector<ElementType> inputPrecisions = {
ElementType::f32,
ElementType::bf16,
ElementType::i8
};
const std::vector<ElementType> inputPrecisionsOther = {
ElementType::i64,
ElementType::i32,
ElementType::i16,
ElementType::u8
};
std::vector<Slice8SpecificParams> staticParams = {
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
Slice8SpecificParams{ {{{}, {{ 20, 10, 5 }}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
Slice8SpecificParams{ {{{}, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, { 0, 1, -1 } },
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 1, 0 }, { 10, -1, 10 }, { 1, 1, 1 }, { -3, -2, -1} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 12, 100 }, { 0, 7, 0 }, { -1, -1, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 4, 99 }, { 0, 9, 0 }, { -1, 2, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 4, 0 }, { -1, -2, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 0, 4 }, { -1, -1, -1 }, {2, 0, 1} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 0, 0, 4 }, { -5, -1, -1 }, { 1, 2, 1 }, {2, 0, 1} },
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 3 }}}}, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 }, { -4, 1, -2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 2 }}}}, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 0, 1, 0, 1 }, { 10, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 1, 0, 1, 0 }, { 2, 4, 2, 10 }, { 1, 2, 1, 1 }, { -1, -2, -3, -4 } },
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 19, 1, -1, 0 }, { -10, 0, 0, -1 }, { -1, -1, -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 3, 2, 4, 200 }}}}, { 0, 1, -1, -1 }, { 3, 2, 0, 0 }, { 1, 1, -2, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 4, 5, 5, 68 }}}}, { 0, 1, 0, 0, 0 }, {
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
Slice8SpecificParams{ {{{}, {{ 10, 12 }}}}, { -1, 1 }, { -9999, 10 }, { -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 5, 5, 5, 5 }}}}, { -1, 0, -1, 0 }, { -50, -1, -60, -1 }, { -1, 1, -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 20 }}}}, { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, -20, -15 }, { 2, -5, 3 }, { 1, 1, 1 }, { 0, 2, 1 } }
};
INSTANTIATE_TEST_SUITE_P(smoke_Slice8Serialization_static, Slice8LayerTest,
::testing::Combine(
::testing::ValuesIn(staticParams),
::testing::ValuesIn(inputPrecisions),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(std::map<std::string, std::string>())),
Slice8LayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Slice8Serialization_PrecisionTransformation, Slice8LayerTest,
::testing::Combine(
::testing::Values(staticParams[0]),
::testing::ValuesIn(inputPrecisionsOther),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(std::map<std::string, std::string>())),
Slice8LayerTest::getTestCaseName);
std::vector<Slice8SpecificParams> dynamicParams = {
Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
Slice8SpecificParams{ {{{ -1, ov::Dimension(2, 20), -1 }, {{ 1, 12, 100 }, { 2, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{ ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5) },
{{ 2, 2, 2, 2 }, { 2, 2, 4, 3 }, { 2, 2, 4, 2 }, { 1, 2, 4, 2 }}}},
{ 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), ov::Dimension(1, 5), -1 }, {{ 10, 2, 4, 2 }, { 10, 4, 2, 2 }}}},
{ 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), -1, -1, ov::Dimension(30, 70) }, {{ 2, 4, 5, 5, 68 }, { 2, 3, 7, 7, 33 }}}},
{ 0, 1, 0, 0, 0 }, {
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
// Shapes mismatch because of missing lower bounds serialization support (ticket: 69092)
// Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)},
// {{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
// Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(10, 20), ov::Dimension(20, 30), 16, ov::Dimension(30, 40)},
// {{ 4, 15, 30, 16, 39 }}}}, { 0, 2, 10, 0, 35 }, { 1, 8, 25, 16, 40 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } }
};
INSTANTIATE_TEST_SUITE_P(smoke_Slice8Serialization_dynamic, Slice8LayerTest,
::testing::Combine(
::testing::ValuesIn(dynamicParams),
::testing::ValuesIn(inputPrecisions),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(std::map<std::string, std::string>())),
Slice8LayerTest::getTestCaseName);
} // namespace

View File

@ -1,197 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "common_test_utils/test_common.hpp"
#include <string>
#include <sstream>
#include <fstream>
#include <memory>
#include <map>
#include <ngraph/function.hpp>
#include <ngraph/op/constant.hpp>
#include <ngraph_ops/convolution_ie.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/reshape_1d_ops.hpp>
#include <transformations/init_node_info.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/pass/manager.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include <ngraph/pass/manager.hpp>
using namespace testing;
using namespace ngraph;
TEST(TransformationTests, ConvReshapeTest1) {
auto input = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 64}, {1});
auto w = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 3/*OIW*/}, {1});
std::shared_ptr<ngraph::Function> f(nullptr);
{
ngraph::Strides strides{1}, dilations{1};
ngraph::CoordinateDiff pads_begin{0}, pads_end{0};
ngraph::Shape output_shape{1, 6, 62};
auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, w, strides, dilations, pads_begin, pads_end, ngraph::element::f32, 1);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{});
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::Reshape1DOps>();
manager.register_pass<ngraph::pass::InjectionPass>([](std::shared_ptr<ngraph::Function> f) {
check_rt_info(f);
});
manager.register_pass<ngraph::pass::ConstantFolding>();
ASSERT_NO_THROW(manager.run_passes(f));
}
std::vector<size_t> ref_shape{1, 6, 1, 62};
ngraph::Strides ref_strides{1, 1};
ngraph::CoordinateDiff ref_pads_begin{0, 0}, ref_pads_end{0, 0};
for (auto op : f->get_ops()) {
if (auto conv = ngraph::as_type_ptr<ngraph::op::ConvolutionIE>(op)) {
ASSERT_EQ(conv->get_shape(), ref_shape);
ASSERT_EQ(conv->get_strides(), ref_strides);
ASSERT_EQ(conv->get_dilations(), ref_strides);
ASSERT_EQ(conv->get_pads_begin(), ref_pads_begin);
ASSERT_EQ(conv->get_pads_end(), ref_pads_end);
}
}
}
TEST(TransformationTests, ConvBiasReshapeTest1) {
auto input = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 64}, {1});
auto w = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 3/*OIW*/}, {1});
auto b = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6}, {1});
std::shared_ptr<ngraph::Function> f(nullptr);
{
ngraph::Strides strides{1}, dilations{1};
ngraph::CoordinateDiff pads_begin{0}, pads_end{0};
ngraph::Shape output_shape{1, 6, 62};
auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, w, b, strides, dilations, pads_begin, pads_end, ngraph::element::f32, 1);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{});
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::Reshape1DOps>();
manager.register_pass<ngraph::pass::InjectionPass>([](std::shared_ptr<ngraph::Function> f) {
check_rt_info(f);
});
manager.register_pass<ngraph::pass::ConstantFolding>();
ASSERT_NO_THROW(manager.run_passes(f));
}
std::vector<size_t> ref_shape{1, 6, 1, 62};
ngraph::Strides ref_strides{1, 1};
ngraph::CoordinateDiff ref_pads_begin{0, 0}, ref_pads_end{0, 0};
for (auto op : f->get_ops()) {
if (auto conv = ngraph::as_type_ptr<ngraph::op::ConvolutionIE>(op)) {
ASSERT_EQ(conv->get_shape(), ref_shape);
ASSERT_EQ(conv->get_strides(), ref_strides);
ASSERT_EQ(conv->get_dilations(), ref_strides);
ASSERT_EQ(conv->get_pads_begin(), ref_pads_begin);
ASSERT_EQ(conv->get_pads_end(), ref_pads_end);
}
}
}
TEST_F(TransformationTestsF, MaxPoolReshapeTest1) {
{
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
ngraph::Strides strides{1};
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
auto pool = std::make_shared<ngraph::opset1::MaxPool>(input, strides, pads_begin, pads_end, kernel, ngraph::op::RoundingType::FLOOR);
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
manager.register_pass<ngraph::pass::Reshape1DOps>();
}
{
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
auto reshape_begin = std::make_shared<opset1::Reshape>(input, opset1::Constant::create(element::i64, Shape{4}, {1, 3, 1, 64}), true);
ngraph::Strides strides{1, 1};
ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 3};
auto pool = std::make_shared<ngraph::opset1::MaxPool>(reshape_begin, strides, pads_begin, pads_end, kernel, ngraph::op::RoundingType::FLOOR);
auto reshape_end = std::make_shared<opset1::Reshape>(pool, opset1::Constant::create(element::i64, Shape{3}, {1, 3, 62}), true);
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input});
}
}
TEST_F(TransformationTestsF, AvgPoolReshapeTest1) {
{
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
ngraph::Strides strides{1};
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
auto pool = std::make_shared<ngraph::opset1::AvgPool>(input, strides, pads_begin, pads_end, kernel, false, ngraph::op::RoundingType::FLOOR);
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
manager.register_pass<ngraph::pass::Reshape1DOps>();
}
{
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
auto reshape_begin = std::make_shared<opset1::Reshape>(input, opset1::Constant::create(element::i64, Shape{4}, {1, 3, 1, 64}), true);
ngraph::Strides strides{1, 1};
ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 3};
auto pool = std::make_shared<ngraph::opset1::AvgPool>(reshape_begin, strides, pads_begin, pads_end, kernel, false, ngraph::op::RoundingType::FLOOR);
auto reshape_end = std::make_shared<opset1::Reshape>(pool, opset1::Constant::create(element::i64, Shape{3}, {1, 3, 62}), true);
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input});
}
}
TEST(TransformationTests, ReshapeDynamicTest1) {
{
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
ngraph::Strides strides{1};
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
auto pool = std::make_shared<ngraph::opset1::AvgPool>(input, strides, pads_begin, pads_end, kernel, false, ngraph::op::RoundingType::FLOOR);
auto f = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
pass::Manager manager;
manager.register_pass<ngraph::pass::Reshape1DOps>();
ASSERT_NO_THROW(manager.run_passes(f));
}
{
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
ngraph::Strides strides{1};
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
auto pool = std::make_shared<ngraph::opset1::MaxPool>(input, strides, pads_begin, pads_end, kernel, ngraph::op::RoundingType::FLOOR);
auto f = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
pass::Manager manager;
manager.register_pass<ngraph::pass::Reshape1DOps>();
ASSERT_NO_THROW(manager.run_passes(f));
}
{
auto input = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 64}, {1});
auto w = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 3/*OIW*/}, {1});
auto b = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6}, {1});
ngraph::Strides strides{1}, dilations{1};
ngraph::CoordinateDiff pads_begin{0}, pads_end{0};
ngraph::Shape output_shape{1, 6, 62};
auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, w, b, strides, dilations, pads_begin, pads_end, 1);
auto f = std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{});
pass::Manager manager;
manager.register_pass<ngraph::pass::Reshape1DOps>();
ASSERT_NO_THROW(manager.run_passes(f));
}
}

View File

@ -0,0 +1,140 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <openvino/core/function.hpp>
#include <openvino/opsets/opset8.hpp>
#include <openvino/pass/manager.hpp>
#include <transformations/common_optimizations/remove_concat_zero_dim_input.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputStaticShape) {
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
int64_t axis = 1;
{
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 0, 3});
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
}
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input3}, axis);
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3});
}
}
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputSubgraph) {
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
int64_t axis = 1;
{
auto in_abs = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 0, 3});
auto abs = std::make_shared<ov::opset8::Abs>(in_abs);
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, abs, input3}, axis);
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3, in_abs});
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
}
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input3}, axis);
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3});
}
}
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputSubgraph2) {
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, ov::Dimension::dynamic(), 3});
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
auto abs = std::make_shared<ov::opset8::Abs>(input1);
int64_t axis = 1;
{
auto in_mul = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 0, 3});
auto mul = std::make_shared<ov::opset8::Multiply>(in_mul, abs);
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{mul, input3}, axis);
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3, in_mul});
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
}
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input3}, axis);
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input3});
}
}
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputPartiallyKnowShape) {
std::shared_ptr<ov::Function> f(nullptr), f_ref(nullptr);
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
int64_t axis = 0;
{
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
ov::PartialShape{0, ov::Dimension::dynamic(), ov::Dimension::dynamic()});
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
}
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input3}, axis);
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3});
}
}
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputDynamicRank) {
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
int64_t axis = 0;
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
}
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
}
}
TEST_F(TransformationTestsF, RemoveConcatZeroDimTwoInputs) {
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
ov::PartialShape{1, ov::Dimension::dynamic(), ov::Dimension::dynamic()});
int64_t axis = 1;
{
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
ov::PartialShape{1, 0, ov::Dimension::dynamic()});
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
ov::PartialShape{1, ov::Dimension::dynamic(), 0});
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
}
{
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1}, axis);
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1});
}
}

View File

@ -0,0 +1,423 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <openvino/core/function.hpp>
#include <openvino/opsets/opset8.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp>
#include <transformations/common_optimizations/remove_concat_zero_dim_input.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ov;
using namespace ov::opset8;
TEST_F(TransformationTestsF, RemoveLoopDanglingParameters) {
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto b = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto bi = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto mul = std::make_shared<Multiply>(bi, bi);
auto abs = std::make_shared<Abs>(mul);
{
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{ai, bi});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(ai, a);
loop->set_invariant_input(bi, b);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{bi});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(bi, b);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
}
}
TEST_F(TransformationTestsF, RemoveLoopManyDanglingParameters) {
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto b = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto bi = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto c = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto ci = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto mul = std::make_shared<Multiply>(bi, bi);
auto abs = std::make_shared<Abs>(mul);
{
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{ai, bi, ci});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(ai, a);
loop->set_invariant_input(bi, b);
loop->set_invariant_input(ci, c);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{bi});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(bi, b);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c});
}
}
TEST_F(TransformationTestsF, RemoveLoopManyDanglingParameters2) {
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto b = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto bi = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto c = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto ci = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto d = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto di = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto mul = std::make_shared<Multiply>(bi, bi);
auto sub = std::make_shared<Multiply>(mul, di);
auto abs = std::make_shared<Abs>(sub);
{
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{ai, bi, ci, di});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(ai, a);
loop->set_invariant_input(bi, b);
loop->set_invariant_input(ci, c);
loop->set_invariant_input(di, d);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c, d});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{bi, di});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(bi, b);
loop->set_invariant_input(di, d);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c, d});
}
}
TEST_F(TransformationTestsF, RemoveLoopDanglingParametersIfConcatEmptyTensor) {
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
auto b = std::make_shared<Parameter>(element::f32, Shape{0, 2}); // empty tensor
auto bi = std::make_shared<Parameter>(element::f32, Shape{0, 2});
{
auto concat = std::make_shared<Concat>(NodeVector{ai, bi}, 0);
auto body = std::make_shared<Function>(OutputVector{condition, concat}, ParameterVector{ai, bi});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(ai, a);
loop->set_invariant_input(bi, b);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(concat));
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
manager.register_pass<pass::RemoveConcatZeroDimInput>();
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto concat = std::make_shared<Concat>(NodeVector{ai}, 0);
auto body = std::make_shared<Function>(OutputVector{condition, concat}, ParameterVector{ai});
auto loop = std::make_shared<Loop>(trip_count, condition);
loop->set_special_body_ports({-1, 0});
loop->set_function(body);
loop->set_invariant_input(ai, a);
auto loop_res = std::make_shared<Result>(loop->get_iter_value(concat));
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
}
}
TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputs) {
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto Y = std::make_shared<Parameter>(element::f32, Shape{3, 4, 1});
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
auto Xte = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Yte = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto then_op = std::make_shared<Add>(Xte, Xte);
auto then_op_res = std::make_shared<Result>(then_op);
auto else_op = std::make_shared<Maximum>(Xte, Xte);
auto else_op_res = std::make_shared<Result>(else_op);
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xte, Yte});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xte, Yte});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xte, Xte);
if_op->set_input(Y, Yte, Yte);
auto res = if_op->set_output(then_op_res, else_op_res);
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xte});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xte});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xte, Xte);
auto res = if_op->set_output(then_op_res, else_op_res);
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
}
}
TEST_F(TransformationTestsF, RemoveIfDanglingParametersOnlyFromBodies) {
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto Y = std::make_shared<Parameter>(element::f32, Shape{3, 4, 1});
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
auto Xt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Yt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto then_op = std::make_shared<Add>(Xt, Xt);
auto then_op_res = std::make_shared<Result>(then_op);
auto Xe = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Ye = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto else_op = std::make_shared<Maximum>(Ye, Ye);
auto else_op_res = std::make_shared<Result>(else_op);
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Yt});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ye});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xt, Xe);
if_op->set_input(Y, Yt, Ye);
auto res = if_op->set_output(then_op_res, else_op_res);
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Ye});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xt, nullptr);
if_op->set_input(Y, nullptr, Ye);
auto res = if_op->set_output(then_op_res, else_op_res);
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
}
}
TEST_F(TransformationTestsF, RemoveIfManyDanglingParameters) {
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto Y = std::make_shared<Parameter>(element::f32, Shape{3, 4, 1});
auto Z = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
auto Xt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Yt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Zt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto then_op = std::make_shared<Add>(Xt, Zt);
auto then_op_res = std::make_shared<Result>(then_op);
auto Xe = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Ye = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Ze = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto else_op = std::make_shared<Maximum>(Xe, Xe);
auto else_op_res = std::make_shared<Result>(else_op);
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Yt, Zt});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ye, Ze});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xt, Xe);
if_op->set_input(Y, Yt, Ye);
if_op->set_input(Z, Zt, Ze);
auto res = if_op->set_output(then_op_res, else_op_res);
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Zt});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xt, Xe);
if_op->set_input(Z, Zt, nullptr);
auto res = if_op->set_output(then_op_res, else_op_res);
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
}
}
TEST_F(TransformationTestsF, RemoveIfDanglingParamFromOneBodyAndUpdateAllDescriptions) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto Y = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto Z = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
auto Xt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Yt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Zt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto then_op = std::make_shared<Add>(Zt, Zt);
auto then_op_res = std::make_shared<Result>(then_op);
auto Xe = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto Ze = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
auto else_op = std::make_shared<Add>(std::make_shared<Maximum>(Xe, Ze), Ze);
auto else_op_res = std::make_shared<Result>(else_op);
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Yt, Zt});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ze});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, Xt, Xe);
if_op->set_input(Y, Yt, nullptr);
if_op->set_input(Z, Zt, Ze);
auto res = if_op->set_output(then_op_res, else_op_res);
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Zt});
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ze});
auto if_op = std::make_shared<If>(cond);
if_op->set_then_body(then_body);
if_op->set_else_body(else_body);
if_op->set_input(X, nullptr, Xe);
if_op->set_input(Z, Zt, Ze);
auto res = if_op->set_output(then_op_res, else_op_res);
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
}
}
TEST_F(TransformationTestsF, RemoveTensorIteratorDanglingParameter) {
auto X = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
auto Y = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
auto M = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Yi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto M_body = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Zo = std::make_shared<Abs>(std::make_shared<Add>(Xi, Yi));
{
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Yi, M_body});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
tensor_iterator->set_sliced_input(Yi, Y, 0, 2, 2, -1, 1);
tensor_iterator->set_invariant_input(M_body, M);
auto out = tensor_iterator->get_iter_value(Zo, -1);
auto res = std::make_shared<Result>(out);
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, M});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Yi});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
tensor_iterator->set_sliced_input(Yi, Y, 0, 2, 2, -1, 1);
auto out = tensor_iterator->get_iter_value(Zo, -1);
auto res = std::make_shared<Result>(out);
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, M});
}
}
TEST_F(TransformationTestsF, RemoveTensorIteratorManyDanglingParameters) {
auto X = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
auto Y = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
auto Z = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
auto M = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Xi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Yi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Zi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto M_body = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
auto Zo = std::make_shared<Abs>(std::make_shared<Add>(Xi, Zi));
{
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Yi, Zi, M_body});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
tensor_iterator->set_sliced_input(Yi, Y, 0, 2, 2, -1, 1);
tensor_iterator->set_sliced_input(Zi, Z, 0, 2, 2, -1, 1);
tensor_iterator->set_invariant_input(M_body, M);
auto out = tensor_iterator->get_iter_value(Zo, -1);
auto res = std::make_shared<Result>(out);
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z, M});
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
}
{
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Zi});
auto tensor_iterator = std::make_shared<TensorIterator>();
tensor_iterator->set_body(body);
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
tensor_iterator->set_sliced_input(Zi, Z, 0, 2, 2, -1, 1);
auto out = tensor_iterator->get_iter_value(Zo, -1);
auto res = std::make_shared<Result>(out);
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z, M});
}
}

View File

@ -8,7 +8,7 @@ if (ENABLE_MKL_DNN)
add_subdirectory(cpu)
endif()
if (ENABLE_CLDNN)
if (ENABLE_INTEL_GPU)
add_subdirectory(gpu)
endif()

View File

@ -22,7 +22,7 @@ inline const std::string getPluginLibNameByDevice(const std::string& deviceName)
{ "AUTO", "MultiDevicePlugin" },
{ "CPU", "MKLDNNPlugin" },
{ "GNA", "GNAPlugin" },
{ "GPU", "clDNNPlugin" },
{ "GPU", "ov_intel_gpu_plugin" },
{ "HETERO", "ov_hetero_plugin" },
{ "BATCH", "AutoBatchPlugin" },
{ "MULTI", "MultiDevicePlugin" },

View File

@ -99,4 +99,8 @@ INSTANTIATE_TEST_SUITE_P(
INSTANTIATE_TEST_SUITE_P(
smoke_IEClassLoadNetworkTest, IEClassLoadNetworkTest,
::testing::Values("CPU"));
INSTANTIATE_TEST_SUITE_P(
smoke_IEClassLoadNetworkTest, IEClassLoadNetworkTestWithThrow,
::testing::Values(""));
} // namespace

View File

@ -8,75 +8,120 @@
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace ov::test;
namespace {
const std::vector<InferenceEngine::Precision> inputPrecision = {
InferenceEngine::Precision::I8,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::I16,
InferenceEngine::Precision::I32,
InferenceEngine::Precision::FP32
const std::vector<ElementType> inputPrecisions = {
ElementType::f32,
ElementType::bf16,
ElementType::i8
};
std::vector<SliceSpecificParams> test_cases = {
SliceSpecificParams{ { 16 }, { 4 }, { 12 }, { 1 }, { 0 } },
SliceSpecificParams{ { 16 }, { 0 }, { 8 }, { 2 }, { 0 } },
SliceSpecificParams{ { 20, 10, 5 }, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
SliceSpecificParams{ { 1, 2, 12, 100 }, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
SliceSpecificParams{ { 1, 12, 100 }, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, {} },
SliceSpecificParams{ { 1, 12, 100 }, { 0, 1, 0 }, { 10, -1, 10 }, { 1, 1, 1 }, {} },
SliceSpecificParams{ { 2, 12, 100 }, { 1, 12, 100 }, { 0, 7, 0 }, { -1, -1, -1 }, {} },
SliceSpecificParams{ { 2, 12, 100 }, { 1, 4, 99 }, { 0, 9, 0 }, { -1, 2, -1 }, {} },
SliceSpecificParams{ { 2, 12, 100 }, { -1, -1, -1 }, { 0, 4, 0 }, { -1, -2, -1 }, {} },
SliceSpecificParams{ { 2, 12, 100 }, { -1, -1, -1 }, { 0, 0, 4 }, { -1, -1, -1 }, {2, 0, 1} },
SliceSpecificParams{ { 2, 12, 100 }, { 0, 0, 4 }, { -5, -1, -1 }, { 1, 2, 1 }, {2, 0, 1} },
SliceSpecificParams{ { 2, 2, 2, 2 }, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
SliceSpecificParams{ { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
SliceSpecificParams{ { 2, 2, 4, 3 }, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 }, {} },
SliceSpecificParams{ { 2, 2, 4, 2 }, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
SliceSpecificParams{ { 1, 2, 4, 2 }, { 0, 1, 0, 1 }, { 10, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
SliceSpecificParams{ { 10, 2, 4, 2 }, { 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
SliceSpecificParams{ { 10, 2, 4, 2 }, { 19, 1, -1, 0 }, { -10, 0, 0, -1 }, { -1, -1, -1, 1 }, {} },
SliceSpecificParams{ { 3, 2, 4, 200 }, { 0, 1, -1, -1 }, { 3, 2, 0, 0 }, { 1, 1, -2, -1 }, {} },
SliceSpecificParams{ { 2, 4, 5, 5, 68 }, { 0, 1, 0, 0, 0 }, {
const std::vector<ElementType> inputPrecisionsOther = {
ElementType::i64,
ElementType::i32,
ElementType::i16,
ElementType::u8
};
std::vector<Slice8SpecificParams> staticParams = {
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
Slice8SpecificParams{ {{{}, {{ 20, 10, 5 }}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
Slice8SpecificParams{ {{{}, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, { 0, 1, -1 } },
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 1, 0 }, { 10, -1, 10 }, { 1, 1, 1 }, { -3, -2, -1} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 12, 100 }, { 0, 7, 0 }, { -1, -1, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 4, 99 }, { 0, 9, 0 }, { -1, 2, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 4, 0 }, { -1, -2, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 0, 4 }, { -1, -1, -1 }, {2, 0, 1} },
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 0, 0, 4 }, { -5, -1, -1 }, { 1, 2, 1 }, {2, 0, 1} },
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 3 }}}}, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 }, { -4, 1, -2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 2 }}}}, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 0, 1, 0, 1 }, { 10, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 1, 0, 1, 0 }, { 2, 4, 2, 10 }, { 1, 2, 1, 1 }, { -1, -2, -3, -4 } },
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 19, 1, -1, 0 }, { -10, 0, 0, -1 }, { -1, -1, -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 3, 2, 4, 200 }}}}, { 0, 1, -1, -1 }, { 3, 2, 0, 0 }, { 1, 1, -2, -1 }, {} },
Slice8SpecificParams{ {{{}, {{ 2, 4, 5, 5, 68 }}}}, { 0, 1, 0, 0, 0 }, {
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
SliceSpecificParams{ { 10, 12 }, { -1, 1 }, { -9999, 10 }, { -1, 1 }, {} },
SliceSpecificParams{ { 5, 5, 5, 5 }, { -1, 0, -1, 0 }, { -50, -1, -60, -1 }, { -1, 1, -1, 1 }, {} },
SliceSpecificParams{ { 1, 5, 32, 32 }, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 1, 5, 32, 20 }, { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 2, 5, 32, 20 }, { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 1, 5, 32, 32 }, { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 2, 5, 32, 32 }, { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 1, 5, 32, 20 }, { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 2, 5, 32, 32 }, { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 1, 5, 32, 20 }, { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 1, 2, 3 } },
SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, -20, -15 }, { 2, -5, 3 }, { 1, 1, 1 }, { 0, 2, 1 } },
// Plugin Error: Slice has zero dimension which is not allowed
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 0, 10 }, { 0, 32, 18 }, { 1, 1, 1 }, { 0, 1, 2 } },
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 0, 10 }, { 1, 0, 20 }, { 1, 1, 1 }, { 0, 1, 2 } },
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 4, 10 }, { 2, 8, 0 }, { 1, 1, 1 }, { 0, 1, 2 } },
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 4, 10 }, { 2, 8, 0 }, { 1, 1, 1 }, { 0, 2, 1 } },
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 4, 10 }, { 2, 8, 0 }, { 1, 1, 1 }, { 0, -2, -1 } },
Slice8SpecificParams{ {{{}, {{ 10, 12 }}}}, { -1, 1 }, { -9999, 10 }, { -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 5, 5, 5, 5 }}}}, { -1, 0, -1, 0 }, { -50, -1, -60, -1 }, { -1, 1, -1, 1 }, {} },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 20 }}}}, { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, -20, -15 }, { 2, -5, 3 }, { 1, 1, 1 }, { 0, 2, 1 } }
};
INSTANTIATE_TEST_SUITE_P(
smoke_MKLDNN, SliceLayerTest,
INSTANTIATE_TEST_SUITE_P(smoke_Static, Slice8LayerTest,
::testing::Combine(
::testing::ValuesIn(test_cases),
::testing::ValuesIn(inputPrecision),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::ValuesIn(staticParams),
::testing::ValuesIn(inputPrecisions),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(std::map<std::string, std::string>())),
SliceLayerTest::getTestCaseName);
Slice8LayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_PrecisionTransformation, Slice8LayerTest,
::testing::Combine(
::testing::Values(staticParams[0]),
::testing::ValuesIn(inputPrecisionsOther),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(std::map<std::string, std::string>())),
Slice8LayerTest::getTestCaseName);
std::vector<Slice8SpecificParams> dynamicParams = {
Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
Slice8SpecificParams{ {{{ -1, ov::Dimension(2, 20), -1 }, {{ 1, 12, 100 }, { 2, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{ ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5) },
{{ 2, 2, 2, 2 }, { 2, 2, 4, 3 }, { 2, 2, 4, 2 }, { 1, 2, 4, 2 }}}},
{ 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), ov::Dimension(1, 5), -1 }, {{ 10, 2, 4, 2 }, { 10, 4, 2, 2 }}}},
{ 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), -1, -1, ov::Dimension(30, 70) }, {{ 2, 4, 5, 5, 68 }, { 2, 3, 7, 7, 33 }}}},
{ 0, 1, 0, 0, 0 }, {
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max(),
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)},
{{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } }
};
INSTANTIATE_TEST_SUITE_P(smoke_Dynamic, Slice8LayerTest,
::testing::Combine(
::testing::ValuesIn(dynamicParams),
::testing::ValuesIn(inputPrecisions),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(std::map<std::string, std::string>())),
Slice8LayerTest::getTestCaseName);
} // namespace

View File

@ -150,6 +150,36 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
{GeluTanh, {{}}}
};
std::vector<Precision> netPrc = {
Precision::BF16,
Precision::FP32
};
/* ============= Activation (1D) ============= */
std::vector<CPUSpecificParams> cpuParams_3D = {
CPUSpecificParams({nCw16c}, {nCw16c}, {}, {}),
CPUSpecificParams({nwc}, {nwc}, {}, {}),
CPUSpecificParams({ncw}, {ncw}, {}, {})
};
std::vector<std::vector<ov::Shape>> basic3D = {
{{2, 4, 4}},
{{2, 17, 5}},
};
const auto basicCases3D = ::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(basic3D)),
::testing::Values(activationShapes),
::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes)),
::testing::ValuesIn(netPrc),
::testing::Values(Precision::FP32),
::testing::Values(Precision::FP32),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_3D))
);
INSTANTIATE_TEST_SUITE_P(smoke_Activation3D_Eltwise_CPU_BF16, ActivationLayerCPUTest, basicCases3D, ActivationLayerCPUTest::getTestCaseName);
/* ============= Activation (2D) ============= */
std::vector<CPUSpecificParams> cpuParams_4D = {
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
@ -161,10 +191,6 @@ std::vector<std::vector<ov::Shape>> basic4D = {
{{2, 17, 5, 4}}
};
std::vector<Precision> netPrc = {
Precision::BF16,
Precision::FP32
};
const auto basicCases4D = ::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(basic4D)),
@ -178,6 +204,7 @@ const auto basicCases4D = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_Activation4D_Eltwise_CPU_BF16, ActivationLayerCPUTest, basicCases4D, ActivationLayerCPUTest::getTestCaseName);
/* ============= Activation (3D) ============= */
std::vector<CPUSpecificParams> cpuParams_5D = {
CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),

View File

@ -0,0 +1,146 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "functional_test_utils/ov_tensor_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "test_utils/cpu_test_utils.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ngraph::opset3;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
using BucketizeCPUParamsTuple = std::tuple<InputShape, // Data shape
InputShape, // Buckets shape
bool, // Right edge of interval
ElementType, // Data input precision
ElementType, // Buckets input precision
ElementType // Output precision
>;
class BucketizeLayerCPUTest : public testing::WithParamInterface<BucketizeCPUParamsTuple>,
virtual public SubgraphBaseTest {
public:
static std::string getTestCaseName(const testing::TestParamInfo<BucketizeCPUParamsTuple>& obj) {
InputShape dataShape;
InputShape bucketsShape;
bool with_right_bound;
ElementType inDataPrc;
ElementType inBucketsPrc;
ElementType netPrc;
std::tie(dataShape, bucketsShape, with_right_bound, inDataPrc, inBucketsPrc, netPrc) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::partialShape2str({dataShape.first}) << "_"
<< CommonTestUtils::partialShape2str({bucketsShape.first}) << "_";
result << "TS=";
for (const auto& item : dataShape.second) {
result << CommonTestUtils::vec2str(item) << "_";
}
result << "BS=";
for (const auto& item : bucketsShape.second) {
result << CommonTestUtils::vec2str(item) << "_";
}
result << "with_right_bound=" << with_right_bound;
result << "inDataPrc=" << inDataPrc << "_";
result << "inBucketsPrc=" << inBucketsPrc << "_";
result << "netPrc=" << netPrc << "_";
return result.str();
}
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
inputs.clear();
const auto& funcInputs = function->inputs();
auto data_size = shape_size(targetInputStaticShapes[0]);
ov::runtime::Tensor tensorData = ov::test::utils::create_and_fill_tensor(funcInputs[0].get_element_type(),
targetInputStaticShapes[0],
data_size * 5,
0,
10,
7235346);
ov::runtime::Tensor tensorBucket =
ov::test::utils::create_and_fill_tensor_unique_sequence(funcInputs[1].get_element_type(),
targetInputStaticShapes[1],
0,
10,
8234231);
inputs.insert({funcInputs[0].get_node_shared_ptr(), tensorData});
inputs.insert({funcInputs[1].get_node_shared_ptr(), tensorBucket});
}
protected:
void SetUp() override {
InputShape dataShape;
InputShape bucketsShape;
bool with_right_bound;
ElementType inDataPrc;
ElementType inBucketsPrc;
ElementType netPrc;
targetDevice = CommonTestUtils::DEVICE_CPU;
std::tie(dataShape, bucketsShape, with_right_bound, inDataPrc, inBucketsPrc, netPrc) = this->GetParam();
init_input_shapes({dataShape, bucketsShape});
auto data = std::make_shared<ngraph::op::Parameter>(inDataPrc, inputDynamicShapes[0]);
data->set_friendly_name("a_data");
auto buckets = std::make_shared<ngraph::op::Parameter>(inBucketsPrc, inputDynamicShapes[1]);
buckets->set_friendly_name("b_buckets");
auto bucketize = std::make_shared<ngraph::op::v3::Bucketize>(data, buckets, netPrc, with_right_bound);
function = std::make_shared<ngraph::Function>(std::make_shared<ngraph::opset1::Result>(bucketize),
ngraph::ParameterVector{data, buckets},
"Bucketize");
}
};
TEST_P(BucketizeLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
}
namespace {
const std::vector<ov::test::InputShape> dataShapesDynamic = {
{{ngraph::Dimension(1, 10), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
{{1, 20, 20}, {3, 16, 16}, {10, 16, 16}}},
{{ngraph::Dimension(1, 10), 3, 50, 50}, {{1, 3, 50, 50}, {2, 3, 50, 50}, {10, 3, 50, 50}}}};
const std::vector<ov::test::InputShape> bucketsShapesDynamic = {{{ngraph::Dimension::dynamic()}, {{5}, {20}, {100}}}};
const std::vector<ov::test::ElementType> inPrc = {ov::element::f32, ov::element::i64, ov::element::i32};
const std::vector<ov::test::ElementType> outPrc = {ov::element::i64, ov::element::i32};
const auto test_Bucketize_right_edge_Dynamic = ::testing::Combine(::testing::ValuesIn(dataShapesDynamic),
::testing::ValuesIn(bucketsShapesDynamic),
::testing::Values(true),
::testing::ValuesIn(inPrc),
::testing::ValuesIn(inPrc),
::testing::ValuesIn(outPrc));
const auto test_Bucketize_left_edge_Dynamic = ::testing::Combine(::testing::ValuesIn(dataShapesDynamic),
::testing::ValuesIn(bucketsShapesDynamic),
::testing::Values(false),
::testing::ValuesIn(inPrc),
::testing::ValuesIn(inPrc),
::testing::ValuesIn(outPrc));
INSTANTIATE_TEST_SUITE_P(smoke_TestsBucketize_right_Dynamic,
BucketizeLayerCPUTest,
test_Bucketize_right_edge_Dynamic,
BucketizeLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsBucketize_left_Dynamic,
BucketizeLayerCPUTest,
test_Bucketize_left_edge_Dynamic,
BucketizeLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -155,6 +155,8 @@ protected:
PluginConfigParams::YES == configuration[PluginConfigParams::KEY_ENFORCE_BF16]) {
selectedType += "_BF16";
rel_threshold = 1e-2f;
if (selectedType == "jit_gemm_BF16")
rel_threshold = 0.05f;
} else {
selectedType = makeSelectedTypeStr(selectedType, netType);
}
@ -180,7 +182,7 @@ TEST_P(ConvolutionLayerCPUTest, CompareWithRefs) {
// Skip tests for sse41 convolution where ic or oc cannot be exactly divided by the block size,
// since tails processing for sse41 nspc layout is not supported yet (see 52736).
if (!inFmts.empty() && (inFmts.front() == nhwc || inFmts.front() == ndhwc) && selectedType.find("jit_sse") != std::string::npos) {
if (!inFmts.empty() && (inFmts.front() == nwc || inFmts.front() == nhwc || inFmts.front() == ndhwc) && selectedType.find("jit_sse") != std::string::npos) {
auto inpChannels = function->get_parameters().front()->get_partial_shape()[1].get_length();
auto outChannels = function->get_output_partial_shape(0)[1].get_length();
if ((inpChannels % 8) || (outChannels % 8)) {
@ -229,11 +231,67 @@ const std::vector<fusingSpecificParams> fusingParamsSetBF16{
};
/* ============= Convolution params (GEMM layout) ============= */
const SizeVector numOutChannels_Gemm = {6 };
const SizeVector numOutChannels_Gemm = { 6 };
/* ============= Convolution params (blocked and nspc layout) ============= */
const SizeVector numOutChannels = { 64, 63 };
/* ============= Convolution params (1D) ============= */
const std::vector<SizeVector> kernels1d = { {3}, {1} };
const std::vector<SizeVector> strides1d = { {1}, {2} };
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
const std::vector<SizeVector> dilations1d = { {1}, {2} };
std::vector<InputShape> inputShapes1d = {
{{}, {{ 2, 64, 7 }}},
{{}, {{ 1, 67, 7 }}},
{
//dynamic shape
{ -1, 64, {1, 200} },
{ //target static shapes
{ 2, 64, 7 },
{ 1, 64, 9 }
}
},
{
//dynamic shape
{ -1, 67, {1, 200} },
{ //target static shapes
{ 2, 67, 7 },
{ 1, 67, 9 }
}
},
{
//dynamic shape
{ {1, 200}, 64, -1 },
{ //target static shapes
{ 2, 64, 7 },
{ 1, 64, 5 }
}
}
};
std::vector<InputShape> inputShapesPlain2Blocked1d = {
{{}, {{1, 1, 7}}},
{{}, {{1, 2, 7}}},
{{}, {{1, 3, 7}}},
{
//dynamic shapes
{-1, 1, {1, 200}},
{ //target static shapes
{2, 1, 7},
{1, 1, 9}
}
},
{
//dynamic shapes
{-1, 3, {1, 200}},
{ //target static shapes
{2, 3, 7},
{1, 3, 9}
}
}
};
/* ============= Convolution params (2D) ============= */
const std::vector<SizeVector> kernels2d = { {3, 3}, {1, 1} };
const std::vector<SizeVector> strides2d = { {1, 1}, {2, 2} };
@ -332,6 +390,76 @@ std::vector<InputShape> inputShapesPlain2Blocked3d = {
/* ============= */
/* INSTANCES */
/* ============= Convolution (Gemm 1D) ============= */
const auto convParams_ExplicitPadding_GEMM_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels_Gemm),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_GEMM_1D = {
conv_gemm_1D,
conv_gemm_1D_nspc
};
std::vector<InputShape> inShapesGemm1D = {
{{}, {{ 2, 12, 7 }}},
{
//dynamic shape
{ {1, 200}, 12, {1, 200} },
{ //target static shapes
{ 2, 12, 7 },
{ 1, 12, 5 }
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inShapesGemm1D),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_1D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inShapesGemm1D),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_1D})), // todo: [AV] what about conv_gemm_1D_nspc?
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::i8),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inShapesGemm1D),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_1D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Convolution (Gemm 2D) ============= */
const auto convParams_ExplicitPadding_GEMM_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
@ -576,6 +704,102 @@ INSTANTIATE_TEST_SUITE_P(Conv_3D_GEMM_I8_dilated, ConvolutionLayerCPUTest,
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Convolution (1D) ============= */
const auto convParams_ExplicitPadding_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_1D = {
conv_sse42_1D,
conv_avx2_1D,
conv_avx512_1D,
conv_sse42_1D_nspc,
conv_avx2_1D_nspc,
conv_avx512_1D_nspc
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_1D})), // todo: [AV] what about conv_avx512_1D_nspc?
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::i8),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
const std::vector<CPUSpecificParams> CPUParams_1D_plain_to_blocked = {
conv_sse42_plain_to_blocked_1D,
conv_avx2_plain_to_blocked_1D,
conv_avx512_plain_to_blocked_1D,
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_PlainToBlocked_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapesPlain2Blocked1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D_plain_to_blocked)),
::testing::Values(emptyFusingSpec),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_PlainToBlocked_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapesPlain2Blocked1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_plain_to_blocked_1D})),
::testing::Values(emptyFusingSpec),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Convolution (2D) ============= */
const auto convParams_ExplicitPadding_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
@ -696,7 +920,7 @@ const std::vector<CPUSpecificParams> CPUParams_2D_plain_to_blocked = {
conv_avx512_plain_to_blocked_2D,
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_2D_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_PlainToBlocked_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_2D,
@ -710,7 +934,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_2D_FP32, ConvolutionLayerCPUT
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_2D_BF16, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_PlainToBlocked_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_2D,
@ -870,7 +1094,7 @@ const std::vector<CPUSpecificParams> CPUParams_3D_plain_to_blocked = {
conv_avx512_plain_to_blocked_3D,
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_3D_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_PlainToBlocked_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_3D,
@ -884,7 +1108,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_3D_FP32, ConvolutionLayerCPUT
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_3D_BF16, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_PlainToBlocked_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_3D,
@ -926,6 +1150,69 @@ INSTANTIATE_TEST_SUITE_P(Conv_PlainToBlocked_3D_BF16_dilated, ConvolutionLayerCP
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Kernel_1x1 (1D) ============= */
const auto convParams_ExplicitPadding_1x1_1D = ::testing::Combine(
::testing::Values(SizeVector({1})),
::testing::Values(SizeVector({1})),
::testing::Values(std::vector<ptrdiff_t>({0})),
::testing::Values(std::vector<ptrdiff_t>({0})),
::testing::Values(SizeVector({1})),
::testing::Values(63),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_1x1_1D = {
conv_sse42_1D_1x1,
conv_avx2_1D_1x1,
conv_avx512_1D_1x1,
conv_sse42_1D_1x1_nspc,
conv_avx2_1D_1x1_nspc,
conv_avx512_1D_1x1_nspc
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_1D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_1D_1x1, conv_avx512_2D_1x1_nspc})),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::i8),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_1D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Kernel_1x1 (2D) ============= */
const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
@ -989,56 +1276,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Convolution (1D) ============= */
/* ============= Convolution params (1D) ============= */
const std::vector<SizeVector> kernels1d = { {3} };
const std::vector<SizeVector> strides1d = { {1}, {2} };
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
const std::vector<SizeVector> dilations1d = { {1}, {2} };
const auto convParams_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_1D = {
conv_sse42_1D,
conv_avx2_1D,
conv_avx512_1D
};
std::vector<InputShape> inShapes1D = {
{{}, {{ 2, 64, 7 }}},
{
//dynamic shape
{ {1, 200}, 64, -1 },
{ //target static shapes
{ 2, 64, 7 },
{ 1, 64, 5 }
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inShapes1D),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
::testing::Values(fusingAddPerChannel),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Jit Planar ============= */
/* ============= Convolution planar params (2D) ============= */
@ -1068,7 +1305,7 @@ const auto convParams_Planar_ExplicitPadding_2D_dilated = ::testing::Combine(
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_2D_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_Jit_Planar_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_Planar_ExplicitPadding_2D,
@ -1082,7 +1319,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_2D_FP32, ConvolutionLayerCPUTest,
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(Conv_Jit_Planar_2D_FP32_dilated, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(Conv_2D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_Planar_ExplicitPadding_2D_dilated,
@ -1123,7 +1360,7 @@ const auto convParams_Planar_ExplicitPadding_3D_dilated = ::testing::Combine(
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_Jit_Planar_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_Planar_ExplicitPadding_3D,
@ -1137,7 +1374,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(Conv_Jit_Planar_3D_FP32_dilated, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_SUITE_P(Conv_3D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_Planar_ExplicitPadding_3D_dilated,

View File

@ -521,4 +521,4 @@
//INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_MemOrder_dyn_param, EltwiseLayerCPUTest, params_5D_dyn_param, EltwiseLayerCPUTest::getTestCaseName);
//
//} // namespace
//} // namespace CPULayerTestsDefinitions
//} // namespace CPULayerTestsDefinitions

View File

@ -253,6 +253,13 @@ const SizeVector numGroups_Blocked = {2, 4};
const SizeVector numOutChannels_DW = {32};
const SizeVector numGroups_DW = {32};
/* ============= GroupConvolution params (1D) ============= */
const std::vector<SizeVector> kernels1d = { {3}, {1} };
const std::vector<SizeVector> strides1d = { {1}, {2} };
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
const std::vector<SizeVector> dilations1d = { {1}, {2} };
/* ============= GroupConvolution params (2D) ============= */
const std::vector<SizeVector> kernels2d = {{3, 3}, {1, 1}};
const std::vector<SizeVector> strides2d = {{1, 1}, {2, 2}};
@ -270,6 +277,63 @@ const std::vector<SizeVector> dilations3d = {{1, 1, 1}, {2, 2, 2}};
/* INSTANCES */
/* ============= GroupConvolution (GEMM 1D) ============= */
const auto groupConvParams_ExplicitPadding_Gemm_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels_Gemm),
::testing::ValuesIn(numGroups_Gemm),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Gemm_1D = {
conv_gemm_1D,
conv_gemm_1D_nspc
};
std::vector<InputShape> inShapesGemm1D = {
{{}, {{ 2, 12, 7 }}},
{
//dynamic shape
{{1, 200}, 12, {1, 200}},
{ //target static shapes
{ 2, 12, 7 },
{ 1, 12, 5 }
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_Gemm_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Gemm_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inShapesGemm1D),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Gemm_1D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_Gemm_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Gemm_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inShapesGemm1D),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_1D})), // todo: [AV] what about conv_gemm_1D_nspc?
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (GEMM 2D) ============= */
const auto groupConvParams_ExplicitPadding_Gemm_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
@ -384,6 +448,89 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_Gemm_BF16, GroupConvolutionLayerCPUT
::testing::Values(cpuBF16PluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (1D) ============= */
const auto groupConvParams_ExplicitPadding_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::ValuesIn(numGroups_Blocked),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_1D = {
conv_sse42_1D,
conv_avx2_1D,
conv_avx512_1D,
conv_sse42_1D_nspc,
conv_avx2_1D_nspc,
conv_avx512_1D_nspc
};
std::vector<InputShape> inputShapes1d = {
{{}, {{ 2, 64, 7 }}},
{
//dynamic shapes
{-1, 64, {1, 200}},
{ //target static shapes
{ 2, 64, 7 },
{ 1, 64, 9 }
}
},
{
//dynamic shapes
{ {-1, 64, -1} },
{ //target static shapes
{ 2, 64, 7 },
{ 1, 64, 14 }
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_FP32_fusingBias, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
::testing::Values(fusingAddPerChannel),
::testing::Values(cpuEmptyPluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_1D})), // todo: [AV] what about conv_avx512_1D_nspc?
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (2D) ============= */
const auto groupConvParams_ExplicitPadding_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
@ -505,6 +652,71 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_BF16, GroupConvolutionLayerCPUTest,
::testing::Values(cpuBF16PluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (DW 1D) ============= */
const auto groupConvParams_ExplicitPadding_DW_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels_DW),
::testing::ValuesIn(numGroups_DW),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_DW_1D = {
conv_sse42_dw_1D,
conv_avx2_dw_1D,
conv_avx512_dw_1D,
conv_sse42_dw_1D_nspc,
conv_avx2_dw_1D_nspc,
conv_avx512_dw_1D_nspc
};
std::vector<InputShape> inputShapes1dDW = {
{{}, {{ 2, 32, 7 }}},
{
//dynamic shapes
{-1, 32, {1, 200}},
{ //target static shapes
{ 2, 32, 7 },
{ 1, 32, 9 }
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_DW_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_DW_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1dDW),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_sse42_dw_1D,
conv_avx2_dw_1D,
conv_avx512_dw_1D})), // todo: [AV] what about conv_sse42_dw_1D_nspc,
// conv_avx2_dw_1D_nspc, conv_avx512_dw_1D_nspc?
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_DW_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_DW_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1dDW),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_1D})), // todo: [AV] what about conv_avx512_dw_1D_nspc?
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (DW 2D) ============= */
const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
@ -965,57 +1177,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_JIT_AVX512_DW_GroupConv, GroupConvolutionLayerCPU
/* ============= JIT AVX5122 PLANAR Convolution (not supported with groups) ============= */
/* ============================================= */
/* ============= Convolution (1D) ============= */
/* ============= Convolution params (1D) ============= */
const std::vector<SizeVector> kernels1d = { {3} };
const std::vector<SizeVector> strides1d = { {1}, {2} };
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
const std::vector<SizeVector> dilations1d = { {1}, {2} };
const auto convParams_1D = ::testing::Combine(
::testing::ValuesIn(kernels1d),
::testing::ValuesIn(strides1d),
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::ValuesIn(numGroups_Blocked),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_1D = {
conv_sse42_1D,
conv_avx2_1D,
conv_avx512_1D
};
std::vector<InputShape> inputShapes1d = {
{{}, {{ 2, 64, 7 }}},
{
//dynamic shapes
{ {-1, 64, -1} },
{ //target static shapes
{ 2, 64, 7 },
{ 1, 64, 14 }
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_1D,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes1d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
::testing::Values(fusingAddPerChannel),
::testing::Values(cpuEmptyPluginConfig)),
GroupConvolutionLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -136,8 +136,7 @@ public:
continue;
}
if (inType != ov::element::Type_t::undefined) {
p.input(ov::preprocess::InputInfo(i)
.tensor(ov::preprocess::InputTensorInfo().set_element_type(inType)));
p.input(i).tensor().set_element_type(inType);
}
}
}
@ -145,8 +144,7 @@ public:
auto results = function->get_results();
for (size_t i = 0; i < results.size(); i++) {
if (outType != ov::element::Type_t::undefined) {
p.output(ov::preprocess::OutputInfo(i)
.tensor(ov::preprocess::OutputTensorInfo().set_element_type(outType)));
p.output(i).tensor().set_element_type(outType);
}
}
}

View File

@ -141,6 +141,34 @@ const auto ref = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"};
const std::vector<CPUSpecificParams> vecCpuConfigs = {ref, sse42, avx, avx512};
const std::vector<ElementType> inpOutPrecision = {ElementType::f32/*, ElementType::bf16*/};
const std::vector<InputShape> inputShapes3D = {
{ {}, {{3, 4, 64}} },
{ {}, {{2, 8, 12}} },
{ {}, {{1, 16, 12}} },
{ {}, {{1, 21, 4}} },
{ {}, {{1, 32, 8}} },
{
// dynamic
{-1, -1, -1},
// target
{
{1, 32, 8},
{1, 21, 4},
{2, 8, 12}
}
},
{
// dynamic
{{1, 5}, {4, 32}, {1, 64}},
// target
{
{3, 4, 64},
{1, 16, 12},
{1, 32, 8}
}
}
};
const std::vector<InputShape> inputShapes4D = {
{ {}, {{3, 4, 64, 64}} },
{ {}, {{2, 8, 8, 12}} },
@ -197,6 +225,61 @@ const std::vector<InputShape> inputShapes5D = {
}
};
/* ============= Pooling (1D) ============= */
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax3D = {
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {2}, {0}, {0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4}, {2}, {0}, {0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {1}, {0}, {0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
};
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsAvg3D = {
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false },
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true },
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4}, {4}, {2}, {2},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true },
};
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsAvg3D_RefOnly = {
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2}, {2}, {2}, {2},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
};
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_3D, PoolingLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(paramsMax3D),
::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(inpOutPrecision),
::testing::Values(false),
::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
::testing::Values(emptyFusingSpec)),
PoolingLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D, PoolingLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(paramsAvg3D),
::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(inpOutPrecision),
::testing::Values(false),
::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
::testing::Values(emptyFusingSpec)),
PoolingLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D_NotOptimized, PoolingLayerCPUTest,
::testing::Combine(
::testing::ValuesIn(paramsAvg3D_RefOnly),
::testing::ValuesIn(inputShapes3D),
::testing::ValuesIn(inpOutPrecision),
::testing::Values(false),
::testing::Values(ref),
::testing::Values(emptyFusingSpec)),
PoolingLayerCPUTest::getTestCaseName);
/* ============= Pooling (2D) ============= */
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax4D = {
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false },
@ -258,6 +341,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest,
::testing::Values(emptyFusingSpec)),
PoolingLayerCPUTest::getTestCaseName);
/* ============= Pooling (3D) ============= */
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax5D = {
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0},
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false },

View File

@ -4,11 +4,13 @@
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using namespace ov::test;
struct regionYoloAttributes {
size_t classes;
@ -20,78 +22,86 @@ struct regionYoloAttributes {
};
using regionYoloParamsTuple = std::tuple<
ngraph::Shape, // Input Shape
regionYoloAttributes, // Params
InputShape, // Input Shape
regionYoloAttributes, // Params
std::vector<int64_t>, // mask
InferenceEngine::Precision, // Network input precision
InferenceEngine::Precision, // Network output precision
ov::test::ElementType, // Network input precision
ov::test::ElementType, // Network output precision
std::map<std::string, std::string>, // Additional network configuration
std::string>; // Device name
class RegionYoloCPULayerTest : public testing::WithParamInterface<regionYoloParamsTuple>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<regionYoloParamsTuple> obj) {
ngraph::Shape inputShape;
InputShape inputShape;
regionYoloAttributes attributes;
std::vector<int64_t> mask;
InferenceEngine::Precision inpPrecision;
InferenceEngine::Precision outPrecision;
ov::test::ElementType inpPrecision;
ov::test::ElementType outPrecision;
std::string targetName;
std::map<std::string, std::string> additionalConfig;
std::tie(inputShape, attributes, mask, inpPrecision, outPrecision, additionalConfig, targetName) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "IS=" << inputShape << "_";
result << "classes=" << attributes.classes << "_";
result << "coords=" << attributes.coordinates << "_";
result << "num=" << attributes.num_regions << "_";
result << "doSoftmax=" << attributes.do_softmax << "_";
result << "axis=" << attributes.start_axis << "_";
result << "endAxis=" << attributes.end_axis << "_";
result << "inpPRC=" << inpPrecision.name() << "_";
result << "outPRC=" << outPrecision.name() << "_";
result << "inpPRC=" << inpPrecision << "_";
result << "outPRC=" << outPrecision << "_";
result << "targetDevice=" << targetName << "_";
return result.str();
}
protected:
void SetUp() override {
ngraph::Shape inputShape;
InputShape inputShape;
regionYoloAttributes attributes;
std::vector<int64_t> mask;
ov::test::ElementType inPrc;
ov::test::ElementType outPrc;
std::map<std::string, std::string> additionalConfig;
std::tie(inputShape, attributes, mask, inPrc, outPrc, additionalConfig, targetDevice) = this->GetParam();
if (inPrc == ov::test::ElementType::bf16) {
// ticket #72342
rel_threshold = 0.02;
}
init_input_shapes({ inputShape });
configuration.insert(additionalConfig.begin(), additionalConfig.end());
selectedType = getPrimitiveType() + "_" + inPrc.name();
const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
auto paramRegionYolo = ngraph::builder::makeParams(ngPrc, {inputShape});
selectedType = getPrimitiveType() + "_" + InferenceEngine::details::convertPrecision(inPrc).name();
auto paramRegionYolo = ngraph::builder::makeDynamicParams(inPrc, inputDynamicShapes);
const auto region_yolo = std::make_shared<ngraph::op::v0::RegionYolo>(paramRegionYolo[0],
attributes.coordinates, attributes.classes, attributes.num_regions,
attributes.do_softmax, mask, attributes.start_axis, attributes.end_axis);
function = makeNgraphFunction(ngPrc, paramRegionYolo, region_yolo, "RegionYolo");
function = makeNgraphFunction(inPrc, paramRegionYolo, region_yolo, "RegionYolo");
}
};
TEST_P(RegionYoloCPULayerTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
run();
CheckPluginRelatedResults(executableNetwork, "RegionYolo");
}
namespace {
const std::vector<Precision> inpOutPrc = {Precision::BF16, Precision::FP32};
const std::vector<ov::test::ElementType> inpOutPrc = {ov::test::ElementType::bf16, ov::test::ElementType::f32};
const std::map<std::string, std::string> additional_config;
/* *======================* Static Shapes *======================* */
const std::vector<ngraph::Shape> inShapes_caffe = {
{1, 125, 13, 13}
};
@ -114,6 +124,24 @@ const std::vector<ngraph::Shape> inShapes_v3 = {
{1, 255, 13, 13}
};
/* *======================* Dynamic Shapes *======================* */
const std::vector<InputShape> inShapes_caffe_dynamic = {
{{-1, -1, -1, -1}, {{1, 125, 13, 13}, {1, 125, 26, 26}}},
{{{1, 2}, {100, 125}, {13, 26}, {13, 26}}, {{1, 125, 13, 13}, {1, 125, 26, 26}}}
};
const std::vector<InputShape> inShapes_mxnet_dynamic = {
{{-1, -1, -1, -1}, {{1, 75, 52, 52}, {1, 75, 32, 32}, {1, 75, 26, 26}}},
{{{1, 2}, {75, 80}, {26, 52}, {26, 52}}, {{1, 75, 52, 52}, {1, 75, 32, 32}, {1, 75, 26, 26}}},
};
const std::vector<InputShape> inShapes_v3_dynamic = {
{{-1, -1, -1, -1}, {{1, 255, 52, 52}, {1, 255, 26, 26}, {1, 255, 13, 13}}},
{{{1, 2}, {255, 256}, {13, 52}, {13, 52}}, {{1, 255, 52, 52}, {1, 255, 26, 26}, {1, 255, 13, 13}}}
};
const std::vector<std::vector<int64_t>> masks = {
{0, 1, 2},
{3, 4, 5},
@ -127,7 +155,17 @@ const std::vector<size_t> num_regions = {5, 9};
const regionYoloAttributes yoloV3attr = {80, 4, 9, false, 1, 3};
const auto testCase_yolov3 = ::testing::Combine(
::testing::ValuesIn(inShapes_v3),
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_v3)),
::testing::Values(yoloV3attr),
::testing::Values(masks[2]),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc),
::testing::Values(additional_config),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
const auto testCase_yolov3_dynamic = ::testing::Combine(
::testing::ValuesIn(inShapes_v3_dynamic),
::testing::Values(yoloV3attr),
::testing::Values(masks[2]),
::testing::ValuesIn(inpOutPrc),
@ -139,7 +177,17 @@ const auto testCase_yolov3 = ::testing::Combine(
const regionYoloAttributes yoloV3mxnetAttr = {20, 4, 9, false, 1, 3};
const auto testCase_yolov3_mxnet = ::testing::Combine(
::testing::ValuesIn(inShapes_mxnet),
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_mxnet)),
::testing::Values(yoloV3mxnetAttr),
::testing::Values(masks[1]),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc),
::testing::Values(additional_config),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
const auto testCase_yolov3_mxnet_dynamic = ::testing::Combine(
::testing::ValuesIn(inShapes_mxnet_dynamic),
::testing::Values(yoloV3mxnetAttr),
::testing::Values(masks[1]),
::testing::ValuesIn(inpOutPrc),
@ -151,7 +199,7 @@ const auto testCase_yolov3_mxnet = ::testing::Combine(
const regionYoloAttributes yoloV2caffeAttr = {20, 4, 5, true, 1, 3};
const auto testCase_yolov2_caffe = ::testing::Combine(
::testing::ValuesIn(inShapes_caffe),
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_caffe)),
::testing::Values(yoloV2caffeAttr),
::testing::Values(masks[0]),
::testing::ValuesIn(inpOutPrc),
@ -160,8 +208,21 @@ const auto testCase_yolov2_caffe = ::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3CPU, RegionYoloCPULayerTest, testCase_yolov3, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnetCPU, RegionYoloCPULayerTest, testCase_yolov3_mxnet, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffeCPU, RegionYoloCPULayerTest, testCase_yolov2_caffe, RegionYoloCPULayerTest::getTestCaseName);
const auto testCase_yolov2_caffe_dynamic = ::testing::Combine(
::testing::ValuesIn(inShapes_caffe_dynamic),
::testing::Values(yoloV2caffeAttr),
::testing::Values(masks[0]),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc),
::testing::Values(additional_config),
::testing::Values(CommonTestUtils::DEVICE_CPU)
);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3CPUStatic, RegionYoloCPULayerTest, testCase_yolov3, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3CPUDynamic, RegionYoloCPULayerTest, testCase_yolov3_dynamic, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnetCPUStatic, RegionYoloCPULayerTest, testCase_yolov3_mxnet, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnetCPUDynamic, RegionYoloCPULayerTest, testCase_yolov3_mxnet_dynamic, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffeCPUStatic, RegionYoloCPULayerTest, testCase_yolov2_caffe, RegionYoloCPULayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffeCPUDynamic, RegionYoloCPULayerTest, testCase_yolov2_caffe_dynamic, RegionYoloCPULayerTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -2,38 +2,86 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <shared_test_classes/single_layer/roi_pooling.hpp>
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "common_test_utils/file_utils.hpp"
#include "functional_test_utils/ov_tensor_utils.hpp"
#include "common_test_utils/data_utils.hpp"
#include "ie_common.h"
#include "test_utils/cpu_test_utils.hpp"
#include "utils/bfloat16.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
enum ProposalGenerationMode { RANDOM, ULTIMATE_RIGHT_BORDER };
using ROIPoolingCPUTestParamsSet = std::tuple<LayerTestsDefinitions::roiPoolingParamsTuple,
using roiPoolingShapes = std::vector<InputShape>;
using roiPoolingParams = std::tuple<
roiPoolingShapes, // Input shapes
std::vector<size_t>, // Pooled shape {pooled_h, pooled_w}
float, // Spatial scale
ngraph::helpers::ROIPoolingTypes, // ROIPooling method
InferenceEngine::Precision, // Net precision
LayerTestsUtils::TargetDevice>; // Device name
using ROIPoolingCPUTestParamsSet = std::tuple<roiPoolingParams,
CPUSpecificParams,
ProposalGenerationMode,
std::map<std::string, std::string>>;
class ROIPoolingCPULayerTest : public testing::WithParamInterface<ROIPoolingCPUTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon,
public ov::test::SubgraphBaseTest,
public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<ROIPoolingCPUTestParamsSet> obj) {
LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet;
roiPoolingParams basicParamsSet;
CPUSpecificParams cpuParams;
ProposalGenerationMode propMode;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = obj.param;
roiPoolingShapes inputShapes;
std::vector<size_t> poolShape;
float spatial_scale;
ngraph::helpers::ROIPoolingTypes pool_method;
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::tie(inputShapes, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
for (const auto& shape : inputShapes) {
result << CommonTestUtils::partialShape2str({ shape.first }) << "_";
}
result << "TS=";
for (const auto& shape : inputShapes) {
result << "(";
if (!shape.second.empty()) {
auto itr = shape.second.begin();
do {
result << CommonTestUtils::vec2str(*itr);
} while (++itr != shape.second.end() && result << "_");
}
result << ")_";
}
result << LayerTestsDefinitions::ROIPoolingLayerTest::getTestCaseName(
testing::TestParamInfo<LayerTestsDefinitions::roiPoolingParamsTuple>(basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
result << "PS=" << CommonTestUtils::vec2str(poolShape) << "_";
result << "Scale=" << spatial_scale << "_";
switch (pool_method) {
case ngraph::helpers::ROIPoolingTypes::ROI_MAX:
result << "Max_";
break;
case ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR:
result << "Bilinear_";
break;
}
result << "trgDev=" << targetDevice;
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
@ -55,116 +103,132 @@ public:
}
protected:
void GenerateInputs() override {
auto feat_map_shape = cnnNetwork.getInputShapes().begin()->second;
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
const ProposalGenerationMode propMode = std::get<2>(this->GetParam());
const float spatial_scale = std::get<2>(std::get<0>(this->GetParam()));
const ngraph::helpers::ROIPoolingTypes pool_method = std::get<3>(std::get<0>(this->GetParam()));
inputs.clear();
const auto& funcInputs = function->inputs();
auto feat_map_shape = targetInputStaticShapes[0];
const auto is_roi_max_mode = (pool_method == ngraph::helpers::ROIPoolingTypes::ROI_MAX);
const int height = is_roi_max_mode ? feat_map_shape[2] / spatial_scale : 1;
const int width = is_roi_max_mode ? feat_map_shape[3] / spatial_scale : 1;
size_t it = 0;
for (const auto &input : cnnNetwork.getInputsInfo()) {
const auto &info = input.second;
InferenceEngine::Blob::Ptr blob;
void (*propGenerator)(InferenceEngine::Blob::Ptr &);
switch (propMode) {
case ULTIMATE_RIGHT_BORDER:
for (size_t i = 0; i < funcInputs.size(); ++i) {
const auto& funcInput = funcInputs[i];
ov::runtime::Tensor tensor;
if (i == 1) {
tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
if (propMode == ULTIMATE_RIGHT_BORDER) {
// because of nonalgebraic character of floating point operation, the following values causes inequity:
// ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) > (end_h - start_h) * (input_h - 1)
// and as result excess of right limit for proposal value if the border case (current_h == pooled_h - 1)
// will not be handled explicitly
propGenerator = [](InferenceEngine::Blob::Ptr &blob) {
auto *data = blob->buffer().as<float *>();
for (size_t i = 0; i < blob->size(); i += 5) {
data[i] = 0;
data[i + 1] = 0.f;
data[i + 2] = 0.248046786f;
data[i + 3] = 0.471333951f;
data[i + 4] = 1.f;
switch (funcInput.get_element_type()) {
case ngraph::element::f32: {
auto* dataPtr = tensor.data<float>();
for (size_t i = 0; i < tensor.get_size(); i += 5) {
dataPtr[i] = 0;
dataPtr[i + 1] = 0.f;
dataPtr[i + 2] = 0.248046786f;
dataPtr[i + 3] = 0.471333951f;
dataPtr[i + 4] = 1.f;
}
};
break;
case RANDOM:
default:
propGenerator = nullptr;
}
if (it == 1) {
blob = make_blob_with_precision(info->getTensorDesc());
blob->allocate();
switch (inPrc) {
case Precision::FP32: {
CommonTestUtils::fill_data_roi<Precision::FP32>
(blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
break;
}
case Precision::BF16: {
CommonTestUtils::fill_data_roi<Precision::BF16>
(blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
break;
}
default:
IE_THROW() << "roi_pooling. Unsupported precision";
break;
break;
}
case ngraph::element::bf16: {
auto* dataPtr = tensor.data<std::int16_t>();
for (size_t i = 0; i < tensor.get_size(); i += 5) {
dataPtr[i] = static_cast<std::int16_t>(ngraph::float16(0.f).to_bits());
dataPtr[i + 1] = static_cast<std::int16_t>(ngraph::float16(0.f).to_bits());
dataPtr[i + 2] = static_cast<std::int16_t>(ngraph::float16(0.248046786f).to_bits());
dataPtr[i + 3] = static_cast<std::int16_t>(ngraph::float16(0.471333951f).to_bits());
dataPtr[i + 4] = static_cast<std::int16_t>(ngraph::float16(1.f).to_bits());
}
break;
}
default:
IE_THROW() << "roi_pooling. Unsupported precision";
}
} else {
switch (funcInput.get_element_type()) {
case ngraph::element::f32: {
CommonTestUtils::fill_data_roi<InferenceEngine::Precision::FP32>(tensor, feat_map_shape[0] - 1, height, width, 1.f, is_roi_max_mode);
break;
}
case ngraph::element::bf16: {
CommonTestUtils::fill_data_roi<InferenceEngine::Precision::BF16>(tensor, feat_map_shape[0] - 1, height, width, 1.f, is_roi_max_mode);
break;
}
default:
IE_THROW() << "roi_pooling. Unsupported precision";
}
}
} else {
blob = GenerateInput(*info);
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 10, 0, 1000);
}
inputs.push_back(blob);
it++;
inputs.insert({ funcInput.get_node_shared_ptr(), tensor });
}
}
void SetUp() override {
LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet;
roiPoolingParams basicParamsSet;
CPUSpecificParams cpuParams;
ProposalGenerationMode propMode;
std::map<std::string, std::string> additionalConfig;
InferenceEngine::SizeVector inputShape;
InferenceEngine::SizeVector coordsShape;
InferenceEngine::SizeVector poolShape;
InferenceEngine::Precision netPrecision;
std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(inputShape, coordsShape, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
roiPoolingShapes inputShapes;
std::vector<size_t> poolShape;
float spatial_scale;
ngraph::helpers::ROIPoolingTypes pool_method;
InferenceEngine::Precision netPrecision;
std::tie(inputShapes, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES)
inPrc = outPrc = netPrecision = Precision::BF16;
else
inPrc = outPrc = netPrecision;
netPrecision = Precision::BF16;
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (selectedType.empty()) {
selectedType = getPrimitiveType();
}
selectedType.push_back('_');
selectedType += netPrecision.name();
if (netPrecision == Precision::BF16) {
rel_threshold = 1e-2;
}
init_input_shapes(inputShapes);
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape, coordsShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto params = ngraph::builder::makeDynamicParams(ngPrc, inputDynamicShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
std::shared_ptr<ngraph::Node> roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method);
auto roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(roi_pooling)};
function = makeNgraphFunction(ngPrc, params, roi_pooling, "roi_pooling");
selectedType += "_";
selectedType += netPrecision.name();
function = makeNgraphFunction(ngPrc, params, roi_pooling, "ROIPooling");
functionRefs = ngraph::clone_function(*function);
}
private:
ngraph::helpers::ROIPoolingTypes pool_method;
float spatial_scale;
ProposalGenerationMode propMode;
};
TEST_P(ROIPoolingCPULayerTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
run();
CheckPluginRelatedResults(executableNetwork, "ROIPooling");
}
namespace {
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
std::vector<std::map<std::string, std::string>> additionalConfig{
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}
};
/* have to select particular implementation type, since currently
* nodes always choose the best one */
@ -183,29 +247,92 @@ std::vector<CPUSpecificParams> selectCPUInfoForDevice() {
return resCPUParams;
}
const std::vector<std::vector<size_t>> inShapes = {{1, 3, 8, 8},
{3, 4, 50, 50}};
const std::vector<roiPoolingShapes> inShapes = {
roiPoolingShapes{{{}, {{1, 3, 8, 8}}}, {{}, {{1, 5}}}},
roiPoolingShapes{{{}, {{1, 3, 8, 8}}}, {{}, {{3, 5}}}},
roiPoolingShapes{{{}, {{3, 4, 50, 50}}}, {{}, {{3, 5}}}},
roiPoolingShapes{{{}, {{3, 4, 50, 50}}}, {{}, {{5, 5}}}},
roiPoolingShapes{
// input 0
{
// dynamic
{-1, -1, -1, -1},
// static
{
{3, 4, 50, 50}, {3, 4, 50, 50}, {3, 4, 50, 50}, {1, 3, 8, 8}, {1, 3, 8, 8}, {1, 3, 8, 8}
}
},
// input 1
{
// dynamic
{-1, 5},
// static
{
{1, 5}, {3, 5}, {5, 5}, {1, 5}, {3, 5}, {5, 5}
}
},
},
roiPoolingShapes{
// input 0
{
// dynamic
{-1, {3, 5}, {7, 60}, -1},
// static
{
{3, 4, 50, 50}, {1, 3, 7, 8}, {1, 5, 59, 8}, {3, 5, 60, 8},
}
},
// input 1
{
// dynamic
{{1, 5}, 5},
// static
{
{1, 5}, {3, 5}, {4, 5}, {5, 5}
}
},
},
roiPoolingShapes{
// input 0
{
// dynamic
{{1, 8}, {3, 5}, {7, 60}, {5, 50}},
// static
{
{3, 4, 50, 50}, {1, 3, 7, 8}, {8, 5, 59, 5}, {3, 5, 60, 8},
}
},
// input 1
{
// dynamic
{{1, 5}, 5},
// static
{
{1, 5}, {2, 5}, {4, 5}, {5, 5}
}
},
},
};
const std::vector<std::vector<size_t>> pooledShapes_max = {{1, 1},
{2, 2},
{3, 3},
{6, 6}};
const std::vector<std::vector<size_t>> pooledShapes_max = {
{1, 1},
{2, 2},
{3, 3},
{6, 6}
};
const std::vector<std::vector<size_t>> pooledShapes_bilinear = {{1, 1},
{2, 2},
{3, 3},
{6, 6}};
const std::vector<std::vector<size_t>> coordShapes = {{1, 5},
{3, 5},
{5, 5}};
const std::vector<std::vector<size_t>> pooledShapes_bilinear = {
{1, 1},
{2, 2},
{3, 3},
{6, 6}
};
const std::vector<InferenceEngine::Precision> netPRCs = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16};
const std::vector<float> spatial_scales = {0.625f, 1.f};
const auto test_ROIPooling_max = ::testing::Combine(::testing::ValuesIn(inShapes),
::testing::ValuesIn(coordShapes),
::testing::ValuesIn(pooledShapes_max),
::testing::ValuesIn(spatial_scales),
::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_MAX),
@ -213,7 +340,6 @@ const auto test_ROIPooling_max = ::testing::Combine(::testing::ValuesIn(inShapes
::testing::Values(CommonTestUtils::DEVICE_CPU));
const auto test_ROIPooling_bilinear = ::testing::Combine(::testing::ValuesIn(inShapes),
::testing::ValuesIn(coordShapes),
::testing::ValuesIn(pooledShapes_bilinear),
::testing::Values(spatial_scales[1]),
::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR),
@ -238,8 +364,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ROIPoolingCPU_bilinear,
INSTANTIATE_TEST_SUITE_P(smoke_ROIPoolingCPU_bilinear_ultimateRightBorderProposal,
ROIPoolingCPULayerTest,
::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t> { 1, 1, 50, 50 }),
::testing::Values(std::vector<size_t> { 1, 5 }),
::testing::Combine(::testing::Combine(::testing::Values(roiPoolingShapes{{{}, {{1, 1, 50, 50}}}, {{}, {{1, 5}}}}),
::testing::Values(std::vector<size_t> { 4, 4 }),
::testing::Values(spatial_scales[1]),
::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR),

View File

@ -0,0 +1,554 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/builders.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
using namespace CPUTestUtils;
using namespace ov::test;
namespace CPULayerTestsDefinitions {
struct Slice8SpecificParams {
std::vector<int64_t> start;
std::vector<int64_t> stop;
std::vector<int64_t> step;
std::vector<int64_t> axes;
};
typedef std::tuple<
std::vector<InputShape>, // Parameters shapes
Slice8SpecificParams, // Slice-8 specific parameters
ElementType, // Network precision
CPUSpecificParams // CPU specific parameters
> Slice8LayerTestCPUParam;
class Slice8LayerCPUTest : public testing::WithParamInterface<Slice8LayerTestCPUParam>,
virtual public SubgraphBaseTest, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<Slice8LayerTestCPUParam> obj) {
std::vector<InputShape> shapes;
Slice8SpecificParams
params;
ElementType netPrecision;
CPUSpecificParams cpuParams;
std::tie(shapes, params, netPrecision, cpuParams) = obj.param;
std::ostringstream result;
result << "IS=(";
for (const auto& shape : shapes) {
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
}
result << ")_TS=(";
for (const auto& shape : shapes) {
for (const auto& item : shape.second) {
result << CommonTestUtils::vec2str(item) << "_";
}
}
result << "start=" << CommonTestUtils::vec2str(params.start) << "_";
result << "stop=" << CommonTestUtils::vec2str(params.stop) << "_";
result << "step=" << CommonTestUtils::vec2str(params.step) << "_";
result << "axes=" << CommonTestUtils::vec2str(params.axes) << "_";
result << "netPRC=" << netPrecision << "_";
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
std::vector<InputShape> shapes;
Slice8SpecificParams sliceParams;
ElementType netPrecision;
CPUSpecificParams cpuParams;
std::tie(shapes, sliceParams, netPrecision, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType = makeSelectedTypeStr(selectedType, netPrecision);
targetDevice = CommonTestUtils::DEVICE_CPU;
init_input_shapes(shapes);
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
auto sliceOp = ngraph::builder::makeSlice(params[0], sliceParams.start, sliceParams.stop, sliceParams.step, sliceParams.axes, netPrecision);
function = makeNgraphFunction(netPrecision, params, sliceOp, "Slice8");
}
};
TEST_P(Slice8LayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
run();
CheckPluginRelatedResults(executableNetwork, "Slice8");
}
namespace {
const auto cpuParams_nChw16c = CPUSpecificParams {{nChw16c}, {nChw16c}, {}, {}};
const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {nCdhw16c}, {}, {}};
const auto cpuParams_nChw8c = CPUSpecificParams {{nChw8c}, {nChw8c}, {}, {}};
const auto cpuParams_nCdhw8c = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {}, {}};
const auto cpuParams_nhwc = CPUSpecificParams {{nhwc}, {nhwc}, {}, {}};
const auto cpuParams_ndhwc = CPUSpecificParams {{ndhwc}, {ndhwc}, {}, {}};
const auto cpuParams_nchw = CPUSpecificParams {{nchw}, {nchw}, {}, {}};
const auto cpuParams_ncdhw = CPUSpecificParams {{ncdhw}, {ncdhw}, {}, {}};
const std::vector<ElementType> inputPrecisions = {
ElementType::f32,
ElementType::bf16,
ElementType::i8
};
const std::vector<std::vector<InputShape>> inputShapesDynamic2D = {
{
{ // Origin dynamic shape
{-1, -1},
{ // Dynamic shapes instances
{32, 20}, {16, 16}, {24, 16}
}
}
},
{
{ // Origin dynamic shape
{-1, 16},
{ // Dynamic shapes instances
{16, 16}, {20, 16}, {32, 16}
}
}
},
{
{ // Origin dynamic shape
{ {16, 32}, {16, 32} },
{ // Dynamic shapes instances
{16, 32}, {32, 16}, {24, 24}
}
}
}
};
const std::vector<Slice8SpecificParams> paramsPlain2D = {
Slice8SpecificParams{ { 0, 10 }, { 16, 16 }, { 1, 1 }, { 0, 1 } },
Slice8SpecificParams{ { 2, 5 }, { 16, 8 }, { 1, 1 }, { } },
Slice8SpecificParams{ { 2, 5 }, { 16, 16 }, { 1, 2 }, { 0, 1 } },
Slice8SpecificParams{ { 0, 0 }, { 16, 16 }, { 1, 2 }, { 1, 0} },
Slice8SpecificParams{ { 0 }, { 16 }, { 2 }, { 0 } },
Slice8SpecificParams{ { 0 }, { 16 }, { 1 }, { 1 } }
};
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Static_2D, Slice8LayerCPUTest,
::testing::Combine(
::testing::Values(static_shapes_to_test_representation({{32, 20}})),
::testing::ValuesIn(paramsPlain2D),
::testing::ValuesIn(inputPrecisions),
::testing::Values(emptyCPUSpec)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_2D, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesDynamic2D),
::testing::ValuesIn(paramsPlain2D),
::testing::ValuesIn(inputPrecisions),
::testing::Values(emptyCPUSpec)),
Slice8LayerCPUTest::getTestCaseName);
const std::vector<Slice8SpecificParams> testCasesCommon4D = {
Slice8SpecificParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 1, 0, 0 }, { 20, 3, 32, 1 }, { 1, 1, 1, 1 }, { 3, 1, 2, 0 } },
Slice8SpecificParams{ { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 20, 20 }, { 1, 5, 26, 25 }, { 1, 1, 2, 1 }, { 0, 1, 3, 2 } },
Slice8SpecificParams{ { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { } },
Slice8SpecificParams{ { 0, 0, 10 }, { 2, 32, 18 }, { 1, 1, 1 }, { 1, 2, 3 } },
Slice8SpecificParams{ { 0, 10 }, { 2, 32 }, { 1, 1 }, { 1, 3 } }
};
const std::vector<std::vector<ov::Shape>> inputShapesStatic4D = {
{{ 1, 5, 32, 32 }}, {{ 2, 5, 32, 48 }}
};
const std::vector<std::vector<InputShape>> inputShapesDynamic4D = {
{
{ // Origin dynamic shape
{-1, -1, -1, -1},
{ // Dynamic shapes instances
{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 1, 5, 64, 64 }
}
}
},
{
{ // Origin dynamic shape
{-1, 5, -1, -1},
{ // Dynamic shapes instances
{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 3, 5, 32, 36 }
}
}
},
{
{ // Origin dynamic shape
{{1, 5}, 5, {32, 64}, {32, 64}},
{ // Dynamic shapes instances
{ 2, 5, 32, 32 }, { 1, 5, 48, 32 }, { 5, 5, 32, 32 }
}
}
}
};
const std::vector<CPUSpecificParams> CPUParamsCommon4D = {
cpuParams_nchw,
cpuParams_nhwc,
};
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic4D)),
::testing::ValuesIn(testCasesCommon4D),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsCommon4D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesDynamic4D),
::testing::ValuesIn(testCasesCommon4D),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsCommon4D)),
Slice8LayerCPUTest::getTestCaseName);
const std::vector<Slice8SpecificParams> testCasesBlocked4DSubset1 = {
Slice8SpecificParams{ { 0, 0, 0, 0 }, { 1, 32, 32, 32 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 16, 0 }, { 1, 32, 32, 32 }, { 1, 1, 1, 1 }, { 0, 3, 2, 1 } },
Slice8SpecificParams{ { 0, 0, 0 }, { 32, 32, 16 }, { 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 0 }, { 16, 32, 32 }, { 1, 1, 1 }, { 1, 3, 2 } },
};
const std::vector<Slice8SpecificParams> testCasesBlocked4DSubset2 = {
Slice8SpecificParams{ { 0, 0, 5, 4 }, { 1, 16, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 16, 0, 0 }, { 1, 32, 10, 10 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 10, 0 }, { 16, 1, 20, 10 }, { 1, 1, 1, 1 }, { 1, 0, 2, 3 } },
Slice8SpecificParams{ { 0, 0, 20, 20 }, { 1, 32, 25, 25 }, { 1, 1, 1, 1 }, { 0, 1, 3, 2 } },
Slice8SpecificParams{ { 0, 16, 0, 20 }, { 32, 32, 1, 30 }, { 1, 1, 1, 2 }, { 2, 1, 0, 3 } },
Slice8SpecificParams{ { 0, 16, 2, 10 }, { 1, 32, 32, 20 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 16, 0, 0 }, { 2, 64, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 32, 0, 0 }, { 2, 50, 32, 20 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 0, 0 }, { 32, 12, 2, 20 }, { 1, 1, 1, 1 }, { 0, 3, 2, 1 } },
Slice8SpecificParams{ { 0, -16, 0, 10 }, { 2, 100, 32, 20 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, -16, 0, 0 }, { 2, -4, 32, 20 }, { 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, -32, 0, 0 }, { 2, -12, 32, 20 }, { 1, 1, 1, 1 }, { } }
};
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic4DSubset1 = {
{{ 1, 32, 32, 32 }}, {{ 1, 32, 32, 64 }}
};
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic4DSubset2 = {
{{ 1, 64, 32, 32 }}, {{ 1, 64, 32, 64 }}
};
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic4DSubset1 = {
{
{ // Origin dynamic shape
{-1, 32, -1, -1},
{ // Dynamic shapes instances
{ 1, 32, 32, 32 }, { 2, 32, 32, 32 }, { 3, 32, 32, 48 }
}
}
},
{
{ // Origin dynamic shape
{{1, 5}, 32, {32, 64}, {32, 64}},
{ // Dynamic shapes instances
{ 2, 32, 32, 32 }, { 1, 32, 48, 32 }, { 5, 32, 32, 48 }
}
}
}
};
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic4DSubset2 = {
{
{ // Origin dynamic shape
{-1, 64, -1, -1},
{ // Dynamic shapes instances
{ 1, 64, 64, 32 }, { 2, 64, 32, 32 }, { 3, 64, 32, 48 }
}
}
},
{
{ // Origin dynamic shape
{{1, 5}, 64, {32, 64}, {32, 64}},
{ // Dynamic shapes instances
{ 2, 64, 32, 32 }, { 1, 64, 48, 32 }, { 1, 64, 64, 64 }
}
}
}
};
const std::vector<CPUSpecificParams> CPUParamsBlocked4D = {
cpuParams_nChw16c,
cpuParams_nChw8c,
};
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D_Subset1, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic4DSubset1)),
::testing::ValuesIn(testCasesBlocked4DSubset1),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked4D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D_Subset1, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesBlockedDynamic4DSubset1),
::testing::ValuesIn(testCasesBlocked4DSubset1),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked4D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D_Subset2, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic4DSubset2)),
::testing::ValuesIn(testCasesBlocked4DSubset2),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked4D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D_Subset2, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesBlockedDynamic4DSubset2),
::testing::ValuesIn(testCasesBlocked4DSubset2),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked4D)),
Slice8LayerCPUTest::getTestCaseName);
const std::vector<Slice8SpecificParams> testCasesCommon5D = {
Slice8SpecificParams{ { 0, 2, 0, 5, 4 }, { 1, 4, 5, 28, 27 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 10, 0, 0 }, { 1, 5, 20, 32, 20 }, { 1, 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 1, 10, 0, 0 }, { 20, 3, 20, 32, 1 }, { 1, 1, 1, 1, 1 }, { 4, 1, 2, 3, 0 } },
Slice8SpecificParams{ { 0, 20, 0, 0, 20 }, { 1, 30, 20, 5, 26 }, { 1, 1, 1, 2, 2 }, { 0, 3, 2, 1, 4 } },
Slice8SpecificParams{ { 0, 0, 10, 0, 20 }, { 1, 2, 20, 30, 30 }, { 1, 1, 2, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 2, 10, 0 }, { 1, 5, 10, 32, 20 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 1, 0, 10, 0 }, { 1, 5, 20, 32, 32 }, { 1, 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 1, 5, 10, 16, 16 }, { 1, 1, 2, 1, 1 }, { 0, 1, 2, 3, 4 } }
};
const std::vector<std::vector<ov::Shape>> inputShapesStatic5D = {
{{ 1, 5, 20, 32, 32 }}, {{ 2, 5, 32, 32, 32 }}
};
const std::vector<std::vector<InputShape>> inputShapesDynamic5D = {
{
{ // Origin dynamic shape
{-1, -1, -1, -1, -1},
{ // Dynamic shapes instances
{ 1, 5, 32, 32, 32 }, { 1, 5, 32, 32, 48 }, { 1, 5, 64, 64, 64 }, { 1, 10, 32, 32, 32 }
}
}
},
{
{ // Origin dynamic shape
{-1, 5, -1, -1, -1},
{ // Dynamic shapes instances
{ 1, 5, 32, 32, 48 }, { 1, 5, 32, 48, 32 }, { 1, 5, 48, 32, 32 }
}
}
},
{
{ // Origin dynamic shape
{{1, 5}, 5, {32, 64}, {32, 64}, {32, 64}},
{ // Dynamic shapes instances
{ 2, 5, 32, 32, 32 }, { 1, 5, 48, 32, 32 }, { 5, 5, 32, 32, 48 }
}
}
}
};
const std::vector<CPUSpecificParams> CPUParamsCommon5D = {
cpuParams_ncdhw,
cpuParams_ndhwc,
};
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic5D)),
::testing::ValuesIn(testCasesCommon5D),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsCommon5D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesDynamic5D),
::testing::ValuesIn(testCasesCommon5D),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsCommon5D)),
Slice8LayerCPUTest::getTestCaseName);
const std::vector<Slice8SpecificParams> testCasesBlocked5DSubset1 = {
Slice8SpecificParams{ { 0, 0, 0, 5, 4 }, { 1, 16, 5, 28, 27 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 10, 0, 0 }, { 1, 16, 20, 32, 20 }, { 1, 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 0, 20, 20 }, { 16, 1, 20, 26, 30 }, { 1, 1, 1, 2, 2 }, { 1, 0, 2, 4, 3 } },
Slice8SpecificParams{ { 0, 0, 10, 0, 20 }, { 1, 16, 20, 30, 30 }, { 1, 1, 2, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 2, 10, 0 }, { 1, 16, 10, 32, 20 }, { 1, 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 0, 10, 0 }, { 1, 8, 20, 32, 32 }, { 1, 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 1, 16, 10, 16, 16 }, { 1, 1, 2, 1, 1 }, { 0, 1, 2, 3, 4 } },
};
const std::vector<Slice8SpecificParams> testCasesBlocked5DSubset2 = {
Slice8SpecificParams{ { 0, 0, 0, 5, 4 }, { 1, 16, 5, 28, 27 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 5, 4 }, { 16, 5, 28, 27 }, { 1, 1, 1, 1 }, { 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 10, 0, 0 }, { 1, 16, 20, 32, 20 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 0, 20, 20 }, { 1, 20, 16, 30, 26 }, { 1, 1, 1, 2, 2 }, { 0, 2, 1, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 10, 0, 20 }, { 1, 16, 20, 30, 30 }, { 1, 1, 2, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 2, 10, 0 }, { 1, 16, 10, 32, 20 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 0, 10, 0 }, { 1, 8, 20, 32, 32 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 10, 16, 1, 16, 16 }, { 2, 1, 1, 1, 1 }, { 2, 1, 0, 3, 4 } },
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 1, 25, 20, 10, 10 }, { 1, 1, 1, 1, 1 }, { } },
Slice8SpecificParams{ { 0, 16, 0, 0, 0 }, { 1, 25, 20, 10, 10 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
Slice8SpecificParams{ { 0, 16, 0, 0, 0 }, { 1, 64, 20, 10, 10 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
};
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic5DSubset1 = {
{{ 1, 16, 32, 32, 32 }}, {{ 2, 16, 32, 32, 32 }}, {{ 2, 32, 32, 32, 32 }}
};
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic5DSubset2 = {
{{ 1, 64, 32, 32, 32 }}, {{ 2, 64, 32, 64, 32 }}, {{ 2, 64, 32, 32, 32 }}
};
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic5DSubset1 = {
{
{ // Origin dynamic shape
{-1, 16, -1, -1, -1},
{ // Dynamic shapes instances
{ 1, 16, 32, 32, 32 }, { 2, 16, 32, 32, 32 }, { 2, 16, 32, 32, 32 }
}
}
},
{
{ // Origin dynamic shape
{{1, 5}, 16, {16, 32}, {16, 32}, {16, 32}},
{ // Dynamic shapes instances
{ 1, 16, 32, 32, 32 }, { 2, 16, 32, 32, 32 }, { 1, 16, 20, 32, 32 }
}
}
}
};
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic5DSubset2 = {
{
{ // Origin dynamic shape
{-1, 64, -1, -1, -1},
{ // Dynamic shapes instances
{ 1, 64, 64, 32, 32 }, { 2, 64, 32, 32, 32 }, { 3, 64, 32, 48, 32 }
}
},
},
{
{ // Origin dynamic shape
{{1, 5}, 64, {16, 32}, {16, 32}, {16, 32}},
{ // Dynamic shapes instances
{ 1, 64, 32, 32, 32 }, { 2, 64, 32, 32, 32 }, { 1, 64, 20, 32, 32 }
}
}
}
};
const std::vector<CPUSpecificParams> CPUParamsBlocked5D = {
cpuParams_nCdhw16c,
cpuParams_nCdhw8c,
};
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_Subset1, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic5DSubset1)),
::testing::ValuesIn(testCasesBlocked5DSubset1),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked5D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D_Subset1, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesBlockedDynamic5DSubset1),
::testing::ValuesIn(testCasesBlocked5DSubset1),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked5D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_Subset2, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic4DSubset2)),
::testing::ValuesIn(testCasesBlocked4DSubset2),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked4D)),
Slice8LayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D_Subset2, Slice8LayerCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesBlockedDynamic5DSubset2),
::testing::ValuesIn(testCasesBlocked5DSubset2),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(CPUParamsBlocked5D)),
Slice8LayerCPUTest::getTestCaseName);
/* Descriptors check */
class Slice8LayerDescriptorCPUTest : public Slice8LayerCPUTest {};
TEST_P(Slice8LayerDescriptorCPUTest, DescriptorsCheck) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ASSERT_THROW(compile_model(), ov::Exception);
}
const std::vector<Slice8SpecificParams> testCasesDescriptors = {
Slice8SpecificParams{ { 0, -4, 0, 0 }, { 0, 2147483647, 0, 0 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 5, 0, 0 }, { 1, 20, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
Slice8SpecificParams{ { 0, 0, 0, 0 }, { 1, 2147483647, 32, 32 }, { 1, 2, 1, 1 }, { 0, 1, 2, 3 } }
};
const std::vector<std::vector<InputShape>> inputShapesDescriptors = {
{
{ {},
{ // Static shapes
{ 1, 16, 32, 32 }
}
}
},
{
{ {},
{ // Static shapes
{ 1, 17, 32, 32 }
}
}
},
{
{ // Origin dynamic shapes
{1, -1, 32, 32},
{ // Dynamic shapes instances
{ 1, 16, 32, 32 }, { 1, 32, 32, 32 }
}
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_Slice8LayerDescriptorCPUTest, Slice8LayerDescriptorCPUTest,
::testing::Combine(
::testing::ValuesIn(inputShapesDescriptors),
::testing::ValuesIn(testCasesDescriptors),
::testing::Values(ElementType::f32),
::testing::Values(cpuParams_nChw8c)),
Slice8LayerDescriptorCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -219,4 +219,4 @@
//INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Unsupported_CPU, SoftMaxLayerCPUTest, UnsupportedParams, SoftMaxLayerCPUTest::getTestCaseName);
//
//} // namespace
//} // namespace CPULayerTestsDefinitions
//} // namespace CPULayerTestsDefinitions

View File

@ -7,62 +7,86 @@
#include "cpu_test_utils.hpp"
namespace CPUTestUtils {
const auto conv_sse42_1D = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
const auto conv_avx2_1D = CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx512_1D = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
const auto conv_ref_1D = CPUSpecificParams{{ncw}, {ncw}, {"ref_any"}, "ref_any"};
const auto conv_ref_2D = CPUSpecificParams{{nchw}, {nchw}, {"ref_any"}, "ref_any"};
const auto conv_ref_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref_any"}, "ref_any"};
const auto conv_gemm_1D = CPUSpecificParams{{ncw}, {ncw}, {"gemm_any"}, "jit_gemm"};
const auto conv_gemm_2D = CPUSpecificParams{{nchw}, {nchw}, {"gemm_any"}, "jit_gemm"};
const auto conv_gemm_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"gemm_any"}, "jit_gemm"};
const auto conv_gemm_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_gemm"}, "jit_gemm"};
const auto conv_gemm_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_gemm"}, "jit_gemm"};
const auto conv_gemm_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_gemm"}, "jit_gemm"};
const auto conv_sse42_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_dw_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_plain_to_blocked_1D = CPUSpecificParams{{ncw}, {nCw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_dw_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_avx2_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_dw_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_planar_1D = CPUSpecificParams{{ncw}, {ncw}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_planar_2D = CPUSpecificParams{{nchw}, {nchw}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_planar_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_plain_to_blocked_1D = CPUSpecificParams{{ncw}, {nCw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_dw_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx512_1D = CPUSpecificParams{{nCw16c}, {nCw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_dw_1D = CPUSpecificParams{{nCw16c}, {nCw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_planar_1D = CPUSpecificParams{{ncw}, {ncw}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_planar_2D = CPUSpecificParams{{nchw}, {nchw}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_planar_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_plain_to_blocked_1D = CPUSpecificParams{{ncw}, {nCw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_dw_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_sse42_1D_1x1 = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_1D_1x1 = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_1D_1x1 = CPUSpecificParams{{nCw16c}, {nCw16c}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
const auto conv_sse42_1D_1x1_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_1D_1x1_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_1D_1x1_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
const auto conv_sse42_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};

Some files were not shown because too many files have changed in this diff Show More