Merge remote-tracking branch 'github/master' into auto-batch-master
# Conflicts: # inference-engine/tests/functional/plugin/gpu/remote_blob_tests/gpu_remote_tensor_tests.cpp
This commit is contained in:
commit
8986ad583b
@ -88,7 +88,7 @@ jobs:
|
||||
rm -rf $(BUILD_SAMPLES_DIR) ; mkdir $(BUILD_SAMPLES_DIR)
|
||||
sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
|
||||
sudo mkdir -p $(SHARE_DIR)
|
||||
sudo apt --assume-yes install nfs-common
|
||||
sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
|
||||
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(SHARE_DIR) -o vers=4,minorversion=1,sec=sys
|
||||
mkdir -p $(CCACHE_DIR)
|
||||
displayName: 'Make dir'
|
||||
@ -316,8 +316,8 @@ jobs:
|
||||
workingDirectory: $(BUILD_SAMPLES_TESTS_DIR)
|
||||
displayName: 'Install Samples Tests'
|
||||
|
||||
- script: |
|
||||
python3 -m pip install -r $(INSTALL_DIR)/tests/smoke_tests/requirements.txt
|
||||
- script: |
|
||||
python3 -m pip install -r $(INSTALL_DIR)/tests/smoke_tests/requirements.txt
|
||||
workingDirectory: $(INSTALL_DIR)
|
||||
displayName: 'Install dependencies for samples smoke tests'
|
||||
continueOnError: false
|
||||
|
@ -60,7 +60,7 @@ jobs:
|
||||
- script: |
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
sudo mkdir -p $(MODELS_DIR)
|
||||
sudo apt --assume-yes install nfs-common
|
||||
sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
|
||||
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
|
||||
mkdir -p $(MODELS_DIR)/models_data
|
||||
displayName: 'Make dirs'
|
||||
|
@ -53,7 +53,7 @@ jobs:
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
|
||||
sudo mkdir -p $(MODELS_DIR)
|
||||
sudo apt --assume-yes install nfs-common
|
||||
sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common
|
||||
sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
|
||||
displayName: 'Make dirs'
|
||||
|
||||
@ -98,7 +98,7 @@ jobs:
|
||||
-DENABLE_CPPLINT=OFF
|
||||
-DENABLE_TESTS=OFF
|
||||
-DENABLE_MKL_DNN=ON
|
||||
-DENABLE_CLDNN=OFF
|
||||
-DENABLE_INTEL_GPU=OFF
|
||||
-DENABLE_PROFILING_ITT=OFF
|
||||
-DENABLE_SAMPLES=OFF
|
||||
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON
|
||||
|
@ -132,7 +132,7 @@ jobs:
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_CLDNN=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_INTEL_GPU=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
|
||||
@ -181,7 +181,7 @@ jobs:
|
||||
continueOnError: false
|
||||
|
||||
- script: |
|
||||
python -m pip install -r $(INSTALL_DIR)\tests\smoke_tests\requirements.txt
|
||||
python -m pip install -r $(INSTALL_DIR)\tests\smoke_tests\requirements.txt
|
||||
workingDirectory: $(INSTALL_DIR)
|
||||
displayName: 'Install dependencies for samples smoke tests'
|
||||
continueOnError: false
|
||||
|
@ -61,7 +61,7 @@ RUN cmake .. \
|
||||
-DENABLE_CPPLINT=OFF \
|
||||
-DENABLE_TESTS=OFF \
|
||||
-DENABLE_MKL_DNN=ON \
|
||||
-DENABLE_CLDNN=OFF \
|
||||
-DENABLE_INTEL_GPU=OFF \
|
||||
-DENABLE_PROFILING_ITT=OFF \
|
||||
-DENABLE_SAMPLES=OFF \
|
||||
-DENABLE_PYTHON=ON \
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -59,3 +59,6 @@
|
||||
[submodule "tools/pot/thirdparty/open_model_zoo"]
|
||||
path = tools/pot/thirdparty/open_model_zoo
|
||||
url = https://github.com/openvinotoolkit/open_model_zoo.git
|
||||
[submodule "thirdparty/nlohmann_json"]
|
||||
path = thirdparty/nlohmann_json
|
||||
url = https://github.com/nlohmann/json.git
|
||||
|
@ -86,9 +86,6 @@ function(openvino_developer_export_targets)
|
||||
"A list of OpenVINO exported components" FORCE)
|
||||
endfunction()
|
||||
|
||||
ie_cpack_add_component(ngraph REQUIRED)
|
||||
ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph)
|
||||
|
||||
# add target with processed tests model zoo
|
||||
include(cmake/test_model_zoo.cmake)
|
||||
|
||||
@ -103,6 +100,7 @@ add_subdirectory(model-optimizer)
|
||||
add_subdirectory(docs)
|
||||
add_subdirectory(tools)
|
||||
add_subdirectory(scripts)
|
||||
add_subdirectory(licensing)
|
||||
|
||||
#
|
||||
# CPack
|
||||
|
@ -30,11 +30,11 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins
|
||||
/inference-engine/thirdparty/mkl-dnn/ @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
|
||||
|
||||
# IE GPU:
|
||||
/inference-engine/src/cldnn_engine/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
/src/inference/include/ie/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
/src/inference/include/ie/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
/src/inference/include/openvino/runtime/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
/inference-engine/thirdparty/clDNN/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
/src/plugins/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||
|
||||
# IE VPU:
|
||||
/inference-engine/src/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers
|
||||
|
@ -15,12 +15,12 @@ ie_coverage_capture(INFO_FILE "openvino"
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "inference"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/inference/*")
|
||||
|
||||
ie_coverage_genhtml(INFO_FILE "inference_engine"
|
||||
ie_coverage_genhtml(INFO_FILE "inference"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_legacy"
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "legacy"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/legacy/*")
|
||||
ie_coverage_genhtml(INFO_FILE "inference_engine_legacy"
|
||||
ie_coverage_genhtml(INFO_FILE "legacy"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "ov_hetero_plugin"
|
||||
@ -38,14 +38,14 @@ ie_coverage_extract(INPUT "openvino" OUTPUT "preprocessing"
|
||||
ie_coverage_genhtml(INFO_FILE "preprocessing"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_transformations"
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "transformations"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/transformations/*")
|
||||
ie_coverage_genhtml(INFO_FILE "inference_engine_transformations"
|
||||
ie_coverage_genhtml(INFO_FILE "transformations"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_snippets"
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "snippets"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/snippets/*")
|
||||
ie_coverage_genhtml(INFO_FILE "inference_engine_snippets"
|
||||
ie_coverage_genhtml(INFO_FILE "snippets"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "low_precision_transformations"
|
||||
@ -65,10 +65,10 @@ if(ENABLE_MKL_DNN)
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
endif()
|
||||
|
||||
if(ENABLE_CLDNN)
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "cldnn_engine"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference-engine/src/cldnn_engine/*")
|
||||
ie_coverage_genhtml(INFO_FILE "cldnn_engine"
|
||||
if (ENABLE_INTEL_GPU)
|
||||
ie_coverage_extract(INPUT "openvino" OUTPUT "intel_gpu_plugin"
|
||||
PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/plugins/intel_gpu/*")
|
||||
ie_coverage_genhtml(INFO_FILE "intel_gpu_plugin"
|
||||
PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
|
||||
endif()
|
||||
|
||||
|
@ -129,7 +129,7 @@ set(IE_DEBUG_POSTFIX_WIN "d")
|
||||
set(IE_RELEASE_POSTFIX_WIN "")
|
||||
set(IE_DEBUG_POSTFIX_LIN "")
|
||||
set(IE_RELEASE_POSTFIX_LIN "")
|
||||
set(IE_DEBUG_POSTFIX_MAC "")
|
||||
set(IE_DEBUG_POSTFIX_MAC "d")
|
||||
set(IE_RELEASE_POSTFIX_MAC "")
|
||||
|
||||
if(WIN32)
|
||||
|
@ -5,20 +5,33 @@
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (ENABLE_SANITIZER)
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address")
|
||||
check_cxx_compiler_flag("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED)
|
||||
if (SANITIZE_RECOVER_ADDRESS_SUPPORTED)
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
|
||||
if (WIN32)
|
||||
check_cxx_compiler_flag("/fsanitize=address" SANITIZE_ADDRESS_SUPPORTED)
|
||||
if (SANITIZE_ADDRESS_SUPPORTED)
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} /fsanitize=address")
|
||||
else()
|
||||
message(FATAL_ERROR "Address sanitizer is not supported by current compiler.\n"
|
||||
"Please, check requirements:\n"
|
||||
"https://github.com/openvinotoolkit/openvino/wiki/AddressSanitizer-and-LeakSanitizer")
|
||||
endif()
|
||||
else()
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address")
|
||||
check_cxx_compiler_flag("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED)
|
||||
if (SANITIZE_RECOVER_ADDRESS_SUPPORTED)
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
|
||||
endif()
|
||||
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address")
|
||||
endif()
|
||||
|
||||
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address")
|
||||
endif()
|
||||
|
||||
if (ENABLE_UB_SANITIZER)
|
||||
if (WIN32)
|
||||
message(FATAL_ERROR "UndefinedBehavior sanitizer is not supported in Windows")
|
||||
endif()
|
||||
# TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved:
|
||||
# https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null")
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
# TODO: Remove -Wno-maybe-uninitialized after CVS-61143 fix
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -Wno-maybe-uninitialized")
|
||||
endif()
|
||||
@ -38,17 +51,21 @@ endif()
|
||||
# common sanitizer options
|
||||
if (DEFINED SANITIZER_COMPILER_FLAGS)
|
||||
# ensure symbols are present
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
|
||||
if(NOT OV_COMPILER_IS_CLANG)
|
||||
# GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC.
|
||||
# Clang has no var-tracking-assignments.
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments")
|
||||
endif()
|
||||
# prevent unloading libraries at runtime, so sanitizer can resolve their symbols
|
||||
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
|
||||
if (NOT WIN32)
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
|
||||
if(NOT OV_COMPILER_IS_CLANG)
|
||||
# GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC.
|
||||
# Clang has no var-tracking-assignments.
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments")
|
||||
endif()
|
||||
# prevent unloading libraries at runtime, so sanitizer can resolve their symbols
|
||||
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
|
||||
|
||||
if(OV_COMPILER_IS_CLANG AND NOT WIN32 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
|
||||
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
|
||||
if(OV_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
|
||||
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
|
||||
endif()
|
||||
else()
|
||||
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} /Oy-")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
|
||||
|
@ -13,6 +13,7 @@ ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
|
||||
ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON)
|
||||
|
||||
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
|
||||
ie_dependent_option (ENABLE_INTEL_GPU "GPU plugin for inference engine on Intel GPU" ON "ENABLE_CLDNN" OFF)
|
||||
|
||||
if (NOT ENABLE_CLDNN OR ANDROID OR
|
||||
(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
|
||||
@ -49,14 +50,17 @@ ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONL
|
||||
find_package(PythonInterp 3 QUIET)
|
||||
ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF)
|
||||
|
||||
# this option should not be a part of InferenceEngineDeveloperPackage
|
||||
# since wheels can be built only together with main OV build
|
||||
cmake_dependent_option (ENABLE_WHEEL "Build wheel packages for PyPi" OFF
|
||||
"PYTHONINTERP_FOUND;CMAKE_SOURCE_DIR STREQUAL OpenVINO_SOURCE_DIR" OFF)
|
||||
|
||||
#
|
||||
# Inference Engine specific options
|
||||
#
|
||||
|
||||
ie_dependent_option (ENABLE_GNA "GNA support for inference engine" ON "NOT APPLE;NOT ANDROID;X86_64" OFF)
|
||||
|
||||
ie_dependent_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF "ENABLE_CLDNN" OFF)
|
||||
|
||||
# "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
|
||||
if(X86 OR ARM OR (MSVC AND (ARM OR AARCH64)) )
|
||||
set(THREADING_DEFAULT "SEQ")
|
||||
@ -199,8 +203,8 @@ if (ENABLE_MYRIAD_NO_BOOT AND ENABLE_MYRIAD )
|
||||
add_definitions(-DENABLE_MYRIAD_NO_BOOT=1)
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLDNN)
|
||||
add_definitions(-DENABLE_CLDNN=1)
|
||||
if (ENABLE_INTEL_GPU)
|
||||
add_definitions(-DENABLE_INTEL_GPU=1)
|
||||
endif()
|
||||
|
||||
if (ENABLE_MKL_DNN)
|
||||
|
@ -8,9 +8,9 @@ After you have used the Model Optimizer to create an Intermediate Representation
|
||||
|
||||
Inference Engine is a set of C++ libraries providing a common API to deliver inference solutions on the platform of your choice: CPU, GPU, or VPU. Use the Inference Engine API to read the Intermediate Representation, set the input and output formats, and execute the model on devices. While the C++ libraries is the primary implementation, C libraries and Python bindings are also available.
|
||||
|
||||
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
|
||||
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
|
||||
|
||||
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
|
||||
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
|
||||
|
||||
To learn about how to use the Inference Engine API for your application, see the [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md) documentation.
|
||||
|
||||
@ -66,7 +66,7 @@ The table below shows the plugin libraries and additional dependencies for Linux
|
||||
| Plugin | Library name for Linux | Dependency libraries for Linux | Library name for Windows | Dependency libraries for Windows | Library name for macOS | Dependency libraries for macOS |
|
||||
|--------|-----------------------------|-------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------|------------------------------|---------------------------------------------|
|
||||
| CPU | `libMKLDNNPlugin.so` | `libinference_engine_lp_transformations.so` | `MKLDNNPlugin.dll` | `inference_engine_lp_transformations.dll` | `libMKLDNNPlugin.so` | `inference_engine_lp_transformations.dylib` |
|
||||
| GPU | `libclDNNPlugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `clDNNPlugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
|
||||
| GPU | `libov_intel_gpu_plugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `ov_intel_gpu_plugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
|
||||
| MYRIAD | `libmyriadPlugin.so` | `libusb.so`, | `myriadPlugin.dll` | `usb.dll` | `libmyriadPlugin.so` | `libusb.dylib` |
|
||||
| HDDL | `libHDDLPlugin.so` | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so` | `HDDLPlugin.dll` | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` | Is not supported | - |
|
||||
| GNA | `libGNAPlugin.so` | `libgna.so`, | `GNAPlugin.dll` | `gna.dll` | Is not supported | - |
|
||||
|
@ -10,7 +10,7 @@ Based on that, the declaration of an extension class can look as follows:
|
||||
|
||||
@snippet template_extension/old/extension.hpp extension:header
|
||||
|
||||
The extension library should contain and export the InferenceEngine::CreateExtension method, which creates an `Extension` class:
|
||||
The extension library should use `IE_DEFINE_EXTENSION_CREATE_FUNCTION` macro to export a function, which creates an `Extension` class:
|
||||
|
||||
@snippet template_extension/old/extension.cpp extension:CreateExtension
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Inference Engine Extensibility API enables you to add support of custom operations to the Inference Engine.
|
||||
Extension should contain operation sets with custom operations and execution kernels for custom operations.
|
||||
Physically, an extension library can be represented as a dynamic library exporting the single `CreateExtension` function
|
||||
Physically, an extension library can be represented as a dynamic library exporting the single function
|
||||
that creates a new extension instance.
|
||||
|
||||
To load the Extensibility library to the `InferenceEngine::Core` object, use the
|
||||
|
@ -40,7 +40,6 @@ Inference Engine sample applications include the following:
|
||||
- **Object Detection for SSD Sample** – Inference of object detection networks based on the SSD, this sample is simplified version that supports only images as inputs.
|
||||
- [Object Detection SSD C++ Sample](../../samples/cpp/object_detection_sample_ssd/README.md)
|
||||
- [Object Detection SSD C Sample](../../samples/c/object_detection_sample_ssd/README.md)
|
||||
- [Object Detection SSD Python* Sample](../../samples/python/object_detection_sample_ssd/README.md)
|
||||
|
||||
> **NOTE**: All C++ samples support input paths containing only ASCII characters, except the Hello Classification Sample, that supports Unicode.
|
||||
|
||||
|
@ -11,9 +11,9 @@ After you have used the Model Optimizer to create an Intermediate Representation
|
||||
|
||||
Inference Engine is a set of C++ libraries providing a common API to deliver inference solutions on the platform of your choice: CPU, GPU, or VPU. Use the Inference Engine API to read the Intermediate Representation, set the input and output formats, and execute the model on devices. While the C++ libraries is the primary implementation, C libraries and Python bindings are also available.
|
||||
|
||||
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
|
||||
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
|
||||
|
||||
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
|
||||
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
|
||||
|
||||
To learn about how to use the Inference Engine API for your application, see the [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md) documentation.
|
||||
|
||||
@ -71,7 +71,7 @@ The table below shows the plugin libraries and additional dependencies for Linux
|
||||
| Plugin | Library name for Linux | Dependency libraries for Linux | Library name for Windows | Dependency libraries for Windows | Library name for macOS | Dependency libraries for macOS |
|
||||
|--------|-----------------------------|-------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------|------------------------------|---------------------------------------------|
|
||||
| CPU | `libMKLDNNPlugin.so` | `libinference_engine_lp_transformations.so` | `MKLDNNPlugin.dll` | `inference_engine_lp_transformations.dll` | `libMKLDNNPlugin.so` | `inference_engine_lp_transformations.dylib` |
|
||||
| GPU | `libclDNNPlugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `clDNNPlugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
|
||||
| GPU | `libov_intel_gpu_plugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `ov_intel_gpu_plugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
|
||||
| MYRIAD | `libmyriadPlugin.so` | `libusb.so`, | `myriadPlugin.dll` | `usb.dll` | `libmyriadPlugin.so` | `libusb.dylib` |
|
||||
| HDDL | `libHDDLPlugin.so` | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so` | `HDDLPlugin.dll` | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` | Is not supported | - |
|
||||
| GNA | `libGNAPlugin.so` | `libgna.so`, | `GNAPlugin.dll` | `gna.dll` | Is not supported | - |
|
||||
|
@ -3,7 +3,7 @@ GPU Plugin {#openvino_docs_IE_DG_supported_plugins_GPU}
|
||||
|
||||
The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks.
|
||||
clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics.
|
||||
For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/inference-engine/src/cldnn_engine) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
|
||||
For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
|
||||
|
||||
## Device Naming Convention
|
||||
* Devices are enumerated as "GPU.X" where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered.
|
||||
|
@ -49,7 +49,7 @@ Intermediate blobs between these sub graphs are allocated automatically in the m
|
||||
Samples can be used with the following command:
|
||||
|
||||
```sh
|
||||
./object_detection_sample_ssd -m <path_to_model>/ModelSSD.xml -i <path_to_pictures>/picture.jpg -d HETERO:GPU,CPU
|
||||
./hello_classification <path_to_model>/squeezenet1.1.xml <path_to_pictures>/picture.jpg HETERO:GPU,CPU
|
||||
```
|
||||
where:
|
||||
- `HETERO` stands for heterogeneous plugin
|
||||
|
@ -299,7 +299,9 @@ TensorFlow*-specific parameters:
|
||||
TensorFlow*: comma separated list of shared libraries
|
||||
with TensorFlow* custom operations implementation.
|
||||
--disable_nhwc_to_nchw
|
||||
Disables default translation from NHWC to NCHW
|
||||
[DEPRECATED] Disables default translation from NHWC to NCHW. Since 2022.1
|
||||
this option is deprecated and used only to maintain backward compatibility
|
||||
with previous releases.
|
||||
```
|
||||
|
||||
> **NOTE:** Models produces with TensorFlow\* usually have not fully defined shapes (contain `-1` in some dimensions). It is necessary to pass explicit shape for the input using command line parameter `--input_shape` or `-b` to override just batch dimension. If the shape is fully defined, then there is no need to specify either `-b` or `--input_shape` options.
|
||||
|
@ -35,7 +35,6 @@ To generate the BERT Intermediate Representation (IR) of the model, run the Mode
|
||||
python3 ./mo_tf.py
|
||||
--input_meta_graph uncased_L-12_H-768_A-12/bert_model.ckpt.meta \
|
||||
--output bert/pooler/dense/Tanh \
|
||||
--disable_nhwc_to_nchw \
|
||||
--input Placeholder{i32},Placeholder_1{i32},Placeholder_2{i32}
|
||||
```
|
||||
|
||||
@ -110,10 +109,9 @@ python3 run_classifier.py \
|
||||
|
||||
Run the Model Optimizer with the following command line parameters to generate reshape-able BERT Intermediate Representation (IR):
|
||||
```sh
|
||||
python3 ./mo_tf.py
|
||||
--input_model inference_graph.pb
|
||||
--input "IteratorGetNext:0{i32}[1 128],IteratorGetNext:1{i32}[1 128],IteratorGetNext:4{i32}[1 128]"
|
||||
--disable_nhwc_to_nchw
|
||||
python3 ./mo_tf.py \
|
||||
--input_model inference_graph.pb \
|
||||
--input "IteratorGetNext:0{i32}[1 128],IteratorGetNext:1{i32}[1 128],IteratorGetNext:4{i32}[1 128]"
|
||||
```
|
||||
For other applicable parameters, refer to [Convert Model from TensorFlow](../Convert_Model_From_TensorFlow.md).
|
||||
|
||||
|
@ -71,8 +71,7 @@ To generate the IR, run the Model Optimizer with the following parameters:
|
||||
python3 {path_to_mo}/mo_tf.py \
|
||||
--input_model output_graph.pb \
|
||||
--input "input_lengths->[16],input_node[1 16 19 26],previous_state_h[1 2048],previous_state_c[1 2048]" \
|
||||
--output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits" \
|
||||
--disable_nhwc_to_nchw
|
||||
--output "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1,cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd,logits"
|
||||
```
|
||||
|
||||
Where:
|
||||
|
@ -69,7 +69,6 @@ The attribute names are self-explanatory or match the name in the `hparams_confi
|
||||
|
||||
OpenVINO™ toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to
|
||||
[Object Detection for SSD C++ Sample](@ref openvino_inference_engine_samples_object_detection_sample_ssd_README) and
|
||||
[Object Detection for SSD Python Sample](@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README).
|
||||
|
||||
## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR
|
||||
|
||||
|
@ -186,6 +186,7 @@ The script should save into `~/XLNet-Large/xlnet`.
|
||||
|
||||
To generate the XLNet Intermediate Representation (IR) of the model, run the Model Optimizer with the following parameters:
|
||||
```sh
|
||||
python3 mo.py --input_model path-to-model/model_frozen.pb --input "input_mask[50 1],input_ids[50 1],seg_ids[50 1]" --log_level DEBUG --disable_nhwc_to_nchw --output_dir <OUTPUT_MODEL_DIR>
|
||||
python3 mo.py --input_model path-to-model/model_frozen.pb \
|
||||
--input "input_mask[50 1],input_ids[50 1],seg_ids[50 1]"
|
||||
```
|
||||
|
||||
|
@ -285,10 +285,9 @@ More information on how to develop middle transformations and dedicated API desc
|
||||
### NHWC to NCHW Layout Change <a name="layout-change"></a>
|
||||
There are several middle transformations responsible for changing model layout from NHWC to NCHW. These transformations
|
||||
are triggered by default for TensorFlow\* models only because it is the only framework with Convolution operations in
|
||||
NHWC layout.
|
||||
|
||||
> **NOTE**: If a TensorFlow\* model is in NCHW layout, you should specify the `--disable_nhwc_to_nchw` command line
|
||||
> parameter to disable these transformations.
|
||||
NHWC layout. This layout change is disabled if the model does not have operations that OpenVINO&trade needs to execute in
|
||||
NCHW layout, for example, Convolutions in NHWC layout. It is still possible to force Model Optimizer to do layout change
|
||||
using `--disable_nhwc_to_nchw` command-line parameter.
|
||||
|
||||
The layout change is a complex problem and detailed explanation of it is out of this document scope. A very brief
|
||||
explanation of this process is provided below:
|
||||
|
@ -174,7 +174,6 @@ limitations under the License.
|
||||
<tab type="user" title="nGraph Function Creation C++ Sample" url="@ref openvino_inference_engine_samples_ngraph_function_creation_sample_README"/>
|
||||
<tab type="user" title="nGraph Function Creation Python* Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_ngraph_function_creation_sample_README"/>
|
||||
<tab type="user" title="Object Detection SSD C++ Sample" url="@ref openvino_inference_engine_samples_object_detection_sample_ssd_README"/>
|
||||
<tab type="user" title="Object Detection SSD Python* Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README"/>
|
||||
<tab type="user" title="Object Detection SSD C Sample" url="@ref openvino_inference_engine_ie_bridges_c_samples_object_detection_sample_ssd_README"/>
|
||||
<tab type="user" title="Automatic Speech Recognition C++ Sample" url="@ref openvino_inference_engine_samples_speech_sample_README"/>
|
||||
<tab type="user" title="Automatic Speech Recognition Python Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_speech_sample_README"/>
|
||||
|
@ -54,29 +54,29 @@ The OpenVINO™ workflow on Raspbian* OS is as follows:
|
||||
|
||||
## <a name="using-sample"></a>Build and Run Code Samples
|
||||
|
||||
Follow the steps below to run pre-trained Face Detection network using Inference Engine samples from the OpenVINO toolkit.
|
||||
Follow the steps below to run pre-trained SqueezeNet image classification network using Inference Engine samples from the OpenVINO toolkit.
|
||||
|
||||
1. Create a samples build directory. This example uses a directory named `build`:
|
||||
```sh
|
||||
mkdir build && cd build
|
||||
```
|
||||
2. Build the Object Detection Sample with the following command:
|
||||
2. Build the Hello Classification Sample with the following command:
|
||||
```sh
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2022/samples/cpp
|
||||
make -j2 object_detection_sample_ssd
|
||||
make -j2 hello_classification
|
||||
```
|
||||
3. Download the pre-trained Face Detection model with the [Model Downloader tool](@ref omz_tools_downloader):
|
||||
3. Download the pre-trained SqueezeNet image classification model with the [Model Downloader tool](@ref omz_tools_downloader):
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/downloader
|
||||
python3 -m pip install -r requirements.in
|
||||
python3 downloader.py --name face-detection-adas-0001
|
||||
python3 downloader.py --name squeezenet1.1
|
||||
```
|
||||
4. Run the sample, specifying the model and path to the input image:
|
||||
```sh
|
||||
./armv7l/Release/object_detection_sample_ssd -m face-detection-adas-0001.xml -d MYRIAD -i <path_to_image>
|
||||
./armv7l/Release/hello_classification <path_to_model>/squeezenet1.1.xml <path_to_image> MYRIAD
|
||||
```
|
||||
The application outputs an image (`out_0.bmp`) with detected faced enclosed in rectangles.
|
||||
The application outputs to console window top 10 classification results.
|
||||
|
||||
## <a name="basic-guidelines-sample-application"></a>Basic Guidelines for Using Code Samples
|
||||
|
||||
|
@ -138,25 +138,25 @@ Follow the next steps to use the pre-trained face detection model using Inferenc
|
||||
```sh
|
||||
mkdir build && cd build
|
||||
```
|
||||
2. Build the Object Detection Sample:
|
||||
2. Build the Hello Classification Sample:
|
||||
```sh
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2022/samples/cpp
|
||||
```
|
||||
```sh
|
||||
make -j2 object_detection_sample_ssd
|
||||
make -j2 hello_classifiaction
|
||||
```
|
||||
3. Download the pre-trained Face Detection model with the Model Downloader or copy it from the host machine:
|
||||
3. Download the pre-trained squeezenet1.1 image classifiaction model with the Model Downloader or copy it from the host machine:
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/downloader
|
||||
python3 -m pip install -r requirements.in
|
||||
python3 downloader.py --name face-detection-adas-0001
|
||||
python3 downloader.py --name squeezenet1.1
|
||||
```
|
||||
4. Run the sample specifying the model, a path to the input image, and the VPU required to run with the Raspbian* OS:
|
||||
```sh
|
||||
./armv7l/Release/object_detection_sample_ssd -m <path_to_model>/face-detection-adas-0001.xml -d MYRIAD -i <path_to_image>
|
||||
./armv7l/Release/hello_classification <path_to_model>/squeezenet1.1.xml <path_to_image> MYRIAD
|
||||
```
|
||||
The application outputs an image (`out_0.bmp`) with detected faced enclosed in rectangles.
|
||||
The application outputs to console window top 10 classification results.
|
||||
|
||||
Congratulations, you have finished the OpenVINO™ toolkit for Raspbian* OS installation. You have completed all required installation, configuration and build steps in this guide.
|
||||
|
||||
|
@ -152,14 +152,24 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
|
||||
// So we need store as supported either unsupported node sets
|
||||
std::unordered_set<std::string> supported;
|
||||
std::unordered_set<std::string> unsupported;
|
||||
auto opset = ngraph::get_opset4();
|
||||
ngraph::OpSet op_super_set;
|
||||
#define _OPENVINO_OP_REG(NAME, NAMESPACE) op_super_set.insert<NAMESPACE::NAME>();
|
||||
#include "openvino/opsets/opset1_tbl.hpp"
|
||||
#include "openvino/opsets/opset2_tbl.hpp"
|
||||
#include "openvino/opsets/opset3_tbl.hpp"
|
||||
#include "openvino/opsets/opset4_tbl.hpp"
|
||||
#include "openvino/opsets/opset5_tbl.hpp"
|
||||
#include "openvino/opsets/opset6_tbl.hpp"
|
||||
#include "openvino/opsets/opset7_tbl.hpp"
|
||||
#include "openvino/opsets/opset8_tbl.hpp"
|
||||
#undef _OPENVINO_OP_REG
|
||||
for (auto&& node : transformedFunction->get_ops()) {
|
||||
// Extract transformation history from transformed node as list of nodes
|
||||
for (auto&& fusedLayerName : ngraph::getFusedNamesVector(node)) {
|
||||
// Filter just nodes from original operation set
|
||||
// TODO: fill with actual decision rules based on whether kernel is supported by backend
|
||||
if (InferenceEngine::details::contains(originalOps, fusedLayerName)) {
|
||||
if (opset.contains_type(friendlyNameToType[fusedLayerName])) {
|
||||
if (op_super_set.contains_type(friendlyNameToType[fusedLayerName])) {
|
||||
supported.emplace(fusedLayerName);
|
||||
} else {
|
||||
unsupported.emplace(fusedLayerName);
|
||||
|
@ -21,12 +21,12 @@ namespace {
|
||||
|
||||
struct RefPreprocessParams {
|
||||
RefPreprocessParams(const std::string& val): name(val) {}
|
||||
std::function<std::shared_ptr<ov::Function>()> function;
|
||||
std::vector<Tensor> inputs;
|
||||
std::vector<Tensor> expected;
|
||||
float abs_threshold = 0.01f;
|
||||
float rel_threshold = 0.01f;
|
||||
std::string name;
|
||||
std::function<std::shared_ptr<ov::Function>()> function;
|
||||
std::vector<Tensor> inputs;
|
||||
std::vector<Tensor> expected;
|
||||
float abs_threshold = 0.01f;
|
||||
float rel_threshold = 0.01f;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
class ReferencePreprocessTest : public testing::TestWithParam<RefPreprocessParams>, public CommonReferenceTest {
|
||||
@ -95,7 +95,8 @@ static RefPreprocessParams simple_mean_scale() {
|
||||
RefPreprocessParams res("simple_mean_scale");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2});
|
||||
f = PrePostProcessor(f).input(InputInfo().preprocess(PreProcessSteps().mean(1.f).scale(2.f))).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().preprocess().mean(1.f).scale(2.f); p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 3, 2, 2}, element::f32, std::vector<float>{1., 3., 5., 7., 9., 11., 13., 15., 17., 19., 21., 23.});
|
||||
@ -107,7 +108,8 @@ static RefPreprocessParams scale_then_mean() {
|
||||
RefPreprocessParams res("scale_then_mean");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, Shape{1, 3, 2, 2});
|
||||
f = PrePostProcessor(f).input(InputInfo().preprocess(PreProcessSteps().scale(2.0f).mean(2.0f))).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().preprocess().scale(2.0f).mean(2.0f); p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -120,14 +122,15 @@ static RefPreprocessParams convert_only() {
|
||||
RefPreprocessParams res("convert_only");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, Shape{1, 1, 2, 2});
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_element_type(element::i16))
|
||||
.preprocess(PreProcessSteps()
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_element_type(element::i16);
|
||||
p.input().preprocess()
|
||||
.convert_element_type(element::f32)
|
||||
.scale(3.f)
|
||||
.convert_element_type(element::u8)
|
||||
.convert_element_type(element::f32)))
|
||||
.build();
|
||||
.convert_element_type(element::f32);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 1, 2, 2}, element::i16, std::vector<int16_t>{2, 3, 4, 5});
|
||||
@ -139,14 +142,14 @@ static RefPreprocessParams convert_element_type_and_scale() {
|
||||
RefPreprocessParams res("convert_element_type_and_scale");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, Shape{1, 3, 2, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_element_type(element::i16))
|
||||
.preprocess(PreProcessSteps()
|
||||
.convert_element_type(element::f32)
|
||||
.scale(2.f)
|
||||
.convert_element_type(element::u8)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_element_type(element::i16);
|
||||
p.input().preprocess()
|
||||
.convert_element_type(element::f32)
|
||||
.scale(2.f)
|
||||
.convert_element_type(element::u8);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -161,11 +164,11 @@ static RefPreprocessParams tensor_element_type_and_scale() {
|
||||
RefPreprocessParams res("tensor_element_type_and_scale");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::i8, Shape{1, 3, 1, 1});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_element_type(element::f32))
|
||||
.preprocess(PreProcessSteps().scale(2.0f).convert_element_type(element::i8)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_element_type(element::f32);
|
||||
p.input().preprocess().scale(2.0f).convert_element_type(element::i8);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -178,13 +181,13 @@ static RefPreprocessParams custom_preprocessing() {
|
||||
RefPreprocessParams res("custom_preprocessing");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::i32, Shape{1, 3, 1, 1});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo().preprocess(PreProcessSteps().custom([](const Output<Node>& node) {
|
||||
auto abs = std::make_shared<op::v0::Abs>(node);
|
||||
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
|
||||
return abs;
|
||||
})))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().preprocess().custom([](const Output<Node>& node) {
|
||||
auto abs = std::make_shared<op::v0::Abs>(node);
|
||||
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
|
||||
return abs;
|
||||
});
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -193,42 +196,24 @@ static RefPreprocessParams custom_preprocessing() {
|
||||
return res;
|
||||
}
|
||||
|
||||
static RefPreprocessParams test_lvalue() {
|
||||
RefPreprocessParams res("test_lvalue");
|
||||
static RefPreprocessParams test_multiple() {
|
||||
RefPreprocessParams res("test_multiple");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::i8, Shape{1, 3, 1, 1});
|
||||
auto p = PrePostProcessor(f);
|
||||
auto p1 = std::move(p);
|
||||
p = std::move(p1);
|
||||
auto inputInfo = InputInfo();
|
||||
auto inputInfo2 = std::move(inputInfo);
|
||||
inputInfo = std::move(inputInfo2);
|
||||
{
|
||||
auto inputTensorInfo = InputTensorInfo();
|
||||
auto inputTensorInfo2 = std::move(inputTensorInfo);
|
||||
inputTensorInfo = std::move(inputTensorInfo2);
|
||||
auto &same = inputTensorInfo.set_element_type(element::f32);
|
||||
same.set_layout("?CHW");
|
||||
inputInfo.tensor(std::move(same));
|
||||
}
|
||||
{
|
||||
auto preprocessSteps = PreProcessSteps();
|
||||
auto preprocessSteps2 = std::move(preprocessSteps);
|
||||
preprocessSteps = std::move(preprocessSteps2);
|
||||
preprocessSteps.mean(1.f);
|
||||
preprocessSteps.scale(2.f);
|
||||
preprocessSteps.mean({1.f, 2.f, 3.f});
|
||||
preprocessSteps.scale({2.f, 3.f, 4.f});
|
||||
preprocessSteps.custom([](const Output<Node> &node) {
|
||||
auto abs = std::make_shared<op::v0::Abs>(node);
|
||||
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
|
||||
return abs;
|
||||
});
|
||||
auto &same = preprocessSteps.convert_element_type(element::i8);
|
||||
inputInfo.preprocess(std::move(same));
|
||||
}
|
||||
p.input(std::move(inputInfo));
|
||||
f = p.build();
|
||||
p1.input().tensor().set_element_type(element::f32).set_layout("?CHW");
|
||||
p1.input().preprocess().mean(1.f);
|
||||
p1.input().preprocess().scale(2.f);
|
||||
p1.input().preprocess().mean({1.f, 2.f, 3.f});
|
||||
p1.input().preprocess().scale({2.f, 3.f, 4.f});
|
||||
p1.input().preprocess().custom([](const Output<Node> &node) {
|
||||
auto abs = std::make_shared<op::v0::Abs>(node);
|
||||
abs->set_friendly_name(node.get_node_shared_ptr()->get_friendly_name() + "/abs");
|
||||
return abs;
|
||||
});
|
||||
p1.input().preprocess().convert_element_type(element::i8);
|
||||
f = p1.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -241,16 +226,12 @@ static RefPreprocessParams test_2_inputs_basic() {
|
||||
RefPreprocessParams res("test_2_inputs_basic");
|
||||
res.function = []() {
|
||||
auto f = create_n_inputs<2>(element::f32, Shape{1, 3, 1, 1});
|
||||
f = PrePostProcessor(f).input(InputInfo(0)
|
||||
.preprocess(
|
||||
PreProcessSteps()
|
||||
.mean(1.f)))
|
||||
.input(
|
||||
InputInfo("tensor_input2")
|
||||
.preprocess(PreProcessSteps()
|
||||
.mean(1.f)
|
||||
.scale(2.0f)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input(0).preprocess().mean(1.f);
|
||||
p.input("tensor_input2").preprocess()
|
||||
.mean(1.f)
|
||||
.scale(2.0f);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -265,11 +246,11 @@ static RefPreprocessParams mean_scale_vector_tensor_layout() {
|
||||
RefPreprocessParams res("mean_scale_vector_tensor_layout");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 3, 2, 1});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("NC??"))
|
||||
.preprocess(PreProcessSteps().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f})))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_layout("NC??");
|
||||
p.input().preprocess().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f});
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -282,11 +263,11 @@ static RefPreprocessParams mean_scale_dynamic_layout() {
|
||||
RefPreprocessParams res("mean_scale_dynamic_layout");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 2, 1, 3});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("N...C"))
|
||||
.preprocess(PreProcessSteps().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f})))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_layout("N...C");
|
||||
p.input().preprocess().mean({1.f, 2.f, 3.f}).scale({2.f, 3.f, 4.f});
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -299,13 +280,12 @@ static RefPreprocessParams resize_to_network_height() {
|
||||
RefPreprocessParams res("resize_to_network_height");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 2, 1, 1});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_dynamic_shape())
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NHWC"))
|
||||
)
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_spatial_dynamic_shape();
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NHWC");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(element::f32, Shape{1, 4, 1, 1}, std::vector<float>{0., 2., 4., 6.});
|
||||
@ -317,12 +297,12 @@ static RefPreprocessParams resize_to_network_width() {
|
||||
RefPreprocessParams res("resize_to_network_width");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 1, 2, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_dynamic_shape())
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_spatial_dynamic_shape();
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(element::f32, Shape{1, 1, 2, 6}, std::vector<float>{0., 1., 2., 3., 4., 5.,
|
||||
@ -335,14 +315,12 @@ static RefPreprocessParams resize_from_spatial_dims() {
|
||||
RefPreprocessParams res("resize_from_spatial_dims");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 1, 1, 1});
|
||||
auto t = InputTensorInfo();
|
||||
t.set_spatial_static_shape(1, 4);
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(std::move(t))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_CUBIC))
|
||||
.network(InputNetworkInfo().set_layout("NCHW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_spatial_static_shape(1, 4);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_CUBIC);
|
||||
p.input().network().set_layout("NCHW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(element::f32, Shape{1, 1, 1, 7}, std::vector<float>{0., 0.25, 1., 2.25, 4., 6.25, 9});
|
||||
@ -354,13 +332,13 @@ static RefPreprocessParams resize_i8() {
|
||||
RefPreprocessParams res("resize_i8");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::i8, PartialShape{1, 3, 1, 1});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo()
|
||||
.set_spatial_dynamic_shape())
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor()
|
||||
.set_spatial_dynamic_shape();
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(element::i8, Shape{1, 3, 2, 2}, std::vector<int8_t>{0, 0, 0, 0,
|
||||
@ -374,12 +352,12 @@ static RefPreprocessParams resize_to_network_width_height() {
|
||||
RefPreprocessParams res("resize_to_network_width_height");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 1, 4, 4});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_static_shape(5, 5))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_NEAREST))
|
||||
.network(InputNetworkInfo().set_layout("...HW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_spatial_static_shape(5, 5);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_NEAREST);
|
||||
p.input().network().set_layout("...HW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -404,12 +382,12 @@ static RefPreprocessParams resize_to_specified_width_height() {
|
||||
RefPreprocessParams res("resize_to_specified_width_height");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 1, Dimension::dynamic(), Dimension::dynamic()});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_dynamic_shape())
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_NEAREST, 4, 4))
|
||||
.network(InputNetworkInfo().set_layout("...HW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor().set_spatial_dynamic_shape();
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_NEAREST, 4, 4);
|
||||
p.input().network().set_layout("...HW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -430,52 +408,16 @@ static RefPreprocessParams resize_to_specified_width_height() {
|
||||
return res;
|
||||
}
|
||||
|
||||
static RefPreprocessParams resize_lvalues() {
|
||||
RefPreprocessParams res("resize_lvalues");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{Dimension::dynamic(), 1, 1, 2});
|
||||
f->get_parameters().front()->set_layout("NCHW");
|
||||
auto t = InputTensorInfo();
|
||||
t.set_spatial_dynamic_shape();
|
||||
auto s = PreProcessSteps();
|
||||
s.resize(ResizeAlgorithm::RESIZE_LINEAR, 1, 6); // to specified shape
|
||||
s.resize(ResizeAlgorithm::RESIZE_LINEAR); // to network's shape
|
||||
auto n = InputNetworkInfo();
|
||||
n.set_layout("NCHW");
|
||||
auto i = InputInfo();
|
||||
i.tensor(std::move(t));
|
||||
i.preprocess(std::move(s));
|
||||
i.network(std::move(n));
|
||||
f = PrePostProcessor(f)
|
||||
.input(std::move(i))
|
||||
.build();
|
||||
return f;
|
||||
};
|
||||
// clang-format off
|
||||
res.inputs.emplace_back(element::f32, Shape{1, 1, 1, 18}, std::vector<float>{0., 0., 0.,
|
||||
1., 1., 1.,
|
||||
2., 2., 2.,
|
||||
3., 3., 3.,
|
||||
4., 4., 4.,
|
||||
5., 5., 5.});
|
||||
// clang-format on
|
||||
res.expected.emplace_back(Shape{1, 1, 2, 1}, element::f32, std::vector<float>{1., 4.});
|
||||
return res;
|
||||
}
|
||||
|
||||
static RefPreprocessParams convert_layout_nhwc_to_nchw_lvalue() {
|
||||
RefPreprocessParams res("convert_layout_nhwc_to_nchw_lvalue");
|
||||
static RefPreprocessParams convert_layout_nhwc_to_nchw() {
|
||||
RefPreprocessParams res("convert_layout_nhwc_to_nchw");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, {1, 3, 2, 2});
|
||||
f->get_parameters()[0]->set_layout("NCHW");
|
||||
auto p = PreProcessSteps();
|
||||
p.convert_layout("NCHW");
|
||||
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("NHWC"))
|
||||
.preprocess(std::move(p)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_layout("NHWC");
|
||||
p.input().preprocess().convert_layout("NCHW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
|
||||
@ -493,13 +435,10 @@ static RefPreprocessParams convert_layout_nhwc_to_net_no_tensor_shape() {
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, {1, 3, 2, 2});
|
||||
f->get_parameters()[0]->set_layout("NCHW");
|
||||
auto p = PreProcessSteps();
|
||||
p.convert_layout();
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("NHWC"))
|
||||
.preprocess(std::move(p)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_layout("NHWC");
|
||||
p.input().preprocess().convert_layout();
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
|
||||
@ -516,10 +455,9 @@ static RefPreprocessParams convert_layout_by_dims() {
|
||||
RefPreprocessParams res("convert_layout_by_dims");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, {1, 3, 2, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.preprocess(PreProcessSteps().convert_layout({0, 3, 1, 2})))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().preprocess().convert_layout({0, 3, 1, 2});
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
|
||||
@ -536,12 +474,10 @@ static RefPreprocessParams convert_layout_by_dims_multi() {
|
||||
RefPreprocessParams res("convert_layout_by_dims_multi");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, {1, 3, 2, 2});
|
||||
auto p = PreProcessSteps();
|
||||
p.convert_layout({0, 1, 3, 2}); // NHWC->NHCW
|
||||
p.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo().preprocess(std::move(p)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().preprocess().convert_layout({0, 1, 3, 2}) // NHWC->NHCW
|
||||
.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::f32, std::vector<float>{1, 2, 3, // [H=0, W=0]
|
||||
@ -558,14 +494,12 @@ static RefPreprocessParams convert_layout_by_dims_multi_layout() {
|
||||
RefPreprocessParams res("convert_layout_by_dims_multi_layout");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, {1, 3, 2, 2});
|
||||
auto p = PreProcessSteps();
|
||||
p.convert_layout({0, 1, 3, 2}); // NHWC->NHCW
|
||||
p.mean({1, 2, 2}); // Apply means to 'C' channel
|
||||
p.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo().tensor(InputTensorInfo().set_layout("N??C"))
|
||||
.preprocess(std::move(p)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_layout("N??C");
|
||||
p.input().preprocess().convert_layout({0, 1, 3, 2}) // NHWC->NHCW
|
||||
.mean({1, 2, 2}) // Apply means to 'C' channel
|
||||
.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::f32, std::vector<float>{1, 2, 3, // [H=0, W=0, RGB]
|
||||
@ -582,16 +516,16 @@ static RefPreprocessParams resize_and_convert_layout() {
|
||||
RefPreprocessParams res("resize_and_convert_layout");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo()
|
||||
.set_layout("NCHW")
|
||||
.set_spatial_dynamic_shape())
|
||||
.preprocess(PreProcessSteps()
|
||||
.resize(ResizeAlgorithm::RESIZE_LINEAR)
|
||||
.convert_layout())
|
||||
.network(InputNetworkInfo().set_layout("NHWC")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor()
|
||||
.set_layout("NCHW")
|
||||
.set_spatial_dynamic_shape();
|
||||
p.input().preprocess()
|
||||
.resize(ResizeAlgorithm::RESIZE_LINEAR)
|
||||
.convert_layout();
|
||||
p.input().network().set_layout("NHWC");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -620,13 +554,13 @@ static RefPreprocessParams convert_color_nv12_to_bgr_two_planes() {
|
||||
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, PartialShape{1, 4, 4, 3});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo()
|
||||
.set_color_format(ColorFormat::NV12_TWO_PLANES))
|
||||
.preprocess(PreProcessSteps()
|
||||
.convert_color(ColorFormat::BGR)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor()
|
||||
.set_color_format(ColorFormat::NV12_TWO_PLANES);
|
||||
p.input().preprocess()
|
||||
.convert_color(ColorFormat::BGR);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -659,13 +593,13 @@ static RefPreprocessParams convert_color_nv12_single_plane() {
|
||||
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 4, 4, 3});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo()
|
||||
.set_color_format(ColorFormat::NV12_SINGLE_PLANE))
|
||||
.preprocess(PreProcessSteps()
|
||||
.convert_color(ColorFormat::RGB)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor()
|
||||
.set_color_format(ColorFormat::NV12_SINGLE_PLANE);
|
||||
p.input().preprocess()
|
||||
.convert_color(ColorFormat::RGB);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -680,7 +614,7 @@ static RefPreprocessParams convert_color_nv12_single_plane() {
|
||||
255, 0, 0, 255, 0, 0, 0, 255, 0, 0, 255, 0, // RRGG
|
||||
0, 0, 255, 0, 0, 255, 255, 0, 0, 255, 0, 0, // BBRR
|
||||
0, 0, 255, 0, 0, 255, 255, 0, 0, 255, 0, 0, // BBRR
|
||||
};
|
||||
};
|
||||
auto out_shape = Shape{1, 4, 4, 3};
|
||||
// clang-format on
|
||||
res.inputs.emplace_back(element::f32, input_shape, input);
|
||||
@ -694,19 +628,19 @@ static RefPreprocessParams convert_color_nv12_layout_resize() {
|
||||
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 3, 2, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo()
|
||||
.set_color_format(ColorFormat::NV12_SINGLE_PLANE)
|
||||
.set_element_type(element::u8)
|
||||
.set_spatial_dynamic_shape())
|
||||
.preprocess(PreProcessSteps()
|
||||
.convert_color(ColorFormat::RGB)
|
||||
.convert_layout()
|
||||
.convert_element_type(element::f32)
|
||||
.resize(ResizeAlgorithm::RESIZE_NEAREST))
|
||||
.network(InputNetworkInfo().set_layout("NCHW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor()
|
||||
.set_color_format(ColorFormat::NV12_SINGLE_PLANE)
|
||||
.set_element_type(element::u8)
|
||||
.set_spatial_dynamic_shape();
|
||||
p.input().preprocess()
|
||||
.convert_color(ColorFormat::RGB)
|
||||
.convert_layout()
|
||||
.convert_element_type(element::f32)
|
||||
.resize(ResizeAlgorithm::RESIZE_NEAREST);
|
||||
p.input().network().set_layout("NCHW");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -734,16 +668,16 @@ static RefPreprocessParams element_type_before_convert_color_nv12() {
|
||||
res.rel_threshold = 1.f; // Ignore relative pixel values comparison (100%)
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 3});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo()
|
||||
.tensor(InputTensorInfo()
|
||||
.set_element_type(element::u8)
|
||||
.set_color_format(ColorFormat::NV12_TWO_PLANES))
|
||||
.preprocess(PreProcessSteps()
|
||||
.convert_element_type(element::f32)
|
||||
.convert_color(ColorFormat::RGB))
|
||||
.network(InputNetworkInfo().set_layout("NHWC")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input()
|
||||
.tensor()
|
||||
.set_element_type(element::u8)
|
||||
.set_color_format(ColorFormat::NV12_TWO_PLANES);
|
||||
p.input().preprocess()
|
||||
.convert_element_type(element::f32)
|
||||
.convert_color(ColorFormat::RGB);
|
||||
p.input().network().set_layout("NHWC");
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -836,15 +770,15 @@ static RefPreprocessParams postprocess_2_inputs_basic() {
|
||||
RefPreprocessParams res("postprocess_2_inputs_basic");
|
||||
res.function = []() {
|
||||
auto f = create_n_inputs<2>(element::f32, Shape{1, 3, 1, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.output(OutputInfo("tensor_output1")
|
||||
.network(OutputNetworkInfo().set_layout("NCHW"))
|
||||
.postprocess(PostProcessSteps().convert_layout())
|
||||
.tensor(OutputTensorInfo().set_layout("NHWC")))
|
||||
.output(OutputInfo("tensor_output2")
|
||||
.postprocess(PostProcessSteps().convert_element_type())
|
||||
.tensor(OutputTensorInfo().set_element_type(element::u8)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.output("tensor_output1")
|
||||
.network().set_layout("NCHW");
|
||||
p.output("tensor_output1").postprocess().convert_layout();
|
||||
p.output("tensor_output1").tensor().set_layout("NHWC");
|
||||
p.output("tensor_output2")
|
||||
.postprocess().convert_element_type();
|
||||
p.output("tensor_output2").tensor().set_element_type(element::u8);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::f32, std::vector<float>{1.1, 2.1, 3.1, 4.1, 5.1, 6.1});
|
||||
@ -858,10 +792,10 @@ static RefPreprocessParams post_convert_layout_by_dims() {
|
||||
RefPreprocessParams res("post_convert_layout_by_dims");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, {1, 2, 2, 3});
|
||||
f = PrePostProcessor(f)
|
||||
.output(OutputInfo()
|
||||
.postprocess(PostProcessSteps().convert_layout({0, 3, 1, 2})))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.output()
|
||||
.postprocess().convert_layout({0, 3, 1, 2});
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::u8, std::vector<uint8_t>{1, 2, 3, // [H=0, W=0, RGB]
|
||||
@ -878,12 +812,10 @@ static RefPreprocessParams post_convert_layout_by_dims_multi() {
|
||||
RefPreprocessParams res("post_convert_layout_by_dims_multi");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, {1, 2, 2, 3});
|
||||
auto p = PostProcessSteps();
|
||||
p.convert_layout({0, 1, 3, 2}); // NHWC->NHCW
|
||||
p.convert_layout({0, 2, 1, 3}); // NHCW->NCHW
|
||||
f = PrePostProcessor(f)
|
||||
.output(OutputInfo().postprocess(std::move(p)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.output().postprocess().convert_layout({0, 1, 3, 2}); // NHWC->NHCW;
|
||||
p.output().postprocess().convert_layout({0, 2, 1, 3}); // NHCW->NCHW;
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 2, 2, 3}, element::f32, std::vector<float>{1, 2, 3, // [H=0, W=0]
|
||||
@ -900,20 +832,19 @@ static RefPreprocessParams pre_and_post_processing() {
|
||||
RefPreprocessParams res("pre_and_post_processing");
|
||||
res.function = []() {
|
||||
auto f = create_n_inputs<2>(element::f32, Shape{1, 3, 1, 2});
|
||||
f = PrePostProcessor(f)
|
||||
.input(InputInfo(0)
|
||||
.tensor(InputTensorInfo().set_element_type(element::u8))
|
||||
.preprocess(PreProcessSteps().convert_element_type(element::f32).mean(1.f)))
|
||||
.input(InputInfo(1)
|
||||
.preprocess(PreProcessSteps().scale(2.f)))
|
||||
.output(OutputInfo("tensor_output1")
|
||||
.network(OutputNetworkInfo().set_layout("NCHW"))
|
||||
.postprocess(PostProcessSteps().convert_layout())
|
||||
.tensor(OutputTensorInfo().set_layout("NHWC")))
|
||||
.output(OutputInfo("tensor_output2")
|
||||
.postprocess(PostProcessSteps().convert_element_type())
|
||||
.tensor(OutputTensorInfo().set_element_type(element::u8)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input(0)
|
||||
.tensor().set_element_type(element::u8);
|
||||
p.input(0).preprocess().convert_element_type(element::f32).mean(1.f);
|
||||
p.input(1).preprocess().scale(2.f);
|
||||
p.output("tensor_output1")
|
||||
.network().set_layout("NCHW");
|
||||
p.output("tensor_output1").postprocess().convert_layout();
|
||||
p.output("tensor_output1").tensor().set_layout("NHWC");
|
||||
p.output("tensor_output2")
|
||||
.postprocess().convert_element_type();
|
||||
p.output("tensor_output2").tensor().set_element_type(element::u8);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
res.inputs.emplace_back(Shape{1, 3, 1, 2}, element::u8, std::vector<uint8_t>{1, 2, 3, 4, 5, 6});
|
||||
@ -927,9 +858,10 @@ static RefPreprocessParams rgb_to_bgr() {
|
||||
RefPreprocessParams res("rgb_to_bgr");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, Shape{2, 1, 1, 3});
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_color_format(ColorFormat::RGB))
|
||||
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_color_format(ColorFormat::RGB);
|
||||
p.input().preprocess().convert_color(ColorFormat::BGR);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -942,9 +874,10 @@ static RefPreprocessParams bgr_to_rgb() {
|
||||
RefPreprocessParams res("bgr_to_rgb");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, Shape{2, 1, 1, 3});
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_color_format(ColorFormat::BGR))
|
||||
.preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_color_format(ColorFormat::BGR);
|
||||
p.input().preprocess().convert_color(ColorFormat::RGB);
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -957,9 +890,10 @@ static RefPreprocessParams reverse_channels_nchw() {
|
||||
RefPreprocessParams res("reverse_channels_nchw");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 2, 2, 2});
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("NCHW"))
|
||||
.preprocess(PreProcessSteps().reverse_channels())).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_layout("NCHW");
|
||||
p.input().preprocess().reverse_channels();
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -1004,14 +938,13 @@ static RefPreprocessParams color_cut_last_channel() {
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static RefPreprocessParams reverse_channels_dyn_layout() {
|
||||
RefPreprocessParams res("reverse_channels_dyn_layout");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::f32, PartialShape{1, 1, 3, 2});
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_color_format(ColorFormat::BGR).set_layout("...CN"))
|
||||
.preprocess(PreProcessSteps().convert_color(ColorFormat::RGB))).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_color_format(ColorFormat::BGR).set_layout("...CN");
|
||||
p.input().preprocess().convert_color(ColorFormat::RGB); p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -1024,12 +957,13 @@ static RefPreprocessParams reverse_dyn_shape() {
|
||||
RefPreprocessParams res("reverse_dyn_shape");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, PartialShape{Dimension::dynamic(),
|
||||
Dimension::dynamic(),
|
||||
Dimension::dynamic(),
|
||||
Dimension::dynamic()});
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("NCHW"))
|
||||
.preprocess(PreProcessSteps().reverse_channels())).build();
|
||||
Dimension::dynamic(),
|
||||
Dimension::dynamic(),
|
||||
Dimension::dynamic()});
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_layout("NCHW");
|
||||
p.input().preprocess().reverse_channels();
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -1042,11 +976,10 @@ static RefPreprocessParams reverse_fully_dyn_shape() {
|
||||
RefPreprocessParams res("reverse_fully_dyn_shape");
|
||||
res.function = []() {
|
||||
auto f = create_simple_function(element::u8, PartialShape::dynamic());
|
||||
auto p = PreProcessSteps();
|
||||
p.reverse_channels();
|
||||
f = PrePostProcessor(f).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("...C??"))
|
||||
.preprocess(std::move(p))).build();
|
||||
auto p = PrePostProcessor(f);
|
||||
p.input().tensor().set_layout("...C??");
|
||||
p.input().preprocess().reverse_channels();
|
||||
p.build();
|
||||
return f;
|
||||
};
|
||||
|
||||
@ -1057,47 +990,46 @@ static RefPreprocessParams reverse_fully_dyn_shape() {
|
||||
|
||||
std::vector<RefPreprocessParams> allPreprocessTests() {
|
||||
return std::vector<RefPreprocessParams> {
|
||||
simple_mean_scale(),
|
||||
scale_then_mean(),
|
||||
convert_only(),
|
||||
convert_element_type_and_scale(),
|
||||
tensor_element_type_and_scale(),
|
||||
custom_preprocessing(),
|
||||
test_lvalue(),
|
||||
test_2_inputs_basic(),
|
||||
mean_scale_vector_tensor_layout(),
|
||||
mean_scale_dynamic_layout(),
|
||||
resize_to_network_height(),
|
||||
resize_to_network_width(),
|
||||
resize_from_spatial_dims(),
|
||||
resize_i8(),
|
||||
resize_to_network_width_height(),
|
||||
resize_to_specified_width_height(),
|
||||
resize_lvalues(),
|
||||
convert_layout_nhwc_to_nchw_lvalue(),
|
||||
convert_layout_nhwc_to_net_no_tensor_shape(),
|
||||
convert_layout_by_dims(),
|
||||
convert_layout_by_dims_multi(),
|
||||
convert_layout_by_dims_multi_layout(),
|
||||
resize_and_convert_layout(),
|
||||
convert_color_nv12_to_bgr_two_planes(),
|
||||
convert_color_nv12_single_plane(),
|
||||
convert_color_nv12_layout_resize(),
|
||||
element_type_before_convert_color_nv12(),
|
||||
convert_color_i420_to_bgr_three_planes(),
|
||||
convert_color_i420_single_plane(),
|
||||
postprocess_2_inputs_basic(),
|
||||
post_convert_layout_by_dims(),
|
||||
post_convert_layout_by_dims_multi(),
|
||||
pre_and_post_processing(),
|
||||
rgb_to_bgr(),
|
||||
bgr_to_rgb(),
|
||||
color_cut_last_channel(),
|
||||
reverse_channels_nchw(),
|
||||
reverse_channels_dyn_layout(),
|
||||
reverse_dyn_shape(),
|
||||
reverse_fully_dyn_shape()
|
||||
};
|
||||
simple_mean_scale(),
|
||||
scale_then_mean(),
|
||||
convert_only(),
|
||||
convert_element_type_and_scale(),
|
||||
tensor_element_type_and_scale(),
|
||||
custom_preprocessing(),
|
||||
test_multiple(),
|
||||
test_2_inputs_basic(),
|
||||
mean_scale_vector_tensor_layout(),
|
||||
mean_scale_dynamic_layout(),
|
||||
resize_to_network_height(),
|
||||
resize_to_network_width(),
|
||||
resize_from_spatial_dims(),
|
||||
resize_i8(),
|
||||
resize_to_network_width_height(),
|
||||
resize_to_specified_width_height(),
|
||||
convert_layout_nhwc_to_nchw(),
|
||||
convert_layout_nhwc_to_net_no_tensor_shape(),
|
||||
convert_layout_by_dims(),
|
||||
convert_layout_by_dims_multi(),
|
||||
convert_layout_by_dims_multi_layout(),
|
||||
resize_and_convert_layout(),
|
||||
convert_color_nv12_to_bgr_two_planes(),
|
||||
convert_color_nv12_single_plane(),
|
||||
convert_color_nv12_layout_resize(),
|
||||
element_type_before_convert_color_nv12(),
|
||||
convert_color_i420_to_bgr_three_planes(),
|
||||
convert_color_i420_single_plane(),
|
||||
postprocess_2_inputs_basic(),
|
||||
post_convert_layout_by_dims(),
|
||||
post_convert_layout_by_dims_multi(),
|
||||
pre_and_post_processing(),
|
||||
rgb_to_bgr(),
|
||||
bgr_to_rgb(),
|
||||
color_cut_last_channel(),
|
||||
reverse_channels_nchw(),
|
||||
reverse_channels_dyn_layout(),
|
||||
reverse_dyn_shape(),
|
||||
reverse_fully_dyn_shape()
|
||||
};
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferencePreprocessTest,
|
||||
|
@ -57,7 +57,9 @@ static std::shared_ptr<Function> create_simple_function_yuv(const PartialShape&
|
||||
|
||||
TEST_F(ReferencePreprocessLegacyTest, mean) {
|
||||
function = create_simple_function(element::f32, Shape{1, 3, 2, 2});
|
||||
function = PrePostProcessor(function).input(InputInfo().preprocess(PreProcessSteps().mean(1.f))).build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().preprocess().mean(1.f);
|
||||
p.build();
|
||||
|
||||
auto f2 = create_simple_function(element::f32, Shape{1, 3, 2, 2});
|
||||
legacy_network = InferenceEngine::CNNNetwork(f2);
|
||||
@ -75,7 +77,9 @@ TEST_F(ReferencePreprocessLegacyTest, mean) {
|
||||
|
||||
TEST_F(ReferencePreprocessLegacyTest, mean_scale) {
|
||||
function = create_simple_function(element::f32, Shape{1, 3, 20, 20});
|
||||
function = PrePostProcessor(function).input(InputInfo().preprocess(PreProcessSteps().scale(2.f))).build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().preprocess().scale(2.f);
|
||||
p.build();
|
||||
|
||||
auto f2 = create_simple_function(element::f32, Shape{1, 3, 20, 20});
|
||||
legacy_network = InferenceEngine::CNNNetwork(f2);
|
||||
@ -96,11 +100,11 @@ TEST_F(ReferencePreprocessLegacyTest, resize) {
|
||||
auto f2 = create_simple_function(element::f32, Shape{1, 3, 5, 5});
|
||||
legacy_network = InferenceEngine::CNNNetwork(f2);
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_layout("NCHW").set_spatial_static_shape(42, 30))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_layout("NCHW").set_spatial_static_shape(42, 30);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
p.build();
|
||||
|
||||
auto &preProcess = legacy_network.getInputsInfo().begin()->second->getPreProcess();
|
||||
preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR);
|
||||
@ -177,12 +181,11 @@ public:
|
||||
inputData.clear();
|
||||
legacy_input_blobs.clear();
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_color_format(
|
||||
ColorFormat::NV12_SINGLE_PLANE))
|
||||
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW")))
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_color_format(ColorFormat::NV12_SINGLE_PLANE);
|
||||
p.input().preprocess().convert_color(ColorFormat::BGR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
p.build();
|
||||
|
||||
const auto ¶m = function->get_parameters()[0];
|
||||
inputData.emplace_back(param->get_element_type(), param->get_shape(), ov20_input_yuv.data());
|
||||
|
@ -109,11 +109,10 @@ TEST_F(PreprocessOpenCVReferenceTest_YUV, convert_nv12_full_color_range) {
|
||||
|
||||
inputData.clear();
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_color_format(
|
||||
ColorFormat::NV12_SINGLE_PLANE))
|
||||
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR)))
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_color_format(ColorFormat::NV12_SINGLE_PLANE);
|
||||
p.input().preprocess().convert_color(ColorFormat::BGR);
|
||||
function = p.build();
|
||||
|
||||
const auto ¶m = function->get_parameters()[0];
|
||||
inputData.emplace_back(param->get_element_type(), param->get_shape(), ov20_input_yuv.data());
|
||||
@ -138,12 +137,10 @@ TEST_F(PreprocessOpenCVReferenceTest_YUV, convert_nv12_colored) {
|
||||
|
||||
inputData.clear();
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_color_format(
|
||||
ColorFormat::NV12_SINGLE_PLANE))
|
||||
.preprocess(PreProcessSteps().convert_color(ColorFormat::BGR))
|
||||
)
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_color_format(ColorFormat::NV12_SINGLE_PLANE);
|
||||
p.input().preprocess().convert_color(ColorFormat::BGR);
|
||||
function = p.build();
|
||||
|
||||
const auto ¶m = function->get_parameters()[0];
|
||||
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_yuv.data());
|
||||
@ -165,12 +162,11 @@ TEST_F(PreprocessOpenCVReferenceTest, resize_u8_simple_linear) {
|
||||
|
||||
inputData.clear();
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_static_shape(2, 2))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW"))
|
||||
)
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_spatial_static_shape(2, 2);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
function = p.build();
|
||||
|
||||
const auto ¶m = function->get_parameters()[0];
|
||||
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_img.data());
|
||||
@ -204,12 +200,11 @@ TEST_F(PreprocessOpenCVReferenceTest, resize_u8_large_picture_linear) {
|
||||
|
||||
inputData.clear();
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_static_shape(input_height, input_width))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW"))
|
||||
)
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_spatial_static_shape(input_height, input_width);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
function = p.build();
|
||||
|
||||
const auto ¶m = function->get_parameters()[0];
|
||||
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_img.data());
|
||||
@ -242,12 +237,11 @@ TEST_F(PreprocessOpenCVReferenceTest, resize_f32_large_picture_linear) {
|
||||
|
||||
inputData.clear();
|
||||
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_static_shape(input_height, input_width))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_LINEAR))
|
||||
.network(InputNetworkInfo().set_layout("NCHW"))
|
||||
)
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_spatial_static_shape(input_height, input_width);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_LINEAR);
|
||||
p.input().network().set_layout("NCHW");
|
||||
function = p.build();
|
||||
|
||||
const auto ¶m = function->get_parameters()[0];
|
||||
inputData.emplace_back(param->get_element_type(), param->get_shape(), input_img.data());
|
||||
@ -271,12 +265,11 @@ TEST_F(PreprocessOpenCVReferenceTest, DISABLED_resize_f32_large_picture_cubic_sm
|
||||
auto element_type = element::f32;
|
||||
auto input_img = std::vector<float> {1.f, 2.f, 3.f, 4.f, 4.f, 3.f, 2.f, 1.f, 1.f, 2.f, 3.f, 4.f, 4.f, 3.f, 2.f, 1.f};
|
||||
function = create_simple_function(element_type, func_shape);
|
||||
function = PrePostProcessor(function).input(InputInfo()
|
||||
.tensor(InputTensorInfo().set_spatial_static_shape(input_height, input_width))
|
||||
.preprocess(PreProcessSteps().resize(ResizeAlgorithm::RESIZE_CUBIC))
|
||||
.network(InputNetworkInfo().set_layout("NCHW"))
|
||||
)
|
||||
.build();
|
||||
auto p = PrePostProcessor(function);
|
||||
p.input().tensor().set_spatial_static_shape(input_height, input_width);
|
||||
p.input().preprocess().resize(ResizeAlgorithm::RESIZE_CUBIC);
|
||||
p.input().network().set_layout("NCHW");
|
||||
function = p.build();
|
||||
|
||||
inputData.emplace_back(element_type, input_shape, input_img.data());
|
||||
|
||||
|
@ -13,8 +13,6 @@ if(NOT DEFINED OpenVINO_SOURCE_DIR)
|
||||
endif()
|
||||
|
||||
option(ENABLE_CONDA_FOLDER "Create output folder with conda python bindings" OFF)
|
||||
cmake_dependent_option(ENABLE_WHEEL "Create wheel package" OFF
|
||||
"PYTHONINTERP_FOUND;NOT CMAKE_SOURCE_DIR STREQUAL ie_python_api_SOURCE_DIR" OFF)
|
||||
|
||||
set(PYTHON_BRIDGE_CPACK_PATH "python")
|
||||
|
||||
|
@ -55,9 +55,6 @@ add_custom_command(TARGET ${TARGET_NAME}
|
||||
|
||||
# install
|
||||
|
||||
# TODO: use ${PYTHON_VERSION}_dev component below
|
||||
# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT})
|
||||
|
||||
install(TARGETS ${TARGET_NAME}
|
||||
RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}
|
||||
LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT})
|
||||
|
@ -71,3 +71,11 @@ add_custom_command(OUTPUT ${openvino_wheel_path}
|
||||
VERBATIM)
|
||||
|
||||
add_custom_target(ie_wheel ALL DEPENDS ${openvino_wheel_path})
|
||||
|
||||
# install
|
||||
|
||||
ie_cpack_add_component(python_wheels)
|
||||
|
||||
install(FILES ${openvino_wheel_path}
|
||||
DESTINATION tools
|
||||
COMPONENT python_wheels)
|
||||
|
@ -1,3 +1,3 @@
|
||||
setuptools>=53.0.0,<=58.4.0
|
||||
setuptools>=53.0.0
|
||||
wheel>=0.36.2
|
||||
python-decouple>=3.4
|
||||
|
@ -13,10 +13,6 @@ if(ENABLE_MKL_DNN)
|
||||
add_subdirectory(mkldnn_plugin)
|
||||
endif()
|
||||
|
||||
if(ENABLE_CLDNN)
|
||||
add_subdirectory(cldnn_engine)
|
||||
endif()
|
||||
|
||||
if(ENABLE_VPU)
|
||||
add_subdirectory(vpu)
|
||||
endif()
|
||||
|
@ -10,7 +10,7 @@ namespace GNAPluginNS {
|
||||
struct GNAFlags {
|
||||
uint8_t gna_lib_async_threads_num = 1;
|
||||
|
||||
bool compact_mode = false;
|
||||
bool compact_mode = true;
|
||||
bool exclusive_async_requests = false;
|
||||
bool uniformPwlDesign = false;
|
||||
float pwlMaxErrorPercent = 1.0f;
|
||||
|
@ -208,7 +208,7 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
|
||||
connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
|
||||
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
|
||||
// dont see practical use case when bind storage type need to be different that allocation type
|
||||
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
|
||||
gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) {
|
||||
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
|
||||
});
|
||||
}
|
||||
@ -475,7 +475,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
}
|
||||
|
||||
if (num_conv_kernel_padding == 0) {
|
||||
gnamem->readonly().push_local_ptr(ptr_weights,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_weights,
|
||||
transposedWeights.data(),
|
||||
convolution._weights->byteSize(),
|
||||
64);
|
||||
@ -502,19 +502,19 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
offset += padding_zeros.size();
|
||||
}
|
||||
};
|
||||
gnamem->readonly().push_initializer(ptr_weights,
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights,
|
||||
paddedWeightsSize,
|
||||
initializer,
|
||||
64);
|
||||
}
|
||||
|
||||
if (convolution._biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
convolution._biases->cbuffer().as<const void*>(),
|
||||
convolution._biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -600,7 +600,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
ptr_biases);
|
||||
|
||||
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
|
||||
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
|
||||
|
||||
@ -647,18 +646,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
transposedWeights.resize(transposedWeights.size() + kernelPad);
|
||||
}
|
||||
|
||||
gnamem->readonly().push_local_ptr(ptr_weights,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_weights,
|
||||
transposedWeights.data(),
|
||||
transposedWeights.size(),
|
||||
64);
|
||||
|
||||
if (convolution._biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
convolution._biases->cbuffer().as<const void*>(),
|
||||
convolution._biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -712,14 +711,13 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
true);
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
|
||||
if (gnaFlags->sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
|
||||
} else {
|
||||
IE_ASSERT(quantized != nullptr);
|
||||
if (!gnaFlags->input_low_precision) {
|
||||
@ -727,15 +725,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
static_cast<float>(INT16_MAX)));
|
||||
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset,
|
||||
static_cast<float>(INT32_MAX)));
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
} else {
|
||||
auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale,
|
||||
static_cast<float>(INT8_MAX)));
|
||||
auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset,
|
||||
static_cast<float>(INT8_MAX)));
|
||||
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -799,12 +797,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_pwl_input,
|
||||
ptr_pwl_outputs,
|
||||
ptr_pwl_segments_target);
|
||||
|
||||
connectOutput(layer, ptr_pwl_outputs, num_data_bytes_out);
|
||||
connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0);
|
||||
|
||||
if (ptr_pwl_segments_target != nullptr) {
|
||||
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
|
||||
&ptr_pwl_segments.front(),
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
|
||||
64);
|
||||
@ -876,7 +873,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
|
||||
* outputs->getPrecision().size();
|
||||
|
||||
@ -921,7 +917,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
num_columns_out,
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
|
||||
begin(outputs->getDims()), end(outputs->getDims())), 8)
|
||||
* outputs->getPrecision().size();
|
||||
@ -933,7 +928,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
|
||||
|
||||
if (concatLayer == nullptr) {
|
||||
return;
|
||||
}
|
||||
@ -996,13 +990,10 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto layerInfo = LayerInfo(concatParent);
|
||||
// auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock());
|
||||
if (layerInfo.isInput()) {
|
||||
connectInput(layer, &concatLayerInfo.gna_ptr,
|
||||
inputLayer.tensorSize, inputLayer.offset, idx, false);
|
||||
|
||||
connectInput(layer, &concatLayerInfo.gna_ptr, inputLayer.tensorSize, inputLayer.offset, idx, false);
|
||||
concatLayerInfo.input_allocated = true;
|
||||
} else if (layerInfo.isMemory()) {
|
||||
connectInput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size, inputLayer.offset, idx, false);
|
||||
|
||||
concatLayerInfo.input_allocated = true;
|
||||
}
|
||||
++idx;
|
||||
@ -1114,7 +1105,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
false);
|
||||
|
||||
size_t num_data_bytes_out =
|
||||
InferenceEngine::details::product(
|
||||
begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
||||
@ -1128,8 +1118,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
|
||||
|
||||
(quantized == nullptr) ?
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) :
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1249,7 +1239,6 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
true);
|
||||
|
||||
size_t num_data_bytes_out =
|
||||
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size();
|
||||
|
||||
@ -1262,36 +1251,36 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
switch (eltwise._operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
|
||||
} else {
|
||||
auto scaledIdentity = -quantized->_weights_quant.GetScale();
|
||||
|
||||
if (gnaFlags->input_low_precision == false) {
|
||||
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
} else {
|
||||
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
break;
|
||||
case EltwiseLayer::Sum:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
|
||||
} else {
|
||||
auto scaledIdentity = quantized->_weights_quant.GetScale();
|
||||
|
||||
if (gnaFlags->input_low_precision == false) {
|
||||
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
} else {
|
||||
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
@ -1299,12 +1288,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
case EltwiseLayer::Prod:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
} else {
|
||||
if (gnaFlags->input_low_precision == false) {
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
} else {
|
||||
gnamem->readonly().push_value<int8_t>(ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
@ -1372,9 +1361,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
|
||||
if (gnaFlags->sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
} else {
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1485,12 +1474,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
|
||||
if (num_padding == 0) {
|
||||
if (!transpose) {
|
||||
gnamem->readonly().push_ptr(ptr_weights,
|
||||
gnamem->readonly().push_ptr(layer, ptr_weights,
|
||||
weightable._weights->cbuffer().as<const void*>(),
|
||||
weightable._weights->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
|
||||
for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
|
||||
auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
|
||||
auto cbuffer = weightable._weights->cbuffer().as<const uint8_t*>() + rowOffset;
|
||||
@ -1519,7 +1508,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
|
||||
auto paddedWeightsSize = paddedWeights * weightable.precision.size();
|
||||
|
||||
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
|
||||
ie_memcpy(data, size,
|
||||
weightable._weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * weightable.precision.size(),
|
||||
@ -1530,16 +1519,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
}
|
||||
|
||||
if (weightable._biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
weightable._biases->cbuffer().as<const void*>(),
|
||||
weightable._biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
|
||||
if (useBiasConnection) {
|
||||
gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1557,7 +1546,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
|
||||
THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
|
||||
}
|
||||
|
||||
gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
|
||||
int out = 0;
|
||||
for (int input = offset; input < num_rows_out + offset; ++input) {
|
||||
auto mem_ptr = reinterpret_cast<uint8_t*>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
|
||||
@ -1624,7 +1613,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
|
||||
size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in
|
||||
* inputs->getPrecision().size();
|
||||
// need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron
|
||||
@ -1681,7 +1669,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
|
||||
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
|
||||
|
||||
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
size_t roffset = weights_offset;
|
||||
size_t woffset = 0;
|
||||
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
|
||||
@ -1696,12 +1684,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
}
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
filterLayer->_biases->cbuffer().as<const void*>(),
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1774,18 +1762,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
gnamem->readonly().push_ptr(ptr_weights,
|
||||
gnamem->readonly().push_ptr(layer, ptr_weights,
|
||||
filterLayer->_weights->cbuffer().as<const void*>(),
|
||||
filterLayer->_weights->byteSize(),
|
||||
64);
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
filterLayer->_biases->cbuffer().as<const void*>(),
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2016,7 +2004,7 @@ case name:\
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
if (ptr_pwl_segments_target != nullptr) {
|
||||
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
|
||||
&ptr_pwl_segments.front(),
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
|
||||
64);
|
||||
@ -2152,8 +2140,9 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
|
||||
size_t num_data_bytes_out) {
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr,
|
||||
size_t num_data_bytes_out) {
|
||||
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
|
||||
int32_t output_offset = 0;
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
@ -2162,7 +2151,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
return output_offset;
|
||||
};
|
||||
|
||||
|
||||
gnalog() << "Connecting output " << layer->name << " ...\n";
|
||||
// in case of Memory Layer it's input allocated in meminput layer
|
||||
if (layer->outData.size() == 1) {
|
||||
@ -2179,7 +2167,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
if (!nextLayer.first) {
|
||||
gnalog() << "for layer: " << layer->name << "outData[0] has non functional connection at " << j;
|
||||
}
|
||||
|
||||
auto nextMemoryLayerIt =
|
||||
std::find_if(begin(memory_connection), end(memory_connection),
|
||||
[&](MemoryConnection::value_type &comp) {
|
||||
@ -2190,14 +2177,13 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
// memory layer not yet initialized
|
||||
if (nextMemoryLayer.reserved_size == 0) {
|
||||
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
|
||||
|
||||
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
|
||||
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
||||
} else {
|
||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
||||
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2288,7 +2274,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
return it != concatItem.second.concatInputLayers.end();
|
||||
});
|
||||
if (included == concat_connection.end()) {
|
||||
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
|
||||
gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
|
||||
|
||||
std::function<void(GNAConcatLayer, GNAPluginNS::InputDesc&, ConcatConnection&)> allocate_input_recursively =
|
||||
[&allocate_input_recursively](GNAConcatLayer clayer, GNAPluginNS::InputDesc& inputDesc, ConcatConnection& concat_connection) {
|
||||
@ -2321,26 +2307,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
output_offset = layer->GetParamAsInt("output_offset");
|
||||
}
|
||||
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, output_offset);
|
||||
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
intel_dnn_component_t * unused_input = nullptr;
|
||||
if (gnaFlags->compact_mode) {
|
||||
unused_input = find_first_unused_input(layer);
|
||||
if (unused_input != nullptr) {
|
||||
gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out));
|
||||
}
|
||||
}
|
||||
// cannot reuse suitable input
|
||||
if (unused_input == nullptr) {
|
||||
gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out), 64);
|
||||
}
|
||||
auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true,
|
||||
[](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first;
|
||||
// Check that layer will be an output
|
||||
gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64);
|
||||
}
|
||||
|
||||
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx, bool connectTo) {
|
||||
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
void *ptr,
|
||||
size_t num_data_bytes_in,
|
||||
int32_t offset,
|
||||
int idx,
|
||||
bool connectTo) {
|
||||
// selecting particular input layers
|
||||
// auto prevLayer = CNNNetPrevLayer(layer, idx);
|
||||
auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) {
|
||||
@ -2363,12 +2347,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
// real allocation pointer will be kept in ptr not in ptr_inputs_global
|
||||
if (!connectTo) {
|
||||
gnamem->push_value(ptr,
|
||||
gnamem->push_value(nullptr, ptr,
|
||||
static_cast<uint8_t>(0),
|
||||
num_data_bytes_in,
|
||||
64);
|
||||
} else {
|
||||
gnamem->push_value(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
|
||||
gnamem->push_value(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
|
||||
static_cast<uint8_t>(0),
|
||||
num_data_bytes_in,
|
||||
64);
|
||||
@ -2384,9 +2368,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
}
|
||||
|
||||
if (connectTo) {
|
||||
gnamem->bind_ptr(ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
|
||||
gnamem->bind_ptr(nullptr, ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
|
||||
} else {
|
||||
gnamem->bind_ptr(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
|
||||
gnamem->bind_ptr(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
@ -2394,9 +2378,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// const input
|
||||
if (LayerInfo(prevLayer).isConst()) {
|
||||
if (connectTo) {
|
||||
gnamem->bind_ptr(ptr, const_connections[prevLayer->name], offset);
|
||||
gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
|
||||
} else {
|
||||
gnamem->bind_ptr(const_connections[prevLayer->name], ptr, offset);
|
||||
gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
@ -2423,6 +2407,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
if (it != splitLayerInfoItem.splitOutputLayers.end()) {
|
||||
gnalog() << "Connecting " << splitName << " input \n";
|
||||
// splitting layer should take the execution order from the connected layer
|
||||
splittingLayer->userValue = layer->userValue;
|
||||
auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
|
||||
gnalog() << "Connected \n";
|
||||
return res;
|
||||
@ -2435,7 +2421,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
if (concatLayerInfo != concat_connection.end()) {
|
||||
auto & concatLayerInfoItem = concatLayerInfo->second;
|
||||
// dnnLayer that is input for concat layer
|
||||
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
|
||||
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset);
|
||||
// return layer over concat
|
||||
return CNNNetPrevLayer(prevLayer);
|
||||
}
|
||||
@ -2444,7 +2430,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
prevLayer->name);
|
||||
if (cropLayerInfo != crop_connection.end()) {
|
||||
auto & cropLayerInfoItem = cropLayerInfo->second;
|
||||
gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset);
|
||||
gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset);
|
||||
return CNNNetPrevLayer(prevLayer);
|
||||
}
|
||||
}
|
||||
@ -2452,7 +2438,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
// check for generic prev layer
|
||||
if (prevDnnLayer != nullptr) {
|
||||
gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset);
|
||||
gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset);
|
||||
return prevLayer;
|
||||
}
|
||||
|
||||
@ -2470,20 +2456,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// connectTo used for indicate that memory layer should be bound to given buffer
|
||||
if (connectTo) {
|
||||
memorySize = std::max(memorySize, num_data_bytes_in);
|
||||
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
||||
gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
|
||||
} else {
|
||||
if (num_data_bytes_in < memorySize + offset) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
||||
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
|
||||
}
|
||||
gnamem->bind_ptr(&memoryLayer.gna_ptr, ptr, offset);
|
||||
gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset);
|
||||
}
|
||||
|
||||
memoryLayer.reserved_size = ALIGN64(memorySize);
|
||||
} else {
|
||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <legacy/graph_tools.hpp>
|
||||
#include <legacy/net_pass.h>
|
||||
#include <debug.h>
|
||||
@ -524,7 +525,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
|
||||
desc.num_elements = numElem;
|
||||
|
||||
// binding ptr for first infer request - then others will be setup during relocation
|
||||
gnamem->bind_ptr(&desc.ptrs.front(), outputPtr);
|
||||
gnamem->bind_ptr(layer, &desc.ptrs.front(), outputPtr);
|
||||
};
|
||||
|
||||
// probing gna_primitives
|
||||
@ -927,7 +928,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
}
|
||||
|
||||
// Creating Layer primitives
|
||||
uint16_t id = 0;
|
||||
for (auto & layer : sortedNoMem) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
layer->userValue.v_int = id++;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
graphCompiler.CreateLayerPrimitive(layer);
|
||||
}
|
||||
|
||||
@ -981,7 +986,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
|
||||
// TODO: how active list will work in multioutput case
|
||||
// make room for active list
|
||||
gnamem->reserve_ptr(nullptr,
|
||||
gnamem->reserve_ptr(nullptr, nullptr,
|
||||
ALIGN64(outputsDesc.front().num_bytes_per_element * outputsDesc.front().num_elements), 64);
|
||||
|
||||
void *pParallelExecutionData = nullptr;
|
||||
@ -989,10 +994,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
// reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest
|
||||
rwSegmentSize = gnamem->getRWBytes();
|
||||
if (gnaFlags->gna_lib_async_threads_num > 1) {
|
||||
gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
|
||||
gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
|
||||
}
|
||||
|
||||
gnamem->commit();
|
||||
gnamem->commit(gnaFlags->compact_mode);
|
||||
|
||||
dnn->Init(gnamem->getBasePtr(),
|
||||
gnamem->getTotalBytes(),
|
||||
@ -1569,7 +1574,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
|
||||
|
||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
||||
void *basePtr = nullptr;
|
||||
gnamem->reserve_ptr(&basePtr, header.gnaMemSize);
|
||||
gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
|
||||
gnamem->commit();
|
||||
#if GNA_LIB_VER == 2
|
||||
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>(header.layersCount)));
|
||||
|
@ -14,6 +14,8 @@
|
||||
* @brief used for creating graphviz charts, and layers dump
|
||||
*/
|
||||
# define PLOT
|
||||
# define MODEL_DUMP
|
||||
# define GNA_HEAP_PROFILER
|
||||
# define gnalog() std::cout
|
||||
# define gnawarn() std::cerr
|
||||
#else
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
|
||||
@ -26,6 +28,45 @@ enum rRegion {
|
||||
REGION_AUTO,
|
||||
};
|
||||
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
inline const char* rRegionToStr(uint8_t region) {
|
||||
const char* strRegion = "UNKNOWN";
|
||||
switch (region) {
|
||||
case REGION_RO:
|
||||
strRegion = "REGION_RO";
|
||||
break;
|
||||
case REGION_RW:
|
||||
strRegion = "REGION_RW";
|
||||
break;
|
||||
case REGION_AUTO:
|
||||
strRegion = "REGION_AUTO";
|
||||
break;
|
||||
}
|
||||
return strRegion;
|
||||
}
|
||||
|
||||
inline const char* rTypeToStr(uint8_t type) {
|
||||
const char* strType = "UNKNOWN";
|
||||
switch (type) {
|
||||
case REQUEST_STORE:
|
||||
strType = "REQUEST_STORE";
|
||||
break;
|
||||
case REQUEST_ALLOCATE:
|
||||
strType = "REQUEST_ALLOCATE";
|
||||
break;
|
||||
case REQUEST_BIND:
|
||||
strType = "REQUEST_BIND";
|
||||
break;
|
||||
case REQUEST_INITIALIZER | REQUEST_STORE:
|
||||
case REQUEST_INITIALIZER | REQUEST_ALLOCATE:
|
||||
case REQUEST_INITIALIZER | REQUEST_BIND:
|
||||
strType = "INITIALIZER";
|
||||
break;
|
||||
}
|
||||
return strType;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct MemRequest {
|
||||
rRegion _region;
|
||||
uint8_t _type;
|
||||
@ -40,6 +81,10 @@ struct MemRequest {
|
||||
size_t _offset = 0;
|
||||
// expansion in bytes due to large depended layers
|
||||
size_t _padding = 0;
|
||||
|
||||
// fields to sort regions by execution availability
|
||||
std::pair<uint16_t, uint16_t> _life_limits{0, UINT16_MAX};
|
||||
|
||||
MemRequest(rRegion region,
|
||||
rType req,
|
||||
void *ptr_out,
|
||||
@ -79,7 +124,8 @@ struct MemRequest {
|
||||
_data.resize(sizeof(T));
|
||||
std::copy(reinterpret_cast<uint8_t *>(&element), reinterpret_cast<uint8_t *>(&element) + sizeof(T), _data.begin());
|
||||
}
|
||||
/**
|
||||
|
||||
/**
|
||||
* Store initializer request
|
||||
* @param req
|
||||
* @param ptr_out
|
||||
@ -103,4 +149,4 @@ struct MemRequest {
|
||||
}
|
||||
};
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace GNAPluginNS
|
||||
|
@ -8,10 +8,23 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include <ie_api.h>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "gna_mem_requests.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
|
||||
/**
|
||||
* @brief get layer id from legacy CNNLayer
|
||||
*/
|
||||
inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
return layer->userValue.v_int;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapter for requests submission and actual request queue
|
||||
*/
|
||||
@ -26,12 +39,26 @@ public:
|
||||
* @param num_bytes
|
||||
* @param alignment
|
||||
*/
|
||||
void push_initializer(void *ptr_out, size_t num_bytes, std::function<void(void * data, size_t size)> initializer, size_t alignment = 1) {
|
||||
void push_initializer(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
size_t num_bytes,
|
||||
std::function<void(void * data, size_t size)> initializer,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
void push_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
|
||||
void push_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
const void *ptr_in,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -40,10 +67,17 @@ public:
|
||||
* @param ptr_in
|
||||
* @param num_bytes
|
||||
*/
|
||||
void push_local_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
|
||||
void push_local_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
const void *ptr_in,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
localStorage().emplace_back(reinterpret_cast<const uint8_t *>(ptr_in),
|
||||
reinterpret_cast<const uint8_t *>(ptr_in) + num_bytes);
|
||||
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -51,8 +85,14 @@ public:
|
||||
* @param ptr_out
|
||||
* @param num_bytes
|
||||
*/
|
||||
void reserve_ptr(void *ptr_out, size_t num_bytes, size_t alignment = 1) {
|
||||
void reserve_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,8 +103,15 @@ public:
|
||||
* @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(),
|
||||
* if that happens - reserved request parameters will be updated before committing memory
|
||||
*/
|
||||
void bind_ptr(void *source, const void *dest, size_t offset = 0, size_t num_bytes = 0) {
|
||||
void bind_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *source,
|
||||
const void *dest,
|
||||
size_t offset = 0,
|
||||
size_t num_bytes = 0) {
|
||||
futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -72,16 +119,28 @@ public:
|
||||
* @param ptr_out - previously requested buffer
|
||||
* @param initializer - initialisation routine to be called on allocated memory
|
||||
*/
|
||||
void bind_initializer(void *ptr_out, std::function<void(void * data, size_t size)> initializer) {
|
||||
void bind_initializer(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
std::function<void(void * data, size_t size)> initializer) {
|
||||
futureHeap().push_back({regionType(), ptr_out, 0, initializer, REQUEST_BIND, 1});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief allocates buffer and set all its values to T value
|
||||
*/
|
||||
template<class T>
|
||||
void push_value(void *ptr_out, T value, size_t num_elements, size_t alignment = 1) {
|
||||
void push_value(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
T value,
|
||||
size_t num_elements,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -13,7 +13,15 @@
|
||||
#include <list>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "memory_solver.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#endif
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
@ -32,6 +40,7 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
Allocator _allocator;
|
||||
std::shared_ptr<uint8_t> heap = nullptr;
|
||||
size_t _page_alignment = 1;
|
||||
bool _is_compact_mode = false;
|
||||
|
||||
class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
|
||||
std::reference_wrapper<GNAMemRequestsQueue> _that;
|
||||
@ -62,93 +71,32 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
return readOnlyFrontEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief enables memory optimization (compact mode). This mode can be enable in plugin configuration (COMPACT_MODE = Yes)
|
||||
*/
|
||||
void setCompactMode(bool isCompact) {
|
||||
_is_compact_mode = isCompact;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief calculates size required for all requests, allocates memory and updates pointers
|
||||
*/
|
||||
void commit() {
|
||||
void commit(bool isCompact = false) {
|
||||
setCompactMode(isCompact);
|
||||
|
||||
// 1st stage -- looking for expandable bind requests:
|
||||
for (auto &originated : _future_heap) {
|
||||
if (originated._type & REQUEST_BIND) continue;
|
||||
size_t offset = 0;
|
||||
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
|
||||
if (&originated == &reference) {
|
||||
offset = 0;
|
||||
}
|
||||
offset += binded._offset;
|
||||
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
|
||||
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
|
||||
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
|
||||
expandBindings();
|
||||
|
||||
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
|
||||
});
|
||||
}
|
||||
// 2nd stage -- setup offsets:
|
||||
setRegionOffsets(REGION_RO);
|
||||
setRegionOffsets(REGION_RW);
|
||||
|
||||
updateSectionsSizes();
|
||||
// 3rd stage -- allocation total memory setting to 0 internally
|
||||
heap = allocate(getTotalBytes());
|
||||
|
||||
_total = _rw_section_size + _ro_section_size;
|
||||
|
||||
// allocation with memory setting to 0 internally
|
||||
heap = allocate(_total);
|
||||
auto setupOffsets = [&](std::function<bool(MemRequest & request)> filter, size_t offset) {
|
||||
for (auto &re : _future_heap) {
|
||||
if (re._type == REQUEST_BIND) continue;
|
||||
if (filter(re)) continue;
|
||||
|
||||
auto sz = re._element_size * re._num_elements;
|
||||
|
||||
if (re._ptr_out != nullptr) {
|
||||
auto cptr = heap.get() + offset;
|
||||
size_t cptr_avail_size = _total - offset;
|
||||
if (re._type & REQUEST_BIND) {
|
||||
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
|
||||
cptr_avail_size = sz;
|
||||
} else {
|
||||
*reinterpret_cast<void **>(re._ptr_out) = cptr;
|
||||
}
|
||||
// std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n";
|
||||
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
|
||||
*reinterpret_cast<void **>(binded._ptr_out) =
|
||||
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
|
||||
binded._num_elements = reference._num_elements;
|
||||
binded._element_size = reference._element_size;
|
||||
});
|
||||
|
||||
// std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush;
|
||||
|
||||
switch (re._type & ~REQUEST_BIND) {
|
||||
case REQUEST_ALLOCATE :
|
||||
break;
|
||||
case REQUEST_STORE : {
|
||||
if (re._ptr_in != nullptr) {
|
||||
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
|
||||
} else {
|
||||
size_t of = 0;
|
||||
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
|
||||
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case REQUEST_INITIALIZER : {
|
||||
re._initializer(cptr, sz);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!(re._type & REQUEST_BIND)) {
|
||||
offset += ALIGN(sz + re._padding, re._alignment);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
|
||||
// TODO: consume bind requests separately from storage type
|
||||
return !(request._type & REQUEST_BIND) && (request._region != REGION_RW);
|
||||
}, 0);
|
||||
|
||||
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
|
||||
return (request._type & REQUEST_BIND) || request._region != REGION_RO;
|
||||
}, _rw_section_size);
|
||||
// 4th stage -- store data and updates pointers
|
||||
allocateRegion(REGION_RW, 0);
|
||||
allocateRegion(REGION_RO, _rw_section_size);
|
||||
}
|
||||
|
||||
void *getBasePtr() {
|
||||
@ -180,7 +128,7 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
|
||||
for (auto &re : _future_heap) {
|
||||
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
|
||||
// std::cout << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
|
||||
// std::cout << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
|
||||
visitor(reference, re);
|
||||
// primitive loop check
|
||||
if (re._ptr_in == re._ptr_out) continue;
|
||||
@ -190,7 +138,6 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<uint8_t> allocate(size_t bytes) {
|
||||
std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
|
||||
_allocator.deallocate(p, bytes);
|
||||
@ -200,31 +147,191 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order
|
||||
*/
|
||||
void expandBindings() {
|
||||
for (auto &originated : _future_heap) {
|
||||
// skipping bind requests to avoid duplications
|
||||
if (originated._type & REQUEST_BIND) continue;
|
||||
|
||||
size_t offset = 0;
|
||||
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
|
||||
// aligning sizes
|
||||
if (&originated == &reference) offset = 0;
|
||||
|
||||
offset += binded._offset;
|
||||
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
|
||||
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
|
||||
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
|
||||
|
||||
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
|
||||
|
||||
// set execution order
|
||||
originated._life_limits.first = std::min(originated._life_limits.first, binded._life_limits.first);
|
||||
originated._life_limits.second = std::max(originated._life_limits.second, binded._life_limits.second);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief set offsets for specific region
|
||||
*/
|
||||
size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) {
|
||||
size_t region_offset = 0;
|
||||
for (auto &re : _future_heap) {
|
||||
if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
|
||||
|
||||
re._offset = region_offset;
|
||||
region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
|
||||
}
|
||||
return region_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief allocates memory and updates pointers
|
||||
*/
|
||||
void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) {
|
||||
for (auto &re : _future_heap) {
|
||||
// skipping Bind, crossregion and empty requests
|
||||
if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
|
||||
|
||||
size_t offset = baseOffset + re._offset;
|
||||
auto cptr = heap.get() + offset;
|
||||
size_t cptr_avail_size = _total - offset;
|
||||
|
||||
auto sz = re._element_size * re._num_elements;
|
||||
if (re._type & REQUEST_BIND) {
|
||||
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
|
||||
cptr_avail_size = sz;
|
||||
} else {
|
||||
*reinterpret_cast<void **>(re._ptr_out) = cptr;
|
||||
}
|
||||
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
|
||||
*reinterpret_cast<void **>(binded._ptr_out) =
|
||||
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
|
||||
binded._num_elements = reference._num_elements;
|
||||
binded._element_size = reference._element_size;
|
||||
});
|
||||
|
||||
switch (re._type & ~REQUEST_BIND) {
|
||||
case REQUEST_ALLOCATE :
|
||||
break;
|
||||
case REQUEST_STORE : {
|
||||
if (re._ptr_in != nullptr) {
|
||||
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
|
||||
} else {
|
||||
size_t of = 0;
|
||||
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
|
||||
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case REQUEST_INITIALIZER : {
|
||||
re._initializer(cptr, sz);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief optimize memory region by reusing buffers
|
||||
*/
|
||||
size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) {
|
||||
size_t memSize = 0;
|
||||
switch (regType) {
|
||||
case REGION_AUTO:
|
||||
case REGION_RW:
|
||||
case REGION_RO: {
|
||||
std::vector<MemorySolver::Box> boxes;
|
||||
for (size_t i = 0; i < _future_heap.size(); ++i) {
|
||||
// skipping BIND, cross-region and empty requests
|
||||
if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding,
|
||||
_future_heap[i]._alignment);
|
||||
int start = _future_heap[i]._life_limits.first;
|
||||
int stop = _future_heap[i]._life_limits.second;
|
||||
|
||||
boxes.push_back({start, stop, static_cast<int64_t>(original_with_pad), static_cast<int64_t>(i)});
|
||||
}
|
||||
MemorySolver memSolver(boxes);
|
||||
memSize = memSolver.solve();
|
||||
|
||||
// setting offsets
|
||||
for (auto const & box : boxes) {
|
||||
_future_heap[box.id]._offset = memSolver.getOffset(box.id);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return memSize;
|
||||
}
|
||||
|
||||
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
void memoryDump(std::function<bool(MemRequest & re)> filter) {
|
||||
std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out);
|
||||
|
||||
for (auto &re : _future_heap) {
|
||||
if (filter(re)) continue;
|
||||
dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", "
|
||||
<< "type: " << std::setw(17) << rTypeToStr(re._type) << " "
|
||||
<< "ptr_in: " << std::setw(15) << re._ptr_in << " "
|
||||
<< "ptr_out: " << std::setw(15) << re._ptr_out << " "
|
||||
<< std::setw(8) << re._num_elements << ", "
|
||||
<< static_cast<int>(re._element_size) << ", "
|
||||
<< re._padding << ", "
|
||||
<< std::setw(3) << re._alignment << ", "
|
||||
<< std::setw(8) << re._offset << ", "
|
||||
<< "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", "
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void updateSectionsSizes() {
|
||||
// count total size and size of read/write regions
|
||||
_rw_section_size = 0;
|
||||
_ro_section_size = 0;
|
||||
for (auto &re : _future_heap) {
|
||||
auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
std::cout << "chunk: " << " region: " << re._region << ", " <<
|
||||
"type: " << (re._type == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind " : "alloc ") <<
|
||||
std::setw(10) << re._num_elements << ", " <<
|
||||
static_cast<int>(re._element_size) << ", " <<
|
||||
re._padding << ", " <<
|
||||
re._offset << ", " <<
|
||||
re._alignment << std::endl;
|
||||
memoryDump([](GNAPluginNS::memory::MemRequest & request) {
|
||||
return false;
|
||||
});
|
||||
#endif
|
||||
if (re._type == REQUEST_BIND) continue;
|
||||
for (auto &re : _future_heap) {
|
||||
if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
|
||||
|
||||
size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
|
||||
if (re._region == REGION_RW) {
|
||||
_rw_section_size += current;
|
||||
} else {
|
||||
_ro_section_size += current;
|
||||
}
|
||||
}
|
||||
|
||||
if (_is_compact_mode) {
|
||||
_rw_section_size = getSectionSizeOptimized(REGION_RW);
|
||||
}
|
||||
|
||||
gnalog() << "ro_section_size: " << _ro_section_size << std::endl;
|
||||
gnalog() << "rw_section_size: " << _rw_section_size << std::endl;
|
||||
gnalog() << "total: " << _total << std::endl;
|
||||
|
||||
_rw_section_size = ALIGN(_rw_section_size, _page_alignment);
|
||||
_ro_section_size = ALIGN(_ro_section_size, _page_alignment);
|
||||
_total = _rw_section_size + _ro_section_size;
|
||||
|
||||
gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl;
|
||||
gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl;
|
||||
}
|
||||
};
|
||||
} // namespace memory
|
||||
|
@ -81,6 +81,7 @@ const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_t
|
||||
{ "ConvolutionBackpropData", Deconvolution },
|
||||
{ "GroupConvolutionBackpropData", Deconvolution },
|
||||
{ "StridedSlice", StridedSlice },
|
||||
{ "Slice", StridedSlice },
|
||||
{ "Tile", Tile },
|
||||
{ "ROIAlign", ROIAlign },
|
||||
{ "ROIPooling", ROIPooling },
|
||||
|
@ -127,13 +127,14 @@ DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::des
|
||||
}
|
||||
}
|
||||
|
||||
size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(mkldnn::memory::desc desc) {
|
||||
const auto offset0 = desc.data.offset0;
|
||||
desc.data.offset0 = 0;
|
||||
size_t size = desc.get_size();
|
||||
size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc) {
|
||||
auto tmpDesc = desc;
|
||||
const auto offset0 = tmpDesc.data.offset0;
|
||||
tmpDesc.data.offset0 = 0;
|
||||
size_t size = tmpDesc.get_size();
|
||||
if (size == DNNL_RUNTIME_SIZE_VAL)
|
||||
return MemoryDesc::UNDEFINED_SIZE;
|
||||
size += offset0 * sizeOfDataType(desc.data_type());
|
||||
size += offset0 * sizeOfDataType(tmpDesc.data_type());
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
*/
|
||||
|
||||
static std::shared_ptr<DnnlBlockedMemoryDesc> makeUndefinedDesc(const mkldnn::memory::desc &desc, const Shape& shape);
|
||||
static size_t getMemSizeForDnnlDesc(mkldnn::memory::desc desc);
|
||||
static size_t getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc);
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -722,8 +722,13 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
|
||||
|
||||
auto input = inputNodesMap.find(name);
|
||||
if (input != inputNodesMap.end()) {
|
||||
auto& inTensorDesc = in->getTensorDesc();
|
||||
auto node = input->second;
|
||||
auto childEdge = node->getChildEdgeAt(0);
|
||||
const auto& outDims = node->getOutputShapeAtPort(0);
|
||||
|
||||
const void *ext_data_ptr = in->cbuffer();
|
||||
void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
|
||||
void *inter_data_ptr = childEdge->getMemory().GetData();
|
||||
|
||||
if (ext_data_ptr != inter_data_ptr) {
|
||||
auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc());
|
||||
@ -731,17 +736,16 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
|
||||
auto ext_mem = MKLDNNMemory(eng);
|
||||
ext_mem.Create(ext_tdesc, ext_data_ptr, false);
|
||||
|
||||
input->second->getChildEdgeAt(0)->getMemory().SetData(ext_mem, 0, false);
|
||||
childEdge->getMemory().SetData(ext_mem, 0, false);
|
||||
}
|
||||
|
||||
// todo: make sure 'name' exists in this map...
|
||||
if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
|
||||
if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
|
||||
_normalizePreprocMap[name].NormalizeImage(input->second->getOutputShapeAtPort(0),
|
||||
reinterpret_cast<float *>(inter_data_ptr),
|
||||
in->getTensorDesc().getLayout());
|
||||
if (inTensorDesc.getPrecision() == InferenceEngine::Precision::FP32) {
|
||||
_normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
|
||||
inTensorDesc.getLayout());
|
||||
} else {
|
||||
IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
|
||||
IE_THROW() << "Mean image of type " << inTensorDesc.getPrecision().name() << " is unsupported";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -756,15 +760,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
for (auto &outputMap : outputNodesMap) {
|
||||
auto name = outputMap.first;
|
||||
auto node = outputMap.second;
|
||||
const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory();
|
||||
auto parentEdge = node->getParentEdgeAt(0);
|
||||
const MKLDNNMemory& intr_blob = parentEdge->getMemory();
|
||||
|
||||
auto ext_blob = out.find(name);
|
||||
if (ext_blob == out.end()) {
|
||||
const auto ext_blob_map = out.find(name);
|
||||
const auto ext_blob = ext_blob_map->second;
|
||||
if (ext_blob_map == out.end()) {
|
||||
IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\"";
|
||||
}
|
||||
|
||||
const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
|
||||
auto &expectedDesc = ext_blob->second->getTensorDesc();
|
||||
auto &expectedDesc = ext_blob->getTensorDesc();
|
||||
|
||||
// TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
|
||||
// WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
|
||||
@ -797,27 +803,16 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
auto srcPrec = actualDesc.getPrecision();
|
||||
auto dstPrec = expectedDesc.getPrecision();
|
||||
|
||||
if (srcPrec == dstPrec && ext_blob->second->byteSize() != intr_blob.GetSize())
|
||||
if (srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize())
|
||||
IE_THROW() << "Output blob byte size is not equal network output byte size ("
|
||||
<< ext_blob->second->byteSize() << "!=" << intr_blob.GetSize() << ").";
|
||||
<< ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ").";
|
||||
|
||||
void *ext_blob_ptr = ext_blob->second->buffer();
|
||||
void *ext_blob_ptr = ext_blob->buffer();
|
||||
void *intr_blob_ptr = intr_blob.GetData();
|
||||
|
||||
// That is the same memory. No need to copy
|
||||
if (ext_blob_ptr == intr_blob_ptr) continue;
|
||||
|
||||
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
|
||||
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
|
||||
if (config.batchLimit) {
|
||||
if (node->isDynamicNode()) {
|
||||
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
|
||||
}
|
||||
int MB_to_process = node->batchToProcess();
|
||||
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
|
||||
}
|
||||
|
||||
if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
|
||||
auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
|
||||
auto outBloMem = MKLDNNMemory(eng);
|
||||
@ -825,6 +820,17 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
|
||||
|
||||
outBloMem.SetData(intr_blob, 0, false);
|
||||
} else {
|
||||
size_t size_to_copy = intr_blob.GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
// TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT???
|
||||
// TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm
|
||||
if (config.batchLimit) {
|
||||
if (node->isDynamicNode()) {
|
||||
IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape";
|
||||
}
|
||||
int MB_to_process = node->batchToProcess();
|
||||
size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies<size_t>()) * MB_to_process;
|
||||
}
|
||||
|
||||
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
|
||||
}
|
||||
}
|
||||
|
@ -457,7 +457,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
|
||||
if (auto convNode = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(node)) {
|
||||
auto rank = convNode->getInputShapeAtPort(0).getRank();
|
||||
// int8 depthwise convolution does not support fusing zero points in 3D case
|
||||
if (implication(convNode->isDepthWise(), rank == 4)) {
|
||||
if (implication(convNode->isDepthWise(), rank < 5)) {
|
||||
retVal = true;
|
||||
}
|
||||
}
|
||||
@ -577,7 +577,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
|
||||
ptrdiff_t OC = weightsConstantDims[0 + groupOffset];
|
||||
ptrdiff_t IC = weightsConstantDims[1 + groupOffset];
|
||||
ptrdiff_t KD = weightsConstantDims.size() == (5 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 3] : 1;
|
||||
ptrdiff_t KH = weightsConstantDims[weightsConstantDims.size() - 2];
|
||||
ptrdiff_t KH = node->getInputShapeAtPort(0).getRank() > (3 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 2] : 1;
|
||||
ptrdiff_t KW = weightsConstantDims[weightsConstantDims.size() - 1];
|
||||
|
||||
for (size_t g = 0; g < G; g++) {
|
||||
|
@ -84,27 +84,27 @@ MKLDNNPlugin::MKLDNNInferRequest::~MKLDNNInferRequest() {
|
||||
}
|
||||
|
||||
void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) {
|
||||
bool needConvert = inPrec != inputBlob->getTensorDesc().getPrecision();
|
||||
auto& tensorDesc = inputBlob->getTensorDesc();
|
||||
bool needConvert = inPrec != tensorDesc.getPrecision();
|
||||
|
||||
if (inputBlob->cbuffer().as<const void *>() == nullptr) {
|
||||
const void* srcData = inputBlob->cbuffer().as<const void *>();
|
||||
if (srcData == nullptr) {
|
||||
IE_THROW() << "Input blob has no allocated memory";
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr iconv;
|
||||
if (needConvert) {
|
||||
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, inputBlob->getTensorDesc().getDims(),
|
||||
inputBlob->getTensorDesc().getLayout()));
|
||||
iconv = make_blob_with_precision(inPrec, InferenceEngine::TensorDesc(inPrec, tensorDesc.getDims(), tensorDesc.getLayout()));
|
||||
iconv->allocate();
|
||||
if (inputBlob->size() != iconv->size())
|
||||
IE_THROW() << "Can't copy tensor: input and converted tensors have different number of elements: " << inputBlob->size() << " and "
|
||||
<< iconv->size();
|
||||
|
||||
void *srcData = inputBlob->cbuffer().as<void *>();
|
||||
void *dstData = iconv->buffer().as<void *>();
|
||||
if (dstData == nullptr) {
|
||||
IE_THROW() << "Converted input blob has no allocated memory";
|
||||
}
|
||||
cpu_convert(srcData, dstData, inputBlob->getTensorDesc().getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
|
||||
cpu_convert(srcData, dstData, tensorDesc.getPrecision(), iconv->getTensorDesc().getPrecision(), iconv->size());
|
||||
}
|
||||
|
||||
graph->PushInputData(inputName, needConvert ? iconv : inputBlob);
|
||||
@ -112,27 +112,30 @@ void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, I
|
||||
|
||||
void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
|
||||
for (auto input : _inputs) {
|
||||
if (!_networkInputs[input.first]) {
|
||||
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first;
|
||||
auto inputName = input.first;
|
||||
if (!_networkInputs[inputName]) {
|
||||
IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << inputName;
|
||||
}
|
||||
auto inPrec = input.second->getTensorDesc().getPrecision();
|
||||
if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
|
||||
auto inputBlob = input.second;
|
||||
auto& inputTensorDesc = inputBlob->getTensorDesc();
|
||||
auto inPrec = inputTensorDesc.getPrecision();
|
||||
if (graph->hasMeanImageFor(inputName) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
|
||||
inPrec = InferenceEngine::Precision::FP32;
|
||||
} else {
|
||||
inPrec = normalizeToSupportedPrecision(inPrec);
|
||||
}
|
||||
|
||||
if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
|
||||
IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
|
||||
IE_THROW() << "Unsupported input precision " << inputTensorDesc.getPrecision();
|
||||
}
|
||||
|
||||
// User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
|
||||
// Currently IE doesn't specify behavior in such scenario, so we assume real layout is equal to the network input.
|
||||
if (input.second->getTensorDesc().getLayout() == InferenceEngine::ANY) {
|
||||
input.second->getTensorDesc().setLayout(_networkInputs[input.first]->getLayout());
|
||||
if (inputTensorDesc.getLayout() == InferenceEngine::ANY) {
|
||||
inputTensorDesc.setLayout(_networkInputs[inputName]->getLayout());
|
||||
}
|
||||
|
||||
pushInput(input.first, input.second, inPrec);
|
||||
pushInput(inputName, inputBlob, inPrec);
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,6 +165,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::PullStates() {
|
||||
for (auto &node : graph->GetNodes()) {
|
||||
if (node->getType() == MemoryInput) {
|
||||
auto cur_node = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
|
||||
if (!cur_node) {
|
||||
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
|
||||
}
|
||||
auto cur_id = cur_node->getId();
|
||||
for (const auto& state : memoryStates) {
|
||||
if (state->GetName() == cur_id) {
|
||||
@ -499,71 +505,104 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *
|
||||
|
||||
void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
|
||||
for (auto& it : externalPtr) {
|
||||
auto input = graph->GetInputNodesMap().find(it.first);
|
||||
if (input != graph->GetInputNodesMap().end()) {
|
||||
if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
const auto& inputNodesMap = graph->GetInputNodesMap();
|
||||
auto input = inputNodesMap.find(it.first);
|
||||
if (input != inputNodesMap.end()) {
|
||||
MKLDNNNodePtr inputNodePtr = input->second;
|
||||
if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
continue;
|
||||
auto& childEdges = inputNodePtr->getChildEdges();
|
||||
// Input cannot be in-place with other primitives
|
||||
bool canBeInPlace = true;
|
||||
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
|
||||
auto& child = input->second->getChildEdgeAt(i)->getChild();
|
||||
if (child->isConstant())
|
||||
canBeInPlace = false;
|
||||
for (auto& childEdge : childEdges) {
|
||||
auto ce = childEdge.lock();
|
||||
if (!ce)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
|
||||
auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
|
||||
if (canBeInPlace && concat && concat->isOptimized())
|
||||
canBeInPlace = false;
|
||||
auto& child = ce->getChild();
|
||||
|
||||
// Cannot be in-place before split because split is using different ptrs without offsets
|
||||
auto* split = dynamic_cast<MKLDNNSplitNode *>(child.get());
|
||||
if (canBeInPlace && split)
|
||||
canBeInPlace = false;
|
||||
|
||||
if (child->isInplace())
|
||||
canBeInPlace = false;
|
||||
for (size_t j = 0; canBeInPlace && j < child->getChildEdges().size(); j++) {
|
||||
if (child->getChildEdgeAt(j)->getMemory().GetPrimitive().get_data_handle() ==
|
||||
input->second->getChildEdgeAt(i)->getMemory().GetPrimitive().get_data_handle())
|
||||
canBeInPlace = false;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; canBeInPlace && i < input->second->getChildEdges().size(); i++) {
|
||||
changeEdgePtr(input->second->getChildEdgeAt(i), it.second);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
MKLDNNNodePtr output;
|
||||
for (auto& out : graph->GetOutputNodesMap()) {
|
||||
if (out.first == it.first) {
|
||||
output = out.second;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (output) {
|
||||
if (output->getParentEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
continue;
|
||||
bool canBeInPlace = true;
|
||||
void * defaultPtr = output->getParentEdgeAt(0)->getMemory().GetPrimitivePtr()->get_data_handle();
|
||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||
auto parent = output->getParentEdgeAt(0)->getParent();
|
||||
MKLDNNNodePtr previousParent;
|
||||
do {
|
||||
previousParent = parent;
|
||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInplace()) {
|
||||
if (child->isConstant()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < parent->getParentEdges().size(); i++) {
|
||||
if (parent->getParentEdgeAt(i)->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
|
||||
parent = parent->getParentEdgeAt(i)->getParent();
|
||||
if (child->getType() == Concatenation && dynamic_cast<MKLDNNConcatNode*>(child.get())->isOptimized()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Cannot be in-place before split because split is using different ptrs without offsets
|
||||
if (child->getType() == Split) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (child->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
auto& edges = child->getChildEdges();
|
||||
for (auto& edge : edges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << child->getName() << " contains empty child edge";
|
||||
|
||||
if (e->getMemory().GetPrimitive().get_data_handle() == ce->getMemory().GetPrimitive().get_data_handle()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!canBeInPlace)
|
||||
break;
|
||||
}
|
||||
if (canBeInPlace) {
|
||||
for (auto& edge : childEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << inputNodePtr->getName() << " contains empty child edge";
|
||||
|
||||
changeEdgePtr(e, it.second);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& outputNodesMap = graph->GetOutputNodesMap();
|
||||
auto output = outputNodesMap.find(it.first);
|
||||
if (output != outputNodesMap.end()) {
|
||||
auto parentEdge = output->second->getParentEdgeAt(0);
|
||||
if (parentEdge->getMemory().GetPrimitive().get_data_handle() == it.second)
|
||||
continue;
|
||||
|
||||
bool canBeInPlace = true;
|
||||
void* defaultPtr = parentEdge->getMemory().GetPrimitivePtr()->get_data_handle();
|
||||
// Cannot be in-place after concat because concat is using different ptrs without offsets
|
||||
auto parent = parentEdge->getParent();
|
||||
MKLDNNNodePtr previousParent;
|
||||
do {
|
||||
previousParent = parent;
|
||||
if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) {
|
||||
canBeInPlace = false;
|
||||
break;
|
||||
}
|
||||
|
||||
auto& parentEdges = parent->getParentEdges();
|
||||
for (auto& edge : parentEdges) {
|
||||
auto e = edge.lock();
|
||||
if (!e)
|
||||
IE_THROW() << "Node " << parent->getName() << " contains empty parent edge";
|
||||
|
||||
if (e->getMemory().GetPrimitivePtr()->get_data_handle() == defaultPtr) {
|
||||
parent = e->getParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (previousParent != parent);
|
||||
if (canBeInPlace)
|
||||
changeEdgePtr(output->getParentEdgeAt(0), it.second);
|
||||
changeEdgePtr(parentEdge, it.second);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Cannot find input/output blob: " << it.first;
|
||||
|
@ -491,7 +491,8 @@ std::vector<memory::format_tag> MKLDNNNode::getAvailableFormatsForDims(const Sha
|
||||
else if (dims.getRank() == 2)
|
||||
return {memory::format_tag::nc};
|
||||
else if (dims.getRank() == 3)
|
||||
return {memory::format_tag::tnc, memory::format_tag::ntc};
|
||||
return {memory::format_tag::tnc, memory::format_tag::ntc,
|
||||
memory::format_tag::ncw, memory::format_tag::nCw8c, memory::format_tag::nCw16c };
|
||||
else if (dims.getRank() == 4)
|
||||
return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c};
|
||||
else if (dims.getRank() == 5)
|
||||
@ -769,15 +770,29 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isInplace() const {
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
auto config = selected_pd->getConfig();
|
||||
bool MKLDNNNode::isInPlace() {
|
||||
if (inplace == InPlaceType::Unknown) {
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
|
||||
for (auto &in : config.inConfs) if (in.inPlace >= 0) return true;
|
||||
for (auto &out : config.outConfs) if (out.inPlace >= 0) return true;
|
||||
return false;
|
||||
inplace = InPlaceType::NoInPlace;
|
||||
auto config = selected_pd->getConfig();
|
||||
for (auto &in : config.inConfs) {
|
||||
if (in.inPlace >= 0) {
|
||||
inplace = InPlaceType::InPlace;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (auto &out : config.outConfs) {
|
||||
if (out.inPlace >= 0) {
|
||||
inplace = InPlaceType::InPlace;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return inplace == InPlaceType::InPlace;
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isConstant() {
|
||||
|
@ -195,6 +195,8 @@ public:
|
||||
return engine;
|
||||
}
|
||||
|
||||
bool isInPlace();
|
||||
|
||||
// must be called only after MKLDNNGraph::InitEdges()
|
||||
virtual bool isExecutable() const {
|
||||
return true;
|
||||
@ -202,8 +204,6 @@ public:
|
||||
|
||||
bool isConstant();
|
||||
|
||||
bool isInplace() const;
|
||||
|
||||
bool isFusedWith(Type type) const;
|
||||
|
||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
@ -336,6 +336,10 @@ public:
|
||||
selectedPrimitiveDescriptorIndex = -1;
|
||||
else
|
||||
selectedPrimitiveDescriptorIndex = index;
|
||||
|
||||
// Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection
|
||||
// we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace()
|
||||
inplace = InPlaceType::Unknown;
|
||||
}
|
||||
|
||||
std::string getPrimitiveDescriptorType();
|
||||
@ -616,11 +620,17 @@ protected:
|
||||
bool permanent = false;
|
||||
bool temporary = false;
|
||||
int dynBatchLim = 0;
|
||||
enum class InPlaceType {
|
||||
Unknown,
|
||||
InPlace,
|
||||
NoInPlace
|
||||
};
|
||||
enum class ConstantType {
|
||||
Unknown,
|
||||
Const,
|
||||
NoConst
|
||||
};
|
||||
InPlaceType inplace = InPlaceType::Unknown;
|
||||
ConstantType constant = ConstantType::Unknown;
|
||||
std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
|
||||
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
|
||||
#include <transformations/op_conversions/convert_depth_to_space.hpp>
|
||||
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
|
||||
#include <transformations/op_conversions/convert_slice_to_strided_slice.hpp>
|
||||
#include <transformations/op_conversions/convert_space_to_depth.hpp>
|
||||
#include <transformations/op_conversions/convert_gelu.hpp>
|
||||
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
|
||||
@ -107,6 +108,7 @@
|
||||
#include "nodes/mkldnn_fake_quantize_node.h"
|
||||
#include "nodes/mkldnn_normalize_node.h"
|
||||
#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
|
||||
#include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
|
||||
#include "transformations/smart_reshape/smart_reshape.hpp"
|
||||
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
@ -371,6 +373,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
|
||||
pass_config->disable<ngraph::pass::ConvertReduceSumToPooling>();
|
||||
pass_config->disable<ngraph::pass::SliceToStridedSlice>();
|
||||
|
||||
pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
|
||||
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
|
||||
@ -475,6 +478,14 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
||||
});
|
||||
|
||||
postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
|
||||
postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
||||
if (node->get_input_size() >= 2) {
|
||||
return node->get_input_element_type(1) == ngraph::element::i8 || node->get_input_element_type(1) == ngraph::element::u8;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
postLPTPassManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
@ -575,15 +586,13 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
||||
// network is below general threshold
|
||||
num_streams = std::max(default_num_streams, num_streams_less_aggressive);
|
||||
}
|
||||
|
||||
int ovPerfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests; // set thru SetConfig to the plugin
|
||||
auto num_requests = config.find(PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS);
|
||||
if (num_requests != config.end()) {
|
||||
// arrived with config to the LoadNetwork (and thus higher pri)
|
||||
ovPerfHintNumRequests = PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second);
|
||||
}
|
||||
num_streams = std::min(num_streams, std::max(ovPerfHintNumRequests, 1));
|
||||
|
||||
if (engConfig.perfHintsConfig.ovPerfHintNumRequests) // set thru SetConfig to the plugin
|
||||
num_streams = std::min(engConfig.perfHintsConfig.ovPerfHintNumRequests,
|
||||
engConfig.perfHintsConfig.ovPerfHintNumRequests);
|
||||
if (num_requests != config.end()) // arrived with config to the LoadNetwork (and thus higher pri)
|
||||
num_streams = std::min(num_streams,
|
||||
PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second));
|
||||
config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(num_streams);
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include "fc_bias_fusion.hpp"
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
#include "ngraph/pass/manager.hpp"
|
||||
#include "reshape_1d_ops.hpp"
|
||||
#include "reshape_fc_fusion.hpp"
|
||||
#include "reshape_fully_connected.hpp"
|
||||
#include "align_matmul_input_ranks.hpp"
|
||||
@ -26,10 +25,6 @@ namespace MKLDNNPlugin {
|
||||
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
manager.register_pass<Reshape1DConvolution>();
|
||||
manager.register_pass<Reshape1DGroupConvolution>();
|
||||
manager.register_pass<Reshape1DAvgPool>();
|
||||
manager.register_pass<Reshape1DMaxPool>();
|
||||
manager.register_pass<ConvertMatMulToFC>();
|
||||
manager.register_pass<AlignMatMulInputRanks>();
|
||||
manager.register_pass<ConvertTileToSeqTiles>();
|
||||
|
@ -0,0 +1,113 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "move_eltwise_up_data_movement.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <ngraph/opsets/opset8.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::MoveEltwiseUpThroughDataMov, "MoveEltwiseUpThroughDataMov", 0);
|
||||
|
||||
namespace {
|
||||
bool is_data_movement_operation(const std::shared_ptr<ngraph::Node>& node) {
|
||||
return ov::is_type<ngraph::op::v0::Squeeze>(node) ||
|
||||
ov::is_type<ngraph::op::v0::Unsqueeze>(node) ||
|
||||
ov::is_type<ngraph::op::v1::Reshape>(node) ||
|
||||
ov::is_type<ngraph::op::v1::Transpose>(node) ||
|
||||
ov::is_type<ngraph::op::v0::ShuffleChannels>(node) ||
|
||||
ov::is_type<ngraph::op::v7::Roll>(node) ||
|
||||
ov::is_type<ngraph::op::v0::ReverseSequence>(node) ||
|
||||
ov::is_type<ngraph::op::v0::DepthToSpace>(node) ||
|
||||
ov::is_type<ngraph::op::v1::BatchToSpace>(node) ||
|
||||
ov::is_type<ngraph::op::v1::Broadcast>(node) ||
|
||||
ov::is_type<ngraph::op::v3::Broadcast>(node) ||
|
||||
ov::is_type<ngraph::op::v1::Gather>(node) ||
|
||||
ov::is_type<ngraph::op::v7::Gather>(node) ||
|
||||
ov::is_type<ngraph::op::v8::Gather>(node);
|
||||
}
|
||||
|
||||
bool is_scalar_like(const std::shared_ptr<ngraph::Node>& node) {
|
||||
auto constantNode = std::dynamic_pointer_cast<ngraph::opset8::Constant>(node);
|
||||
return constantNode != nullptr && shape_size(constantNode->get_shape()) == 1;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
MKLDNNPlugin::MoveEltwiseUpThroughDataMov::MoveEltwiseUpThroughDataMov() {
|
||||
auto eltwise_pattern = ngraph::pattern::wrap_type<ngraph::op::util::UnaryElementwiseArithmetic,
|
||||
ngraph::op::util::BinaryElementwiseArithmetic>(ngraph::pattern::has_static_rank());
|
||||
|
||||
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
|
||||
auto eltwise = pattern_map.at(eltwise_pattern).get_node_shared_ptr();
|
||||
if (transformation_callback(eltwise)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (eltwise->get_output_size() == 0 ||
|
||||
eltwise->get_input_size() == 0 ||
|
||||
eltwise->get_output_element_type(0) != eltwise->get_input_element_type(0) ||
|
||||
eltwise->get_output_target_inputs(0).size() != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_binary_op = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseArithmetic>(eltwise) != nullptr;
|
||||
if (is_binary_op && !is_scalar_like(eltwise->get_input_node_shared_ptr(1))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
auto current = eltwise->get_input_node_shared_ptr(0);
|
||||
auto child = eltwise;
|
||||
|
||||
while (is_data_movement_operation(current)) {
|
||||
if (current->get_output_size() != 1 ||
|
||||
current->get_output_target_inputs(0).size() != 1 ||
|
||||
current->get_output_element_type(0) != current->get_input_element_type(0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
child = current;
|
||||
current = current->get_input_node_shared_ptr(0);
|
||||
}
|
||||
|
||||
// now current is the first not data movement op
|
||||
if (child == eltwise) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// eltwise constant shape should match new input shape
|
||||
if (is_binary_op && current->get_output_shape(0).size() != eltwise->get_input_shape(1).size()) {
|
||||
auto old_eltwise_const = std::dynamic_pointer_cast<ngraph::opset8::Constant>(eltwise->get_input_node_shared_ptr(1));
|
||||
auto new_constant = std::make_shared<ngraph::opset8::Constant>(*old_eltwise_const.get(), ngraph::Shape{});
|
||||
ngraph::replace_node(old_eltwise_const, new_constant);
|
||||
}
|
||||
ngraph::replace_output_update_name(eltwise->output(0), eltwise->input_value(0));
|
||||
|
||||
ngraph::OutputVector eltwiseInputs = eltwise->input_values();
|
||||
eltwiseInputs[0] = child->input_value(0);
|
||||
auto newEltwise = eltwise->clone_with_new_inputs(eltwiseInputs);
|
||||
ngraph::copy_runtime_info(eltwise, newEltwise);
|
||||
newEltwise->set_friendly_name(eltwise->get_friendly_name());
|
||||
|
||||
ngraph::OutputVector childInputs = child->input_values();
|
||||
childInputs[0] = newEltwise;
|
||||
auto newChild = child->clone_with_new_inputs(childInputs);
|
||||
ngraph::copy_runtime_info(child, newChild);
|
||||
newChild->set_friendly_name(child->get_friendly_name());
|
||||
|
||||
ngraph::replace_node(child, newChild);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(eltwise_pattern, "MoveEltwiseUpThroughDataMov");
|
||||
register_matcher(m, callback);
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MoveEltwiseUpThroughDataMov : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MoveEltwiseUpThroughDataMov();
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -1,218 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "reshape_1d_ops.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph_ops/type_relaxed.hpp>
|
||||
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace {
|
||||
template <class BaseOp>
|
||||
std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<BaseOp> node, ngraph::NodeVector &new_ops) {
|
||||
auto new_strides = node->get_strides();
|
||||
auto new_dilations = node->get_dilations();
|
||||
auto new_pads_begin = node->get_pads_begin();
|
||||
auto new_pad_end = node->get_pads_end();
|
||||
|
||||
new_strides.insert(new_strides.begin(), 1);
|
||||
new_dilations.insert(new_dilations.begin(), 1);
|
||||
new_pads_begin.insert(new_pads_begin.begin(), 0);
|
||||
new_pad_end.insert(new_pad_end.begin(), 0);
|
||||
|
||||
const size_t weights_rank = node->get_input_partial_shape(1).size();
|
||||
const auto unsqueeze_const = ngraph::opset1::Constant::create(ngraph::element::i32, { 1 }, { weights_rank - 1 });
|
||||
const auto weights = ngraph::op::util::make_try_fold<ngraph::opset1::Unsqueeze>(node->input_value(1), unsqueeze_const);
|
||||
new_ops.push_back(weights);
|
||||
|
||||
if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node)) {
|
||||
return std::make_shared<ngraph::op::TypeRelaxed<BaseOp>>(std::vector<ngraph::element::Type>{ngraph::element::f32, ngraph::element::f32},
|
||||
std::vector<ngraph::element::Type>{ngraph::element::f32},
|
||||
ngraph::op::TemporaryReplaceOutputType(data, ngraph::element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(weights, ngraph::element::f32).get(),
|
||||
new_strides,
|
||||
new_pads_begin,
|
||||
new_pad_end,
|
||||
new_dilations,
|
||||
node->get_auto_pad());
|
||||
} else {
|
||||
return std::make_shared<BaseOp>(data,
|
||||
weights,
|
||||
new_strides,
|
||||
new_pads_begin,
|
||||
new_pad_end,
|
||||
new_dilations,
|
||||
node->get_auto_pad());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::MaxPool> node, ngraph::NodeVector & new_ops) {
|
||||
auto new_strides = node->get_strides();
|
||||
auto new_pads_begin = node->get_pads_begin();
|
||||
auto new_pad_end = node->get_pads_end();
|
||||
auto new_kernel = node->get_kernel();
|
||||
|
||||
new_strides.insert(new_strides.begin(), 1);
|
||||
new_pads_begin.insert(new_pads_begin.begin(), 0);
|
||||
new_pad_end.insert(new_pad_end.begin(), 0);
|
||||
new_kernel.insert(new_kernel.begin(), 1);
|
||||
|
||||
return std::make_shared<ngraph::opset1::MaxPool>(data,
|
||||
new_strides,
|
||||
new_pads_begin,
|
||||
new_pad_end,
|
||||
new_kernel,
|
||||
node->get_rounding_type(),
|
||||
node->get_auto_pad());
|
||||
}
|
||||
|
||||
template <>
|
||||
std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::AvgPool> node, ngraph::NodeVector & new_ops) {
|
||||
// Update Pooling attributes with additional dimension
|
||||
auto new_strides = node->get_strides();
|
||||
auto new_pads_begin = node->get_pads_begin();
|
||||
auto new_pad_end = node->get_pads_end();
|
||||
auto new_kernel = node->get_kernel();
|
||||
|
||||
new_strides.insert(new_strides.begin(), 1);
|
||||
new_pads_begin.insert(new_pads_begin.begin(), 0);
|
||||
new_pad_end.insert(new_pad_end.begin(), 0);
|
||||
new_kernel.insert(new_kernel.begin(), 1);
|
||||
|
||||
return std::make_shared<ngraph::opset1::AvgPool>(data,
|
||||
new_strides,
|
||||
new_pads_begin,
|
||||
new_pad_end,
|
||||
new_kernel,
|
||||
node->get_exclude_pad(),
|
||||
node->get_rounding_type(),
|
||||
node->get_auto_pad());
|
||||
}
|
||||
|
||||
ngraph::matcher_pass_callback get_callback() {
|
||||
return [](ngraph::pattern::Matcher& m) {
|
||||
auto node = m.get_match_root();
|
||||
const auto input_rank = node->get_input_partial_shape(0).size();
|
||||
if (input_rank != 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::NodeVector new_ops;
|
||||
|
||||
// Update pshape from [N, C, W] to [N, C, 1, W]
|
||||
const auto unsqueeze_const = ngraph::opset1::Constant::create(ngraph::element::i32, { 1 }, { input_rank - 1 });
|
||||
ngraph::Output<ngraph::Node> last = std::make_shared<ngraph::opset1::Unsqueeze>(node->input_value(0), unsqueeze_const);
|
||||
last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/reshape_begin");
|
||||
new_ops.push_back(last.get_node_shared_ptr());
|
||||
|
||||
if (auto conv = std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node)) {
|
||||
last = convert(last, conv, new_ops);
|
||||
} else if (auto group_conv = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node)) {
|
||||
last = convert(last, group_conv, new_ops);
|
||||
} else if (auto max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(node)) {
|
||||
last = convert(last, max_pool, new_ops);
|
||||
} else if (auto avg_pool = std::dynamic_pointer_cast<ngraph::opset1::AvgPool>(node)) {
|
||||
last = convert(last, avg_pool, new_ops);
|
||||
} else {
|
||||
throw ngraph::ngraph_error("Reshape1DOps: op type is not supported");
|
||||
}
|
||||
|
||||
last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/new");
|
||||
new_ops.push_back(last.get_node_shared_ptr());
|
||||
|
||||
// if convolution is followed by add we need to replace add before output reshape to fuse conv+bias on plug-in side
|
||||
std::shared_ptr<ngraph::Node> add_to_replace = nullptr;
|
||||
std::shared_ptr<ngraph::Node> reshaped_add = nullptr;
|
||||
ngraph::NodeVector bias_ops;
|
||||
if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) || std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node)) {
|
||||
auto out_pshape = node->get_output_partial_shape(0);
|
||||
const auto dst_nodes = node->get_output_target_inputs(0);
|
||||
|
||||
// we can also reshape biases if possible
|
||||
if (dst_nodes.size() == 1 && out_pshape.rank().is_static() && out_pshape.rank().get_length() > 2 && out_pshape[1].is_static()) {
|
||||
auto channel = node->get_output_partial_shape(0)[1];
|
||||
ngraph::Shape expected_shape = ngraph::Shape(input_rank, 1);
|
||||
expected_shape[1] = channel.get_length();
|
||||
|
||||
add_to_replace = dst_nodes.begin()->get_node()->shared_from_this();
|
||||
if (std::dynamic_pointer_cast<ngraph::opset1::Add>(add_to_replace) &&
|
||||
std::dynamic_pointer_cast<ngraph::opset1::Constant>(add_to_replace->get_input_node_shared_ptr(1)) &&
|
||||
add_to_replace->get_input_shape(1) == expected_shape) {
|
||||
ngraph::Shape new_shape(add_to_replace->get_input_shape(1));
|
||||
new_shape.push_back(1);
|
||||
auto new_shape_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ new_shape.size() }, new_shape);
|
||||
|
||||
auto new_bias = ngraph::op::util::make_try_fold<ngraph::opset1::Reshape>(add_to_replace->input_value(1), new_shape_const, true);
|
||||
reshaped_add = std::make_shared<ngraph::opset1::Add>(last, new_bias);
|
||||
reshaped_add->set_friendly_name(add_to_replace->get_friendly_name() + "/new");
|
||||
bias_ops.push_back(new_bias);
|
||||
bias_ops.push_back(reshaped_add);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reshaped_add != nullptr) {
|
||||
ngraph::replace_node(node, last.get_node_shared_ptr());
|
||||
ngraph::copy_runtime_info(node, new_ops);
|
||||
last = reshaped_add;
|
||||
node = add_to_replace;
|
||||
new_ops = bias_ops;
|
||||
}
|
||||
|
||||
// Update pshape from [N, C, 1, W] to [N, C, W]
|
||||
const auto squeeze_const = ngraph::opset1::Constant::create(ngraph::element::i32, { 1 }, { input_rank - 1 });
|
||||
last = std::make_shared<ngraph::opset1::Squeeze>(last, squeeze_const);
|
||||
last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name());
|
||||
ngraph::replace_node(node, last.get_node_shared_ptr());
|
||||
ngraph::copy_runtime_info(node, new_ops);
|
||||
return true;
|
||||
};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DConvolution, "Reshape1DConvolution", 0);
|
||||
|
||||
MKLDNNPlugin::Reshape1DConvolution::Reshape1DConvolution() {
|
||||
auto activations = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
|
||||
auto weights = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset1::Convolution>({ activations, weights });
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "Reshape1DConvolution");
|
||||
this->register_matcher(m, get_callback());
|
||||
}
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DGroupConvolution, "Reshape1DGroupConvolution", 0);
|
||||
|
||||
MKLDNNPlugin::Reshape1DGroupConvolution::Reshape1DGroupConvolution() {
|
||||
auto activations = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
|
||||
auto weights = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
|
||||
auto group_conv = ngraph::pattern::wrap_type<ngraph::opset1::GroupConvolution>({ activations, weights });
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(group_conv, "Reshape1DGroupConvolution");
|
||||
this->register_matcher(m, get_callback());
|
||||
}
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DAvgPool, "Reshape1DAvgPool", 0);
|
||||
|
||||
MKLDNNPlugin::Reshape1DAvgPool::Reshape1DAvgPool() {
|
||||
auto input = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
|
||||
auto pool = ngraph::pattern::wrap_type<ngraph::opset1::AvgPool>({ input });
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DAvgPool");
|
||||
this->register_matcher(m, get_callback());
|
||||
}
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DMaxPool, "Reshape1DMaxPool", 0);
|
||||
|
||||
MKLDNNPlugin::Reshape1DMaxPool::Reshape1DMaxPool() {
|
||||
auto input = ngraph::pattern::any_input(ngraph::pattern::has_static_rank());
|
||||
auto pool = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({ input });
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DMaxPool");
|
||||
this->register_matcher(m, get_callback());
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class Reshape1DConvolution: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
Reshape1DConvolution();
|
||||
};
|
||||
|
||||
class Reshape1DGroupConvolution: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
Reshape1DGroupConvolution();
|
||||
};
|
||||
|
||||
class Reshape1DAvgPool: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
Reshape1DAvgPool();
|
||||
};
|
||||
|
||||
class Reshape1DMaxPool: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
Reshape1DMaxPool();
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -66,6 +66,8 @@ MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, co
|
||||
if (op->get_input_size() <= AXES_MAPPING_IDX)
|
||||
IE_THROW() << errorPrefix << " and EXPLICIT mode must have tree input edges: " << getParentEdges().size();
|
||||
broadcastType = EXPLICIT;
|
||||
} else {
|
||||
IE_THROW() << errorPrefix << "has unexpected broadcast type: " << broadcastOp->get_broadcast_spec().m_type;
|
||||
}
|
||||
|
||||
if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX))) {
|
||||
|
@ -15,10 +15,6 @@ using namespace InferenceEngine;
|
||||
|
||||
bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
|
||||
if (!bucketsize) {
|
||||
errorMessage = "Only opset3 Bucketize operation is supported";
|
||||
@ -49,22 +45,6 @@ MKLDNNBucketizeNode::MKLDNNBucketizeNode(const std::shared_ptr<ngraph::Node>& op
|
||||
|
||||
// check one attribute
|
||||
with_right = bucketsize->get_with_right_bound();
|
||||
|
||||
// check dimensions of input tensors
|
||||
SizeVector input_tensor_dims = op->get_input_shape(INPUT_TENSOR_PORT);
|
||||
if (input_tensor_dims.size() < 1) {
|
||||
IE_THROW() << errorPrefix << " has incorrect dimensions of the input.";
|
||||
}
|
||||
SizeVector input_bin_dims = op->get_input_shape(INPUT_BINS_PORT);
|
||||
if (input_bin_dims.size() != 1) {
|
||||
IE_THROW() << errorPrefix << " has incorrect dimensions of the boundaries tensor.";
|
||||
}
|
||||
if (input_bin_dims[0] != 0) {
|
||||
with_bins = true;
|
||||
}
|
||||
num_bin_values = input_bin_dims[0];
|
||||
|
||||
num_values = std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() {
|
||||
@ -192,6 +172,49 @@ void MKLDNNBucketizeNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNBucketizeNode::prepareParams() {
|
||||
auto& inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr();
|
||||
auto& inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr();
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Destination memory didn't allocate.";
|
||||
if (!inputTensorMemPtr || !inputTensorMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Input tensor didn't allocate.";
|
||||
if (!inputBinsMemPtr || !inputBinsMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Input bins didn't allocate.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
|
||||
// update with_bins/num_values/num_bin_values
|
||||
auto input_tensor_dims = inputTensorMemPtr->getStaticDims();
|
||||
if (input_tensor_dims.size() < 1) {
|
||||
IE_THROW() << errorPrefix << " has incorrect dimensions of the input.";
|
||||
}
|
||||
auto input_bin_dims = inputBinsMemPtr->getStaticDims();
|
||||
if (input_bin_dims.size() != 1) {
|
||||
IE_THROW() << errorPrefix << " has incorrect dimensions of the boundaries tensor.";
|
||||
}
|
||||
if (input_bin_dims[0] != 0) {
|
||||
with_bins = true;
|
||||
}
|
||||
num_bin_values = input_bin_dims[0];
|
||||
|
||||
num_values =
|
||||
std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
void MKLDNNBucketizeNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VectorDims> MKLDNNBucketizeNode::shapeInfer() const {
|
||||
return {getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
|
||||
}
|
||||
|
||||
template <typename T, typename T_BOUNDARIES, typename T_IND>
|
||||
void MKLDNNBucketizeNode::bucketize() {
|
||||
const auto *input_data = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
|
||||
|
@ -15,9 +15,14 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override {};
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override {};
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override {
|
||||
execute(strm);
|
||||
}
|
||||
void prepareParams() override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
|
@ -33,7 +33,7 @@ bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngr
|
||||
return false;
|
||||
}
|
||||
size_t ndims = op->get_input_partial_shape(0).rank().get_length();
|
||||
if ((ndims < 4) || (ndims > 5)) {
|
||||
if ((ndims < 3) || (ndims > 5)) {
|
||||
errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(ndims);
|
||||
return false;
|
||||
}
|
||||
@ -254,10 +254,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
outputDataType = memory::data_type::f32;
|
||||
if (eltwisePrecision == Precision::BF16)
|
||||
eltwisePrecision = Precision::FP32;
|
||||
in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0),
|
||||
inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
|
||||
out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0),
|
||||
outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
|
||||
in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType,
|
||||
ndims == 3 ? memory::format_tag::nwc : (ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
|
||||
out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType,
|
||||
ndims == 3 ? memory::format_tag::nwc : (ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
|
||||
createDescriptor({ in_candidate }, { out_candidate });
|
||||
} else {
|
||||
inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16
|
||||
@ -289,11 +289,11 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
eltwisePrecision = Precision::FP32;
|
||||
}
|
||||
|
||||
if (one_of(ndims, 4, 5)) {
|
||||
memory::format_tag ncsp = ndims == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw;
|
||||
memory::format_tag nspc = ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc;
|
||||
memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c;
|
||||
memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c;
|
||||
if (one_of(ndims, 3, 4, 5)) {
|
||||
memory::format_tag nspc = ndims == 3 ? memory::format_tag::nwc : (ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc);
|
||||
memory::format_tag ncsp = ndims == 3 ? memory::format_tag::ncw : (ndims == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw);
|
||||
memory::format_tag nCsp8c = ndims == 3 ? memory::format_tag::nCw8c : (ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c);
|
||||
memory::format_tag nCsp16c = ndims == 3 ? memory::format_tag::nCw16c : (ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c);
|
||||
|
||||
auto inputShape = getInputShapeAtPort(0);
|
||||
auto outputShape = getOutputShapeAtPort(0);
|
||||
@ -830,7 +830,7 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const {
|
||||
|
||||
if (isDepthWise()) {
|
||||
// 1d equivalent cases are painfully slow
|
||||
if (1 == inpDims[inpDims.size() - 2]) {
|
||||
if (inpDims.size() == 3 || 1 == inpDims[inpDims.size() - 2]) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
@ -29,7 +29,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_
|
||||
|
||||
constexpr static int sampledPointsPerPixel = MKLDNNDeformableConvolutionNode::sampledPointsPerPixel;
|
||||
|
||||
explicit jit_uni_def_conv_kernel_f32(jit_def_conv_params jcp) : jit_uni_def_conv_kernel(std::move(jcp)), jit_generator() {}
|
||||
explicit jit_uni_def_conv_kernel_f32(const jit_def_conv_params& jcp) : jit_uni_def_conv_kernel(jcp), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
|
@ -59,7 +59,7 @@ struct jit_uni_def_conv_kernel {
|
||||
ker_(args);
|
||||
}
|
||||
|
||||
explicit jit_uni_def_conv_kernel(jit_def_conv_params jcp) : ker_(nullptr), jcp_(std::move(jcp)) {}
|
||||
explicit jit_uni_def_conv_kernel(const jit_def_conv_params& jcp) : ker_(nullptr), jcp_(jcp) {}
|
||||
virtual ~jit_uni_def_conv_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
@ -79,8 +79,8 @@ template <cpu_isa_t isa>
|
||||
struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic)
|
||||
|
||||
explicit jit_uni_eltwise_generic(jit_eltwise_params jep, MKLDNNEltwiseNode& eltwiseNode) :
|
||||
jit_uni_eltwise_kernel(std::move(jep), eltwiseNode), jit_generator() {}
|
||||
explicit jit_uni_eltwise_generic(const jit_eltwise_params& jep, MKLDNNEltwiseNode& eltwiseNode) :
|
||||
jit_uni_eltwise_kernel(jep, eltwiseNode), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
@ -1238,18 +1238,18 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
|
||||
return {config, impl_type};
|
||||
};
|
||||
|
||||
bool isChannelsFirstApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 2, 4, 5);
|
||||
bool isChannelsFirstApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 2, 3, 4, 5);
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getInputShapeAtPort(i).getRank(), 1, 2, 4, 5);
|
||||
isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getInputShapeAtPort(i).getRank(), 1, 2, 3, 4, 5);
|
||||
isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getInputShapeAtPort(i).getRank() != 1,
|
||||
getOutputShapeAtPort(0).getRank() ==
|
||||
getInputShapeAtPort(i).getRank());
|
||||
}
|
||||
|
||||
bool isBlockedApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 4, 5);
|
||||
bool isBlockedApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 3, 4, 5);
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
const auto &inShape = getInputShapeAtPort(i);
|
||||
isBlockedApplicable = isBlockedApplicable && one_of(inShape.getRank(), 1, 4, 5);
|
||||
isBlockedApplicable = isBlockedApplicable && one_of(inShape.getRank(), 1, 3, 4, 5);
|
||||
isBlockedApplicable = isBlockedApplicable && implication(inShape.getRank() != 1,
|
||||
getOutputShapeAtPort(0).getRank() ==
|
||||
inShape.getRank());
|
||||
|
@ -54,7 +54,7 @@ struct jit_uni_eltwise_kernel {
|
||||
ker_(const_args, indexes);
|
||||
}
|
||||
|
||||
explicit jit_uni_eltwise_kernel(jit_eltwise_params jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(std::move(jep)), eltwiseNode(node) {}
|
||||
explicit jit_uni_eltwise_kernel(const jit_eltwise_params& jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(jep), eltwiseNode(node) {}
|
||||
virtual ~jit_uni_eltwise_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
@ -45,7 +45,7 @@ template <cpu_isa_t isa>
|
||||
struct jit_uni_binarization_kernel : public jit_uni_quantize_kernel, public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_binarization_kernel)
|
||||
|
||||
explicit jit_uni_binarization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(std::move(jqp)), jit_generator() {}
|
||||
explicit jit_uni_binarization_kernel(const jit_quantize_params& jqp) : jit_uni_quantize_kernel(jqp), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
@ -213,7 +213,7 @@ template <cpu_isa_t isa>
|
||||
struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_quantization_kernel)
|
||||
|
||||
explicit jit_uni_quantization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(std::move(jqp)), jit_generator() {}
|
||||
explicit jit_uni_quantization_kernel(const jit_quantize_params& jqp) : jit_uni_quantize_kernel(jqp), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
|
@ -56,7 +56,7 @@ struct jit_uni_quantize_kernel {
|
||||
ker_(args);
|
||||
}
|
||||
|
||||
explicit jit_uni_quantize_kernel(jit_quantize_params jqp) : ker_(nullptr), jqp_(std::move(jqp)) {}
|
||||
explicit jit_uni_quantize_kernel(const jit_quantize_params& jqp) : ker_(nullptr), jqp_(jqp) {}
|
||||
virtual ~jit_uni_quantize_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
@ -34,8 +34,8 @@ private:
|
||||
|
||||
size_t axis_;
|
||||
size_t dataTypeSize_ = 0;
|
||||
int strideAxDst_;
|
||||
int dstAxDim_;
|
||||
int strideAxDst_ = 0;
|
||||
int dstAxDim_ = 0;
|
||||
int strideAx1Diff_ = 0;
|
||||
std::string errorPrefix_;
|
||||
|
||||
|
@ -161,18 +161,17 @@ void MKLDNNGenericNode::execLayer() {
|
||||
}
|
||||
}
|
||||
|
||||
if (isDynBatch) {
|
||||
// TODO: use ngraph-based extension mechnism if needed to recompute shape
|
||||
isDynBatch = false;
|
||||
}
|
||||
// TODO: use ngraph-based extension mechnism if needed to recompute shape
|
||||
isDynBatch = false;
|
||||
// TODO: uncomment after using ngraph-based extension mechnism
|
||||
// if (isDynBatch) {
|
||||
// for (size_t i = 0; i < inputs.size(); i++) {
|
||||
// auto td = inputs[i]->getTensorDesc();
|
||||
// td.setDims(inputDescs[i].getDims());
|
||||
// inputs[i] = make_blob_with_precision(td, getParentEdgeAt(i)->getMemory().GetData());
|
||||
// }
|
||||
// }
|
||||
|
||||
if (isDynBatch) {
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
auto td = inputs[i]->getTensorDesc();
|
||||
td.setDims(inputDescs[i].getDims());
|
||||
inputs[i] = make_blob_with_precision(td, getParentEdgeAt(i)->getMemory().GetData());
|
||||
}
|
||||
}
|
||||
std::vector<InferenceEngine::Blob::Ptr> outputs;
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
if (isDynBatch) {
|
||||
|
@ -27,7 +27,7 @@ public:
|
||||
|
||||
private:
|
||||
int axis;
|
||||
size_t reducedAxisSize;
|
||||
size_t reducedAxisSize = 0;
|
||||
size_t reducedAxisStride = 1;
|
||||
size_t axisStep = 1;
|
||||
bool isLastDim = false;
|
||||
|
@ -676,10 +676,10 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
epsMode_ = INSIDE_SQRT;
|
||||
if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v6::MVN>(op)) {
|
||||
normalizeVariance_ = mvnOp->get_normalize_variance();
|
||||
epsValue_ = mvnOp->get_eps();
|
||||
epsMode_ = INSIDE_SQRT;
|
||||
if (mvnOp->get_eps_mode() == ngraph::op::MVNEpsMode::OUTSIDE_SQRT) {
|
||||
epsMode_ = OUTSIDE_SQRT;
|
||||
}
|
||||
@ -691,7 +691,6 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
|
||||
} else if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v0::MVN>(op)) {
|
||||
normalizeVariance_ = mvnOp->get_normalize_variance();
|
||||
epsValue_ = mvnOp->get_eps();
|
||||
epsMode_ = INSIDE_SQRT;
|
||||
initAcrossChannels_ = mvnOp->get_across_channels();
|
||||
}
|
||||
execAcrossChannels_ = initAcrossChannels_;
|
||||
|
@ -86,7 +86,7 @@ std::vector<memory::format_tag> MKLDNNPoolingNode::getAvailableFormatsForDims(co
|
||||
else if (dims.getRank() == 2)
|
||||
return {memory::format_tag::nc};
|
||||
else if (dims.getRank() == 3)
|
||||
return {memory::format_tag::tnc, memory::format_tag::ntc};
|
||||
return { memory::format_tag::nCw8c, memory::format_tag::nCw16c, memory::format_tag::nwc, memory::format_tag::ncw};
|
||||
else if (dims.getRank() == 4)
|
||||
return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw};
|
||||
else if (dims.getRank() == 5)
|
||||
@ -148,8 +148,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
|
||||
const auto &childShape = getOutputShapeAtPort(0);
|
||||
const size_t inputRank = getInputShapeAtPort(0).getRank();
|
||||
|
||||
if ((inputRank < 4) || (inputRank > 5))
|
||||
IE_THROW() << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
|
||||
if ((inputRank < 3) || (inputRank > 5))
|
||||
IE_THROW() << "Pooling layer. Unsupported mode. Only 3D, 4D and 5D blobs are supported as input.";
|
||||
|
||||
initEffectivePad(MemoryDescUtils::makeDummyShape(parentShape),
|
||||
MemoryDescUtils::makeDummyShape(childShape));
|
||||
@ -159,17 +159,17 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
|
||||
if (outputDataType == memory::data_type::bf16)
|
||||
outputDataType = memory::data_type::f32;
|
||||
// i8 layers supports only ndhwc and nhwc layouts
|
||||
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 5 ?
|
||||
memory::format_tag::ndhwc : memory::format_tag::nhwc);
|
||||
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 5 ?
|
||||
memory::format_tag::ndhwc : memory::format_tag::nhwc);
|
||||
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 3 ?
|
||||
memory::format_tag::nwc : (inputRank == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
|
||||
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 3 ?
|
||||
memory::format_tag::nwc : (inputRank == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc));
|
||||
createDescriptor({ in_candidate }, { out_candidate });
|
||||
} else if ((inputRank == 4 || inputRank == 5) && parentShape.getDims()[1] == 1) {
|
||||
} else if ((inputRank == 3 || inputRank == 4 || inputRank == 5) && parentShape.getDims()[1] == 1) {
|
||||
// WA. We should force planar layout since it provides better performance
|
||||
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 5 ?
|
||||
memory::format_tag::ncdhw : memory::format_tag::nchw);
|
||||
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 5 ?
|
||||
memory::format_tag::ncdhw : memory::format_tag::nchw);
|
||||
const auto in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(parentShape, inputDataType, inputRank == 3 ?
|
||||
memory::format_tag::ncw : (inputRank == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw));
|
||||
const auto out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(childShape, outputDataType, inputRank == 3 ?
|
||||
memory::format_tag::ncw : (inputRank == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw));
|
||||
createDescriptor({ in_candidate }, { out_candidate });
|
||||
} else {
|
||||
if (inputDataType != memory::data_type::bf16) {
|
||||
|
@ -229,10 +229,6 @@ private:
|
||||
|
||||
bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
const auto regionYolo = std::dynamic_pointer_cast<const ngraph::opset1::RegionYolo>(op);
|
||||
if (!regionYolo) {
|
||||
errorMessage = "Only opset1 RegionYolo operation is supported";
|
||||
@ -244,6 +240,10 @@ bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr<const ngra
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MKLDNNRegionYoloNode::needPrepareParams() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
MKLDNNRegionYoloNode::MKLDNNRegionYoloNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
@ -302,6 +302,10 @@ void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
void MKLDNNRegionYoloNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
updateLastInputDims();
|
||||
}
|
||||
|
||||
jit_logistic_config_params jcp;
|
||||
jcp.src_dt = jcp.dst_dt = output_prec;
|
||||
jcp.src_data_size = jcp.dst_data_size = output_prec.size();
|
||||
|
@ -49,6 +49,10 @@ public:
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
bool needPrepareParams() const override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
|
||||
private:
|
||||
int classes;
|
||||
int coords;
|
||||
|
@ -308,11 +308,7 @@ private:
|
||||
|
||||
bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
const auto roiPooling = std::dynamic_pointer_cast<const ngraph::opset2::ROIPooling>(op);
|
||||
auto roiPooling = ngraph::as_type_ptr<const ngraph::opset2::ROIPooling>(op);
|
||||
if (!roiPooling) {
|
||||
errorMessage = "Only opset2 ROIPooling operation is supported";
|
||||
return false;
|
||||
@ -331,22 +327,22 @@ bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr<const ngra
|
||||
MKLDNNROIPoolingNode::MKLDNNROIPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
if (isSupportedOperation(op, errorMessage)) {
|
||||
std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
|
||||
|
||||
const auto roiPooling = std::dynamic_pointer_cast<const ngraph::opset2::ROIPooling>(op);
|
||||
pooled_h = roiPooling->get_output_size()[0];
|
||||
pooled_w = roiPooling->get_output_size()[1];
|
||||
spatial_scale = roiPooling->get_spatial_scale();
|
||||
std::string m = roiPooling->get_method();
|
||||
if (m == "max") {
|
||||
algorithm = Algorithm::ROIPoolingMax;
|
||||
} else if (m == "bilinear") {
|
||||
algorithm = Algorithm::ROIPoolingBilinear;
|
||||
}
|
||||
} else {
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
|
||||
|
||||
auto roiPooling = ngraph::as_type_ptr<const ngraph::opset2::ROIPooling>(op);
|
||||
refParams.pooled_h = roiPooling->get_output_size()[0];
|
||||
refParams.pooled_w = roiPooling->get_output_size()[1];
|
||||
refParams.spatial_scale = roiPooling->get_spatial_scale();
|
||||
std::string m = roiPooling->get_method();
|
||||
if (m == "max") {
|
||||
algorithm = Algorithm::ROIPoolingMax;
|
||||
} else if (m == "bilinear") {
|
||||
algorithm = Algorithm::ROIPoolingBilinear;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNROIPoolingNode::getSupportedDescriptors() {
|
||||
@ -370,9 +366,9 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() {
|
||||
IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank();
|
||||
}
|
||||
|
||||
if (getInputShapeAtPort(1).getStaticDims()[1] != 5) {
|
||||
IE_THROW() << errorPrefix << "has invalid shape on 1st input: ["
|
||||
<< getInputShapeAtPort(1).getStaticDims()[0] << "," << getInputShapeAtPort(1).getStaticDims()[1] << "]";
|
||||
const auto& dims = getInputShapeAtPort(1).getDims();
|
||||
if (dims[1] != 5) {
|
||||
IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" << dims[0] << "," << dims[1] << "]";
|
||||
}
|
||||
}
|
||||
|
||||
@ -380,16 +376,15 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
runtimePrecision = getOriginalInputPrecisionAtPort(0);
|
||||
refParams.src_prc = getOriginalInputPrecisionAtPort(0);
|
||||
|
||||
if (!mayiuse(avx512_core)) {
|
||||
if (runtimePrecision == Precision::BF16)
|
||||
runtimePrecision = Precision::FP32;
|
||||
if (refParams.src_prc == Precision::BF16)
|
||||
refParams.src_prc = Precision::FP32;
|
||||
}
|
||||
|
||||
src_data_size = dst_data_size = runtimePrecision.size();
|
||||
src_data_size = dst_data_size = refParams.src_prc.size();
|
||||
|
||||
auto parentDims = getInputShapeAtPort(0).getStaticDims();
|
||||
auto format = mayiuse(avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c;
|
||||
impl_desc_type impl_type;
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
@ -402,159 +397,159 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
|
||||
impl_type = impl_desc_type::ref;
|
||||
}
|
||||
|
||||
addSupportedPrimDesc({{format, runtimePrecision},
|
||||
{LayoutType::ncsp, runtimePrecision}},
|
||||
{{format, runtimePrecision}},
|
||||
addSupportedPrimDesc({{format, refParams.src_prc},
|
||||
{LayoutType::ncsp, refParams.src_prc}},
|
||||
{{format, refParams.src_prc}},
|
||||
impl_type);
|
||||
}
|
||||
|
||||
void MKLDNNROIPoolingNode::createPrimitive() {
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
|
||||
const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
|
||||
jpp.c_block = simd_w;
|
||||
|
||||
auto inDims = getParentEdgeAt(0)->getMemory().getStaticDims();
|
||||
auto outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
|
||||
|
||||
jpp.mb = outDims[0];
|
||||
jpp.c = rnd_up(inDims[1], simd_w);
|
||||
jpp.ih = inDims[2];
|
||||
jpp.iw = inDims[3];
|
||||
jpp.oh = outDims[2];
|
||||
jpp.ow = outDims[3];
|
||||
|
||||
jpp.spatial_scale = spatial_scale;
|
||||
jpp.pooled_h = pooled_h;
|
||||
jpp.pooled_w = pooled_w;
|
||||
|
||||
jpp.nb_c = jpp.c / jpp.c_block;
|
||||
|
||||
jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7;
|
||||
|
||||
auto selectedPD = getSelectedPrimitiveDescriptor();
|
||||
jpp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
|
||||
jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
|
||||
jpp.src_data_size = jpp.src_prc.size();
|
||||
jpp.dst_data_size = jpp.dst_prc.size();
|
||||
if (!selectedPD)
|
||||
IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
jpp.alg = getAlgorithm();
|
||||
refParams.c_block = mayiuse(cpu::x64::avx512_common) ? 16 : 8;;
|
||||
refParams.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7;
|
||||
refParams.alg = getAlgorithm();
|
||||
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx512_common>(jpp));
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx2>(jpp));
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::sse41>(jpp));
|
||||
const auto& config = selectedPD->getConfig();
|
||||
refParams.src_prc = config.inConfs[0].desc->getPrecision();
|
||||
refParams.dst_prc = config.outConfs[0].desc->getPrecision();
|
||||
refParams.src_data_size = refParams.src_prc.size();
|
||||
refParams.dst_data_size = refParams.dst_prc.size();
|
||||
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
|
||||
if (roi_pooling_kernel)
|
||||
roi_pooling_kernel->create_ker();
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void MKLDNNROIPoolingNode::execute() {
|
||||
auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
|
||||
auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
|
||||
auto &dstMemory = getChildEdgeAt(0)->getMemory();
|
||||
void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
|
||||
if (execPtr) {
|
||||
const auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
|
||||
const auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
|
||||
const auto &dstMemory = getChildEdgeAt(0)->getMemory();
|
||||
execPtr->exec(srcMemory0, srcMemory1, dstMemory);
|
||||
} else {
|
||||
IE_THROW() << "Can't execute ROI Pooling node. Primitive wasn't created";
|
||||
}
|
||||
}
|
||||
|
||||
const auto *src_data = reinterpret_cast<const T*>(srcMemory0.GetPtr());
|
||||
const auto *src_roi = reinterpret_cast<const T*>(srcMemory1.GetPtr());
|
||||
auto *dst = reinterpret_cast<T*>(dstMemory.GetPtr());
|
||||
void MKLDNNROIPoolingNode::executeDynamicImpl(mkldnn::stream strm) {
|
||||
return execute(strm);
|
||||
}
|
||||
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
auto config = selectedPrimitiveDescriptor->getConfig();
|
||||
void MKLDNNROIPoolingNode::prepareParams() {
|
||||
const auto& srcMemPtr0 = getParentEdgeAt(0)->getMemoryPtr();
|
||||
const auto& srcMemPtr1 = getParentEdgeAt(0)->getMemoryPtr();
|
||||
const auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
if (!srcMemPtr0 || !srcMemPtr0->GetPrimitivePtr())
|
||||
IE_THROW() << "Input memory didn't allocate.";
|
||||
if (!srcMemPtr1 || !srcMemPtr1->GetPrimitivePtr())
|
||||
IE_THROW() << "Input memory didn't allocate.";
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Destination memory didn't allocate.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set.";
|
||||
|
||||
auto src_strides = srcMemory0.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
auto dst_strides = dstMemory.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
size_t src_roi_step = srcMemory1.GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
|
||||
const auto& inDims = getParentEdgeAt(0)->getMemory().getStaticDims();
|
||||
const auto& outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
|
||||
|
||||
int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
|
||||
int MB = jpp.mb;
|
||||
refParams.mb = outDims[0];
|
||||
refParams.c = rnd_up(inDims[1], refParams.c_block);
|
||||
refParams.nb_c = refParams.c / refParams.c_block;
|
||||
refParams.ih = inDims[2];
|
||||
refParams.iw = inDims[3];
|
||||
refParams.oh = outDims[2];
|
||||
refParams.ow = outDims[3];
|
||||
|
||||
int real_rois = 0;
|
||||
for (; real_rois < MB; real_rois++) {
|
||||
size_t roi_off = real_rois * src_roi_step;
|
||||
execPtr = ROIPoolingExecutor::createROIPoolingNewExecutor(refParams);
|
||||
}
|
||||
|
||||
const auto *src_roi_ptr = &src_roi[roi_off];
|
||||
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
|
||||
if (roi_batch_ind == -1) {
|
||||
break;
|
||||
template <typename T>
|
||||
class MKLDNNROIPoolingNode::ROIPoolingJitExecutor : public MKLDNNROIPoolingNode::ROIPoolingExecutor {
|
||||
public:
|
||||
ROIPoolingJitExecutor(const jit_roi_pooling_params &jpp) {
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx512_common>(jpp));
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx2>(jpp));
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::sse41>(jpp));
|
||||
} else {
|
||||
IE_THROW() << "Can't create jit RoiPooling kernel";
|
||||
}
|
||||
|
||||
if (roi_pooling_kernel)
|
||||
roi_pooling_kernel->create_ker();
|
||||
}
|
||||
|
||||
parallel_for4d(MB, cb_work, jpp.oh, jpp.ow, [&](int n, int cbb, int oh, int ow) {
|
||||
auto arg = jit_roi_pooling_call_args();
|
||||
void exec(
|
||||
const MKLDNNPlugin::MKLDNNMemory& srcData,
|
||||
const MKLDNNPlugin::MKLDNNMemory& srcRoi,
|
||||
const MKLDNNPlugin::MKLDNNMemory& dst) override {
|
||||
if (!roi_pooling_kernel)
|
||||
IE_THROW() << "Could not execute. Kernel for RoiPooling node was not compiled.";
|
||||
|
||||
int cb = cbb * jpp.nb_c_blocking;
|
||||
int cb_num = jpp.nb_c_blocking;
|
||||
int c_block = jpp.c_block;
|
||||
auto src_strides = srcData.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
auto src_roi_step = srcRoi.GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
|
||||
auto dst_strides = dst.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
const auto* src_ptr = reinterpret_cast<const T*>(srcData.GetPtr());
|
||||
const auto* roi_ptr = reinterpret_cast<const T*>(srcRoi.GetPtr());
|
||||
auto* dst_ptr = reinterpret_cast<T*>(dst.GetPtr());
|
||||
executeOptimizedGeneric(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step);
|
||||
}
|
||||
|
||||
arg.c_blocks = std::min(cb + cb_num, jpp.nb_c) - cb;
|
||||
private:
|
||||
void executeOptimizedGeneric(
|
||||
const T* src_data,
|
||||
const T* src_roi,
|
||||
T* dst,
|
||||
const VectorDims& src_strides,
|
||||
const VectorDims& dst_strides,
|
||||
const size_t src_roi_step) {
|
||||
const auto& jpp = roi_pooling_kernel->jpp_;
|
||||
int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
|
||||
int MB = jpp.mb;
|
||||
|
||||
if (n >= real_rois) {
|
||||
if (roi_pooling_kernel) {
|
||||
int real_rois = 0;
|
||||
for (; real_rois < MB; real_rois++) {
|
||||
size_t roi_off = real_rois * src_roi_step;
|
||||
|
||||
const auto *src_roi_ptr = &src_roi[roi_off];
|
||||
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
|
||||
if (roi_batch_ind == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for4d(MB, cb_work, jpp.oh, jpp.ow, [&](int n, int cbb, int oh, int ow) {
|
||||
auto arg = jit_roi_pooling_call_args();
|
||||
int cb = cbb * jpp.nb_c_blocking;
|
||||
int cb_num = jpp.nb_c_blocking;
|
||||
arg.c_blocks = std::min(cb + cb_num, jpp.nb_c) - cb;
|
||||
|
||||
if (n >= real_rois) {
|
||||
arg.bin_area = 0;
|
||||
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
|
||||
(*roi_pooling_kernel)(&arg);
|
||||
} else {
|
||||
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
|
||||
int ch_blk_cur = cbb * cb_num + cbb_cur;
|
||||
if (ch_blk_cur >= jpp.nb_c) {
|
||||
break; // current block work is done
|
||||
}
|
||||
for (int c = 0; c < c_block; c++) {
|
||||
dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
size_t roi_off = n * src_roi_step;
|
||||
const auto *src_roi_ptr = &src_roi[roi_off];
|
||||
size_t roi_off = n * src_roi_step;
|
||||
const auto *src_roi_ptr = &src_roi[roi_off];
|
||||
|
||||
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
|
||||
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
|
||||
|
||||
if (jpp.alg == Algorithm::ROIPoolingMax) {
|
||||
int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
|
||||
int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
|
||||
int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
|
||||
int roi_end_h = static_cast<int>(round(src_roi_ptr[4] * jpp.spatial_scale));
|
||||
if (jpp.alg == Algorithm::ROIPoolingMax) {
|
||||
int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
|
||||
int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
|
||||
int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
|
||||
int roi_end_h = static_cast<int>(round(src_roi_ptr[4] * jpp.spatial_scale));
|
||||
|
||||
int roi_height = std::max(roi_end_h - roi_start_h + 1, 1);
|
||||
int roi_width = std::max(roi_end_w - roi_start_w + 1, 1);
|
||||
int hstart, hend, wstart, wend;
|
||||
std::tie(hstart, hend, wstart, wend) = getBordersForMaxMode(
|
||||
roi_start_h, roi_end_h, roi_start_w, roi_end_w, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
|
||||
|
||||
|
||||
int hstart = (oh * roi_height) / jpp.pooled_h;
|
||||
if ((hstart * jpp.pooled_h) > (oh * roi_height)) {
|
||||
--hstart;
|
||||
}
|
||||
|
||||
int wstart = (ow * roi_width) / jpp.pooled_w;
|
||||
if ((wstart * jpp.pooled_w) > (ow * roi_width)) {
|
||||
--wstart;
|
||||
}
|
||||
|
||||
int hend = ((oh + 1) * roi_height) / jpp.pooled_h;
|
||||
if ((hend * jpp.pooled_h) < ((oh + 1) * roi_height)) {
|
||||
++hend;
|
||||
}
|
||||
|
||||
int wend = ((ow + 1) * roi_width) / jpp.pooled_w;
|
||||
if ((wend * jpp.pooled_w) < ((ow + 1) * roi_width)) {
|
||||
++wend;
|
||||
}
|
||||
|
||||
hstart = std::min(std::max(hstart + roi_start_h, 0), jpp.ih);
|
||||
hend = std::min(std::max(hend + roi_start_h, 0), jpp.ih);
|
||||
wstart = std::min(std::max(wstart + roi_start_w, 0), jpp.iw);
|
||||
wend = std::min(std::max(wend + roi_start_w, 0), jpp.iw);
|
||||
|
||||
if (roi_pooling_kernel) {
|
||||
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + hstart * src_strides[2] + wstart * src_strides[3]];
|
||||
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
|
||||
|
||||
@ -562,6 +557,121 @@ void MKLDNNROIPoolingNode::execute() {
|
||||
arg.kh = hend - hstart;
|
||||
arg.kw = wend - wstart;
|
||||
} else {
|
||||
float roi_start_w_ = src_roi_ptr[1];
|
||||
float roi_start_h_ = src_roi_ptr[2];
|
||||
float roi_end_w_ = src_roi_ptr[3];
|
||||
float roi_end_h_ = src_roi_ptr[4];
|
||||
|
||||
float in_x, in_y;
|
||||
std::tie(in_x, in_y) = getXYForBilinearMode(
|
||||
roi_start_h_, roi_end_h_, roi_start_w_, roi_end_w_, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
|
||||
|
||||
if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
|
||||
arg.bin_area = 0;
|
||||
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
|
||||
} else {
|
||||
int top_y_index = static_cast<int>(floorf(in_y));
|
||||
int bottom_y_index = static_cast<int>(ceilf(in_y));
|
||||
int left_x_index = static_cast<int>(floorf(in_x));
|
||||
int right_x_index = static_cast<int>(ceilf(in_x));
|
||||
|
||||
if (right_x_index > jpp.iw - 1)
|
||||
right_x_index = jpp.iw - 1;
|
||||
|
||||
if (bottom_y_index > jpp.ih - 1)
|
||||
bottom_y_index = jpp.ih - 1;
|
||||
|
||||
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
|
||||
|
||||
arg.xf = in_x - left_x_index;
|
||||
arg.yf = in_y - top_y_index;
|
||||
|
||||
arg.xoff = sizeof(T) * (right_x_index - left_x_index) * jpp.c_block;
|
||||
arg.yoff = sizeof(T) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block;
|
||||
|
||||
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
|
||||
top_y_index * src_strides[2] + left_x_index * src_strides[3]];
|
||||
|
||||
arg.bin_area = 1;
|
||||
}
|
||||
}
|
||||
|
||||
(*roi_pooling_kernel)(&arg);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
std::shared_ptr<jit_uni_roi_pooling_kernel> roi_pooling_kernel;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class MKLDNNROIPoolingNode::ROIPoolingRefExecutor : public MKLDNNROIPoolingNode::ROIPoolingExecutor {
|
||||
public:
|
||||
ROIPoolingRefExecutor(const jit_roi_pooling_params &_jpp) : jpp(_jpp) {}
|
||||
void exec(
|
||||
const MKLDNNPlugin::MKLDNNMemory& srcData,
|
||||
const MKLDNNPlugin::MKLDNNMemory& srcRoi,
|
||||
const MKLDNNPlugin::MKLDNNMemory& dst) override {
|
||||
auto src_strides = srcData.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
auto src_roi_step = srcRoi.GetDescWithType<BlockedMemoryDesc>()->getStrides()[0];
|
||||
auto dst_strides = dst.GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
const auto* src_ptr = reinterpret_cast<const T*>(srcData.GetPtr());
|
||||
const auto* roi_ptr = reinterpret_cast<const T*>(srcRoi.GetPtr());
|
||||
auto* dst_ptr = reinterpret_cast<T*>(dst.GetPtr());
|
||||
executeReference(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step);
|
||||
}
|
||||
|
||||
void executeReference(
|
||||
const T* src_data,
|
||||
const T* src_roi,
|
||||
T* dst,
|
||||
const VectorDims& src_strides,
|
||||
const VectorDims& dst_strides,
|
||||
const size_t src_roi_step) {
|
||||
int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
|
||||
int MB = jpp.mb;
|
||||
|
||||
int real_rois = 0;
|
||||
for (; real_rois < MB; real_rois++) {
|
||||
size_t roi_off = real_rois * src_roi_step;
|
||||
|
||||
const auto *src_roi_ptr = &src_roi[roi_off];
|
||||
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
|
||||
if (roi_batch_ind == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for4d(MB, cb_work, jpp.oh, jpp.ow, [&](int n, int cbb, int oh, int ow) {
|
||||
int cb_num = jpp.nb_c_blocking;
|
||||
int c_block = jpp.c_block;
|
||||
|
||||
if (n >= real_rois) {
|
||||
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
|
||||
int ch_blk_cur = cbb * cb_num + cbb_cur;
|
||||
if (ch_blk_cur >= jpp.nb_c) {
|
||||
break; // current block work is done
|
||||
}
|
||||
for (int c = 0; c < c_block; c++) {
|
||||
dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
size_t roi_off = n * src_roi_step;
|
||||
const auto *src_roi_ptr = &src_roi[roi_off];
|
||||
|
||||
int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
|
||||
|
||||
if (jpp.alg == Algorithm::ROIPoolingMax) {
|
||||
int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
|
||||
int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
|
||||
int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
|
||||
int roi_end_h = static_cast<int>(round(src_roi_ptr[4] * jpp.spatial_scale));
|
||||
|
||||
int hstart, hend, wstart, wend;
|
||||
std::tie(hstart, hend, wstart, wend) = getBordersForMaxMode(
|
||||
roi_start_h, roi_end_h, roi_start_w, roi_end_w, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
|
||||
|
||||
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
|
||||
int ch_blk_cur = cbb * cb_num + cbb_cur;
|
||||
if (ch_blk_cur >= jpp.nb_c) {
|
||||
@ -584,37 +694,17 @@ void MKLDNNROIPoolingNode::execute() {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
float roi_start_w_ = src_roi_ptr[1];
|
||||
float roi_start_h_ = src_roi_ptr[2];
|
||||
float roi_end_w_ = src_roi_ptr[3];
|
||||
float roi_end_h_ = src_roi_ptr[4];
|
||||
|
||||
float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0);
|
||||
float width_scale = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0);
|
||||
|
||||
float in_y, in_x;
|
||||
// because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
|
||||
// ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
|
||||
// and as result excess of right limit for proposal value,
|
||||
// if the border case (current_h == pooled_h - 1) will not be handled explicitly
|
||||
if (jpp.pooled_h > 1) {
|
||||
in_y = (oh == jpp.pooled_h - 1 ? roi_end_h_ * (jpp.ih - 1) : (oh * height_scale + roi_start_h_ * (jpp.ih - 1)));
|
||||
} else {
|
||||
in_y = 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1);
|
||||
}
|
||||
if (jpp.pooled_w > 1) {
|
||||
in_x = (ow == jpp.pooled_w - 1 ? roi_end_w_ * (jpp.iw - 1) : (ow * width_scale + roi_start_w_ * (jpp.iw - 1)));
|
||||
} else {
|
||||
in_x = 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1);
|
||||
}
|
||||
float roi_start_w_ = src_roi_ptr[1];
|
||||
float roi_start_h_ = src_roi_ptr[2];
|
||||
float roi_end_w_ = src_roi_ptr[3];
|
||||
float roi_end_h_ = src_roi_ptr[4];
|
||||
|
||||
if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
|
||||
if (roi_pooling_kernel) {
|
||||
arg.bin_area = 0;
|
||||
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
|
||||
} else {
|
||||
float in_x, in_y;
|
||||
std::tie(in_x, in_y) = getXYForBilinearMode(
|
||||
roi_start_h_, roi_end_h_, roi_start_w_, roi_end_w_, jpp.ih, oh, jpp.iw, ow, jpp.pooled_h, jpp.pooled_w);
|
||||
|
||||
if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
|
||||
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
|
||||
int ch_blk_cur = cbb * cb_num + cbb_cur;
|
||||
if (ch_blk_cur >= jpp.nb_c) {
|
||||
@ -624,33 +714,18 @@ void MKLDNNROIPoolingNode::execute() {
|
||||
dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int top_y_index = static_cast<int>(floorf(in_y));
|
||||
int bottom_y_index = static_cast<int>(ceilf(in_y));
|
||||
int left_x_index = static_cast<int>(floorf(in_x));
|
||||
int right_x_index = static_cast<int>(ceilf(in_x));
|
||||
|
||||
if (right_x_index > jpp.iw - 1)
|
||||
right_x_index = jpp.iw - 1;
|
||||
|
||||
if (bottom_y_index > jpp.ih - 1)
|
||||
bottom_y_index = jpp.ih - 1;
|
||||
|
||||
if (roi_pooling_kernel) {
|
||||
arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
|
||||
|
||||
arg.xf = in_x - left_x_index;
|
||||
arg.yf = in_y - top_y_index;
|
||||
|
||||
arg.xoff = sizeof(T) * (right_x_index - left_x_index) * jpp.c_block;
|
||||
arg.yoff = sizeof(T) * (bottom_y_index - top_y_index) * jpp.iw * jpp.c_block;
|
||||
|
||||
arg.src = &src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
|
||||
top_y_index * src_strides[2] + left_x_index * src_strides[3]];
|
||||
|
||||
arg.bin_area = 1;
|
||||
} else {
|
||||
int top_y_index = static_cast<int>(floorf(in_y));
|
||||
int bottom_y_index = static_cast<int>(ceilf(in_y));
|
||||
int left_x_index = static_cast<int>(floorf(in_x));
|
||||
int right_x_index = static_cast<int>(ceilf(in_x));
|
||||
|
||||
if (right_x_index > jpp.iw - 1)
|
||||
right_x_index = jpp.iw - 1;
|
||||
|
||||
if (bottom_y_index > jpp.ih - 1)
|
||||
bottom_y_index = jpp.ih - 1;
|
||||
|
||||
for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
|
||||
int ch_blk_cur = cbb * cb_num + cbb_cur;
|
||||
if (ch_blk_cur >= jpp.nb_c) {
|
||||
@ -676,35 +751,89 @@ void MKLDNNROIPoolingNode::execute() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (roi_pooling_kernel) {
|
||||
(*roi_pooling_kernel)(&arg);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct ROIPoolingContext {
|
||||
MKLDNNROIPoolingNode &node;
|
||||
};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
struct MKLDNNROIPoolingNode::ROIPoolingExecute {
|
||||
void operator()(ROIPoolingContext & ctx) {
|
||||
ctx.node.execute<T>();
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
jit_roi_pooling_params jpp;
|
||||
};
|
||||
|
||||
void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
|
||||
ROIPoolingContext ctx = {
|
||||
*this
|
||||
};
|
||||
// enable conditional compilation
|
||||
OV_SWITCH(MKLDNNPlugin, ROIPoolingExecute, ctx, runtimePrecision,
|
||||
std::shared_ptr<MKLDNNROIPoolingNode::ROIPoolingExecutor> MKLDNNROIPoolingNode::ROIPoolingExecutor::createROIPoolingNewExecutor(
|
||||
const jit_roi_pooling_params& jpp) {
|
||||
ROIPoolingContext ctx = { nullptr, jpp };
|
||||
|
||||
OV_SWITCH(MKLDNNPlugin, ROIPoolingExecutorCreation, ctx, jpp.src_prc,
|
||||
OV_CASE(Precision::FP32, float),
|
||||
OV_CASE(Precision::BF16, bfloat16_t))
|
||||
|
||||
return ctx.executor;
|
||||
}
|
||||
|
||||
std::tuple<int, int, int, int> MKLDNNROIPoolingNode::ROIPoolingExecutor::getBordersForMaxMode(
|
||||
const int roi_start_h, const int roi_end_h, const int roi_start_w, const int roi_end_w,
|
||||
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w) {
|
||||
int roi_height = std::max(roi_end_h - roi_start_h + 1, 1);
|
||||
int roi_width = std::max(roi_end_w - roi_start_w + 1, 1);
|
||||
|
||||
int hstart = (oh * roi_height) / pooled_h;
|
||||
if ((hstart * pooled_h) > (oh * roi_height)) {
|
||||
--hstart;
|
||||
}
|
||||
|
||||
int wstart = (ow * roi_width) / pooled_w;
|
||||
if ((wstart * pooled_w) > (ow * roi_width)) {
|
||||
--wstart;
|
||||
}
|
||||
|
||||
int hend = ((oh + 1) * roi_height) / pooled_h;
|
||||
if ((hend * pooled_h) < ((oh + 1) * roi_height)) {
|
||||
++hend;
|
||||
}
|
||||
|
||||
int wend = ((ow + 1) * roi_width) / pooled_w;
|
||||
if ((wend * pooled_w) < ((ow + 1) * roi_width)) {
|
||||
++wend;
|
||||
}
|
||||
|
||||
hstart = std::min(std::max(hstart + roi_start_h, 0), ih);
|
||||
hend = std::min(std::max(hend + roi_start_h, 0), ih);
|
||||
wstart = std::min(std::max(wstart + roi_start_w, 0), iw);
|
||||
wend = std::min(std::max(wend + roi_start_w, 0), iw);
|
||||
return std::make_tuple(hstart, hend, wstart, wend);
|
||||
}
|
||||
|
||||
std::pair<float, float> MKLDNNROIPoolingNode::ROIPoolingExecutor::getXYForBilinearMode(
|
||||
const float roi_start_h, const float roi_end_h, const float roi_start_w, const float roi_end_w,
|
||||
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w) {
|
||||
float height_scale = (pooled_h > 1 ? ((roi_end_h - roi_start_h) * (ih - 1)) / (pooled_h - 1) : 0);
|
||||
float width_scale = (pooled_w > 1 ? ((roi_end_w - roi_start_w) * (iw - 1)) / (pooled_w - 1) : 0);
|
||||
|
||||
float in_y, in_x;
|
||||
// because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
|
||||
// ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
|
||||
// and as result excess of right limit for proposal value,
|
||||
// if the border case (current_h == pooled_h - 1) will not be handled explicitly
|
||||
if (pooled_h > 1) {
|
||||
in_y = (oh == pooled_h - 1 ? roi_end_h * (ih - 1) : (oh * height_scale + roi_start_h * (ih - 1)));
|
||||
} else {
|
||||
in_y = 0.5 * (roi_start_h + roi_end_h) * (ih - 1);
|
||||
}
|
||||
if (pooled_w > 1) {
|
||||
in_x = (ow == pooled_w - 1 ? roi_end_w * (iw - 1) : (ow * width_scale + roi_start_w * (iw - 1)));
|
||||
} else {
|
||||
in_x = 0.5 * (roi_start_w + roi_end_w) * (iw - 1);
|
||||
}
|
||||
|
||||
return std::make_pair(in_x, in_y);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<MKLDNNROIPoolingNode::ROIPoolingExecutor> MKLDNNROIPoolingNode::ROIPoolingExecutor::makeExecutor(
|
||||
const jit_roi_pooling_params& jpp) {
|
||||
if (mayiuse(cpu::x64::sse41))
|
||||
return std::make_shared<ROIPoolingJitExecutor<T>>(jpp);
|
||||
else
|
||||
return std::make_shared<ROIPoolingRefExecutor<T>>(jpp);
|
||||
}
|
||||
|
||||
bool MKLDNNROIPoolingNode::created() const {
|
||||
|
@ -74,25 +74,62 @@ public:
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
void prepareParams() override;
|
||||
|
||||
private:
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
template<typename T> void execute();
|
||||
template<typename T> struct ROIPoolingExecute;
|
||||
|
||||
InferenceEngine::Precision runtimePrecision;
|
||||
size_t src_data_size = 0;
|
||||
size_t dst_data_size = 0;
|
||||
|
||||
size_t src_data_size;
|
||||
size_t dst_data_size;
|
||||
|
||||
int pooled_h = 0;
|
||||
int pooled_w = 0;
|
||||
float spatial_scale = 0;
|
||||
|
||||
jit_roi_pooling_params jpp = {};
|
||||
std::shared_ptr<jit_uni_roi_pooling_kernel> roi_pooling_kernel = nullptr;
|
||||
jit_roi_pooling_params refParams = {};
|
||||
|
||||
std::string errorPrefix;
|
||||
};
|
||||
|
||||
class ROIPoolingExecutor {
|
||||
public:
|
||||
ROIPoolingExecutor() = default;
|
||||
virtual void exec(
|
||||
const MKLDNNPlugin::MKLDNNMemory& srcData,
|
||||
const MKLDNNPlugin::MKLDNNMemory& srcRoi,
|
||||
const MKLDNNPlugin::MKLDNNMemory& dst) = 0;
|
||||
virtual ~ROIPoolingExecutor() = default;
|
||||
|
||||
static std::shared_ptr<ROIPoolingExecutor> createROIPoolingNewExecutor(const jit_roi_pooling_params& jpp);
|
||||
|
||||
protected:
|
||||
std::tuple<int, int, int, int> getBordersForMaxMode(
|
||||
const int roi_start_h, const int roi_end_h, const int roi_start_w, const int roi_end_w,
|
||||
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w);
|
||||
std::pair<float, float> getXYForBilinearMode(
|
||||
const float roi_start_h, const float roi_end_h, const float roi_start_w, const float roi_end_w,
|
||||
const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static std::shared_ptr<ROIPoolingExecutor> makeExecutor(const jit_roi_pooling_params& jpp);
|
||||
|
||||
struct ROIPoolingContext {
|
||||
std::shared_ptr<ROIPoolingExecutor> executor;
|
||||
jit_roi_pooling_params jpp;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ROIPoolingExecutorCreation {
|
||||
void operator()(ROIPoolingContext& ctx) {
|
||||
ctx.executor = ROIPoolingExecutor::makeExecutor<T>(ctx.jpp);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <typename T> struct ROIPoolingJitExecutor;
|
||||
template <typename T> struct ROIPoolingRefExecutor;
|
||||
|
||||
using executorPtr = std::shared_ptr<ROIPoolingExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
};
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -4,23 +4,14 @@
|
||||
|
||||
#include "mkldnn_strided_slice_node.h"
|
||||
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
|
||||
#include "ie_parallel.hpp"
|
||||
#include "caseless.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include "common/blocked_desc_creator.h"
|
||||
#include "utils/general_utils.h"
|
||||
#include "mkldnn_input_node.h"
|
||||
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <algorithm>
|
||||
#include "caseless.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#define THROW_ERROR IE_THROW() << "StridedSlice layer with name '" << getName() << "' "
|
||||
#include <string>
|
||||
|
||||
#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " node with name '" << getName() << "' "
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -35,19 +26,20 @@ static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims&
|
||||
return start;
|
||||
}
|
||||
|
||||
bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
auto ss = ov::as_type_ptr<const ngraph::opset1::StridedSlice>(op);
|
||||
if (!ss) {
|
||||
errorMessage = "Only opset1 StridedSlice operation is supported";
|
||||
if (!ov::is_type<ov::op::v1::StridedSlice>(op) &&
|
||||
!ov::is_type<ov::op::v8::Slice>(op)) {
|
||||
errorMessage = "Only StridedSlice from opset1 and Slice from opset8 operations are supported.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ss->get_input_node_shared_ptr(BEGIN_ID)->get_type_info() != ov::op::v0::Constant::get_type_info_static() ||
|
||||
ss->get_input_node_shared_ptr(END_ID)->get_type_info() != ov::op::v0::Constant::get_type_info_static() ||
|
||||
(ss->get_input_size() == 4 && ss->get_input_node_shared_ptr(STRIDE_ID)->get_type_info() != ov::op::v0::Constant::get_type_info_static())) {
|
||||
// TODO: Support begin, end, stride inputs for dynamic shapes.
|
||||
errorMessage = "Only Constant 'begin', 'end' and 'stride' inputs are supported.";
|
||||
if (!ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(BEGIN_ID)) ||
|
||||
!ov::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(END_ID)) ||
|
||||
(op->get_input_size() > STRIDE_ID && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(STRIDE_ID))) ||
|
||||
(op->get_input_size() > AXES_ID && !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_ID)))) {
|
||||
// TODO: Support begin, end, stride, axis inputs for dynamic shapes.
|
||||
errorMessage = "Only Constant 'begin', 'end', 'stride' and 'axis' inputs are supported.";
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
@ -56,53 +48,31 @@ bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<const ng
|
||||
return true;
|
||||
}
|
||||
|
||||
MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
auto ss = ov::as_type_ptr<const ngraph::opset1::StridedSlice>(op);
|
||||
if (inputShapes.size() != 3 && inputShapes.size() != 4) {
|
||||
|
||||
isStridedSliceOp = ov::is_type<ov::op::v1::StridedSlice>(op);
|
||||
|
||||
if ((isStridedSliceOp && (inputShapes.size() < 3 || inputShapes.size() > 4)) ||
|
||||
(!isStridedSliceOp && (inputShapes.size() < 4 || inputShapes.size() > 5))) {
|
||||
THROW_ERROR << "has incorrect number of input edges";
|
||||
}
|
||||
if (outputShapes.size() != 1) {
|
||||
THROW_ERROR << "has incorrect number of output edges";
|
||||
}
|
||||
|
||||
const size_t inputRank = getInputShapeAtPort(DATA_ID).getRank();
|
||||
const size_t outputRank = getOutputShapeAtPort(0).getRank();
|
||||
|
||||
const size_t nDims = std::max(inputRank, outputRank);
|
||||
|
||||
auto createMask = [&](const std::vector<int64_t> &origMask, const int bit = 0, bool needReverse = false) {
|
||||
std::vector<int> mask(origMask.begin(), origMask.end());
|
||||
if (needReverse) {
|
||||
for (size_t i = 0; i < mask.size(); i++)
|
||||
mask[i] = 1 - mask[i];
|
||||
}
|
||||
for (size_t i = mask.size(); i < nDims; ++i) mask.push_back(bit);
|
||||
return mask;
|
||||
};
|
||||
|
||||
attrs.beginMask = createMask(ss->get_begin_mask(), 1, true);
|
||||
attrs.endMask = createMask(ss->get_end_mask(), 1, true);
|
||||
attrs.newAxisMask = createMask(ss->get_new_axis_mask());
|
||||
attrs.shrinkAxisMask = createMask(ss->get_shrink_axis_mask());
|
||||
|
||||
auto origEllipsisMask = ss->get_ellipsis_mask();
|
||||
for (const auto &o : origEllipsisMask) {
|
||||
attrs.ellipsisMask.push_back(o);
|
||||
}
|
||||
if (attrs.ellipsisMask.size() == 0) {
|
||||
for (size_t i = attrs.ellipsisMask.size(); i < nDims; ++i) attrs.ellipsisMask.push_back(0);
|
||||
for (size_t i = 0lu; i < op->get_input_size(); i++) {
|
||||
isConstantInput[i] = ov::is_type<ov::op::v0::Constant>(op->inputs()[i].get_node());
|
||||
}
|
||||
|
||||
attrs.beginDims = getInputShapeAtPort(BEGIN_ID).getStaticDims();
|
||||
attrs.endDims = getInputShapeAtPort(END_ID).getStaticDims();
|
||||
if (attrs.beginDims.size() != 1)
|
||||
THROW_ERROR << " should have begin vector with 1 dimension";
|
||||
if (attrs.beginDims.size() != 1)
|
||||
THROW_ERROR << "should have begin vector with 1 dimension";
|
||||
if (attrs.endDims.size() != 1)
|
||||
THROW_ERROR << "should have end vector with 1 dimension";
|
||||
if (attrs.beginDims[0] != attrs.endDims[0])
|
||||
THROW_ERROR << "should have begin vector with size equal to end vector size";
|
||||
@ -115,6 +85,59 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Nod
|
||||
if (attrs.beginDims[0] != attrs.strideDims[0])
|
||||
THROW_ERROR << "should have stride vector with size equal to begin vector size";
|
||||
}
|
||||
|
||||
if (inputShapes.size() > AXES_ID) {
|
||||
isAxesSpecified = true;
|
||||
attrs.axesDims = inputShapes[AXES_ID].getStaticDims();
|
||||
if (attrs.axesDims.size() != 1)
|
||||
THROW_ERROR << "should have axes vector with 1 dimension.";
|
||||
if (attrs.beginDims[0] != attrs.axesDims[0])
|
||||
THROW_ERROR << "should have axes vector with size equal to begin vector size.";
|
||||
}
|
||||
|
||||
if (isStridedSliceOp) {
|
||||
auto ss = ov::as_type_ptr<const ov::op::v1::StridedSlice>(op);
|
||||
|
||||
const size_t inputRank = getInputShapeAtPort(DATA_ID).getRank();
|
||||
const size_t outputRank = getOutputShapeAtPort(0).getRank();
|
||||
|
||||
const size_t nDims = std::max(inputRank, outputRank);
|
||||
|
||||
auto createMask = [&](const std::vector<int64_t> &origMask, const int bit = 0, bool needReverse = false) {
|
||||
std::vector<int> mask(origMask.begin(), origMask.end());
|
||||
if (needReverse) {
|
||||
for (size_t i = 0; i < mask.size(); i++)
|
||||
mask[i] = 1 - mask[i];
|
||||
}
|
||||
for (size_t i = mask.size(); i < nDims; ++i) mask.push_back(bit);
|
||||
return mask;
|
||||
};
|
||||
|
||||
attrs.beginMask = createMask(ss->get_begin_mask(), 1, true);
|
||||
attrs.endMask = createMask(ss->get_end_mask(), 1, true);
|
||||
attrs.newAxisMask = createMask(ss->get_new_axis_mask());
|
||||
attrs.shrinkAxisMask = createMask(ss->get_shrink_axis_mask());
|
||||
|
||||
auto origEllipsisMask = ss->get_ellipsis_mask();
|
||||
for (const auto &o : origEllipsisMask) {
|
||||
attrs.ellipsisMask.push_back(o);
|
||||
}
|
||||
if (attrs.ellipsisMask.size() == 0) {
|
||||
for (size_t i = attrs.ellipsisMask.size(); i < nDims; ++i) attrs.ellipsisMask.push_back(0);
|
||||
}
|
||||
} else {
|
||||
const size_t length = outputShapes[0].getRank();
|
||||
if (inputShapes.size() > AXES_ID) {
|
||||
attrs.beginMask = std::vector<int>(length, 0);
|
||||
attrs.endMask = std::vector<int>(length, 0);
|
||||
} else {
|
||||
attrs.beginMask = std::vector<int>(length, 1);
|
||||
attrs.endMask = std::vector<int>(length, 1);
|
||||
}
|
||||
attrs.newAxisMask = std::vector<int>(length, 0);
|
||||
attrs.shrinkAxisMask = std::vector<int>(length, 0);
|
||||
attrs.ellipsisMask = std::vector<int>(length, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNStridedSliceNode::getSupportedDescriptors() {
|
||||
@ -124,16 +147,20 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
|
||||
|
||||
int ellipsisMaskCounter = 0;
|
||||
int ellipsisPos1 = -1;
|
||||
for (size_t i = 0; i < attrs.ellipsisMask.size(); i++) {
|
||||
ellipsisMaskCounter += attrs.ellipsisMask[i];
|
||||
ellipsisPos1 = attrs.ellipsisMask[i] == 1 && ellipsisPos1 == -1 ? i : ellipsisPos1;
|
||||
}
|
||||
if (ellipsisMaskCounter > 1)
|
||||
THROW_ERROR << "has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed";
|
||||
if (isStridedSliceOp) {
|
||||
for (size_t i = 0; i < attrs.ellipsisMask.size(); i++) {
|
||||
ellipsisMaskCounter += attrs.ellipsisMask[i];
|
||||
ellipsisPos1 = attrs.ellipsisMask[i] == 1 && ellipsisPos1 == -1 ? i : ellipsisPos1;
|
||||
}
|
||||
if (ellipsisMaskCounter > 1)
|
||||
THROW_ERROR << "has incorrect 'Ellipsis_mask'. Only one non-zero bit is allowed";
|
||||
|
||||
int newAxis = std::accumulate(attrs.newAxisMask.begin(), attrs.newAxisMask.end(), 0);
|
||||
int shrinkAxis = std::accumulate(attrs.shrinkAxisMask.begin(), attrs.shrinkAxisMask.end(), 0);
|
||||
attrs.equalDims = newAxis == 0 && shrinkAxis == 0;
|
||||
int newAxis = std::accumulate(attrs.newAxisMask.begin(), attrs.newAxisMask.end(), 0);
|
||||
int shrinkAxis = std::accumulate(attrs.shrinkAxisMask.begin(), attrs.shrinkAxisMask.end(), 0);
|
||||
attrs.equalDims = newAxis == 0 && shrinkAxis == 0;
|
||||
} else {
|
||||
attrs.equalDims = true;
|
||||
}
|
||||
|
||||
auto fillingInParameters = [&](std::vector<int> ¶meter, const size_t type, const size_t size, const int value) {
|
||||
const auto constNode = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent());
|
||||
@ -146,7 +173,7 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
|
||||
const int *ptr = static_cast<const int*>(blob->GetPtr());
|
||||
parameter.assign(ptr, ptr + size);
|
||||
|
||||
if (ellipsisMaskCounter == 0 && size < nDims) {
|
||||
if (type != AXES_ID && ellipsisMaskCounter == 0 && size < nDims) {
|
||||
for (size_t i = size; i < nDims; i++) parameter.push_back(value);
|
||||
}
|
||||
};
|
||||
@ -157,6 +184,25 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
|
||||
fillingInParameters(attrs.end, END_ID, attrs.endDims[0], 0);
|
||||
if (attrs.strideDims.size())
|
||||
fillingInParameters(attrs.stride, STRIDE_ID, attrs.strideDims[0], 1);
|
||||
if (attrs.axesDims.size()) {
|
||||
fillingInParameters(attrs.axes, AXES_ID, attrs.axesDims[0], 0);
|
||||
std::vector<int> beginTmp(outputRank, 0);
|
||||
std::vector<int> endTmp(outputRank, -1);
|
||||
std::vector<int> strideTmp(outputRank, 1);
|
||||
size_t i = 0lu;
|
||||
for (auto& a : attrs.axes) {
|
||||
if (a < 0)
|
||||
a += outputRank;
|
||||
beginTmp[a] = attrs.begin[i];
|
||||
endTmp[a] = attrs.end[i];
|
||||
strideTmp[a] = attrs.stride[i++];
|
||||
attrs.beginMask[a] = 1;
|
||||
attrs.endMask[a] = 1;
|
||||
}
|
||||
attrs.begin = beginTmp;
|
||||
attrs.end = endTmp;
|
||||
attrs.stride = strideTmp;
|
||||
}
|
||||
|
||||
if (inputRank > 3 && attrs.equalDims && ellipsisMaskCounter == 1)
|
||||
addHiddenDims(inputRank, ellipsisPos1);
|
||||
@ -194,15 +240,11 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
|
||||
InferenceEngine::Precision beginPrecision = getOriginalInputPrecisionAtPort(BEGIN_ID);
|
||||
InferenceEngine::Precision endPrecision = getOriginalInputPrecisionAtPort(END_ID);
|
||||
InferenceEngine::Precision stridePrecision;
|
||||
if (isStrideSpecified)
|
||||
stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID);
|
||||
const InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
|
||||
const InferenceEngine::Precision iPrecision = Precision::I32;
|
||||
attrs.dataSize = dataPrecision.size();
|
||||
|
||||
size_t nDims = getInputShapeAtPort(DATA_ID).getRank();
|
||||
const size_t nDims = getInputShapeAtPort(DATA_ID).getRank();
|
||||
|
||||
NodeConfig config;
|
||||
config.dynBatchSupport = false;
|
||||
@ -210,12 +252,16 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
|
||||
config.inConfs[DATA_ID].inPlace = -1;
|
||||
config.inConfs[BEGIN_ID].inPlace = -1;
|
||||
config.inConfs[END_ID].inPlace = -1;
|
||||
config.inConfs[DATA_ID].constant = false;
|
||||
config.inConfs[BEGIN_ID].constant = true;
|
||||
config.inConfs[END_ID].constant = true;
|
||||
config.inConfs[DATA_ID].constant = isConstantInput[DATA_ID];
|
||||
config.inConfs[BEGIN_ID].constant = isConstantInput[BEGIN_ID];
|
||||
config.inConfs[END_ID].constant = isConstantInput[END_ID];
|
||||
if (isStrideSpecified) {
|
||||
config.inConfs[STRIDE_ID].inPlace = -1;
|
||||
config.inConfs[STRIDE_ID].constant = true;
|
||||
config.inConfs[STRIDE_ID].constant = isConstantInput[STRIDE_ID];
|
||||
}
|
||||
if (isAxesSpecified) {
|
||||
config.inConfs[AXES_ID].inPlace = -1;
|
||||
config.inConfs[AXES_ID].constant = isConstantInput[AXES_ID];
|
||||
}
|
||||
config.outConfs.resize(1);
|
||||
|
||||
@ -241,11 +287,13 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
|
||||
auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
|
||||
|
||||
for (auto itr = range.first; itr != range.second; ++itr) {
|
||||
config.inConfs[0].desc = itr->second->createSharedDesc(dataPrecision, getInputShapeAtPort(DATA_ID));
|
||||
config.inConfs[BEGIN_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(beginPrecision, getInputShapeAtPort(BEGIN_ID));
|
||||
config.inConfs[END_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(endPrecision, getInputShapeAtPort(END_ID));
|
||||
config.inConfs[DATA_ID].desc = itr->second->createSharedDesc(dataPrecision, getInputShapeAtPort(DATA_ID));
|
||||
config.inConfs[BEGIN_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(BEGIN_ID));
|
||||
config.inConfs[END_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(END_ID));
|
||||
if (isStrideSpecified)
|
||||
config.inConfs[STRIDE_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(stridePrecision, getInputShapeAtPort(STRIDE_ID));
|
||||
config.inConfs[STRIDE_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(STRIDE_ID));
|
||||
if (isAxesSpecified)
|
||||
config.inConfs[AXES_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(iPrecision, getInputShapeAtPort(AXES_ID));
|
||||
|
||||
config.outConfs[0].desc = itr->second->createSharedDesc(dataPrecision, getOutputShapeAtPort(DATA_ID));
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
|
||||
@ -254,7 +302,7 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
void MKLDNNStridedSliceNode::createPrimitive() {
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
THROW_ERROR << "has not allocated destination memory.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
@ -308,9 +356,11 @@ void MKLDNNStridedSliceNode::orderParametersByLayouts(const MKLDNNMemoryPtr& src
|
||||
sortByOrder(attrs.stride);
|
||||
sortByOrder(attrs.beginMask);
|
||||
sortByOrder(attrs.endMask);
|
||||
sortByOrder(attrs.ellipsisMask);
|
||||
sortByOrder(attrs.newAxisMask);
|
||||
sortByOrder(attrs.shrinkAxisMask);
|
||||
if (isStridedSliceOp) {
|
||||
sortByOrder(attrs.ellipsisMask);
|
||||
sortByOrder(attrs.newAxisMask);
|
||||
sortByOrder(attrs.shrinkAxisMask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <mkldnn_node.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -13,9 +12,9 @@ namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNStridedSliceNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
MKLDNNStridedSliceNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
@ -25,9 +24,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
void prepareParams() override;
|
||||
|
||||
protected:
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
private:
|
||||
@ -38,6 +36,7 @@ private:
|
||||
std::vector<int> begin;
|
||||
std::vector<int> end;
|
||||
std::vector<int> stride;
|
||||
std::vector<int> axes;
|
||||
|
||||
std::vector<int> beginMask;
|
||||
std::vector<int> endMask;
|
||||
@ -48,6 +47,7 @@ private:
|
||||
VectorDims beginDims;
|
||||
VectorDims endDims;
|
||||
VectorDims strideDims;
|
||||
VectorDims axesDims;
|
||||
|
||||
bool equalDims = false;
|
||||
size_t dataSize = 1lu;
|
||||
@ -84,12 +84,17 @@ private:
|
||||
using executorPtr = std::shared_ptr<StridedSliceExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
|
||||
bool isStridedSliceOp = true;
|
||||
bool isStrideSpecified = false;
|
||||
bool isAxesSpecified = false;
|
||||
|
||||
static constexpr size_t DATA_ID = 0;
|
||||
static constexpr size_t BEGIN_ID = 1;
|
||||
static constexpr size_t END_ID = 2;
|
||||
static constexpr size_t STRIDE_ID = 3;
|
||||
static constexpr size_t AXES_ID = 4;
|
||||
|
||||
bool isConstantInput[AXES_ID + 1] = {false};
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -5,13 +5,18 @@
|
||||
#include <openvino/core/node.hpp>
|
||||
#include <ngraph/runtime/host_tensor.hpp>
|
||||
#include <openvino/opsets/opset1.hpp>
|
||||
#include <openvino/opsets/opset2.hpp>
|
||||
#include <openvino/opsets/opset4.hpp>
|
||||
#include <openvino/opsets/opset5.hpp>
|
||||
#include <openvino/opsets/opset6.hpp>
|
||||
#include <openvino/opsets/opset8.hpp>
|
||||
#include "static_shape.hpp"
|
||||
#include "utils.hpp"
|
||||
#include "shape_inference.hpp"
|
||||
#include "convolution_shape_inference.hpp"
|
||||
#include "reduce_shape_inference.hpp"
|
||||
#include "shape_nodes.hpp"
|
||||
#include "fake_quantize.hpp"
|
||||
#include "experimental_detectron_detection_output_shape_inference.hpp"
|
||||
|
||||
|
||||
@ -24,10 +29,45 @@ void shape_inference(ov::Node* op,
|
||||
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 2);
|
||||
OPENVINO_ASSERT(status, "Convolution shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset8::GroupConvolution>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 3);
|
||||
OPENVINO_ASSERT(status, "GroupConvolution shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset8::ConvolutionBackpropData>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
ov::StaticShape output_shape_input;
|
||||
if (node->get_input_size() == 3)
|
||||
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
|
||||
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
|
||||
OPENVINO_ASSERT(status, "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset8::GroupConvolutionBackpropData>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
ov::StaticShape output_shape_input;
|
||||
if (node->get_input_size() == 3)
|
||||
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
|
||||
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
|
||||
OPENVINO_ASSERT(status, "GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::op::util::ArithmeticReductionKeepDims>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes, constant_data);
|
||||
} else if (auto node = ov::as_type<ov::op::util::LogicalReductionKeepDims>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes, constant_data);
|
||||
} else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) ||
|
||||
ov::is_type<ov::opset1::Convert>(op) || ov::is_type<ov::opset1::Clamp>(op) ||
|
||||
ov::is_type<ov::opset1::GRN>(op) || ov::is_type<ov::opset1::LRN>(op) ||
|
||||
ov::is_type<ov::opset1::LogicalNot>(op) || ov::is_type<ov::opset4::Mish>(op) ||
|
||||
ov::is_type<ov::opset2::MVN>(op) || ov::is_type<ov::opset6::MVN>(op) ||
|
||||
ov::is_type<ov::opset1::PRelu>(op) || ov::is_type<ov::opset1::Relu>(op) ||
|
||||
ov::is_type<ov::opset4::Swish>(op) || ov::is_type<ov::opset1::Softmax>(op) ||
|
||||
ov::is_type<ov::opset1::Elu>(op) || ov::is_type<ov::opset5::Round>(op)) {
|
||||
copy_shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (ov::is_type<ov::op::util::BinaryElementwiseArithmetic>(op) ||
|
||||
ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) || ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
|
||||
eltwise_shape_infer(op, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset1::FakeQuantize>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset1::Reshape>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes, constant_data);
|
||||
} else if (auto node = ov::as_type<ov::opset1::Squeeze>(op)) {
|
||||
|
@ -0,0 +1,133 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "shared_test_classes/single_layer/slice.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace {
|
||||
TEST_P(Slice8LayerTest, Serialize) {
|
||||
serialize();
|
||||
}
|
||||
|
||||
const std::vector<ElementType> inputPrecisions = {
|
||||
ElementType::f32,
|
||||
ElementType::bf16,
|
||||
ElementType::i8
|
||||
};
|
||||
|
||||
const std::vector<ElementType> inputPrecisionsOther = {
|
||||
ElementType::i64,
|
||||
ElementType::i32,
|
||||
ElementType::i16,
|
||||
ElementType::u8
|
||||
};
|
||||
|
||||
std::vector<Slice8SpecificParams> staticParams = {
|
||||
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 20, 10, 5 }}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, { 0, 1, -1 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 1, 0 }, { 10, -1, 10 }, { 1, 1, 1 }, { -3, -2, -1} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 12, 100 }, { 0, 7, 0 }, { -1, -1, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 4, 99 }, { 0, 9, 0 }, { -1, 2, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 4, 0 }, { -1, -2, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 0, 4 }, { -1, -1, -1 }, {2, 0, 1} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 0, 0, 4 }, { -5, -1, -1 }, { 1, 2, 1 }, {2, 0, 1} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 3 }}}}, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 }, { -4, 1, -2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 2 }}}}, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 0, 1, 0, 1 }, { 10, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 1, 0, 1, 0 }, { 2, 4, 2, 10 }, { 1, 2, 1, 1 }, { -1, -2, -3, -4 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 19, 1, -1, 0 }, { -10, 0, 0, -1 }, { -1, -1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 3, 2, 4, 200 }}}}, { 0, 1, -1, -1 }, { 3, 2, 0, 0 }, { 1, 1, -2, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 4, 5, 5, 68 }}}}, { 0, 1, 0, 0, 0 }, {
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 10, 12 }}}}, { -1, 1 }, { -9999, 10 }, { -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 5, 5, 5, 5 }}}}, { -1, 0, -1, 0 }, { -50, -1, -60, -1 }, { -1, 1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 20 }}}}, { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, -20, -15 }, { 2, -5, 3 }, { 1, 1, 1 }, { 0, 2, 1 } }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Slice8Serialization_static, Slice8LayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(staticParams),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(std::map<std::string, std::string>())),
|
||||
Slice8LayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Slice8Serialization_PrecisionTransformation, Slice8LayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(staticParams[0]),
|
||||
::testing::ValuesIn(inputPrecisionsOther),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(std::map<std::string, std::string>())),
|
||||
Slice8LayerTest::getTestCaseName);
|
||||
|
||||
std::vector<Slice8SpecificParams> dynamicParams = {
|
||||
Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
|
||||
Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
|
||||
Slice8SpecificParams{ {{{ -1, ov::Dimension(2, 20), -1 }, {{ 1, 12, 100 }, { 2, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{ ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5) },
|
||||
{{ 2, 2, 2, 2 }, { 2, 2, 4, 3 }, { 2, 2, 4, 2 }, { 1, 2, 4, 2 }}}},
|
||||
{ 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), ov::Dimension(1, 5), -1 }, {{ 10, 2, 4, 2 }, { 10, 4, 2, 2 }}}},
|
||||
{ 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), -1, -1, ov::Dimension(30, 70) }, {{ 2, 4, 5, 5, 68 }, { 2, 3, 7, 7, 33 }}}},
|
||||
{ 0, 1, 0, 0, 0 }, {
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
|
||||
|
||||
// Shapes mismatch because of missing lower bounds serialization support (ticket: 69092)
|
||||
// Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)},
|
||||
// {{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
// Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(10, 20), ov::Dimension(20, 30), 16, ov::Dimension(30, 40)},
|
||||
// {{ 4, 15, 30, 16, 39 }}}}, { 0, 2, 10, 0, 35 }, { 1, 8, 25, 16, 40 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Slice8Serialization_dynamic, Slice8LayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(dynamicParams),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(std::map<std::string, std::string>())),
|
||||
Slice8LayerTest::getTestCaseName);
|
||||
} // namespace
|
@ -1,197 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/op/constant.hpp>
|
||||
#include <ngraph_ops/convolution_ie.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/reshape_1d_ops.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
|
||||
TEST(TransformationTests, ConvReshapeTest1) {
|
||||
auto input = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 64}, {1});
|
||||
auto w = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 3/*OIW*/}, {1});
|
||||
|
||||
std::shared_ptr<ngraph::Function> f(nullptr);
|
||||
{
|
||||
ngraph::Strides strides{1}, dilations{1};
|
||||
ngraph::CoordinateDiff pads_begin{0}, pads_end{0};
|
||||
ngraph::Shape output_shape{1, 6, 62};
|
||||
auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, w, strides, dilations, pads_begin, pads_end, ngraph::element::f32, 1);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{});
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
manager.register_pass<ngraph::pass::InjectionPass>([](std::shared_ptr<ngraph::Function> f) {
|
||||
check_rt_info(f);
|
||||
});
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
|
||||
std::vector<size_t> ref_shape{1, 6, 1, 62};
|
||||
ngraph::Strides ref_strides{1, 1};
|
||||
ngraph::CoordinateDiff ref_pads_begin{0, 0}, ref_pads_end{0, 0};
|
||||
for (auto op : f->get_ops()) {
|
||||
if (auto conv = ngraph::as_type_ptr<ngraph::op::ConvolutionIE>(op)) {
|
||||
ASSERT_EQ(conv->get_shape(), ref_shape);
|
||||
ASSERT_EQ(conv->get_strides(), ref_strides);
|
||||
ASSERT_EQ(conv->get_dilations(), ref_strides);
|
||||
ASSERT_EQ(conv->get_pads_begin(), ref_pads_begin);
|
||||
ASSERT_EQ(conv->get_pads_end(), ref_pads_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvBiasReshapeTest1) {
|
||||
auto input = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 64}, {1});
|
||||
auto w = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 3/*OIW*/}, {1});
|
||||
auto b = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6}, {1});
|
||||
|
||||
std::shared_ptr<ngraph::Function> f(nullptr);
|
||||
{
|
||||
ngraph::Strides strides{1}, dilations{1};
|
||||
ngraph::CoordinateDiff pads_begin{0}, pads_end{0};
|
||||
ngraph::Shape output_shape{1, 6, 62};
|
||||
auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, w, b, strides, dilations, pads_begin, pads_end, ngraph::element::f32, 1);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{});
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
manager.register_pass<ngraph::pass::InjectionPass>([](std::shared_ptr<ngraph::Function> f) {
|
||||
check_rt_info(f);
|
||||
});
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
|
||||
std::vector<size_t> ref_shape{1, 6, 1, 62};
|
||||
ngraph::Strides ref_strides{1, 1};
|
||||
ngraph::CoordinateDiff ref_pads_begin{0, 0}, ref_pads_end{0, 0};
|
||||
for (auto op : f->get_ops()) {
|
||||
if (auto conv = ngraph::as_type_ptr<ngraph::op::ConvolutionIE>(op)) {
|
||||
ASSERT_EQ(conv->get_shape(), ref_shape);
|
||||
ASSERT_EQ(conv->get_strides(), ref_strides);
|
||||
ASSERT_EQ(conv->get_dilations(), ref_strides);
|
||||
ASSERT_EQ(conv->get_pads_begin(), ref_pads_begin);
|
||||
ASSERT_EQ(conv->get_pads_end(), ref_pads_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, MaxPoolReshapeTest1) {
|
||||
{
|
||||
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
|
||||
|
||||
ngraph::Strides strides{1};
|
||||
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
|
||||
auto pool = std::make_shared<ngraph::opset1::MaxPool>(input, strides, pads_begin, pads_end, kernel, ngraph::op::RoundingType::FLOOR);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
|
||||
|
||||
auto reshape_begin = std::make_shared<opset1::Reshape>(input, opset1::Constant::create(element::i64, Shape{4}, {1, 3, 1, 64}), true);
|
||||
|
||||
ngraph::Strides strides{1, 1};
|
||||
ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 3};
|
||||
auto pool = std::make_shared<ngraph::opset1::MaxPool>(reshape_begin, strides, pads_begin, pads_end, kernel, ngraph::op::RoundingType::FLOOR);
|
||||
|
||||
auto reshape_end = std::make_shared<opset1::Reshape>(pool, opset1::Constant::create(element::i64, Shape{3}, {1, 3, 62}), true);
|
||||
|
||||
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, AvgPoolReshapeTest1) {
|
||||
{
|
||||
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
|
||||
|
||||
ngraph::Strides strides{1};
|
||||
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
|
||||
auto pool = std::make_shared<ngraph::opset1::AvgPool>(input, strides, pads_begin, pads_end, kernel, false, ngraph::op::RoundingType::FLOOR);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
|
||||
|
||||
auto reshape_begin = std::make_shared<opset1::Reshape>(input, opset1::Constant::create(element::i64, Shape{4}, {1, 3, 1, 64}), true);
|
||||
|
||||
ngraph::Strides strides{1, 1};
|
||||
ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 3};
|
||||
auto pool = std::make_shared<ngraph::opset1::AvgPool>(reshape_begin, strides, pads_begin, pads_end, kernel, false, ngraph::op::RoundingType::FLOOR);
|
||||
|
||||
auto reshape_end = std::make_shared<opset1::Reshape>(pool, opset1::Constant::create(element::i64, Shape{3}, {1, 3, 62}), true);
|
||||
|
||||
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input});
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ReshapeDynamicTest1) {
|
||||
{
|
||||
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
|
||||
|
||||
ngraph::Strides strides{1};
|
||||
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
|
||||
auto pool = std::make_shared<ngraph::opset1::AvgPool>(input, strides, pads_begin, pads_end, kernel, false, ngraph::op::RoundingType::FLOOR);
|
||||
|
||||
auto f = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
|
||||
pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 64});
|
||||
|
||||
ngraph::Strides strides{1};
|
||||
ngraph::Shape pads_begin{0}, pads_end{0}, kernel{3};
|
||||
auto pool = std::make_shared<ngraph::opset1::MaxPool>(input, strides, pads_begin, pads_end, kernel, ngraph::op::RoundingType::FLOOR);
|
||||
|
||||
auto f = std::make_shared<ngraph::Function>(ngraph::NodeVector{pool}, ngraph::ParameterVector{input});
|
||||
pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
|
||||
{
|
||||
auto input = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{1, 3, 64}, {1});
|
||||
auto w = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 3/*OIW*/}, {1});
|
||||
auto b = ngraph::op::Constant::create(ngraph::element::f32, ngraph::Shape{6}, {1});
|
||||
ngraph::Strides strides{1}, dilations{1};
|
||||
ngraph::CoordinateDiff pads_begin{0}, pads_end{0};
|
||||
ngraph::Shape output_shape{1, 6, 62};
|
||||
auto conv = std::make_shared<ngraph::op::ConvolutionIE>(input, w, b, strides, dilations, pads_begin, pads_end, 1);
|
||||
|
||||
auto f = std::make_shared<ngraph::Function>(ngraph::NodeVector{conv}, ngraph::ParameterVector{});
|
||||
pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::Reshape1DOps>();
|
||||
ASSERT_NO_THROW(manager.run_passes(f));
|
||||
}
|
||||
}
|
@ -0,0 +1,140 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include <openvino/core/function.hpp>
|
||||
#include <openvino/opsets/opset8.hpp>
|
||||
#include <openvino/pass/manager.hpp>
|
||||
#include <transformations/common_optimizations/remove_concat_zero_dim_input.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputStaticShape) {
|
||||
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
|
||||
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
|
||||
int64_t axis = 1;
|
||||
{
|
||||
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 0, 3});
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
|
||||
|
||||
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
|
||||
|
||||
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
|
||||
}
|
||||
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input3}, axis);
|
||||
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputSubgraph) {
|
||||
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
|
||||
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
|
||||
int64_t axis = 1;
|
||||
{
|
||||
auto in_abs = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 0, 3});
|
||||
auto abs = std::make_shared<ov::opset8::Abs>(in_abs);
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, abs, input3}, axis);
|
||||
|
||||
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3, in_abs});
|
||||
|
||||
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
|
||||
}
|
||||
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input3}, axis);
|
||||
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputSubgraph2) {
|
||||
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, ov::Dimension::dynamic(), 3});
|
||||
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3});
|
||||
auto abs = std::make_shared<ov::opset8::Abs>(input1);
|
||||
int64_t axis = 1;
|
||||
{
|
||||
auto in_mul = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape{1, 0, 3});
|
||||
auto mul = std::make_shared<ov::opset8::Multiply>(in_mul, abs);
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{mul, input3}, axis);
|
||||
|
||||
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3, in_mul});
|
||||
|
||||
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
|
||||
}
|
||||
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input3}, axis);
|
||||
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputPartiallyKnowShape) {
|
||||
std::shared_ptr<ov::Function> f(nullptr), f_ref(nullptr);
|
||||
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
|
||||
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
|
||||
int64_t axis = 0;
|
||||
{
|
||||
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
|
||||
ov::PartialShape{0, ov::Dimension::dynamic(), ov::Dimension::dynamic()});
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
|
||||
|
||||
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
|
||||
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
|
||||
}
|
||||
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input3}, axis);
|
||||
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveConcatZeroDimInputDynamicRank) {
|
||||
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
|
||||
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
|
||||
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
|
||||
int64_t axis = 0;
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
|
||||
|
||||
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
|
||||
|
||||
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
|
||||
}
|
||||
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
|
||||
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveConcatZeroDimTwoInputs) {
|
||||
auto input1 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
|
||||
ov::PartialShape{1, ov::Dimension::dynamic(), ov::Dimension::dynamic()});
|
||||
int64_t axis = 1;
|
||||
{
|
||||
auto input2 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
|
||||
ov::PartialShape{1, 0, ov::Dimension::dynamic()});
|
||||
auto input3 = std::make_shared<ov::opset8::Parameter>(ov::element::f32,
|
||||
ov::PartialShape{1, ov::Dimension::dynamic(), 0});
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1, input2, input3}, axis);
|
||||
|
||||
function = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1, input2, input3});
|
||||
|
||||
manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
|
||||
}
|
||||
|
||||
{
|
||||
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{input1}, axis);
|
||||
function_ref = std::make_shared<ov::Function>(ov::NodeVector{concat}, ov::ParameterVector{input1});
|
||||
}
|
||||
}
|
@ -0,0 +1,423 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include <openvino/core/function.hpp>
|
||||
#include <openvino/opsets/opset8.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp>
|
||||
#include <transformations/common_optimizations/remove_concat_zero_dim_input.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
using namespace ov::opset8;
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveLoopDanglingParameters) {
|
||||
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
|
||||
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto b = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto bi = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
|
||||
auto mul = std::make_shared<Multiply>(bi, bi);
|
||||
auto abs = std::make_shared<Abs>(mul);
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{ai, bi});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(ai, a);
|
||||
loop->set_invariant_input(bi, b);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
|
||||
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{bi});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(bi, b);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
|
||||
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveLoopManyDanglingParameters) {
|
||||
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
|
||||
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto b = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto bi = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto c = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto ci = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
|
||||
auto mul = std::make_shared<Multiply>(bi, bi);
|
||||
auto abs = std::make_shared<Abs>(mul);
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{ai, bi, ci});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(ai, a);
|
||||
loop->set_invariant_input(bi, b);
|
||||
loop->set_invariant_input(ci, c);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
|
||||
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{bi});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(bi, b);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
|
||||
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveLoopManyDanglingParameters2) {
|
||||
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
|
||||
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto b = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto bi = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto c = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto ci = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto d = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto di = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
|
||||
auto mul = std::make_shared<Multiply>(bi, bi);
|
||||
auto sub = std::make_shared<Multiply>(mul, di);
|
||||
auto abs = std::make_shared<Abs>(sub);
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{ai, bi, ci, di});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(ai, a);
|
||||
loop->set_invariant_input(bi, b);
|
||||
loop->set_invariant_input(ci, c);
|
||||
loop->set_invariant_input(di, d);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
|
||||
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c, d});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, abs}, ParameterVector{bi, di});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(bi, b);
|
||||
loop->set_invariant_input(di, d);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(abs));
|
||||
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b, c, d});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveLoopDanglingParametersIfConcatEmptyTensor) {
|
||||
auto trip_count = std::make_shared<Constant>(element::i64, Shape{}, 10);
|
||||
auto condition = std::make_shared<Constant>(element::boolean, Shape{}, true);
|
||||
|
||||
auto a = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto ai = std::make_shared<Parameter>(element::f32, Shape{2, 2});
|
||||
auto b = std::make_shared<Parameter>(element::f32, Shape{0, 2}); // empty tensor
|
||||
auto bi = std::make_shared<Parameter>(element::f32, Shape{0, 2});
|
||||
{
|
||||
auto concat = std::make_shared<Concat>(NodeVector{ai, bi}, 0);
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, concat}, ParameterVector{ai, bi});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(ai, a);
|
||||
loop->set_invariant_input(bi, b);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(concat));
|
||||
function = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
|
||||
|
||||
manager.register_pass<pass::RemoveConcatZeroDimInput>();
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto concat = std::make_shared<Concat>(NodeVector{ai}, 0);
|
||||
auto body = std::make_shared<Function>(OutputVector{condition, concat}, ParameterVector{ai});
|
||||
auto loop = std::make_shared<Loop>(trip_count, condition);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_function(body);
|
||||
loop->set_invariant_input(ai, a);
|
||||
|
||||
auto loop_res = std::make_shared<Result>(loop->get_iter_value(concat));
|
||||
function_ref = std::make_shared<Function>(OutputVector{loop_res}, ParameterVector{a, b});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputs) {
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{3, 4, 1});
|
||||
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
|
||||
|
||||
auto Xte = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Yte = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto then_op = std::make_shared<Add>(Xte, Xte);
|
||||
auto then_op_res = std::make_shared<Result>(then_op);
|
||||
|
||||
auto else_op = std::make_shared<Maximum>(Xte, Xte);
|
||||
auto else_op_res = std::make_shared<Result>(else_op);
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xte, Yte});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xte, Yte});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xte, Xte);
|
||||
if_op->set_input(Y, Yte, Yte);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xte});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xte});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xte, Xte);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveIfDanglingParametersOnlyFromBodies) {
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{3, 4, 1});
|
||||
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
|
||||
|
||||
auto Xt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Yt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto then_op = std::make_shared<Add>(Xt, Xt);
|
||||
auto then_op_res = std::make_shared<Result>(then_op);
|
||||
|
||||
auto Xe = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Ye = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto else_op = std::make_shared<Maximum>(Ye, Ye);
|
||||
auto else_op_res = std::make_shared<Result>(else_op);
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Yt});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ye});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xt, Xe);
|
||||
if_op->set_input(Y, Yt, Ye);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Ye});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xt, nullptr);
|
||||
if_op->set_input(Y, nullptr, Ye);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveIfManyDanglingParameters) {
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{3, 4, 1});
|
||||
auto Z = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
|
||||
|
||||
auto Xt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Yt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Zt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto then_op = std::make_shared<Add>(Xt, Zt);
|
||||
auto then_op_res = std::make_shared<Result>(then_op);
|
||||
|
||||
auto Xe = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Ye = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Ze = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto else_op = std::make_shared<Maximum>(Xe, Xe);
|
||||
auto else_op_res = std::make_shared<Result>(else_op);
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Yt, Zt});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ye, Ze});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xt, Xe);
|
||||
if_op->set_input(Y, Yt, Ye);
|
||||
if_op->set_input(Z, Zt, Ze);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Zt});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xt, Xe);
|
||||
if_op->set_input(Z, Zt, nullptr);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveIfDanglingParamFromOneBodyAndUpdateAllDescriptions) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto Z = std::make_shared<Parameter>(element::f32, Shape{2, 4, 1});
|
||||
auto cond = std::make_shared<Constant>(element::boolean, Shape{1}, true);
|
||||
|
||||
auto Xt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Yt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Zt = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto then_op = std::make_shared<Add>(Zt, Zt);
|
||||
auto then_op_res = std::make_shared<Result>(then_op);
|
||||
|
||||
auto Xe = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto Ze = std::make_shared<Parameter>(element::f32, PartialShape::dynamic());
|
||||
|
||||
auto else_op = std::make_shared<Add>(std::make_shared<Maximum>(Xe, Ze), Ze);
|
||||
auto else_op_res = std::make_shared<Result>(else_op);
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Xt, Yt, Zt});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ze});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, Xt, Xe);
|
||||
if_op->set_input(Y, Yt, nullptr);
|
||||
if_op->set_input(Z, Zt, Ze);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto then_body = std::make_shared<Function>(OutputVector{then_op_res}, ParameterVector{Zt});
|
||||
auto else_body = std::make_shared<Function>(OutputVector{else_op_res}, ParameterVector{Xe, Ze});
|
||||
auto if_op = std::make_shared<If>(cond);
|
||||
if_op->set_then_body(then_body);
|
||||
if_op->set_else_body(else_body);
|
||||
if_op->set_input(X, nullptr, Xe);
|
||||
if_op->set_input(Z, Zt, Ze);
|
||||
auto res = if_op->set_output(then_op_res, else_op_res);
|
||||
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveTensorIteratorDanglingParameter) {
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
|
||||
auto M = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto Yi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto M_body = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto Zo = std::make_shared<Abs>(std::make_shared<Add>(Xi, Yi));
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Yi, M_body});
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
|
||||
tensor_iterator->set_sliced_input(Yi, Y, 0, 2, 2, -1, 1);
|
||||
tensor_iterator->set_invariant_input(M_body, M);
|
||||
|
||||
auto out = tensor_iterator->get_iter_value(Zo, -1);
|
||||
auto res = std::make_shared<Result>(out);
|
||||
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, M});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Yi});
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
|
||||
tensor_iterator->set_sliced_input(Yi, Y, 0, 2, 2, -1, 1);
|
||||
|
||||
auto out = tensor_iterator->get_iter_value(Zo, -1);
|
||||
auto res = std::make_shared<Result>(out);
|
||||
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, M});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RemoveTensorIteratorManyDanglingParameters) {
|
||||
auto X = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
|
||||
auto Y = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
|
||||
auto Z = std::make_shared<Parameter>(element::f32, Shape{32, 40, 10});
|
||||
auto M = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
|
||||
auto Xi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto Yi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto Zi = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto M_body = std::make_shared<Parameter>(element::f32, Shape{32, 2, 10});
|
||||
auto Zo = std::make_shared<Abs>(std::make_shared<Add>(Xi, Zi));
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Yi, Zi, M_body});
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
|
||||
tensor_iterator->set_sliced_input(Yi, Y, 0, 2, 2, -1, 1);
|
||||
tensor_iterator->set_sliced_input(Zi, Z, 0, 2, 2, -1, 1);
|
||||
tensor_iterator->set_invariant_input(M_body, M);
|
||||
|
||||
auto out = tensor_iterator->get_iter_value(Zo, -1);
|
||||
auto res = std::make_shared<Result>(out);
|
||||
function = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z, M});
|
||||
|
||||
manager.register_pass<pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
}
|
||||
{
|
||||
auto body = std::make_shared<Function>(OutputVector{Zo}, ParameterVector{Xi, Zi});
|
||||
auto tensor_iterator = std::make_shared<TensorIterator>();
|
||||
tensor_iterator->set_body(body);
|
||||
tensor_iterator->set_sliced_input(Xi, X, 0, 2, 2, 39, 1);
|
||||
tensor_iterator->set_sliced_input(Zi, Z, 0, 2, 2, -1, 1);
|
||||
|
||||
auto out = tensor_iterator->get_iter_value(Zo, -1);
|
||||
auto res = std::make_shared<Result>(out);
|
||||
function_ref = std::make_shared<Function>(OutputVector{res}, ParameterVector{X, Y, Z, M});
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@ if (ENABLE_MKL_DNN)
|
||||
add_subdirectory(cpu)
|
||||
endif()
|
||||
|
||||
if (ENABLE_CLDNN)
|
||||
if (ENABLE_INTEL_GPU)
|
||||
add_subdirectory(gpu)
|
||||
endif()
|
||||
|
||||
|
@ -22,7 +22,7 @@ inline const std::string getPluginLibNameByDevice(const std::string& deviceName)
|
||||
{ "AUTO", "MultiDevicePlugin" },
|
||||
{ "CPU", "MKLDNNPlugin" },
|
||||
{ "GNA", "GNAPlugin" },
|
||||
{ "GPU", "clDNNPlugin" },
|
||||
{ "GPU", "ov_intel_gpu_plugin" },
|
||||
{ "HETERO", "ov_hetero_plugin" },
|
||||
{ "BATCH", "AutoBatchPlugin" },
|
||||
{ "MULTI", "MultiDevicePlugin" },
|
||||
|
@ -99,4 +99,8 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_IEClassLoadNetworkTest, IEClassLoadNetworkTest,
|
||||
::testing::Values("CPU"));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_IEClassLoadNetworkTest, IEClassLoadNetworkTestWithThrow,
|
||||
::testing::Values(""));
|
||||
} // namespace
|
@ -8,75 +8,120 @@
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecision = {
|
||||
InferenceEngine::Precision::I8,
|
||||
InferenceEngine::Precision::U8,
|
||||
InferenceEngine::Precision::I16,
|
||||
InferenceEngine::Precision::I32,
|
||||
InferenceEngine::Precision::FP32
|
||||
const std::vector<ElementType> inputPrecisions = {
|
||||
ElementType::f32,
|
||||
ElementType::bf16,
|
||||
ElementType::i8
|
||||
};
|
||||
|
||||
std::vector<SliceSpecificParams> test_cases = {
|
||||
SliceSpecificParams{ { 16 }, { 4 }, { 12 }, { 1 }, { 0 } },
|
||||
SliceSpecificParams{ { 16 }, { 0 }, { 8 }, { 2 }, { 0 } },
|
||||
SliceSpecificParams{ { 20, 10, 5 }, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
|
||||
SliceSpecificParams{ { 1, 2, 12, 100 }, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
|
||||
SliceSpecificParams{ { 1, 12, 100 }, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, {} },
|
||||
SliceSpecificParams{ { 1, 12, 100 }, { 0, 1, 0 }, { 10, -1, 10 }, { 1, 1, 1 }, {} },
|
||||
SliceSpecificParams{ { 2, 12, 100 }, { 1, 12, 100 }, { 0, 7, 0 }, { -1, -1, -1 }, {} },
|
||||
SliceSpecificParams{ { 2, 12, 100 }, { 1, 4, 99 }, { 0, 9, 0 }, { -1, 2, -1 }, {} },
|
||||
SliceSpecificParams{ { 2, 12, 100 }, { -1, -1, -1 }, { 0, 4, 0 }, { -1, -2, -1 }, {} },
|
||||
SliceSpecificParams{ { 2, 12, 100 }, { -1, -1, -1 }, { 0, 0, 4 }, { -1, -1, -1 }, {2, 0, 1} },
|
||||
SliceSpecificParams{ { 2, 12, 100 }, { 0, 0, 4 }, { -5, -1, -1 }, { 1, 2, 1 }, {2, 0, 1} },
|
||||
SliceSpecificParams{ { 2, 2, 2, 2 }, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
SliceSpecificParams{ { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
SliceSpecificParams{ { 2, 2, 4, 3 }, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 }, {} },
|
||||
SliceSpecificParams{ { 2, 2, 4, 2 }, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
|
||||
SliceSpecificParams{ { 1, 2, 4, 2 }, { 0, 1, 0, 1 }, { 10, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
|
||||
SliceSpecificParams{ { 10, 2, 4, 2 }, { 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
|
||||
SliceSpecificParams{ { 10, 2, 4, 2 }, { 19, 1, -1, 0 }, { -10, 0, 0, -1 }, { -1, -1, -1, 1 }, {} },
|
||||
SliceSpecificParams{ { 3, 2, 4, 200 }, { 0, 1, -1, -1 }, { 3, 2, 0, 0 }, { 1, 1, -2, -1 }, {} },
|
||||
SliceSpecificParams{ { 2, 4, 5, 5, 68 }, { 0, 1, 0, 0, 0 }, {
|
||||
const std::vector<ElementType> inputPrecisionsOther = {
|
||||
ElementType::i64,
|
||||
ElementType::i32,
|
||||
ElementType::i16,
|
||||
ElementType::u8
|
||||
};
|
||||
|
||||
std::vector<Slice8SpecificParams> staticParams = {
|
||||
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 16 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 20, 10, 5 }}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, { 0, 1, -1 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 12, 100 }}}}, { 0, 1, 0 }, { 10, -1, 10 }, { 1, 1, 1 }, { -3, -2, -1} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 12, 100 }, { 0, 7, 0 }, { -1, -1, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 1, 4, 99 }, { 0, 9, 0 }, { -1, 2, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 4, 0 }, { -1, -2, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { -1, -1, -1 }, { 0, 0, 4 }, { -1, -1, -1 }, {2, 0, 1} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 12, 100 }}}}, { 0, 0, 4 }, { -5, -1, -1 }, { 1, 2, 1 }, {2, 0, 1} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 2, 2 }}}}, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 3 }}}}, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 }, { -4, 1, -2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 2, 4, 2 }}}}, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 0, 1, 0, 1 }, { 10, 2, 4, 2 }, { 1, 1, 2, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 2, 4, 2 }}}}, { 1, 0, 1, 0 }, { 2, 4, 2, 10 }, { 1, 2, 1, 1 }, { -1, -2, -3, -4 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 10, 2, 4, 2 }}}}, { 19, 1, -1, 0 }, { -10, 0, 0, -1 }, { -1, -1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 3, 2, 4, 200 }}}}, { 0, 1, -1, -1 }, { 3, 2, 0, 0 }, { 1, 1, -2, -1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 4, 5, 5, 68 }}}}, { 0, 1, 0, 0, 0 }, {
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
|
||||
SliceSpecificParams{ { 10, 12 }, { -1, 1 }, { -9999, 10 }, { -1, 1 }, {} },
|
||||
SliceSpecificParams{ { 5, 5, 5, 5 }, { -1, 0, -1, 0 }, { -50, -1, -60, -1 }, { -1, 1, -1, 1 }, {} },
|
||||
SliceSpecificParams{ { 1, 5, 32, 32 }, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 1, 5, 32, 20 }, { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 2, 5, 32, 20 }, { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 1, 5, 32, 32 }, { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 2, 5, 32, 32 }, { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 1, 5, 32, 20 }, { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 2, 5, 32, 32 }, { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 1, 5, 32, 20 }, { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, -20, -15 }, { 2, -5, 3 }, { 1, 1, 1 }, { 0, 2, 1 } },
|
||||
|
||||
// Plugin Error: Slice has zero dimension which is not allowed
|
||||
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 0, 10 }, { 0, 32, 18 }, { 1, 1, 1 }, { 0, 1, 2 } },
|
||||
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 0, 10 }, { 1, 0, 20 }, { 1, 1, 1 }, { 0, 1, 2 } },
|
||||
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 4, 10 }, { 2, 8, 0 }, { 1, 1, 1 }, { 0, 1, 2 } },
|
||||
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 4, 10 }, { 2, 8, 0 }, { 1, 1, 1 }, { 0, 2, 1 } },
|
||||
// SliceSpecificParams{ { 2, 8, 32, 20 }, { 0, 4, 10 }, { 2, 8, 0 }, { 1, 1, 1 }, { 0, -2, -1 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 10, 12 }}}}, { -1, 1 }, { -9999, 10 }, { -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 5, 5, 5, 5 }}}}, { -1, 0, -1, 0 }, { -50, -1, -60, -1 }, { -1, 1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 0, 0 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 20 }}}}, { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 32 }}}}, { 0, 0, 20, 20 }, { 1, 5, 25, 26 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 5, 32, 32 }}}}, { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 1, 5, 32, 20 }}}}, { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ {{{}, {{ 2, 8, 32, 20 }}}}, { 0, -20, -15 }, { 2, -5, 3 }, { 1, 1, 1 }, { 0, 2, 1 } }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_MKLDNN, SliceLayerTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Static, Slice8LayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(test_cases),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::ValuesIn(staticParams),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(std::map<std::string, std::string>())),
|
||||
SliceLayerTest::getTestCaseName);
|
||||
Slice8LayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_PrecisionTransformation, Slice8LayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(staticParams[0]),
|
||||
::testing::ValuesIn(inputPrecisionsOther),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(std::map<std::string, std::string>())),
|
||||
Slice8LayerTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<Slice8SpecificParams> dynamicParams = {
|
||||
Slice8SpecificParams{ {{{ -1 }, {{ 8 }, { 16 }}}}, { 4 }, { 12 }, { 1 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{ ov::Dimension(2, 20) }, {{ 5 }, { 15 }}}}, { 0 }, { 8 }, { 2 }, { 0 } },
|
||||
Slice8SpecificParams{ {{{ -1, -1, -1 }, {{ 20, 10, 5 }, {5, 10, 20}}}}, { 0, 0}, { 10, 20}, { 1, 1 }, { 1, 0 } },
|
||||
Slice8SpecificParams{ {{{ -1, -1, -1, -1 }, {{ 1, 2, 12, 100 }}}}, { 0, 1, 0, 1 }, { 1, 2, 5, 100 }, { 1, 1, 1, 10 }, {} },
|
||||
Slice8SpecificParams{ {{{ -1, ov::Dimension(2, 20), -1 }, {{ 1, 12, 100 }, { 2, 12, 100 }}}}, { 0, 9, 0 }, { 1, 11, 1 }, { 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{ ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5), ov::Dimension(1, 5) },
|
||||
{{ 2, 2, 2, 2 }, { 2, 2, 4, 3 }, { 2, 2, 4, 2 }, { 1, 2, 4, 2 }}}},
|
||||
{ 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), ov::Dimension(1, 5), -1 }, {{ 10, 2, 4, 2 }, { 10, 4, 2, 2 }}}},
|
||||
{ 9, 1, 3, 0 }, { 0, 0, 0, 1 }, { -1, -1, -1, 1 }, {} },
|
||||
Slice8SpecificParams{ {{{ -1, ov::Dimension(1, 5), -1, -1, ov::Dimension(30, 70) }, {{ 2, 4, 5, 5, 68 }, { 2, 3, 7, 7, 33 }}}},
|
||||
{ 0, 1, 0, 0, 0 }, {
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max(),
|
||||
std::numeric_limits<std::int64_t>::max() }, { 1, 1, 1, 1, 16 }, {} },
|
||||
Slice8SpecificParams{ {{{ov::Dimension(1, 5), ov::Dimension(1, 7), ov::Dimension(1, 35), ov::Dimension(1, 35)},
|
||||
{{ 1, 5, 32, 32 }, { 2, 5, 32, 20 }, { 2, 5, 32, 32 }}}}, { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Dynamic, Slice8LayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(dynamicParams),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(std::map<std::string, std::string>())),
|
||||
Slice8LayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -150,6 +150,36 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
|
||||
{GeluTanh, {{}}}
|
||||
};
|
||||
|
||||
std::vector<Precision> netPrc = {
|
||||
Precision::BF16,
|
||||
Precision::FP32
|
||||
};
|
||||
|
||||
/* ============= Activation (1D) ============= */
|
||||
std::vector<CPUSpecificParams> cpuParams_3D = {
|
||||
CPUSpecificParams({nCw16c}, {nCw16c}, {}, {}),
|
||||
CPUSpecificParams({nwc}, {nwc}, {}, {}),
|
||||
CPUSpecificParams({ncw}, {ncw}, {}, {})
|
||||
};
|
||||
|
||||
std::vector<std::vector<ov::Shape>> basic3D = {
|
||||
{{2, 4, 4}},
|
||||
{{2, 17, 5}},
|
||||
};
|
||||
|
||||
const auto basicCases3D = ::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(basic3D)),
|
||||
::testing::Values(activationShapes),
|
||||
::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes)),
|
||||
::testing::ValuesIn(netPrc),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_3D))
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Activation3D_Eltwise_CPU_BF16, ActivationLayerCPUTest, basicCases3D, ActivationLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Activation (2D) ============= */
|
||||
std::vector<CPUSpecificParams> cpuParams_4D = {
|
||||
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
|
||||
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
|
||||
@ -161,10 +191,6 @@ std::vector<std::vector<ov::Shape>> basic4D = {
|
||||
{{2, 17, 5, 4}}
|
||||
};
|
||||
|
||||
std::vector<Precision> netPrc = {
|
||||
Precision::BF16,
|
||||
Precision::FP32
|
||||
};
|
||||
|
||||
const auto basicCases4D = ::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(basic4D)),
|
||||
@ -178,6 +204,7 @@ const auto basicCases4D = ::testing::Combine(
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Activation4D_Eltwise_CPU_BF16, ActivationLayerCPUTest, basicCases4D, ActivationLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Activation (3D) ============= */
|
||||
std::vector<CPUSpecificParams> cpuParams_5D = {
|
||||
CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}),
|
||||
CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}),
|
||||
|
@ -0,0 +1,146 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "functional_test_utils/ov_tensor_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ngraph::opset3;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
using BucketizeCPUParamsTuple = std::tuple<InputShape, // Data shape
|
||||
InputShape, // Buckets shape
|
||||
bool, // Right edge of interval
|
||||
ElementType, // Data input precision
|
||||
ElementType, // Buckets input precision
|
||||
ElementType // Output precision
|
||||
>;
|
||||
|
||||
class BucketizeLayerCPUTest : public testing::WithParamInterface<BucketizeCPUParamsTuple>,
|
||||
virtual public SubgraphBaseTest {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<BucketizeCPUParamsTuple>& obj) {
|
||||
InputShape dataShape;
|
||||
InputShape bucketsShape;
|
||||
bool with_right_bound;
|
||||
ElementType inDataPrc;
|
||||
ElementType inBucketsPrc;
|
||||
ElementType netPrc;
|
||||
|
||||
std::tie(dataShape, bucketsShape, with_right_bound, inDataPrc, inBucketsPrc, netPrc) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::partialShape2str({dataShape.first}) << "_"
|
||||
<< CommonTestUtils::partialShape2str({bucketsShape.first}) << "_";
|
||||
|
||||
result << "TS=";
|
||||
for (const auto& item : dataShape.second) {
|
||||
result << CommonTestUtils::vec2str(item) << "_";
|
||||
}
|
||||
result << "BS=";
|
||||
for (const auto& item : bucketsShape.second) {
|
||||
result << CommonTestUtils::vec2str(item) << "_";
|
||||
}
|
||||
|
||||
result << "with_right_bound=" << with_right_bound;
|
||||
result << "inDataPrc=" << inDataPrc << "_";
|
||||
result << "inBucketsPrc=" << inBucketsPrc << "_";
|
||||
result << "netPrc=" << netPrc << "_";
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
|
||||
auto data_size = shape_size(targetInputStaticShapes[0]);
|
||||
ov::runtime::Tensor tensorData = ov::test::utils::create_and_fill_tensor(funcInputs[0].get_element_type(),
|
||||
targetInputStaticShapes[0],
|
||||
data_size * 5,
|
||||
0,
|
||||
10,
|
||||
7235346);
|
||||
|
||||
ov::runtime::Tensor tensorBucket =
|
||||
ov::test::utils::create_and_fill_tensor_unique_sequence(funcInputs[1].get_element_type(),
|
||||
targetInputStaticShapes[1],
|
||||
0,
|
||||
10,
|
||||
8234231);
|
||||
|
||||
inputs.insert({funcInputs[0].get_node_shared_ptr(), tensorData});
|
||||
inputs.insert({funcInputs[1].get_node_shared_ptr(), tensorBucket});
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InputShape dataShape;
|
||||
InputShape bucketsShape;
|
||||
bool with_right_bound;
|
||||
ElementType inDataPrc;
|
||||
ElementType inBucketsPrc;
|
||||
ElementType netPrc;
|
||||
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
std::tie(dataShape, bucketsShape, with_right_bound, inDataPrc, inBucketsPrc, netPrc) = this->GetParam();
|
||||
init_input_shapes({dataShape, bucketsShape});
|
||||
|
||||
auto data = std::make_shared<ngraph::op::Parameter>(inDataPrc, inputDynamicShapes[0]);
|
||||
data->set_friendly_name("a_data");
|
||||
auto buckets = std::make_shared<ngraph::op::Parameter>(inBucketsPrc, inputDynamicShapes[1]);
|
||||
buckets->set_friendly_name("b_buckets");
|
||||
auto bucketize = std::make_shared<ngraph::op::v3::Bucketize>(data, buckets, netPrc, with_right_bound);
|
||||
function = std::make_shared<ngraph::Function>(std::make_shared<ngraph::opset1::Result>(bucketize),
|
||||
ngraph::ParameterVector{data, buckets},
|
||||
"Bucketize");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(BucketizeLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
run();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<ov::test::InputShape> dataShapesDynamic = {
|
||||
{{ngraph::Dimension(1, 10), ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()},
|
||||
{{1, 20, 20}, {3, 16, 16}, {10, 16, 16}}},
|
||||
{{ngraph::Dimension(1, 10), 3, 50, 50}, {{1, 3, 50, 50}, {2, 3, 50, 50}, {10, 3, 50, 50}}}};
|
||||
|
||||
const std::vector<ov::test::InputShape> bucketsShapesDynamic = {{{ngraph::Dimension::dynamic()}, {{5}, {20}, {100}}}};
|
||||
|
||||
const std::vector<ov::test::ElementType> inPrc = {ov::element::f32, ov::element::i64, ov::element::i32};
|
||||
const std::vector<ov::test::ElementType> outPrc = {ov::element::i64, ov::element::i32};
|
||||
|
||||
const auto test_Bucketize_right_edge_Dynamic = ::testing::Combine(::testing::ValuesIn(dataShapesDynamic),
|
||||
::testing::ValuesIn(bucketsShapesDynamic),
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(inPrc),
|
||||
::testing::ValuesIn(inPrc),
|
||||
::testing::ValuesIn(outPrc));
|
||||
|
||||
const auto test_Bucketize_left_edge_Dynamic = ::testing::Combine(::testing::ValuesIn(dataShapesDynamic),
|
||||
::testing::ValuesIn(bucketsShapesDynamic),
|
||||
::testing::Values(false),
|
||||
::testing::ValuesIn(inPrc),
|
||||
::testing::ValuesIn(inPrc),
|
||||
::testing::ValuesIn(outPrc));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsBucketize_right_Dynamic,
|
||||
BucketizeLayerCPUTest,
|
||||
test_Bucketize_right_edge_Dynamic,
|
||||
BucketizeLayerCPUTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsBucketize_left_Dynamic,
|
||||
BucketizeLayerCPUTest,
|
||||
test_Bucketize_left_edge_Dynamic,
|
||||
BucketizeLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -155,6 +155,8 @@ protected:
|
||||
PluginConfigParams::YES == configuration[PluginConfigParams::KEY_ENFORCE_BF16]) {
|
||||
selectedType += "_BF16";
|
||||
rel_threshold = 1e-2f;
|
||||
if (selectedType == "jit_gemm_BF16")
|
||||
rel_threshold = 0.05f;
|
||||
} else {
|
||||
selectedType = makeSelectedTypeStr(selectedType, netType);
|
||||
}
|
||||
@ -180,7 +182,7 @@ TEST_P(ConvolutionLayerCPUTest, CompareWithRefs) {
|
||||
|
||||
// Skip tests for sse41 convolution where ic or oc cannot be exactly divided by the block size,
|
||||
// since tails processing for sse41 nspc layout is not supported yet (see 52736).
|
||||
if (!inFmts.empty() && (inFmts.front() == nhwc || inFmts.front() == ndhwc) && selectedType.find("jit_sse") != std::string::npos) {
|
||||
if (!inFmts.empty() && (inFmts.front() == nwc || inFmts.front() == nhwc || inFmts.front() == ndhwc) && selectedType.find("jit_sse") != std::string::npos) {
|
||||
auto inpChannels = function->get_parameters().front()->get_partial_shape()[1].get_length();
|
||||
auto outChannels = function->get_output_partial_shape(0)[1].get_length();
|
||||
if ((inpChannels % 8) || (outChannels % 8)) {
|
||||
@ -229,11 +231,67 @@ const std::vector<fusingSpecificParams> fusingParamsSetBF16{
|
||||
};
|
||||
|
||||
/* ============= Convolution params (GEMM layout) ============= */
|
||||
const SizeVector numOutChannels_Gemm = {6 };
|
||||
const SizeVector numOutChannels_Gemm = { 6 };
|
||||
|
||||
/* ============= Convolution params (blocked and nspc layout) ============= */
|
||||
const SizeVector numOutChannels = { 64, 63 };
|
||||
|
||||
/* ============= Convolution params (1D) ============= */
|
||||
const std::vector<SizeVector> kernels1d = { {3}, {1} };
|
||||
const std::vector<SizeVector> strides1d = { {1}, {2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
|
||||
const std::vector<SizeVector> dilations1d = { {1}, {2} };
|
||||
std::vector<InputShape> inputShapes1d = {
|
||||
{{}, {{ 2, 64, 7 }}},
|
||||
{{}, {{ 1, 67, 7 }}},
|
||||
{
|
||||
//dynamic shape
|
||||
{ -1, 64, {1, 200} },
|
||||
{ //target static shapes
|
||||
{ 2, 64, 7 },
|
||||
{ 1, 64, 9 }
|
||||
}
|
||||
},
|
||||
{
|
||||
//dynamic shape
|
||||
{ -1, 67, {1, 200} },
|
||||
{ //target static shapes
|
||||
{ 2, 67, 7 },
|
||||
{ 1, 67, 9 }
|
||||
}
|
||||
},
|
||||
{
|
||||
//dynamic shape
|
||||
{ {1, 200}, 64, -1 },
|
||||
{ //target static shapes
|
||||
{ 2, 64, 7 },
|
||||
{ 1, 64, 5 }
|
||||
}
|
||||
}
|
||||
};
|
||||
std::vector<InputShape> inputShapesPlain2Blocked1d = {
|
||||
{{}, {{1, 1, 7}}},
|
||||
{{}, {{1, 2, 7}}},
|
||||
{{}, {{1, 3, 7}}},
|
||||
{
|
||||
//dynamic shapes
|
||||
{-1, 1, {1, 200}},
|
||||
{ //target static shapes
|
||||
{2, 1, 7},
|
||||
{1, 1, 9}
|
||||
}
|
||||
},
|
||||
{
|
||||
//dynamic shapes
|
||||
{-1, 3, {1, 200}},
|
||||
{ //target static shapes
|
||||
{2, 3, 7},
|
||||
{1, 3, 9}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* ============= Convolution params (2D) ============= */
|
||||
const std::vector<SizeVector> kernels2d = { {3, 3}, {1, 1} };
|
||||
const std::vector<SizeVector> strides2d = { {1, 1}, {2, 2} };
|
||||
@ -332,6 +390,76 @@ std::vector<InputShape> inputShapesPlain2Blocked3d = {
|
||||
/* ============= */
|
||||
|
||||
/* INSTANCES */
|
||||
/* ============= Convolution (Gemm 1D) ============= */
|
||||
const auto convParams_ExplicitPadding_GEMM_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels_Gemm),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_GEMM_1D = {
|
||||
conv_gemm_1D,
|
||||
conv_gemm_1D_nspc
|
||||
};
|
||||
|
||||
std::vector<InputShape> inShapesGemm1D = {
|
||||
{{}, {{ 2, 12, 7 }}},
|
||||
{
|
||||
//dynamic shape
|
||||
{ {1, 200}, 12, {1, 200} },
|
||||
{ //target static shapes
|
||||
{ 2, 12, 7 },
|
||||
{ 1, 12, 5 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_GEMM_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inShapesGemm1D),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_1D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_BF16, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_GEMM_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inShapesGemm1D),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_1D})), // todo: [AV] what about conv_gemm_1D_nspc?
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_GEMM_I8, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_GEMM_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inShapesGemm1D),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_1D)),
|
||||
::testing::Values(fusingSum),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Convolution (Gemm 2D) ============= */
|
||||
const auto convParams_ExplicitPadding_GEMM_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
@ -576,6 +704,102 @@ INSTANTIATE_TEST_SUITE_P(Conv_3D_GEMM_I8_dilated, ConvolutionLayerCPUTest,
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Convolution (1D) ============= */
|
||||
const auto convParams_ExplicitPadding_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_1D = {
|
||||
conv_sse42_1D,
|
||||
conv_avx2_1D,
|
||||
conv_avx512_1D,
|
||||
conv_sse42_1D_nspc,
|
||||
conv_avx2_1D_nspc,
|
||||
conv_avx512_1D_nspc
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_BF16, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_1D})), // todo: [AV] what about conv_avx512_1D_nspc?
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_I8, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
|
||||
::testing::Values(fusingSum),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_1D_plain_to_blocked = {
|
||||
conv_sse42_plain_to_blocked_1D,
|
||||
conv_avx2_plain_to_blocked_1D,
|
||||
conv_avx512_plain_to_blocked_1D,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_PlainToBlocked_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapesPlain2Blocked1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D_plain_to_blocked)),
|
||||
::testing::Values(emptyFusingSpec),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_PlainToBlocked_BF16, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapesPlain2Blocked1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_plain_to_blocked_1D})),
|
||||
::testing::Values(emptyFusingSpec),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Convolution (2D) ============= */
|
||||
const auto convParams_ExplicitPadding_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
@ -696,7 +920,7 @@ const std::vector<CPUSpecificParams> CPUParams_2D_plain_to_blocked = {
|
||||
conv_avx512_plain_to_blocked_2D,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_2D_FP32, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_PlainToBlocked_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_2D,
|
||||
@ -710,7 +934,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_2D_FP32, ConvolutionLayerCPUT
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_2D_BF16, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_PlainToBlocked_BF16, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_2D,
|
||||
@ -870,7 +1094,7 @@ const std::vector<CPUSpecificParams> CPUParams_3D_plain_to_blocked = {
|
||||
conv_avx512_plain_to_blocked_3D,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_3D_FP32, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_PlainToBlocked_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_3D,
|
||||
@ -884,7 +1108,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_3D_FP32, ConvolutionLayerCPUT
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_PlainToBlocked_3D_BF16, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_PlainToBlocked_BF16, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_3D,
|
||||
@ -926,6 +1150,69 @@ INSTANTIATE_TEST_SUITE_P(Conv_PlainToBlocked_3D_BF16_dilated, ConvolutionLayerCP
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Kernel_1x1 (1D) ============= */
|
||||
|
||||
const auto convParams_ExplicitPadding_1x1_1D = ::testing::Combine(
|
||||
::testing::Values(SizeVector({1})),
|
||||
::testing::Values(SizeVector({1})),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0})),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0})),
|
||||
::testing::Values(SizeVector({1})),
|
||||
::testing::Values(63),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_1x1_1D = {
|
||||
conv_sse42_1D_1x1,
|
||||
conv_avx2_1D_1x1,
|
||||
conv_avx512_1D_1x1,
|
||||
conv_sse42_1D_1x1_nspc,
|
||||
conv_avx2_1D_1x1_nspc,
|
||||
conv_avx512_1D_1x1_nspc
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1x1_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_1D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_BF16, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1x1_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_1D_1x1, conv_avx512_2D_1x1_nspc})),
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D_1x1_I8, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1x1_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::i8),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_1D)),
|
||||
::testing::Values(fusingSum),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Kernel_1x1 (2D) ============= */
|
||||
|
||||
const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
|
||||
@ -989,56 +1276,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Convolution (1D) ============= */
|
||||
/* ============= Convolution params (1D) ============= */
|
||||
const std::vector<SizeVector> kernels1d = { {3} };
|
||||
const std::vector<SizeVector> strides1d = { {1}, {2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
|
||||
const std::vector<SizeVector> dilations1d = { {1}, {2} };
|
||||
|
||||
const auto convParams_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_1D = {
|
||||
conv_sse42_1D,
|
||||
conv_avx2_1D,
|
||||
conv_avx512_1D
|
||||
};
|
||||
|
||||
std::vector<InputShape> inShapes1D = {
|
||||
{{}, {{ 2, 64, 7 }}},
|
||||
{
|
||||
//dynamic shape
|
||||
{ {1, 200}, 64, -1 },
|
||||
{ //target static shapes
|
||||
{ 2, 64, 7 },
|
||||
{ 1, 64, 5 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_1D, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inShapes1D),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
|
||||
::testing::Values(fusingAddPerChannel),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Jit Planar ============= */
|
||||
|
||||
/* ============= Convolution planar params (2D) ============= */
|
||||
@ -1068,7 +1305,7 @@ const auto convParams_Planar_ExplicitPadding_2D_dilated = ::testing::Combine(
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_2D_FP32, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_Jit_Planar_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_Planar_ExplicitPadding_2D,
|
||||
@ -1082,7 +1319,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_2D_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Conv_Jit_Planar_2D_FP32_dilated, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(Conv_2D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_Planar_ExplicitPadding_2D_dilated,
|
||||
@ -1123,7 +1360,7 @@ const auto convParams_Planar_ExplicitPadding_3D_dilated = ::testing::Combine(
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_Jit_Planar_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_Planar_ExplicitPadding_3D,
|
||||
@ -1137,7 +1374,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
ConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Conv_Jit_Planar_3D_FP32_dilated, ConvolutionLayerCPUTest,
|
||||
INSTANTIATE_TEST_SUITE_P(Conv_3D_Jit_Planar_FP32_dilated, ConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_Planar_ExplicitPadding_3D_dilated,
|
||||
|
@ -521,4 +521,4 @@
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_MemOrder_dyn_param, EltwiseLayerCPUTest, params_5D_dyn_param, EltwiseLayerCPUTest::getTestCaseName);
|
||||
//
|
||||
//} // namespace
|
||||
//} // namespace CPULayerTestsDefinitions
|
||||
//} // namespace CPULayerTestsDefinitions
|
||||
|
@ -253,6 +253,13 @@ const SizeVector numGroups_Blocked = {2, 4};
|
||||
const SizeVector numOutChannels_DW = {32};
|
||||
const SizeVector numGroups_DW = {32};
|
||||
|
||||
/* ============= GroupConvolution params (1D) ============= */
|
||||
const std::vector<SizeVector> kernels1d = { {3}, {1} };
|
||||
const std::vector<SizeVector> strides1d = { {1}, {2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
|
||||
const std::vector<SizeVector> dilations1d = { {1}, {2} };
|
||||
|
||||
/* ============= GroupConvolution params (2D) ============= */
|
||||
const std::vector<SizeVector> kernels2d = {{3, 3}, {1, 1}};
|
||||
const std::vector<SizeVector> strides2d = {{1, 1}, {2, 2}};
|
||||
@ -270,6 +277,63 @@ const std::vector<SizeVector> dilations3d = {{1, 1, 1}, {2, 2, 2}};
|
||||
|
||||
|
||||
/* INSTANCES */
|
||||
/* ============= GroupConvolution (GEMM 1D) ============= */
|
||||
const auto groupConvParams_ExplicitPadding_Gemm_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels_Gemm),
|
||||
::testing::ValuesIn(numGroups_Gemm),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_Gemm_1D = {
|
||||
conv_gemm_1D,
|
||||
conv_gemm_1D_nspc
|
||||
};
|
||||
|
||||
std::vector<InputShape> inShapesGemm1D = {
|
||||
{{}, {{ 2, 12, 7 }}},
|
||||
{
|
||||
//dynamic shape
|
||||
{{1, 200}, 12, {1, 200}},
|
||||
{ //target static shapes
|
||||
{ 2, 12, 7 },
|
||||
{ 1, 12, 5 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_Gemm_FP32, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_Gemm_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inShapesGemm1D),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Gemm_1D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_Gemm_BF16, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_Gemm_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inShapesGemm1D),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_1D})), // todo: [AV] what about conv_gemm_1D_nspc?
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupConvolution (GEMM 2D) ============= */
|
||||
const auto groupConvParams_ExplicitPadding_Gemm_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
@ -384,6 +448,89 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_Gemm_BF16, GroupConvolutionLayerCPUT
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupConvolution (1D) ============= */
|
||||
const auto groupConvParams_ExplicitPadding_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::ValuesIn(numGroups_Blocked),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_1D = {
|
||||
conv_sse42_1D,
|
||||
conv_avx2_1D,
|
||||
conv_avx512_1D,
|
||||
conv_sse42_1D_nspc,
|
||||
conv_avx2_1D_nspc,
|
||||
conv_avx512_1D_nspc
|
||||
};
|
||||
|
||||
std::vector<InputShape> inputShapes1d = {
|
||||
{{}, {{ 2, 64, 7 }}},
|
||||
{
|
||||
//dynamic shapes
|
||||
{-1, 64, {1, 200}},
|
||||
{ //target static shapes
|
||||
{ 2, 64, 7 },
|
||||
{ 1, 64, 9 }
|
||||
}
|
||||
},
|
||||
{
|
||||
//dynamic shapes
|
||||
{ {-1, 64, -1} },
|
||||
{ //target static shapes
|
||||
{ 2, 64, 7 },
|
||||
{ 1, 64, 14 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_FP32, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_FP32_fusingBias, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
|
||||
::testing::Values(fusingAddPerChannel),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_BF16, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_1D})), // todo: [AV] what about conv_avx512_1D_nspc?
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupConvolution (2D) ============= */
|
||||
const auto groupConvParams_ExplicitPadding_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
@ -505,6 +652,71 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_BF16, GroupConvolutionLayerCPUTest,
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupConvolution (DW 1D) ============= */
|
||||
const auto groupConvParams_ExplicitPadding_DW_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels_DW),
|
||||
::testing::ValuesIn(numGroups_DW),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_DW_1D = {
|
||||
conv_sse42_dw_1D,
|
||||
conv_avx2_dw_1D,
|
||||
conv_avx512_dw_1D,
|
||||
conv_sse42_dw_1D_nspc,
|
||||
conv_avx2_dw_1D_nspc,
|
||||
conv_avx512_dw_1D_nspc
|
||||
};
|
||||
|
||||
std::vector<InputShape> inputShapes1dDW = {
|
||||
{{}, {{ 2, 32, 7 }}},
|
||||
{
|
||||
//dynamic shapes
|
||||
{-1, 32, {1, 200}},
|
||||
{ //target static shapes
|
||||
{ 2, 32, 7 },
|
||||
{ 1, 32, 9 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_DW_FP32, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_DW_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1dDW),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_sse42_dw_1D,
|
||||
conv_avx2_dw_1D,
|
||||
conv_avx512_dw_1D})), // todo: [AV] what about conv_sse42_dw_1D_nspc,
|
||||
// conv_avx2_dw_1D_nspc, conv_avx512_dw_1D_nspc?
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D_DW_BF16, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
groupConvParams_ExplicitPadding_DW_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1dDW),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_1D})), // todo: [AV] what about conv_avx512_dw_1D_nspc?
|
||||
::testing::ValuesIn(fusingParamsSetBF16),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupConvolution (DW 2D) ============= */
|
||||
const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
@ -965,57 +1177,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_JIT_AVX512_DW_GroupConv, GroupConvolutionLayerCPU
|
||||
/* ============= JIT AVX5122 PLANAR Convolution (not supported with groups) ============= */
|
||||
/* ============================================= */
|
||||
|
||||
/* ============= Convolution (1D) ============= */
|
||||
/* ============= Convolution params (1D) ============= */
|
||||
const std::vector<SizeVector> kernels1d = { {3} };
|
||||
const std::vector<SizeVector> strides1d = { {1}, {2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins1d = { {0}, {1} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds1d = { {0} };
|
||||
const std::vector<SizeVector> dilations1d = { {1}, {2} };
|
||||
|
||||
const auto convParams_1D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels1d),
|
||||
::testing::ValuesIn(strides1d),
|
||||
::testing::ValuesIn(padBegins1d),
|
||||
::testing::ValuesIn(padEnds1d),
|
||||
::testing::ValuesIn(dilations1d),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::ValuesIn(numGroups_Blocked),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParams_1D = {
|
||||
conv_sse42_1D,
|
||||
conv_avx2_1D,
|
||||
conv_avx512_1D
|
||||
};
|
||||
|
||||
std::vector<InputShape> inputShapes1d = {
|
||||
{{}, {{ 2, 64, 7 }}},
|
||||
{
|
||||
//dynamic shapes
|
||||
{ {-1, 64, -1} },
|
||||
{ //target static shapes
|
||||
{ 2, 64, 7 },
|
||||
{ 1, 64, 14 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_1D, GroupConvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_1D,
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::ValuesIn(inputShapes1d),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1D)),
|
||||
::testing::Values(fusingAddPerChannel),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
GroupConvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
|
@ -136,8 +136,7 @@ public:
|
||||
continue;
|
||||
}
|
||||
if (inType != ov::element::Type_t::undefined) {
|
||||
p.input(ov::preprocess::InputInfo(i)
|
||||
.tensor(ov::preprocess::InputTensorInfo().set_element_type(inType)));
|
||||
p.input(i).tensor().set_element_type(inType);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -145,8 +144,7 @@ public:
|
||||
auto results = function->get_results();
|
||||
for (size_t i = 0; i < results.size(); i++) {
|
||||
if (outType != ov::element::Type_t::undefined) {
|
||||
p.output(ov::preprocess::OutputInfo(i)
|
||||
.tensor(ov::preprocess::OutputTensorInfo().set_element_type(outType)));
|
||||
p.output(i).tensor().set_element_type(outType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -141,6 +141,34 @@ const auto ref = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"};
|
||||
const std::vector<CPUSpecificParams> vecCpuConfigs = {ref, sse42, avx, avx512};
|
||||
const std::vector<ElementType> inpOutPrecision = {ElementType::f32/*, ElementType::bf16*/};
|
||||
|
||||
const std::vector<InputShape> inputShapes3D = {
|
||||
{ {}, {{3, 4, 64}} },
|
||||
{ {}, {{2, 8, 12}} },
|
||||
{ {}, {{1, 16, 12}} },
|
||||
{ {}, {{1, 21, 4}} },
|
||||
{ {}, {{1, 32, 8}} },
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1},
|
||||
// target
|
||||
{
|
||||
{1, 32, 8},
|
||||
{1, 21, 4},
|
||||
{2, 8, 12}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, {4, 32}, {1, 64}},
|
||||
// target
|
||||
{
|
||||
{3, 4, 64},
|
||||
{1, 16, 12},
|
||||
{1, 32, 8}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<InputShape> inputShapes4D = {
|
||||
{ {}, {{3, 4, 64, 64}} },
|
||||
{ {}, {{2, 8, 8, 12}} },
|
||||
@ -197,6 +225,61 @@ const std::vector<InputShape> inputShapes5D = {
|
||||
}
|
||||
};
|
||||
|
||||
/* ============= Pooling (1D) ============= */
|
||||
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax3D = {
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {2}, {0}, {0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {4}, {2}, {0}, {0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2}, {1}, {0}, {0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsAvg3D = {
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_UPPER, false },
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {3}, {1}, {1}, {0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true },
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {4}, {4}, {2}, {2},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, true },
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsAvg3D_RefOnly = {
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::AVG, {2}, {2}, {2}, {2},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false },
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MaxPool_CPU_3D, PoolingLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(paramsMax3D),
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::ValuesIn(inpOutPrecision),
|
||||
::testing::Values(false),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
|
||||
::testing::Values(emptyFusingSpec)),
|
||||
PoolingLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D, PoolingLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(paramsAvg3D),
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::ValuesIn(inpOutPrecision),
|
||||
::testing::Values(false),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)),
|
||||
::testing::Values(emptyFusingSpec)),
|
||||
PoolingLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_3D_NotOptimized, PoolingLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(paramsAvg3D_RefOnly),
|
||||
::testing::ValuesIn(inputShapes3D),
|
||||
::testing::ValuesIn(inpOutPrecision),
|
||||
::testing::Values(false),
|
||||
::testing::Values(ref),
|
||||
::testing::Values(emptyFusingSpec)),
|
||||
PoolingLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Pooling (2D) ============= */
|
||||
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax4D = {
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false },
|
||||
@ -258,6 +341,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest,
|
||||
::testing::Values(emptyFusingSpec)),
|
||||
PoolingLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Pooling (3D) ============= */
|
||||
const std::vector<LayerTestsDefinitions::poolSpecificParams> paramsMax5D = {
|
||||
LayerTestsDefinitions::poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0},
|
||||
ngraph::op::RoundingType::CEIL, ngraph::op::PadType::SAME_LOWER, false },
|
||||
|
@ -4,11 +4,13 @@
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
using namespace ov::test;
|
||||
|
||||
struct regionYoloAttributes {
|
||||
size_t classes;
|
||||
@ -20,78 +22,86 @@ struct regionYoloAttributes {
|
||||
};
|
||||
|
||||
using regionYoloParamsTuple = std::tuple<
|
||||
ngraph::Shape, // Input Shape
|
||||
regionYoloAttributes, // Params
|
||||
InputShape, // Input Shape
|
||||
regionYoloAttributes, // Params
|
||||
std::vector<int64_t>, // mask
|
||||
InferenceEngine::Precision, // Network input precision
|
||||
InferenceEngine::Precision, // Network output precision
|
||||
ov::test::ElementType, // Network input precision
|
||||
ov::test::ElementType, // Network output precision
|
||||
std::map<std::string, std::string>, // Additional network configuration
|
||||
std::string>; // Device name
|
||||
|
||||
|
||||
class RegionYoloCPULayerTest : public testing::WithParamInterface<regionYoloParamsTuple>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
virtual public ov::test::SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<regionYoloParamsTuple> obj) {
|
||||
ngraph::Shape inputShape;
|
||||
InputShape inputShape;
|
||||
regionYoloAttributes attributes;
|
||||
std::vector<int64_t> mask;
|
||||
InferenceEngine::Precision inpPrecision;
|
||||
InferenceEngine::Precision outPrecision;
|
||||
ov::test::ElementType inpPrecision;
|
||||
ov::test::ElementType outPrecision;
|
||||
std::string targetName;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(inputShape, attributes, mask, inpPrecision, outPrecision, additionalConfig, targetName) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
|
||||
result << "IS=" << inputShape << "_";
|
||||
result << "classes=" << attributes.classes << "_";
|
||||
result << "coords=" << attributes.coordinates << "_";
|
||||
result << "num=" << attributes.num_regions << "_";
|
||||
result << "doSoftmax=" << attributes.do_softmax << "_";
|
||||
result << "axis=" << attributes.start_axis << "_";
|
||||
result << "endAxis=" << attributes.end_axis << "_";
|
||||
result << "inpPRC=" << inpPrecision.name() << "_";
|
||||
result << "outPRC=" << outPrecision.name() << "_";
|
||||
result << "inpPRC=" << inpPrecision << "_";
|
||||
result << "outPRC=" << outPrecision << "_";
|
||||
result << "targetDevice=" << targetName << "_";
|
||||
return result.str();
|
||||
}
|
||||
protected:
|
||||
void SetUp() override {
|
||||
ngraph::Shape inputShape;
|
||||
InputShape inputShape;
|
||||
regionYoloAttributes attributes;
|
||||
std::vector<int64_t> mask;
|
||||
ov::test::ElementType inPrc;
|
||||
ov::test::ElementType outPrc;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(inputShape, attributes, mask, inPrc, outPrc, additionalConfig, targetDevice) = this->GetParam();
|
||||
|
||||
if (inPrc == ov::test::ElementType::bf16) {
|
||||
// ticket #72342
|
||||
rel_threshold = 0.02;
|
||||
}
|
||||
|
||||
init_input_shapes({ inputShape });
|
||||
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
selectedType = getPrimitiveType() + "_" + inPrc.name();
|
||||
|
||||
const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
|
||||
auto paramRegionYolo = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
selectedType = getPrimitiveType() + "_" + InferenceEngine::details::convertPrecision(inPrc).name();
|
||||
auto paramRegionYolo = ngraph::builder::makeDynamicParams(inPrc, inputDynamicShapes);
|
||||
|
||||
const auto region_yolo = std::make_shared<ngraph::op::v0::RegionYolo>(paramRegionYolo[0],
|
||||
attributes.coordinates, attributes.classes, attributes.num_regions,
|
||||
attributes.do_softmax, mask, attributes.start_axis, attributes.end_axis);
|
||||
|
||||
function = makeNgraphFunction(ngPrc, paramRegionYolo, region_yolo, "RegionYolo");
|
||||
function = makeNgraphFunction(inPrc, paramRegionYolo, region_yolo, "RegionYolo");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(RegionYoloCPULayerTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
run();
|
||||
CheckPluginRelatedResults(executableNetwork, "RegionYolo");
|
||||
}
|
||||
|
||||
namespace {
|
||||
const std::vector<Precision> inpOutPrc = {Precision::BF16, Precision::FP32};
|
||||
const std::vector<ov::test::ElementType> inpOutPrc = {ov::test::ElementType::bf16, ov::test::ElementType::f32};
|
||||
|
||||
const std::map<std::string, std::string> additional_config;
|
||||
|
||||
/* *======================* Static Shapes *======================* */
|
||||
|
||||
const std::vector<ngraph::Shape> inShapes_caffe = {
|
||||
{1, 125, 13, 13}
|
||||
};
|
||||
@ -114,6 +124,24 @@ const std::vector<ngraph::Shape> inShapes_v3 = {
|
||||
{1, 255, 13, 13}
|
||||
};
|
||||
|
||||
/* *======================* Dynamic Shapes *======================* */
|
||||
|
||||
const std::vector<InputShape> inShapes_caffe_dynamic = {
|
||||
{{-1, -1, -1, -1}, {{1, 125, 13, 13}, {1, 125, 26, 26}}},
|
||||
{{{1, 2}, {100, 125}, {13, 26}, {13, 26}}, {{1, 125, 13, 13}, {1, 125, 26, 26}}}
|
||||
};
|
||||
|
||||
const std::vector<InputShape> inShapes_mxnet_dynamic = {
|
||||
{{-1, -1, -1, -1}, {{1, 75, 52, 52}, {1, 75, 32, 32}, {1, 75, 26, 26}}},
|
||||
{{{1, 2}, {75, 80}, {26, 52}, {26, 52}}, {{1, 75, 52, 52}, {1, 75, 32, 32}, {1, 75, 26, 26}}},
|
||||
};
|
||||
|
||||
const std::vector<InputShape> inShapes_v3_dynamic = {
|
||||
{{-1, -1, -1, -1}, {{1, 255, 52, 52}, {1, 255, 26, 26}, {1, 255, 13, 13}}},
|
||||
{{{1, 2}, {255, 256}, {13, 52}, {13, 52}}, {{1, 255, 52, 52}, {1, 255, 26, 26}, {1, 255, 13, 13}}}
|
||||
};
|
||||
|
||||
|
||||
const std::vector<std::vector<int64_t>> masks = {
|
||||
{0, 1, 2},
|
||||
{3, 4, 5},
|
||||
@ -127,7 +155,17 @@ const std::vector<size_t> num_regions = {5, 9};
|
||||
const regionYoloAttributes yoloV3attr = {80, 4, 9, false, 1, 3};
|
||||
|
||||
const auto testCase_yolov3 = ::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_v3),
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_v3)),
|
||||
::testing::Values(yoloV3attr),
|
||||
::testing::Values(masks[2]),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
::testing::Values(additional_config),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
const auto testCase_yolov3_dynamic = ::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_v3_dynamic),
|
||||
::testing::Values(yoloV3attr),
|
||||
::testing::Values(masks[2]),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
@ -139,7 +177,17 @@ const auto testCase_yolov3 = ::testing::Combine(
|
||||
const regionYoloAttributes yoloV3mxnetAttr = {20, 4, 9, false, 1, 3};
|
||||
|
||||
const auto testCase_yolov3_mxnet = ::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_mxnet),
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_mxnet)),
|
||||
::testing::Values(yoloV3mxnetAttr),
|
||||
::testing::Values(masks[1]),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
::testing::Values(additional_config),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
const auto testCase_yolov3_mxnet_dynamic = ::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_mxnet_dynamic),
|
||||
::testing::Values(yoloV3mxnetAttr),
|
||||
::testing::Values(masks[1]),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
@ -151,7 +199,7 @@ const auto testCase_yolov3_mxnet = ::testing::Combine(
|
||||
const regionYoloAttributes yoloV2caffeAttr = {20, 4, 5, true, 1, 3};
|
||||
|
||||
const auto testCase_yolov2_caffe = ::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_caffe),
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_caffe)),
|
||||
::testing::Values(yoloV2caffeAttr),
|
||||
::testing::Values(masks[0]),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
@ -160,8 +208,21 @@ const auto testCase_yolov2_caffe = ::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3CPU, RegionYoloCPULayerTest, testCase_yolov3, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnetCPU, RegionYoloCPULayerTest, testCase_yolov3_mxnet, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffeCPU, RegionYoloCPULayerTest, testCase_yolov2_caffe, RegionYoloCPULayerTest::getTestCaseName);
|
||||
const auto testCase_yolov2_caffe_dynamic = ::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_caffe_dynamic),
|
||||
::testing::Values(yoloV2caffeAttr),
|
||||
::testing::Values(masks[0]),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
::testing::ValuesIn(inpOutPrc),
|
||||
::testing::Values(additional_config),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3CPUStatic, RegionYoloCPULayerTest, testCase_yolov3, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYolov3CPUDynamic, RegionYoloCPULayerTest, testCase_yolov3_dynamic, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnetCPUStatic, RegionYoloCPULayerTest, testCase_yolov3_mxnet, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloMxnetCPUDynamic, RegionYoloCPULayerTest, testCase_yolov3_mxnet_dynamic, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffeCPUStatic, RegionYoloCPULayerTest, testCase_yolov2_caffe, RegionYoloCPULayerTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TestsRegionYoloCaffeCPUDynamic, RegionYoloCPULayerTest, testCase_yolov2_caffe_dynamic, RegionYoloCPULayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
|
@ -2,38 +2,86 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <shared_test_classes/single_layer/roi_pooling.hpp>
|
||||
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "common_test_utils/file_utils.hpp"
|
||||
#include "functional_test_utils/ov_tensor_utils.hpp"
|
||||
#include "common_test_utils/data_utils.hpp"
|
||||
|
||||
#include "ie_common.h"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "utils/bfloat16.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
enum ProposalGenerationMode { RANDOM, ULTIMATE_RIGHT_BORDER };
|
||||
|
||||
using ROIPoolingCPUTestParamsSet = std::tuple<LayerTestsDefinitions::roiPoolingParamsTuple,
|
||||
using roiPoolingShapes = std::vector<InputShape>;
|
||||
|
||||
using roiPoolingParams = std::tuple<
|
||||
roiPoolingShapes, // Input shapes
|
||||
std::vector<size_t>, // Pooled shape {pooled_h, pooled_w}
|
||||
float, // Spatial scale
|
||||
ngraph::helpers::ROIPoolingTypes, // ROIPooling method
|
||||
InferenceEngine::Precision, // Net precision
|
||||
LayerTestsUtils::TargetDevice>; // Device name
|
||||
|
||||
using ROIPoolingCPUTestParamsSet = std::tuple<roiPoolingParams,
|
||||
CPUSpecificParams,
|
||||
ProposalGenerationMode,
|
||||
std::map<std::string, std::string>>;
|
||||
|
||||
class ROIPoolingCPULayerTest : public testing::WithParamInterface<ROIPoolingCPUTestParamsSet>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||
public ov::test::SubgraphBaseTest,
|
||||
public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ROIPoolingCPUTestParamsSet> obj) {
|
||||
LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet;
|
||||
roiPoolingParams basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
ProposalGenerationMode propMode;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = obj.param;
|
||||
|
||||
roiPoolingShapes inputShapes;
|
||||
std::vector<size_t> poolShape;
|
||||
float spatial_scale;
|
||||
ngraph::helpers::ROIPoolingTypes pool_method;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::tie(inputShapes, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
for (const auto& shape : inputShapes) {
|
||||
result << CommonTestUtils::partialShape2str({ shape.first }) << "_";
|
||||
}
|
||||
result << "TS=";
|
||||
for (const auto& shape : inputShapes) {
|
||||
result << "(";
|
||||
if (!shape.second.empty()) {
|
||||
auto itr = shape.second.begin();
|
||||
do {
|
||||
result << CommonTestUtils::vec2str(*itr);
|
||||
} while (++itr != shape.second.end() && result << "_");
|
||||
}
|
||||
result << ")_";
|
||||
}
|
||||
|
||||
result << LayerTestsDefinitions::ROIPoolingLayerTest::getTestCaseName(
|
||||
testing::TestParamInfo<LayerTestsDefinitions::roiPoolingParamsTuple>(basicParamsSet, 0));
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
result << "PS=" << CommonTestUtils::vec2str(poolShape) << "_";
|
||||
result << "Scale=" << spatial_scale << "_";
|
||||
switch (pool_method) {
|
||||
case ngraph::helpers::ROIPoolingTypes::ROI_MAX:
|
||||
result << "Max_";
|
||||
break;
|
||||
case ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR:
|
||||
result << "Bilinear_";
|
||||
break;
|
||||
}
|
||||
result << "trgDev=" << targetDevice;
|
||||
if (!additionalConfig.empty()) {
|
||||
result << "_PluginConf";
|
||||
for (auto &item : additionalConfig) {
|
||||
@ -55,116 +103,132 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
void GenerateInputs() override {
|
||||
auto feat_map_shape = cnnNetwork.getInputShapes().begin()->second;
|
||||
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
|
||||
const ProposalGenerationMode propMode = std::get<2>(this->GetParam());
|
||||
const float spatial_scale = std::get<2>(std::get<0>(this->GetParam()));
|
||||
const ngraph::helpers::ROIPoolingTypes pool_method = std::get<3>(std::get<0>(this->GetParam()));
|
||||
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
|
||||
auto feat_map_shape = targetInputStaticShapes[0];
|
||||
const auto is_roi_max_mode = (pool_method == ngraph::helpers::ROIPoolingTypes::ROI_MAX);
|
||||
|
||||
const int height = is_roi_max_mode ? feat_map_shape[2] / spatial_scale : 1;
|
||||
const int width = is_roi_max_mode ? feat_map_shape[3] / spatial_scale : 1;
|
||||
|
||||
size_t it = 0;
|
||||
for (const auto &input : cnnNetwork.getInputsInfo()) {
|
||||
const auto &info = input.second;
|
||||
InferenceEngine::Blob::Ptr blob;
|
||||
void (*propGenerator)(InferenceEngine::Blob::Ptr &);
|
||||
switch (propMode) {
|
||||
case ULTIMATE_RIGHT_BORDER:
|
||||
for (size_t i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
ov::runtime::Tensor tensor;
|
||||
|
||||
if (i == 1) {
|
||||
tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i]);
|
||||
if (propMode == ULTIMATE_RIGHT_BORDER) {
|
||||
// because of nonalgebraic character of floating point operation, the following values causes inequity:
|
||||
// ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) > (end_h - start_h) * (input_h - 1)
|
||||
// and as result excess of right limit for proposal value if the border case (current_h == pooled_h - 1)
|
||||
// will not be handled explicitly
|
||||
propGenerator = [](InferenceEngine::Blob::Ptr &blob) {
|
||||
auto *data = blob->buffer().as<float *>();
|
||||
for (size_t i = 0; i < blob->size(); i += 5) {
|
||||
data[i] = 0;
|
||||
data[i + 1] = 0.f;
|
||||
data[i + 2] = 0.248046786f;
|
||||
data[i + 3] = 0.471333951f;
|
||||
data[i + 4] = 1.f;
|
||||
switch (funcInput.get_element_type()) {
|
||||
case ngraph::element::f32: {
|
||||
auto* dataPtr = tensor.data<float>();
|
||||
for (size_t i = 0; i < tensor.get_size(); i += 5) {
|
||||
dataPtr[i] = 0;
|
||||
dataPtr[i + 1] = 0.f;
|
||||
dataPtr[i + 2] = 0.248046786f;
|
||||
dataPtr[i + 3] = 0.471333951f;
|
||||
dataPtr[i + 4] = 1.f;
|
||||
}
|
||||
};
|
||||
break;
|
||||
case RANDOM:
|
||||
default:
|
||||
propGenerator = nullptr;
|
||||
}
|
||||
|
||||
if (it == 1) {
|
||||
blob = make_blob_with_precision(info->getTensorDesc());
|
||||
blob->allocate();
|
||||
switch (inPrc) {
|
||||
case Precision::FP32: {
|
||||
CommonTestUtils::fill_data_roi<Precision::FP32>
|
||||
(blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
|
||||
break;
|
||||
}
|
||||
case Precision::BF16: {
|
||||
CommonTestUtils::fill_data_roi<Precision::BF16>
|
||||
(blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
IE_THROW() << "roi_pooling. Unsupported precision";
|
||||
break;
|
||||
break;
|
||||
}
|
||||
case ngraph::element::bf16: {
|
||||
auto* dataPtr = tensor.data<std::int16_t>();
|
||||
for (size_t i = 0; i < tensor.get_size(); i += 5) {
|
||||
dataPtr[i] = static_cast<std::int16_t>(ngraph::float16(0.f).to_bits());
|
||||
dataPtr[i + 1] = static_cast<std::int16_t>(ngraph::float16(0.f).to_bits());
|
||||
dataPtr[i + 2] = static_cast<std::int16_t>(ngraph::float16(0.248046786f).to_bits());
|
||||
dataPtr[i + 3] = static_cast<std::int16_t>(ngraph::float16(0.471333951f).to_bits());
|
||||
dataPtr[i + 4] = static_cast<std::int16_t>(ngraph::float16(1.f).to_bits());
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
IE_THROW() << "roi_pooling. Unsupported precision";
|
||||
}
|
||||
} else {
|
||||
switch (funcInput.get_element_type()) {
|
||||
case ngraph::element::f32: {
|
||||
CommonTestUtils::fill_data_roi<InferenceEngine::Precision::FP32>(tensor, feat_map_shape[0] - 1, height, width, 1.f, is_roi_max_mode);
|
||||
break;
|
||||
}
|
||||
case ngraph::element::bf16: {
|
||||
CommonTestUtils::fill_data_roi<InferenceEngine::Precision::BF16>(tensor, feat_map_shape[0] - 1, height, width, 1.f, is_roi_max_mode);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
IE_THROW() << "roi_pooling. Unsupported precision";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
blob = GenerateInput(*info);
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 10, 0, 1000);
|
||||
}
|
||||
inputs.push_back(blob);
|
||||
it++;
|
||||
|
||||
inputs.insert({ funcInput.get_node_shared_ptr(), tensor });
|
||||
}
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet;
|
||||
roiPoolingParams basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
ProposalGenerationMode propMode;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
InferenceEngine::SizeVector coordsShape;
|
||||
InferenceEngine::SizeVector poolShape;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
|
||||
std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = this->GetParam();
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
std::tie(inputShape, coordsShape, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
|
||||
roiPoolingShapes inputShapes;
|
||||
std::vector<size_t> poolShape;
|
||||
float spatial_scale;
|
||||
ngraph::helpers::ROIPoolingTypes pool_method;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::tie(inputShapes, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
|
||||
|
||||
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES)
|
||||
inPrc = outPrc = netPrecision = Precision::BF16;
|
||||
else
|
||||
inPrc = outPrc = netPrecision;
|
||||
netPrecision = Precision::BF16;
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
if (selectedType.empty()) {
|
||||
selectedType = getPrimitiveType();
|
||||
}
|
||||
selectedType.push_back('_');
|
||||
selectedType += netPrecision.name();
|
||||
|
||||
if (netPrecision == Precision::BF16) {
|
||||
rel_threshold = 1e-2;
|
||||
}
|
||||
|
||||
init_input_shapes(inputShapes);
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape, coordsShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto params = ngraph::builder::makeDynamicParams(ngPrc, inputDynamicShapes);
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
|
||||
std::shared_ptr<ngraph::Node> roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method);
|
||||
auto roi_pooling = ngraph::builder::makeROIPooling(paramOuts[0], paramOuts[1], poolShape, spatial_scale, pool_method);
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(roi_pooling)};
|
||||
|
||||
function = makeNgraphFunction(ngPrc, params, roi_pooling, "roi_pooling");
|
||||
selectedType += "_";
|
||||
selectedType += netPrecision.name();
|
||||
function = makeNgraphFunction(ngPrc, params, roi_pooling, "ROIPooling");
|
||||
functionRefs = ngraph::clone_function(*function);
|
||||
}
|
||||
|
||||
private:
|
||||
ngraph::helpers::ROIPoolingTypes pool_method;
|
||||
float spatial_scale;
|
||||
ProposalGenerationMode propMode;
|
||||
};
|
||||
|
||||
TEST_P(ROIPoolingCPULayerTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Run();
|
||||
run();
|
||||
CheckPluginRelatedResults(executableNetwork, "ROIPooling");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
|
||||
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig{
|
||||
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
|
||||
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}
|
||||
};
|
||||
|
||||
/* have to select particular implementation type, since currently
|
||||
* nodes always choose the best one */
|
||||
@ -183,29 +247,92 @@ std::vector<CPUSpecificParams> selectCPUInfoForDevice() {
|
||||
return resCPUParams;
|
||||
}
|
||||
|
||||
const std::vector<std::vector<size_t>> inShapes = {{1, 3, 8, 8},
|
||||
{3, 4, 50, 50}};
|
||||
const std::vector<roiPoolingShapes> inShapes = {
|
||||
roiPoolingShapes{{{}, {{1, 3, 8, 8}}}, {{}, {{1, 5}}}},
|
||||
roiPoolingShapes{{{}, {{1, 3, 8, 8}}}, {{}, {{3, 5}}}},
|
||||
roiPoolingShapes{{{}, {{3, 4, 50, 50}}}, {{}, {{3, 5}}}},
|
||||
roiPoolingShapes{{{}, {{3, 4, 50, 50}}}, {{}, {{5, 5}}}},
|
||||
roiPoolingShapes{
|
||||
// input 0
|
||||
{
|
||||
// dynamic
|
||||
{-1, -1, -1, -1},
|
||||
// static
|
||||
{
|
||||
{3, 4, 50, 50}, {3, 4, 50, 50}, {3, 4, 50, 50}, {1, 3, 8, 8}, {1, 3, 8, 8}, {1, 3, 8, 8}
|
||||
}
|
||||
},
|
||||
// input 1
|
||||
{
|
||||
// dynamic
|
||||
{-1, 5},
|
||||
// static
|
||||
{
|
||||
{1, 5}, {3, 5}, {5, 5}, {1, 5}, {3, 5}, {5, 5}
|
||||
}
|
||||
},
|
||||
},
|
||||
roiPoolingShapes{
|
||||
// input 0
|
||||
{
|
||||
// dynamic
|
||||
{-1, {3, 5}, {7, 60}, -1},
|
||||
// static
|
||||
{
|
||||
{3, 4, 50, 50}, {1, 3, 7, 8}, {1, 5, 59, 8}, {3, 5, 60, 8},
|
||||
}
|
||||
},
|
||||
// input 1
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, 5},
|
||||
// static
|
||||
{
|
||||
{1, 5}, {3, 5}, {4, 5}, {5, 5}
|
||||
}
|
||||
},
|
||||
},
|
||||
roiPoolingShapes{
|
||||
// input 0
|
||||
{
|
||||
// dynamic
|
||||
{{1, 8}, {3, 5}, {7, 60}, {5, 50}},
|
||||
// static
|
||||
{
|
||||
{3, 4, 50, 50}, {1, 3, 7, 8}, {8, 5, 59, 5}, {3, 5, 60, 8},
|
||||
}
|
||||
},
|
||||
// input 1
|
||||
{
|
||||
// dynamic
|
||||
{{1, 5}, 5},
|
||||
// static
|
||||
{
|
||||
{1, 5}, {2, 5}, {4, 5}, {5, 5}
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> pooledShapes_max = {{1, 1},
|
||||
{2, 2},
|
||||
{3, 3},
|
||||
{6, 6}};
|
||||
const std::vector<std::vector<size_t>> pooledShapes_max = {
|
||||
{1, 1},
|
||||
{2, 2},
|
||||
{3, 3},
|
||||
{6, 6}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> pooledShapes_bilinear = {{1, 1},
|
||||
{2, 2},
|
||||
{3, 3},
|
||||
{6, 6}};
|
||||
|
||||
const std::vector<std::vector<size_t>> coordShapes = {{1, 5},
|
||||
{3, 5},
|
||||
{5, 5}};
|
||||
const std::vector<std::vector<size_t>> pooledShapes_bilinear = {
|
||||
{1, 1},
|
||||
{2, 2},
|
||||
{3, 3},
|
||||
{6, 6}
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPRCs = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16};
|
||||
|
||||
const std::vector<float> spatial_scales = {0.625f, 1.f};
|
||||
|
||||
const auto test_ROIPooling_max = ::testing::Combine(::testing::ValuesIn(inShapes),
|
||||
::testing::ValuesIn(coordShapes),
|
||||
::testing::ValuesIn(pooledShapes_max),
|
||||
::testing::ValuesIn(spatial_scales),
|
||||
::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_MAX),
|
||||
@ -213,7 +340,6 @@ const auto test_ROIPooling_max = ::testing::Combine(::testing::ValuesIn(inShapes
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU));
|
||||
|
||||
const auto test_ROIPooling_bilinear = ::testing::Combine(::testing::ValuesIn(inShapes),
|
||||
::testing::ValuesIn(coordShapes),
|
||||
::testing::ValuesIn(pooledShapes_bilinear),
|
||||
::testing::Values(spatial_scales[1]),
|
||||
::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR),
|
||||
@ -238,8 +364,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ROIPoolingCPU_bilinear,
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ROIPoolingCPU_bilinear_ultimateRightBorderProposal,
|
||||
ROIPoolingCPULayerTest,
|
||||
::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t> { 1, 1, 50, 50 }),
|
||||
::testing::Values(std::vector<size_t> { 1, 5 }),
|
||||
::testing::Combine(::testing::Combine(::testing::Values(roiPoolingShapes{{{}, {{1, 1, 50, 50}}}, {{}, {{1, 5}}}}),
|
||||
::testing::Values(std::vector<size_t> { 4, 4 }),
|
||||
::testing::Values(spatial_scales[1]),
|
||||
::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR),
|
||||
|
@ -0,0 +1,554 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
struct Slice8SpecificParams {
|
||||
std::vector<int64_t> start;
|
||||
std::vector<int64_t> stop;
|
||||
std::vector<int64_t> step;
|
||||
std::vector<int64_t> axes;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
std::vector<InputShape>, // Parameters shapes
|
||||
Slice8SpecificParams, // Slice-8 specific parameters
|
||||
ElementType, // Network precision
|
||||
CPUSpecificParams // CPU specific parameters
|
||||
> Slice8LayerTestCPUParam;
|
||||
|
||||
class Slice8LayerCPUTest : public testing::WithParamInterface<Slice8LayerTestCPUParam>,
|
||||
virtual public SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<Slice8LayerTestCPUParam> obj) {
|
||||
std::vector<InputShape> shapes;
|
||||
Slice8SpecificParams
|
||||
params;
|
||||
ElementType netPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(shapes, params, netPrecision, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=(";
|
||||
for (const auto& shape : shapes) {
|
||||
result << CommonTestUtils::partialShape2str({shape.first}) << "_";
|
||||
}
|
||||
result << ")_TS=(";
|
||||
for (const auto& shape : shapes) {
|
||||
for (const auto& item : shape.second) {
|
||||
result << CommonTestUtils::vec2str(item) << "_";
|
||||
}
|
||||
}
|
||||
result << "start=" << CommonTestUtils::vec2str(params.start) << "_";
|
||||
result << "stop=" << CommonTestUtils::vec2str(params.stop) << "_";
|
||||
result << "step=" << CommonTestUtils::vec2str(params.step) << "_";
|
||||
result << "axes=" << CommonTestUtils::vec2str(params.axes) << "_";
|
||||
result << "netPRC=" << netPrecision << "_";
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
std::vector<InputShape> shapes;
|
||||
Slice8SpecificParams sliceParams;
|
||||
ElementType netPrecision;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(shapes, sliceParams, netPrecision, cpuParams) = this->GetParam();
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
|
||||
selectedType = makeSelectedTypeStr(selectedType, netPrecision);
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
init_input_shapes(shapes);
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes);
|
||||
auto sliceOp = ngraph::builder::makeSlice(params[0], sliceParams.start, sliceParams.stop, sliceParams.step, sliceParams.axes, netPrecision);
|
||||
|
||||
function = makeNgraphFunction(netPrecision, params, sliceOp, "Slice8");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Slice8LayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
CheckPluginRelatedResults(executableNetwork, "Slice8");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const auto cpuParams_nChw16c = CPUSpecificParams {{nChw16c}, {nChw16c}, {}, {}};
|
||||
const auto cpuParams_nCdhw16c = CPUSpecificParams {{nCdhw16c}, {nCdhw16c}, {}, {}};
|
||||
|
||||
const auto cpuParams_nChw8c = CPUSpecificParams {{nChw8c}, {nChw8c}, {}, {}};
|
||||
const auto cpuParams_nCdhw8c = CPUSpecificParams {{nCdhw8c}, {nCdhw8c}, {}, {}};
|
||||
|
||||
const auto cpuParams_nhwc = CPUSpecificParams {{nhwc}, {nhwc}, {}, {}};
|
||||
const auto cpuParams_ndhwc = CPUSpecificParams {{ndhwc}, {ndhwc}, {}, {}};
|
||||
|
||||
const auto cpuParams_nchw = CPUSpecificParams {{nchw}, {nchw}, {}, {}};
|
||||
const auto cpuParams_ncdhw = CPUSpecificParams {{ncdhw}, {ncdhw}, {}, {}};
|
||||
|
||||
const std::vector<ElementType> inputPrecisions = {
|
||||
ElementType::f32,
|
||||
ElementType::bf16,
|
||||
ElementType::i8
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesDynamic2D = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{32, 20}, {16, 16}, {24, 16}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 16},
|
||||
{ // Dynamic shapes instances
|
||||
{16, 16}, {20, 16}, {32, 16}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{ {16, 32}, {16, 32} },
|
||||
{ // Dynamic shapes instances
|
||||
{16, 32}, {32, 16}, {24, 24}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<Slice8SpecificParams> paramsPlain2D = {
|
||||
Slice8SpecificParams{ { 0, 10 }, { 16, 16 }, { 1, 1 }, { 0, 1 } },
|
||||
Slice8SpecificParams{ { 2, 5 }, { 16, 8 }, { 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 2, 5 }, { 16, 16 }, { 1, 2 }, { 0, 1 } },
|
||||
Slice8SpecificParams{ { 0, 0 }, { 16, 16 }, { 1, 2 }, { 1, 0} },
|
||||
Slice8SpecificParams{ { 0 }, { 16 }, { 2 }, { 0 } },
|
||||
Slice8SpecificParams{ { 0 }, { 16 }, { 1 }, { 1 } }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Static_2D, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(static_shapes_to_test_representation({{32, 20}})),
|
||||
::testing::ValuesIn(paramsPlain2D),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(emptyCPUSpec)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Plain_Dynamic_2D, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesDynamic2D),
|
||||
::testing::ValuesIn(paramsPlain2D),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::Values(emptyCPUSpec)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesCommon4D = {
|
||||
Slice8SpecificParams{ { 0, 2, 5, 4 }, { 1, 4, 28, 27 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 1, 0, 0 }, { 20, 3, 32, 1 }, { 1, 1, 1, 1 }, { 3, 1, 2, 0 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0 }, { 1, 3, 20, 20 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 20, 20 }, { 1, 5, 26, 25 }, { 1, 1, 2, 1 }, { 0, 1, 3, 2 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 20 }, { 1, 2, 30, 30 }, { 1, 1, 2, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 2, 10 }, { 1, 3, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 1, 0, 10 }, { 1, 5, 32, 30 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 1, 2, 10 }, { 1, 5, 32, 18 }, { 1, 1, 1, 2 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 0, 2, 10 }, { 1, 8, 32, 18 }, { 1, 2, 1, 2 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 10 }, { 2, 32, 18 }, { 1, 1, 1 }, { 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 10 }, { 2, 32 }, { 1, 1 }, { 1, 3 } }
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapesStatic4D = {
|
||||
{{ 1, 5, 32, 32 }}, {{ 2, 5, 32, 48 }}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesDynamic4D = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, -1, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 1, 5, 64, 64 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 5, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 5, 32, 32 }, { 2, 5, 32, 32 }, { 3, 5, 32, 36 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{{1, 5}, 5, {32, 64}, {32, 64}},
|
||||
{ // Dynamic shapes instances
|
||||
{ 2, 5, 32, 32 }, { 1, 5, 48, 32 }, { 5, 5, 32, 32 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParamsCommon4D = {
|
||||
cpuParams_nchw,
|
||||
cpuParams_nhwc,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic4D)),
|
||||
::testing::ValuesIn(testCasesCommon4D),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsCommon4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesDynamic4D),
|
||||
::testing::ValuesIn(testCasesCommon4D),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsCommon4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesBlocked4DSubset1 = {
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0 }, { 1, 32, 32, 32 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 16, 0 }, { 1, 32, 32, 32 }, { 1, 1, 1, 1 }, { 0, 3, 2, 1 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0 }, { 32, 32, 16 }, { 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 0 }, { 16, 32, 32 }, { 1, 1, 1 }, { 1, 3, 2 } },
|
||||
};
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesBlocked4DSubset2 = {
|
||||
Slice8SpecificParams{ { 0, 0, 5, 4 }, { 1, 16, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 16, 0, 0 }, { 1, 32, 10, 10 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0 }, { 16, 1, 20, 10 }, { 1, 1, 1, 1 }, { 1, 0, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 0, 20, 20 }, { 1, 32, 25, 25 }, { 1, 1, 1, 1 }, { 0, 1, 3, 2 } },
|
||||
Slice8SpecificParams{ { 0, 16, 0, 20 }, { 32, 32, 1, 30 }, { 1, 1, 1, 2 }, { 2, 1, 0, 3 } },
|
||||
Slice8SpecificParams{ { 0, 16, 2, 10 }, { 1, 32, 32, 20 }, { 1, 1, 2, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 16, 0, 0 }, { 2, 64, 32, 20 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 32, 0, 0 }, { 2, 50, 32, 20 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0 }, { 32, 12, 2, 20 }, { 1, 1, 1, 1 }, { 0, 3, 2, 1 } },
|
||||
Slice8SpecificParams{ { 0, -16, 0, 10 }, { 2, 100, 32, 20 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, -16, 0, 0 }, { 2, -4, 32, 20 }, { 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, -32, 0, 0 }, { 2, -12, 32, 20 }, { 1, 1, 1, 1 }, { } }
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic4DSubset1 = {
|
||||
{{ 1, 32, 32, 32 }}, {{ 1, 32, 32, 64 }}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic4DSubset2 = {
|
||||
{{ 1, 64, 32, 32 }}, {{ 1, 64, 32, 64 }}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic4DSubset1 = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 32, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 32, 32, 32 }, { 2, 32, 32, 32 }, { 3, 32, 32, 48 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{{1, 5}, 32, {32, 64}, {32, 64}},
|
||||
{ // Dynamic shapes instances
|
||||
{ 2, 32, 32, 32 }, { 1, 32, 48, 32 }, { 5, 32, 32, 48 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic4DSubset2 = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 64, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 64, 64, 32 }, { 2, 64, 32, 32 }, { 3, 64, 32, 48 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{{1, 5}, 64, {32, 64}, {32, 64}},
|
||||
{ // Dynamic shapes instances
|
||||
{ 2, 64, 32, 32 }, { 1, 64, 48, 32 }, { 1, 64, 64, 64 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParamsBlocked4D = {
|
||||
cpuParams_nChw16c,
|
||||
cpuParams_nChw8c,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D_Subset1, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic4DSubset1)),
|
||||
::testing::ValuesIn(testCasesBlocked4DSubset1),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D_Subset1, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesBlockedDynamic4DSubset1),
|
||||
::testing::ValuesIn(testCasesBlocked4DSubset1),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_4D_Subset2, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic4DSubset2)),
|
||||
::testing::ValuesIn(testCasesBlocked4DSubset2),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_4D_Subset2, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesBlockedDynamic4DSubset2),
|
||||
::testing::ValuesIn(testCasesBlocked4DSubset2),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesCommon5D = {
|
||||
Slice8SpecificParams{ { 0, 2, 0, 5, 4 }, { 1, 4, 5, 28, 27 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0, 0 }, { 1, 5, 20, 32, 20 }, { 1, 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 1, 10, 0, 0 }, { 20, 3, 20, 32, 1 }, { 1, 1, 1, 1, 1 }, { 4, 1, 2, 3, 0 } },
|
||||
Slice8SpecificParams{ { 0, 20, 0, 0, 20 }, { 1, 30, 20, 5, 26 }, { 1, 1, 1, 2, 2 }, { 0, 3, 2, 1, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0, 20 }, { 1, 2, 20, 30, 30 }, { 1, 1, 2, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 2, 10, 0 }, { 1, 5, 10, 32, 20 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 1, 0, 10, 0 }, { 1, 5, 20, 32, 32 }, { 1, 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 1, 5, 10, 16, 16 }, { 1, 1, 2, 1, 1 }, { 0, 1, 2, 3, 4 } }
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapesStatic5D = {
|
||||
{{ 1, 5, 20, 32, 32 }}, {{ 2, 5, 32, 32, 32 }}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesDynamic5D = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, -1, -1, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 5, 32, 32, 32 }, { 1, 5, 32, 32, 48 }, { 1, 5, 64, 64, 64 }, { 1, 10, 32, 32, 32 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 5, -1, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 5, 32, 32, 48 }, { 1, 5, 32, 48, 32 }, { 1, 5, 48, 32, 32 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{{1, 5}, 5, {32, 64}, {32, 64}, {32, 64}},
|
||||
{ // Dynamic shapes instances
|
||||
{ 2, 5, 32, 32, 32 }, { 1, 5, 48, 32, 32 }, { 5, 5, 32, 32, 48 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParamsCommon5D = {
|
||||
cpuParams_ncdhw,
|
||||
cpuParams_ndhwc,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesStatic5D)),
|
||||
::testing::ValuesIn(testCasesCommon5D),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsCommon5D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesDynamic5D),
|
||||
::testing::ValuesIn(testCasesCommon5D),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsCommon5D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesBlocked5DSubset1 = {
|
||||
Slice8SpecificParams{ { 0, 0, 0, 5, 4 }, { 1, 16, 5, 28, 27 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0, 0 }, { 1, 16, 20, 32, 20 }, { 1, 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 20, 20 }, { 16, 1, 20, 26, 30 }, { 1, 1, 1, 2, 2 }, { 1, 0, 2, 4, 3 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0, 20 }, { 1, 16, 20, 30, 30 }, { 1, 1, 2, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 2, 10, 0 }, { 1, 16, 10, 32, 20 }, { 1, 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 10, 0 }, { 1, 8, 20, 32, 32 }, { 1, 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 1, 16, 10, 16, 16 }, { 1, 1, 2, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
};
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesBlocked5DSubset2 = {
|
||||
Slice8SpecificParams{ { 0, 0, 0, 5, 4 }, { 1, 16, 5, 28, 27 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 5, 4 }, { 16, 5, 28, 27 }, { 1, 1, 1, 1 }, { 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0, 0 }, { 1, 16, 20, 32, 20 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 20, 20 }, { 1, 20, 16, 30, 26 }, { 1, 1, 1, 2, 2 }, { 0, 2, 1, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 10, 0, 20 }, { 1, 16, 20, 30, 30 }, { 1, 1, 2, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 2, 10, 0 }, { 1, 16, 10, 32, 20 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 10, 0 }, { 1, 8, 20, 32, 32 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 10, 16, 1, 16, 16 }, { 2, 1, 1, 1, 1 }, { 2, 1, 0, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0, 0 }, { 1, 25, 20, 10, 10 }, { 1, 1, 1, 1, 1 }, { } },
|
||||
Slice8SpecificParams{ { 0, 16, 0, 0, 0 }, { 1, 25, 20, 10, 10 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
Slice8SpecificParams{ { 0, 16, 0, 0, 0 }, { 1, 64, 20, 10, 10 }, { 1, 1, 1, 1, 1 }, { 0, 1, 2, 3, 4 } },
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic5DSubset1 = {
|
||||
{{ 1, 16, 32, 32, 32 }}, {{ 2, 16, 32, 32, 32 }}, {{ 2, 32, 32, 32, 32 }}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<ov::Shape>> inputShapesBlockedStatic5DSubset2 = {
|
||||
{{ 1, 64, 32, 32, 32 }}, {{ 2, 64, 32, 64, 32 }}, {{ 2, 64, 32, 32, 32 }}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic5DSubset1 = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 16, -1, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 16, 32, 32, 32 }, { 2, 16, 32, 32, 32 }, { 2, 16, 32, 32, 32 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{{1, 5}, 16, {16, 32}, {16, 32}, {16, 32}},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 16, 32, 32, 32 }, { 2, 16, 32, 32, 32 }, { 1, 16, 20, 32, 32 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesBlockedDynamic5DSubset2 = {
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{-1, 64, -1, -1, -1},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 64, 64, 32, 32 }, { 2, 64, 32, 32, 32 }, { 3, 64, 32, 48, 32 }
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shape
|
||||
{{1, 5}, 64, {16, 32}, {16, 32}, {16, 32}},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 64, 32, 32, 32 }, { 2, 64, 32, 32, 32 }, { 1, 64, 20, 32, 32 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<CPUSpecificParams> CPUParamsBlocked5D = {
|
||||
cpuParams_nCdhw16c,
|
||||
cpuParams_nCdhw8c,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_Subset1, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic5DSubset1)),
|
||||
::testing::ValuesIn(testCasesBlocked5DSubset1),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked5D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D_Subset1, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesBlockedDynamic5DSubset1),
|
||||
::testing::ValuesIn(testCasesBlocked5DSubset1),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked5D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Static_5D_Subset2, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesBlockedStatic4DSubset2)),
|
||||
::testing::ValuesIn(testCasesBlocked4DSubset2),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked4D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_Common_Dynamic_5D_Subset2, Slice8LayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesBlockedDynamic5DSubset2),
|
||||
::testing::ValuesIn(testCasesBlocked5DSubset2),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(CPUParamsBlocked5D)),
|
||||
Slice8LayerCPUTest::getTestCaseName);
|
||||
|
||||
/* Descriptors check */
|
||||
|
||||
class Slice8LayerDescriptorCPUTest : public Slice8LayerCPUTest {};
|
||||
|
||||
TEST_P(Slice8LayerDescriptorCPUTest, DescriptorsCheck) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
ASSERT_THROW(compile_model(), ov::Exception);
|
||||
}
|
||||
|
||||
const std::vector<Slice8SpecificParams> testCasesDescriptors = {
|
||||
Slice8SpecificParams{ { 0, -4, 0, 0 }, { 0, 2147483647, 0, 0 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 5, 0, 0 }, { 1, 20, 28, 27 }, { 1, 1, 1, 1 }, { 0, 1, 2, 3 } },
|
||||
Slice8SpecificParams{ { 0, 0, 0, 0 }, { 1, 2147483647, 32, 32 }, { 1, 2, 1, 1 }, { 0, 1, 2, 3 } }
|
||||
};
|
||||
|
||||
const std::vector<std::vector<InputShape>> inputShapesDescriptors = {
|
||||
{
|
||||
{ {},
|
||||
{ // Static shapes
|
||||
{ 1, 16, 32, 32 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ {},
|
||||
{ // Static shapes
|
||||
{ 1, 17, 32, 32 }
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
{ // Origin dynamic shapes
|
||||
{1, -1, 32, 32},
|
||||
{ // Dynamic shapes instances
|
||||
{ 1, 16, 32, 32 }, { 1, 32, 32, 32 }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Slice8LayerDescriptorCPUTest, Slice8LayerDescriptorCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesDescriptors),
|
||||
::testing::ValuesIn(testCasesDescriptors),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(cpuParams_nChw8c)),
|
||||
Slice8LayerDescriptorCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -219,4 +219,4 @@
|
||||
//INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Unsupported_CPU, SoftMaxLayerCPUTest, UnsupportedParams, SoftMaxLayerCPUTest::getTestCaseName);
|
||||
//
|
||||
//} // namespace
|
||||
//} // namespace CPULayerTestsDefinitions
|
||||
//} // namespace CPULayerTestsDefinitions
|
||||
|
@ -7,62 +7,86 @@
|
||||
#include "cpu_test_utils.hpp"
|
||||
|
||||
namespace CPUTestUtils {
|
||||
const auto conv_sse42_1D = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_avx2_1D = CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx512_1D = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
|
||||
|
||||
const auto conv_ref_1D = CPUSpecificParams{{ncw}, {ncw}, {"ref_any"}, "ref_any"};
|
||||
const auto conv_ref_2D = CPUSpecificParams{{nchw}, {nchw}, {"ref_any"}, "ref_any"};
|
||||
const auto conv_ref_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref_any"}, "ref_any"};
|
||||
|
||||
const auto conv_gemm_1D = CPUSpecificParams{{ncw}, {ncw}, {"gemm_any"}, "jit_gemm"};
|
||||
const auto conv_gemm_2D = CPUSpecificParams{{nchw}, {nchw}, {"gemm_any"}, "jit_gemm"};
|
||||
const auto conv_gemm_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"gemm_any"}, "jit_gemm"};
|
||||
|
||||
const auto conv_gemm_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_gemm"}, "jit_gemm"};
|
||||
const auto conv_gemm_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_gemm"}, "jit_gemm"};
|
||||
const auto conv_gemm_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_gemm"}, "jit_gemm"};
|
||||
|
||||
const auto conv_sse42_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_dw_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
|
||||
const auto conv_sse42_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
|
||||
const auto conv_sse42_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
|
||||
|
||||
const auto conv_sse42_plain_to_blocked_1D = CPUSpecificParams{{ncw}, {nCw8c}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
|
||||
|
||||
const auto conv_sse42_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"};
|
||||
const auto conv_sse42_dw_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
|
||||
const auto conv_sse42_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
|
||||
const auto conv_sse42_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
|
||||
|
||||
const auto conv_avx2_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_dw_1D = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
|
||||
const auto conv_avx2_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
|
||||
const auto conv_avx2_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
|
||||
const auto conv_avx2_planar_1D = CPUSpecificParams{{ncw}, {ncw}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_planar_2D = CPUSpecificParams{{nchw}, {nchw}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_planar_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"jit_avx2"}, "jit_avx2"};
|
||||
|
||||
const auto conv_avx2_plain_to_blocked_1D = CPUSpecificParams{{ncw}, {nCw8c}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
|
||||
|
||||
const auto conv_avx2_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"};
|
||||
const auto conv_avx2_dw_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
|
||||
const auto conv_avx2_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
|
||||
const auto conv_avx2_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
|
||||
|
||||
const auto conv_avx512_1D = CPUSpecificParams{{nCw16c}, {nCw16c}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_dw_1D = CPUSpecificParams{{nCw16c}, {nCw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
|
||||
const auto conv_avx512_dw_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
|
||||
const auto conv_avx512_dw_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
|
||||
const auto conv_avx512_planar_1D = CPUSpecificParams{{ncw}, {ncw}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_planar_2D = CPUSpecificParams{{nchw}, {nchw}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_planar_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"jit_avx512"}, "jit_avx512"};
|
||||
|
||||
const auto conv_avx512_plain_to_blocked_1D = CPUSpecificParams{{ncw}, {nCw16c}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
|
||||
|
||||
const auto conv_avx512_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"};
|
||||
const auto conv_avx512_dw_1D_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
|
||||
const auto conv_avx512_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
|
||||
const auto conv_avx512_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
|
||||
|
||||
const auto conv_sse42_1D_1x1 = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
|
||||
const auto conv_avx2_1D_1x1 = CPUSpecificParams{{nCw8c}, {nCw8c}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
|
||||
const auto conv_avx512_1D_1x1 = CPUSpecificParams{{nCw16c}, {nCw16c}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
|
||||
|
||||
const auto conv_sse42_1D_1x1_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
|
||||
const auto conv_avx2_1D_1x1_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
|
||||
const auto conv_avx512_1D_1x1_nspc = CPUSpecificParams{{nwc}, {nwc}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
|
||||
|
||||
const auto conv_sse42_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
|
||||
const auto conv_avx2_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
|
||||
const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user