Merge branch 'master' into topk
This commit is contained in:
commit
a7c8365446
@ -60,7 +60,7 @@ function(build_ngraph)
|
||||
ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
|
||||
endif()
|
||||
|
||||
if(NOT (ANDROID OR WINDOWS_STORE))
|
||||
if(NOT (ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)) ))
|
||||
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
|
||||
else()
|
||||
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)
|
||||
|
@ -26,7 +26,7 @@
|
||||
- [Build Steps](#build-steps-3)
|
||||
- [Use Custom OpenCV Builds for Inference Engine](#use-custom-opencv-builds-for-inference-engine)
|
||||
- [Add Inference Engine to Your Project](#add-inference-engine-to-your-project)
|
||||
- [(Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2)
|
||||
- [(Optional) Additional Installation Steps for the Intel® Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2)
|
||||
- [For Linux, Raspbian Stretch* OS](#for-linux-raspbian-stretch-os)
|
||||
- [Next Steps](#next-steps)
|
||||
- [Additional Resources](#additional-resources)
|
||||
@ -43,7 +43,7 @@ The open source version of Inference Engine includes the following plugins:
|
||||
| CPU plugin | Intel® Xeon® with Intel® AVX2 and AVX512, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® SSE |
|
||||
| GPU plugin | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics |
|
||||
| GNA plugin | Intel® Speech Enabling Developer Kit, Amazon Alexa\* Premium Far-Field Developer Kit, Intel® Pentium® Silver processor J5005, Intel® Celeron® processor J4005, Intel® Core™ i3-8121U processor |
|
||||
| MYRIAD plugin | Intel® Movidius™ Neural Compute Stick powered by the Intel® Movidius™ Myriad™ 2, Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
|
||||
| MYRIAD plugin | Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
|
||||
| Heterogeneous plugin | Heterogeneous plugin enables computing for inference on one network on several Intel® devices. |
|
||||
|
||||
## Build on Linux\* Systems
|
||||
@ -58,7 +58,7 @@ The software was validated on:
|
||||
- GCC\* 4.8 or higher to build the Inference Engine
|
||||
- Python 3.6 or higher for Inference Engine Python API wrapper
|
||||
- (Optional) [Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 19.41.14441].
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
1. Clone submodules:
|
||||
@ -129,7 +129,7 @@ You can use the following additional build options:
|
||||
1. Install all additional packages listed in the
|
||||
`/inference-engine/ie_bridges/python/requirements.txt` file:
|
||||
```sh
|
||||
pip install -r requirements.txt
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
2. Use the `-DENABLE_PYTHON=ON` option. To specify an exact Python version, use the following
|
||||
options:
|
||||
@ -336,7 +336,7 @@ The software was validated on:
|
||||
- Microsoft\* Visual Studio 2017, 2019
|
||||
- (Optional) Intel® Graphics Driver for Windows* (26.20) [driver package].
|
||||
- Python 3.6 or higher for Inference Engine Python API wrapper
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
|
||||
@ -349,7 +349,7 @@ The software was validated on:
|
||||
the Intel® Graphics Driver for Windows (26.20) [driver package] before
|
||||
running the build. If you don't want to use the GPU plugin, use the
|
||||
`-DENABLE_CLDNN=OFF` CMake build option and skip the installation of the
|
||||
Intel® Graphics Driver.
|
||||
Intel® Graphics Driver.
|
||||
3. Create build directory:
|
||||
```sh
|
||||
mkdir build
|
||||
@ -446,7 +446,7 @@ The software was validated on:
|
||||
- [CMake]\* 3.13 or higher
|
||||
- Clang\* compiler from Xcode\* 10.1 or higher
|
||||
- Python\* 3.6 or higher for the Inference Engine Python API wrapper
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
|
||||
@ -499,9 +499,9 @@ You can use the following additional build options:
|
||||
```sh
|
||||
-DPYTHON_EXECUTABLE=/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/bin/python3.7m \
|
||||
-DPYTHON_LIBRARY=/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib \
|
||||
-DPYTHON_INCLUDE_DIR=/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/include/python3.7m
|
||||
-DPYTHON_INCLUDE_DIR=/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/include/python3.7m
|
||||
```
|
||||
- If you installed Python another way, you can use the following commands to find where the `dylib` and `include_dir` are located, respectively:
|
||||
- If you installed Python another way, you can use the following commands to find where the `dylib` and `include_dir` are located, respectively:
|
||||
```sh
|
||||
find /usr/ -name 'libpython*m.dylib'
|
||||
find /usr/ -type d -name python3.7m
|
||||
@ -518,7 +518,7 @@ This section describes how to build Inference Engine for Android x86 (64-bit) op
|
||||
|
||||
- [CMake]\* 3.13 or higher
|
||||
- Android NDK (this guide has been validated with r20 release)
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
> **NOTE**: Building samples and demos from the Intel® Distribution of OpenVINO™ toolkit package requires CMake\* 3.10 or higher.
|
||||
|
||||
### Build Steps
|
||||
|
||||
@ -608,11 +608,11 @@ include_directories(${InferenceEngine_INCLUDE_DIRS})
|
||||
target_link_libraries(${PROJECT_NAME} ${InferenceEngine_LIBRARIES} dl)
|
||||
```
|
||||
|
||||
## (Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2
|
||||
## (Optional) Additional Installation Steps for the Intel® Neural Compute Stick 2
|
||||
|
||||
> **NOTE**: These steps are only required if you want to perform inference on
|
||||
Intel® Movidius™ Neural Compute Stick or the Intel® Neural Compute Stick 2 using
|
||||
the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get Started].
|
||||
> **NOTE**: These steps are only required if you want to perform inference on the
|
||||
Intel® Neural Compute Stick 2 using the Inference Engine MYRIAD Plugin. See also
|
||||
[Intel® Neural Compute Stick 2 Get Started].
|
||||
|
||||
### For Linux, Raspbian\* Stretch OS
|
||||
|
||||
@ -622,11 +622,10 @@ the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get
|
||||
sudo usermod -a -G users "$(whoami)"
|
||||
```
|
||||
|
||||
2. To perform inference on Intel® Movidius™ Neural Compute Stick and Intel®
|
||||
Neural Compute Stick 2, install the USB rules as follows:
|
||||
2. To perform inference on Intel® Neural Compute Stick 2, install the USB rules
|
||||
as follows:
|
||||
```sh
|
||||
cat <<EOF > 97-myriad-usbboot.rules
|
||||
SUBSYSTEM=="usb", ATTRS{idProduct}=="2150", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
|
||||
SUBSYSTEM=="usb", ATTRS{idProduct}=="2485", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
|
||||
SUBSYSTEM=="usb", ATTRS{idProduct}=="f63b", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
|
||||
EOF
|
||||
|
@ -15,10 +15,6 @@ else()
|
||||
SET(ARCH_64 OFF)
|
||||
endif()
|
||||
|
||||
if (NOT ENABLE_MKL_DNN)
|
||||
set(ENABLE_MKL OFF)
|
||||
endif()
|
||||
|
||||
if(ENABLE_AVX512F)
|
||||
if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920))
|
||||
# 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work
|
||||
|
@ -4,10 +4,27 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
# Detect target
|
||||
include(target_flags)
|
||||
|
||||
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
|
||||
if(X86_64)
|
||||
set(ARCH_FOLDER intel64)
|
||||
elseif(X86)
|
||||
set(ARCH_FOLDER ia32)
|
||||
elseif(MSVC AND ARM)
|
||||
set(ARCH_FOLDER arm)
|
||||
elseif(MSVC AND AARCH64)
|
||||
set(ARCH_FOLDER arm64)
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH
|
||||
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/download"
|
||||
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile"
|
||||
)
|
||||
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile")
|
||||
|
||||
#
|
||||
# CPack
|
||||
#
|
||||
|
||||
include(CPackComponent)
|
||||
unset(IE_CPACK_COMPONENTS_ALL CACHE)
|
||||
@ -33,21 +50,14 @@ endif()
|
||||
# Set library directory for cpack
|
||||
#
|
||||
function(ie_cpack_set_library_dir)
|
||||
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
|
||||
if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
|
||||
set(ARCH intel64)
|
||||
elseif(ARCH STREQUAL "i386")
|
||||
set(ARCH ia32)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
|
||||
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
|
||||
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
|
||||
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
|
||||
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
|
||||
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
|
||||
else()
|
||||
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
|
||||
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
|
||||
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
|
||||
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
|
||||
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
|
||||
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
@ -109,28 +119,19 @@ function(set_temp_directory temp_variable source_tree_dir)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
#
|
||||
# Common scripts
|
||||
#
|
||||
|
||||
include(coverage/coverage)
|
||||
include(shellcheck/shellcheck)
|
||||
|
||||
# External dependencies
|
||||
find_package(Threads)
|
||||
|
||||
# Detect target
|
||||
include(target_flags)
|
||||
|
||||
# printing debug messages
|
||||
include(debug)
|
||||
|
||||
# linking libraries without discarding symbols
|
||||
include(whole_archive)
|
||||
|
||||
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
|
||||
if(X86_64)
|
||||
set(ARCH_FOLDER intel64)
|
||||
elseif(X86)
|
||||
set(ARCH_FOLDER ia32)
|
||||
endif()
|
||||
|
||||
if(OS_FOLDER)
|
||||
message ("**** OS FOLDER IS: [${OS_FOLDER}]")
|
||||
if("${OS_FOLDER}" STREQUAL "ON")
|
||||
@ -237,6 +238,7 @@ include(os_flags)
|
||||
include(sanitizer)
|
||||
include(cross_compiled_func)
|
||||
include(faster_build)
|
||||
include(whole_archive)
|
||||
include(api_validator/api_validator)
|
||||
|
||||
function(set_ci_build_number)
|
||||
|
@ -17,11 +17,11 @@ ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
|
||||
|
||||
ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ${ENABLE_MKL_DNN_DEFAULT})
|
||||
|
||||
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "WIN32 OR X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE; NOT WINDOWS_PHONE" OFF)
|
||||
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
|
||||
|
||||
# FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
|
||||
# this must be addressed in a proper way
|
||||
ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX OR WIN32;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
|
||||
ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
|
||||
|
||||
ie_option (OS_FOLDER "create OS dedicated folder in output" OFF)
|
||||
|
||||
|
@ -127,8 +127,10 @@ function(ie_avx512_optimization_flags flags)
|
||||
endfunction()
|
||||
|
||||
function(ie_arm_neon_optimization_flags flags)
|
||||
if(WIN32 OR CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# nothing
|
||||
elseif(ANDROID)
|
||||
if(ANDROID_ABI STREQUAL "arm64-v8a")
|
||||
set(${flags} "-mfpu=neon" PARENT_SCOPE)
|
||||
|
@ -16,10 +16,25 @@ if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
macro(_ie_process_msvc_generator_platform flag_name)
|
||||
# if cmake -A <ARM|ARM64> is passed
|
||||
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
|
||||
set(AARCH64 ON)
|
||||
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM")
|
||||
set(ARM ON)
|
||||
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "x64")
|
||||
set(X86_64 ON)
|
||||
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32")
|
||||
set(X86 ON)
|
||||
else()
|
||||
set(${flag_name} ON)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
if(MSVC64 OR MINGW64)
|
||||
set(X86_64 ON)
|
||||
_ie_process_msvc_generator_platform(X86_64)
|
||||
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
|
||||
set(X86 ON)
|
||||
_ie_process_msvc_generator_platform(X86)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(X86_64 ON)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
|
@ -7,15 +7,15 @@ macro(ie_parse_ci_build_number)
|
||||
set(IE_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||
set(IE_VERSION_MINOR ${CMAKE_MATCH_2})
|
||||
set(IE_VERSION_PATCH ${CMAKE_MATCH_3})
|
||||
set(IE_VS_VER_HAS_WELL_DEFINED_VERSION 1)
|
||||
set(IE_VS_VER_HAS_VERSION 1)
|
||||
else()
|
||||
set(IE_VS_VER_HAS_WELL_DEFINED_VERSION 0)
|
||||
set(IE_VS_VER_HAS_VERSION 0)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
ie_parse_ci_build_number()
|
||||
|
||||
if(IE_VS_VER_HAS_WELL_DEFINED_VERSION)
|
||||
if(IE_VS_VER_HAS_VERSION)
|
||||
set(IE_VS_VER_FILEVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
|
||||
set(IE_VS_VER_PRODUCTVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
|
||||
set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}.0")
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <winver.h>
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
#if IE_VS_VER_HAS_WELL_DEFINED_VERSION
|
||||
#if @IE_VS_VER_HAS_VERSION@
|
||||
FILEVERSION @IE_VS_VER_FILEVERSION_QUAD@
|
||||
PRODUCTVERSION @IE_VS_VER_PRODUCTVERSION_QUAD@
|
||||
#endif
|
||||
@ -20,7 +20,7 @@ BEGIN
|
||||
BLOCK "040904E4"
|
||||
BEGIN
|
||||
VALUE "FileDescription", "@IE_VS_VER_FILEDESCRIPTION_STR@\0"
|
||||
#if IE_VS_VER_HAS_WELL_DEFINED_VERSION
|
||||
#if @IE_VS_VER_HAS_VERSION@
|
||||
VALUE "FileVersion", "@IE_VS_VER_FILEVERSION_STR@\0"
|
||||
#endif
|
||||
VALUE "InternalName", "@IE_VS_VER_INTERNALNAME_STR@\0"
|
||||
|
@ -10,7 +10,7 @@ and mixed-reality headsets.
|
||||
The OpenVINO™ toolkit:
|
||||
|
||||
* Enables CNN-based deep learning inference on the edge
|
||||
* Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Movidius™ Neural Compute Stick and Intel® Neural Compute Stick 2
|
||||
* Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2
|
||||
* Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
|
||||
* Includes optimized calls for computer vision standards including OpenCV\*, OpenCL™, and OpenVX\*
|
||||
|
||||
@ -35,7 +35,7 @@ optimized for running on Intel® hardware (CPU, GPU, IPU).
|
||||
This Guide provides overview of the Inference Engine describing the typical workflow for performing
|
||||
inference of a pre-trained and optimized deep learning model and a set of sample applications.
|
||||
|
||||
> **NOTES:**
|
||||
> **NOTES:**
|
||||
> - Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_intel_index).
|
||||
> - [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
## Introducing MYRIAD Plugin
|
||||
|
||||
The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel® Movidius™ Neural Compute Stick and Intel® Neural Compute Stick 2.
|
||||
The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel® Neural Compute Stick 2.
|
||||
|
||||
## Installation on Linux* OS
|
||||
|
||||
@ -23,10 +23,10 @@ The Inference Engine MYRIAD plugin supports the following networks:
|
||||
* GoogleNet (Inception) v1, v2, v4
|
||||
* VGG family (VGG16, VGG19)
|
||||
* SqueezeNet v1.0, v1.1
|
||||
* ResNet v1 family (18\*\* \*\*\*, 50, 101, 152)
|
||||
* ResNet v1 family (18\*\*\*, 50, 101, 152)
|
||||
* MobileNet (mobilenet-v1-1.0-224, mobilenet-v2)
|
||||
* Inception ResNet v2
|
||||
* DenseNet family\*\* (121,161,169,201)
|
||||
* DenseNet family (121,161,169,201)
|
||||
* SSD-300, SSD-512, SSD-MobileNet, SSD-GoogleNet, SSD-SqueezeNet
|
||||
|
||||
**TensorFlow\***:
|
||||
@ -45,7 +45,7 @@ The Inference Engine MYRIAD plugin supports the following networks:
|
||||
|
||||
**MXNet\***:
|
||||
* AlexNet and CaffeNet
|
||||
* DenseNet family\*\* (121,161,169,201)
|
||||
* DenseNet family (121,161,169,201)
|
||||
* SqueezeNet v1.1
|
||||
* MobileNet v1, v2
|
||||
* NiN
|
||||
@ -55,8 +55,6 @@ The Inference Engine MYRIAD plugin supports the following networks:
|
||||
* VGG family (VGG16, VGG19)
|
||||
* SSD-Inception-v3, SSD-MobileNet, SSD-ResNet-50, SSD-300
|
||||
|
||||
\*\* Network is tested on Intel® Movidius™ Neural Compute Stick with BatchNormalization fusion optimization disabled during Model Optimizer import
|
||||
|
||||
\*\*\* Network is tested on Intel® Neural Compute Stick 2 with BatchNormalization fusion optimization disabled during Model Optimizer import
|
||||
|
||||
## Supported Configuration Parameters
|
||||
@ -77,9 +75,9 @@ In addition to common parameters, the MYRIAD plugin accepts the following option
|
||||
## Device allocation <a name="MYRIAD_DEVICE_ALLOC"> </a>
|
||||
|
||||
Each `IExecutableNetwork` instance tries to allocate new device on `InferenceEngine::Core::LoadNetwork`, but if all available devices are already allocated it will use the one with the minimal number of uploaded networks.
|
||||
The maximum number of networks single device can handle depends on device memory capacity and the size of the networks.
|
||||
The maximum number of networks single device can handle depends on device memory capacity and the size of the networks.
|
||||
|
||||
If `KEY_VPU_MYRIAD_FORCE_RESET` option is set to `YES` the plugin will reset all VPU devices in the system.
|
||||
If `KEY_VPU_MYRIAD_FORCE_RESET` option is set to `YES` the plugin will reset all VPU devices in the system.
|
||||
|
||||
Single device cannot be shared across multiple processes.
|
||||
|
||||
|
@ -0,0 +1,96 @@
|
||||
# Converting EfficientDet Models from TensorFlow {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}
|
||||
|
||||
This tutorial explains how to convert detection EfficientDet\* public models to the Intermediate Representation (IR).
|
||||
|
||||
## <a name="efficientdet-to-ir"></a>Convert EfficientDet Model to IR
|
||||
|
||||
On GitHub*, you can find several public versions of EfficientDet model implementation. This tutorial explains how to
|
||||
convert models from the [https://github.com/google/automl/tree/master/efficientdet](https://github.com/google/automl/tree/master/efficientdet)
|
||||
repository (commit 96e1fee) to IR.
|
||||
|
||||
### Get Frozen TensorFlow\* Model
|
||||
|
||||
Follow the instructions below to get frozen TensorFlow EfficientDet model. We use EfficientDet-D4 model as an example:
|
||||
|
||||
1. Clone the repository:<br>
|
||||
```sh
|
||||
git clone https://github.com/google/automl
|
||||
cd automl/efficientdet
|
||||
```
|
||||
2. (Optional) Checkout to the commit that the conversion was tested on:<br>
|
||||
```sh
|
||||
git checkout 96e1fee
|
||||
```
|
||||
3. Install required dependencies:<br>
|
||||
```sh
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install -r automl/efficientdet/requirements.txt
|
||||
```
|
||||
4. Download and extract the model checkpoint [efficientdet-d4.tar.gz](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz)
|
||||
referenced in the "Pretrained EfficientDet Checkpoints" section of the model repository:<br>
|
||||
```sh
|
||||
wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
|
||||
tar zxvf efficientdet-d4.tar.gz
|
||||
```
|
||||
5. Freeze the model:<br>
|
||||
```sh
|
||||
python3 model_inspect.py --runmode=saved_model --model_name=efficientdet-d4 --ckpt_path=efficientdet-d4 --saved_model_dir=savedmodeldir
|
||||
```
|
||||
As a result the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will be generated.
|
||||
|
||||
> **NOTE:** If you see an error `AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'` apply the fix from the [patch](https://github.com/google/automl/pull/846).
|
||||
|
||||
### Convert EfficientDet TensorFlow Model to the IR
|
||||
|
||||
To generate the IR of the EfficientDet TensorFlow model, run:<br>
|
||||
```sh
|
||||
python3 $MO_ROOT/mo.py \
|
||||
--input_model savedmodeldir/efficientdet-d4_frozen.pb \
|
||||
--tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/automl_efficientdet.json \
|
||||
--input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
|
||||
--reverse_input_channels
|
||||
```
|
||||
|
||||
Where `$IMAGE_SIZE` is the size that the input image of the original TensorFlow model will be resized to. Different
|
||||
EfficientDet models were trained with different input image sizes. To determine the right one refer to the `efficientdet_model_param_dict`
|
||||
dictionary in the [hparams_config.py](https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304) file.
|
||||
The attribute `image_size` specifies the shape to be specified for the model conversion.
|
||||
|
||||
The `tensorflow_use_custom_operations_config` command line parameter specifies the configuration json file containing hints
|
||||
to the Model Optimizer on how to convert the model and trigger transformations implemented in the
|
||||
`$MO_ROOT/extensions/front/tf/AutomlEfficientDet.py`. The json file contains some parameters which must be changed if you
|
||||
train the model yourself and modified the `hparams_config` file or the parameters are different from the ones used for EfficientDet-D4.
|
||||
The attribute names are self-explanatory or match the name in the `hparams_config` file.
|
||||
|
||||
> **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md).
|
||||
|
||||
OpenVINO™ toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to
|
||||
[Object Detection for SSD C++ Sample](@ref openvino_inference_engine_samples_object_detection_sample_ssd_README) and
|
||||
[Object Detection for SSD Python Sample](@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README).
|
||||
|
||||
## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR
|
||||
|
||||
The TensorFlow model produces as output a list of 7-element tuples: `[image_id, y_min, x_min, y_max, x_max, confidence, class_id]`, where:
|
||||
* `image_id` -- image batch index.
|
||||
* `y_min` -- absolute `y` coordinate of the lower left corner of the detected object.
|
||||
* `x_min` -- absolute `x` coordinate of the lower left corner of the detected object.
|
||||
* `y_max` -- absolute `y` coordinate of the upper right corner of the detected object.
|
||||
* `x_max` -- absolute `x` coordinate of the upper right corner of the detected object.
|
||||
* `confidence` -- is the confidence of the detected object.
|
||||
* `class_id` -- is the id of the detected object class counted from 1.
|
||||
|
||||
The output of the IR is a list of 7-element tuples: `[image_id, class_id, confidence, x_min, y_min, x_max, y_max]`, where:
|
||||
* `image_id` -- image batch index.
|
||||
* `class_id` -- is the id of the detected object class counted from 0.
|
||||
* `confidence` -- is the confidence of the detected object.
|
||||
* `x_min` -- normalized `x` coordinate of the lower left corner of the detected object.
|
||||
* `y_min` -- normalized `y` coordinate of the lower left corner of the detected object.
|
||||
* `x_max` -- normalized `x` coordinate of the upper right corner of the detected object.
|
||||
* `y_max` -- normalized `y` coordinate of the upper right corner of the detected object.
|
||||
|
||||
The first element with `image_id = -1` means end of data.
|
||||
|
||||
---
|
||||
## See Also
|
||||
|
||||
* [Sub-Graph Replacement in Model Optimizer](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
|
@ -22,6 +22,7 @@
|
||||
<tab type="user" title="Converting BERT from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow"/>
|
||||
<tab type="user" title="Convert TensorFlow* XLNet Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow"/>
|
||||
<tab type="user" title="Converting TensorFlow* Wide and Deep Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models"/>
|
||||
<tab type="user" title="Converting EfficientDet Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models"/>
|
||||
</tab>
|
||||
<tab type="usergroup" title="Converting a MXNet* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet">
|
||||
<tab type="user" title="Converting a Style Transfer Model from MXNet" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet"/>
|
||||
|
@ -22,12 +22,12 @@ A directory named `fpga_support_files` is created.
|
||||
```sh
|
||||
cd fpga_support_files
|
||||
```
|
||||
|
||||
|
||||
5. Source `setup_env.sh` to set your environment variables:
|
||||
```sh
|
||||
source /home/<user>/Downloads/fpga_support_files/setup_env.sh
|
||||
```
|
||||
|
||||
|
||||
6. Configure the FPGA Driver Blacklist:
|
||||
```sh
|
||||
sudo mv config/blacklist-altera-cvp.conf /etc/modprobe.d
|
||||
@ -37,41 +37,41 @@ sudo mv config/blacklist-altera-cvp.conf /etc/modprobe.d
|
||||
```sh
|
||||
sudo su
|
||||
```
|
||||
|
||||
|
||||
8. Use the `setup_env.sh` script from `fpga_support_files.tgz` to set your environment variables:
|
||||
```sh
|
||||
source /home/<user>/Downloads/fpga_support_files/setup_env.sh
|
||||
```
|
||||
|
||||
|
||||
9. Change directory to `Downloads/fpga_support_files/`:
|
||||
```sh
|
||||
cd /home/<user>/Downloads/fpga_support_files/
|
||||
```
|
||||
|
||||
|
||||
10. Run the FPGA dependencies script, which allows OpenCL to support Ubuntu* and recent kernels:
|
||||
```sh
|
||||
./install_openvino_fpga_dependencies.sh
|
||||
```
|
||||
|
||||
11. When asked, select the FPGA card, Intel® GPU, and Intel® Movidius™ Neural Compute Stick, then you can install the correct dependencies.
|
||||
11. When asked, select the FPGA card, Intel® GPU, and Intel® Neural Compute Stick 2, then you can install the correct dependencies.
|
||||
|
||||
12. If you installed the 4.14 kernel as part of the installation script, you will need to reboot the machine and select the new kernel in the Ubuntu (grub) boot menu. You will also need to rerun `setup_env.sh` to set up your environmental variables again.
|
||||
|
||||
|
||||
13. Install OpenCL™ devices. Enter **Y** when prompted to install:
|
||||
```sh
|
||||
aocl install
|
||||
```
|
||||
|
||||
|
||||
14. Reboot the machine:
|
||||
```sh
|
||||
reboot
|
||||
```
|
||||
|
||||
|
||||
15. Use the `setup_env.sh` script from `fpga_support_files.tgz` to set your environment variables:
|
||||
```sh
|
||||
source /home/<user>/Downloads/fpga_support_files/setup_env.sh
|
||||
```
|
||||
|
||||
|
||||
16. Run `aocl diagnose`:
|
||||
```sh
|
||||
aocl diagnose
|
||||
@ -93,21 +93,21 @@ cd /home/<user>/Downloads/fpga_support_files/config/
|
||||
```sh
|
||||
sudo cp -rf a10_1150_sg1 /opt/altera/aocl-pro-rte/aclrte-linux64/board/
|
||||
```
|
||||
|
||||
|
||||
3. Convert the BSP files from DOS to UNIX:
|
||||
```sh
|
||||
sudo chmod +x a10_1150_sg1
|
||||
find a10_1150_sg1 -type f -print0 | xargs -0 dos2unix
|
||||
```
|
||||
|
||||
|
||||
4. Set up the USB Blaster:
|
||||
|
||||
|
||||
1. Connect the cable between the board and the host system. Use the letter codes in the diagram below for the connection points:
|
||||
|
||||
|
||||
2. Connect the B end of the cable to point B on the board.
|
||||
|
||||
3. Connect the F end of the cable to point F on the FPGA download cable.
|
||||
|
||||
|
||||
4. From point F end of the cable to point F on the FPGA download cable, the connection is as shown:
|
||||

|
||||
|
||||
@ -115,17 +115,17 @@ find a10_1150_sg1 -type f -print0 | xargs -0 dos2unix
|
||||
```sh
|
||||
source /home/<user>/Downloads/fpga_support_files/setup_env.sh
|
||||
```
|
||||
|
||||
6. Update the Intel® FPGA Download Cable rules to program the board without root permissions and to flash the initialization bitstreams so that the Intel® FPGA Download Cable can communicate with the board:
|
||||
|
||||
6. Update the Intel® FPGA Download Cable rules to program the board without root permissions and to flash the initialization bitstreams so that the Intel® FPGA Download Cable can communicate with the board:
|
||||
```sh
|
||||
sudo cp config/51-usbblaster.rules /etc/udev/rules.d
|
||||
```
|
||||
|
||||
|
||||
7. Load the USB rules:
|
||||
```sh
|
||||
sudo udevadm control --reload-rules && udevadm trigger
|
||||
```
|
||||
|
||||
|
||||
8. Unplug and re-plug the Intel® FPGA Download Cable to enable JTAG connection.
|
||||
|
||||
9. Run `jtagconfig` to ensure that your Intel FPGA Download Cable driver is ready to use:
|
||||
@ -135,22 +135,22 @@ jtagconfig
|
||||
Your output is similar to:
|
||||
```sh
|
||||
1) USB-Blaster [1-6]
|
||||
02E660DD 10AX115H1(.|E2|ES)/10AX115H2/..
|
||||
02E660DD 10AX115H1(.|E2|ES)/10AX115H2/..
|
||||
```
|
||||
|
||||
10. Download [Intel® Quartus® Prime Software Lite Edition 17.1](http://fpgasoftware.intel.com/17.1/?edition=lite). Install the Intel® Quartus® Prime Software Lite to the `/home/<user>/intelFPGA/17.1` directory.
|
||||
> **NOTE**: You will need the complete the Intel® Quartus® Prime Software Lite version when you want to program the `boardtest_1ddr_top.aocx` into the flash for permanent availability.
|
||||
|
||||
|
||||
11. Export the Intel® Quartus® Prime Software Lite environment variable:
|
||||
```sh
|
||||
export QUARTUS_ROOTDIR=/home/<user>/intelFPGA/17.1/quartus
|
||||
```
|
||||
|
||||
|
||||
12. Use `jtagconfig` to slow the clock:
|
||||
```sh
|
||||
jtagconfig --setparam 1 JtagClock 6M
|
||||
```
|
||||
|
||||
|
||||
13. (OPTIONAL) Confirm the clock is set to 6M:
|
||||
```sh
|
||||
jtagconfig --getparam 1 JtagClock
|
||||
@ -164,7 +164,7 @@ You should see the following:
|
||||
```sh
|
||||
cd /opt/altera/aocl-pro-rte/aclrte-linux64/board/a10_1150_sg1/bringup
|
||||
```
|
||||
|
||||
|
||||
15. Program the `boardtest_1ddr_top.aocx` file to the flash to be made permanently available even after power cycle:
|
||||
```sh
|
||||
aocl flash acl0 boardtest_1ddr_top.aocx
|
||||
@ -186,12 +186,12 @@ Your output is similar to:
|
||||
```sh
|
||||
source /home/<user>/Downloads/fpga_support_file/setup_env.sh
|
||||
```
|
||||
|
||||
|
||||
19. Uninstall the previous BSP before installing the OpenCL drivers for the R5 BSP:
|
||||
```sh
|
||||
aocl uninstall /opt/altera/aocl-pro-rte/aclrte-linux64/board/<BSP_package>/
|
||||
```
|
||||
|
||||
|
||||
20. Export and source the environment script:
|
||||
```sh
|
||||
export AOCL_BOARD_PACKAGE_ROOT=/opt/altera/aocl-pro-rte/aclrte-linux64/board/a10_1150_sg1
|
||||
@ -204,13 +204,13 @@ source /opt/altera/aocl-pro-rte/aclrte-linux64/init_opencl.sh
|
||||
```sh
|
||||
aocl install
|
||||
```
|
||||
|
||||
|
||||
22. Run the `diagnose` command:
|
||||
```sh
|
||||
aocl diagnose
|
||||
```
|
||||
You should see `DIAGNOSTIC_PASSED` before proceeding to the next steps.
|
||||
|
||||
|
||||
## 3. Program a Bitstream
|
||||
|
||||
The bitstream you program should correspond to the topology you want to deploy. In this section, you program a SqueezeNet bitstream and deploy the classification sample with a SqueezeNet model that you used the Model Optimizer to convert in the steps before.
|
||||
@ -225,17 +225,17 @@ Depending on how many bitstreams you selected, there are different folders for e
|
||||
```sh
|
||||
source /home/<user>/Downloads/fpga_support_files/setup_env.sh
|
||||
```
|
||||
|
||||
|
||||
3. Change to your home directory:
|
||||
```sh
|
||||
cd /home/<user>
|
||||
```
|
||||
|
||||
|
||||
4. Program the bitstream for the Intel® Vision Accelerator Design with Intel® Arria® 10 FPGA:
|
||||
```sh
|
||||
aocl program acl0 /opt/intel/openvino/bitstreams/a10_vision_design_bitstreams/5-0_PL1_FP11_SqueezeNet.aocx
|
||||
```
|
||||
|
||||
|
||||
### Optional Steps to Flash the FPGA Card
|
||||
|
||||
> **NOTE**:
|
||||
@ -248,12 +248,12 @@ aocl program acl0 /opt/intel/openvino/bitstreams/a10_vision_design_bitstreams/5-
|
||||
```sh
|
||||
jtagconfig
|
||||
```
|
||||
|
||||
|
||||
3. Use `jtagconfig` to slow the clock:
|
||||
```sh
|
||||
jtagconfig --setparam 1 JtagClock 6M
|
||||
```
|
||||
|
||||
|
||||
4. Store the Intel® Vision Accelerator Design with Intel® Arria® 10 FPGA bistream on the board:
|
||||
```sh
|
||||
aocl flash acl0 /opt/intel/openvino/bitstreams/a10_vision_design_bitstreams/5-0_PL1_FP11_SqueezeNet.aocx
|
||||
@ -274,27 +274,27 @@ In this section, you will create an FP16 model suitable for hardware accelerator
|
||||
```sh
|
||||
mkdir /home/<user>/squeezenet1.1_FP16
|
||||
```
|
||||
|
||||
|
||||
2. Go to `/home/<user>/squeezenet1.1_FP16`:
|
||||
```sh
|
||||
cd /home/<user>/squeezenet1.1_FP16
|
||||
```
|
||||
|
||||
|
||||
3. Use the Model Optimizer to convert an FP16 SqueezeNet Caffe* model into an optimized Intermediate Representation (IR):
|
||||
```sh
|
||||
python3 /opt/intel/openvino/deployment_tools/model_optimizer/mo.py --input_model /home/<user>/openvino_models/FP32/classification/squeezenet/1.1/caffe/squeezenet1.1.caffemodel --data_type FP16 --output_dir .
|
||||
```
|
||||
|
||||
|
||||
4. The `squeezenet1.1.labels` file contains the classes `ImageNet` uses. This file is included so that the inference results show text instead of classification numbers. Copy `squeezenet1.1.labels` to the your optimized model location:
|
||||
```sh
|
||||
cp /home/<user>/openvino_models/ir/squeezenet1.1/FP32/squeezenet1.1.labels .
|
||||
```
|
||||
|
||||
|
||||
5. Copy a sample image to the release directory. You will use this with your optimized model:
|
||||
```sh
|
||||
sudo cp /opt/intel/openvino/deployment_tools/demo/car.png ~/inference_engine_samples/intel64/Release
|
||||
```
|
||||
|
||||
|
||||
## 5. Run a Sample Application
|
||||
|
||||
1. Go to the samples directory
|
||||
|
@ -13,55 +13,64 @@ This guide provides installation steps for the Intel® distribution of OpenVINO
|
||||
|
||||
## Install the Runtime Package Using the PyPI Repository
|
||||
|
||||
1. Set up and update pip to the highest version:
|
||||
```sh
|
||||
python3 -m pip install --upgrade pip
|
||||
```
|
||||
2. Install the Intel® distribution of OpenVINO™ toolkit:
|
||||
### Step 1. Set up and update pip to the highest version
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
python3 -m pip install --upgrade pip
|
||||
```
|
||||
|
||||
### Step 2. Install the Intel® distribution of OpenVINO™ toolkit
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
pip install openvino-python
|
||||
```
|
||||
|
||||
3. Add PATH to environment variables.
|
||||
- Ubuntu* 18.04 and macOS*:
|
||||
```sh
|
||||
export LD_LIBRARY_PATH=<library_dir>:${LD_LIBRARY_PATH}
|
||||
```
|
||||
- Windows* 10:
|
||||
```sh
|
||||
set PATH=<library_dir>;%PATH%
|
||||
```
|
||||
How to find `library_dir`:
|
||||
- Ubuntu\*, macOS\*:
|
||||
- Standard user:
|
||||
```sh
|
||||
echo $(python3 -m site --user-base)/lib
|
||||
```
|
||||
- Root or sudo user:
|
||||
```sh
|
||||
/usr/local/lib
|
||||
```
|
||||
- Virtual environments or custom Python installations (from sources or tarball):
|
||||
```sh
|
||||
echo $(which python3)/../../lib
|
||||
```
|
||||
- Windows\*:
|
||||
- Standard Python:
|
||||
```sh
|
||||
python -c "import os, sys; print((os.path.dirname(sys.executable))+'\Library\\bin')"
|
||||
```
|
||||
- Virtual environments or custom Python installations (from sources or tarball):
|
||||
```sh
|
||||
python -c "import os, sys; print((os.path.dirname(sys.executable))+'\..\Library\\bin')"
|
||||
```
|
||||
4. Verify that the package is installed:
|
||||
```sh
|
||||
python3 -c "import openvino"
|
||||
```
|
||||
### Step 3. Add PATH to environment variables
|
||||
|
||||
Run a command for your operating system:
|
||||
- Ubuntu 18.04 and macOS:
|
||||
```sh
|
||||
export LD_LIBRARY_PATH=<library_dir>:${LD_LIBRARY_PATH}
|
||||
```
|
||||
- Windows* 10:
|
||||
```sh
|
||||
set PATH=<library_dir>;%PATH%
|
||||
```
|
||||
To find `library_dir`:
|
||||
**Ubuntu, macOS**:
|
||||
- Standard user:
|
||||
```sh
|
||||
echo $(python3 -m site --user-base)/lib
|
||||
```
|
||||
- Root or sudo user:
|
||||
```sh
|
||||
/usr/local/lib
|
||||
```
|
||||
- Virtual environments or custom Python installations (from sources or tarball):
|
||||
```sh
|
||||
echo $(which python3)/../../lib
|
||||
```
|
||||
**Windows**:
|
||||
- Standard Python:
|
||||
```sh
|
||||
python -c "import os, sys; print((os.path.dirname(sys.executable))+'\Library\\bin')"
|
||||
```
|
||||
- Virtual environments or custom Python installations (from sources or tarball):
|
||||
```sh
|
||||
python -c "import os, sys; print((os.path.dirname(sys.executable))+'\..\Library\\bin')"
|
||||
```
|
||||
|
||||
### Step 4. Verify that the package is installed
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
python3 -c "import openvino"
|
||||
```
|
||||
|
||||
Now you are ready to develop and run your application.
|
||||
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit).
|
||||
|
@ -20,7 +20,9 @@ INSTANTIATE_TEST_CASE_P(NumSplitsCheck, SplitLayerTest,
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values("TEMPLATE")),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
@ -185,7 +185,7 @@ if (ENABLE_OPENCV)
|
||||
set(OPENCV_BUILD "36")
|
||||
set(OPENCV_BUILD_YOCTO "337")
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
if (AARCH64)
|
||||
if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
|
||||
set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}")
|
||||
elseif(DEFINED THIRDPARTY_SERVER_PATH)
|
||||
@ -220,10 +220,10 @@ if (ENABLE_OPENCV)
|
||||
ENVIRONMENT "OpenCV_DIR"
|
||||
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
|
||||
elseif(LINUX)
|
||||
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
if (AARCH64)
|
||||
set(OPENCV_SUFFIX "yocto_kmb")
|
||||
set(OPENCV_BUILD "${OPENCV_BUILD_YOCTO}")
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
|
||||
elseif (ARM)
|
||||
set(OPENCV_SUFFIX "debian9arm")
|
||||
elseif (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
|
||||
set(OPENCV_SUFFIX "centos7")
|
||||
|
@ -29,7 +29,7 @@ if (ENABLE_MKL_DNN)
|
||||
endif()
|
||||
|
||||
# "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
|
||||
if(ARM)
|
||||
if(ARM OR (MSVC AND (ARM OR AARCH64)) )
|
||||
set(THREADING_DEFAULT "SEQ")
|
||||
else()
|
||||
set(THREADING_DEFAULT "TBB")
|
||||
|
@ -13,7 +13,7 @@ endif()
|
||||
|
||||
include(dependency_solver)
|
||||
|
||||
set(VPU_SUPPORTED_FIRMWARES usb-ma2450 usb-ma2x8x pcie-ma248x)
|
||||
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma248x)
|
||||
|
||||
#
|
||||
# Default packages
|
||||
@ -66,11 +66,11 @@ foreach(firmware_name IN LISTS VPU_SUPPORTED_FIRMWARES)
|
||||
string(TOUPPER "${firmware_name}" firmware_name_upper)
|
||||
set(var_name VPU_FIRMWARE_${firmware_name_upper}_FILE)
|
||||
|
||||
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${firmware_name}.mvcmd")
|
||||
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${firmware_name}.mvcmd")
|
||||
|
||||
# Handle PCIe elf firmware for Windows
|
||||
if (WIN32 AND "${firmware_name}" STREQUAL "pcie-ma248x")
|
||||
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${firmware_name}.elf")
|
||||
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${firmware_name}.elf")
|
||||
endif ()
|
||||
|
||||
list(APPEND all_firmware_files ${firmware_out_file})
|
||||
@ -79,7 +79,7 @@ foreach(firmware_name IN LISTS VPU_SUPPORTED_FIRMWARES)
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy ${${var_name}} ${firmware_out_file}
|
||||
MAIN_DEPENDENCY ${${var_name}}
|
||||
COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}"
|
||||
COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}"
|
||||
VERBATIM)
|
||||
|
||||
install(FILES ${${var_name}}
|
||||
|
@ -24,6 +24,14 @@
|
||||
# define _AMD64_
|
||||
#endif
|
||||
|
||||
#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
# define _ARM_
|
||||
#endif
|
||||
|
||||
#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
# define _ARM64_
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <windef.h>
|
||||
#include <fileapi.h>
|
||||
|
@ -59,10 +59,6 @@ else ()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
if (NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
message(FATAL_ERROR "Only 64-bit supported on Windows")
|
||||
endif()
|
||||
|
||||
set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling
|
||||
|
@ -98,6 +98,7 @@ int main(int argc, char *argv[]) {
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
// --------------------------- 3. Configure input & output ---------------------------------------------
|
||||
if (network.getOutputsInfo().size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
|
||||
|
||||
// --------------------------- Prepare input blobs -----------------------------------------------------
|
||||
slog::info << "Preparing input blobs" << slog::endl;
|
||||
@ -214,7 +215,6 @@ int main(int argc, char *argv[]) {
|
||||
// --------------------------- 8. Process output -------------------------------------------------------
|
||||
slog::info << "Processing output blobs" << slog::endl;
|
||||
OutputsDataMap outputInfo(network.getOutputsInfo());
|
||||
if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
|
||||
Blob::Ptr outputBlob = inferRequest.GetBlob(outputInfo.begin()->first);
|
||||
|
||||
/** Validating -nt value **/
|
||||
|
@ -24,6 +24,14 @@
|
||||
# define _AMD64_
|
||||
#endif
|
||||
|
||||
#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
# define _ARM_
|
||||
#endif
|
||||
|
||||
#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
|
||||
# define _ARM64_
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
#include <windef.h>
|
||||
#include <fileapi.h>
|
||||
|
@ -86,6 +86,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
|
||||
CNNNetwork network = ie.ReadNetwork(input_model);
|
||||
if (network.getOutputsInfo().size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
|
||||
network.setBatchSize(1);
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -11,7 +11,6 @@ for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA
|
||||
for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA
|
||||
for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA
|
||||
for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA
|
||||
for %%A in ("%VPU_FIRMWARE_MA2450%") do set VPU_FIRMWARE_MA2450_FILENAME=%%~nxA
|
||||
for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA
|
||||
for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA
|
||||
|
||||
@ -86,16 +85,6 @@ if not "%HDDL%"=="" (
|
||||
)
|
||||
)
|
||||
|
||||
if not "%VPU_FIRMWARE_MA2450%"=="" (
|
||||
if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
|
||||
powershell -command "iwr -outf '%DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME%' %VPU_FIRMWARE_MA2450%"
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%"
|
||||
call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME% -o%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%
|
||||
del "%DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME%" /F /Q
|
||||
)
|
||||
)
|
||||
|
||||
if not "%VPU_FIRMWARE_MA2X8X%"=="" (
|
||||
if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
|
||||
@ -131,7 +120,7 @@ if not "%MYRIAD%"=="" (
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\mvnc" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64 /S /I /Y /R
|
||||
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64 /S /I /Y /R
|
||||
)
|
||||
)
|
||||
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\mvnc" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64 /S /I /Y /R
|
||||
@ -139,13 +128,6 @@ if not "%MYRIAD%"=="" (
|
||||
)
|
||||
)
|
||||
|
||||
if not "%VPU_FIRMWARE_MA2450%"=="" (
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%\*" intel64 /S /I /Y /R
|
||||
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%\*" intel64 /S /I /Y /R
|
||||
)
|
||||
)
|
||||
|
||||
if not "%VPU_FIRMWARE_MA2X8X%"=="" (
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R
|
||||
|
@ -37,7 +37,7 @@ add_path() {
|
||||
fi
|
||||
}
|
||||
|
||||
runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2450 VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
|
||||
runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
|
||||
|
||||
export_library_path() {
|
||||
export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include <runtime/pwl.h>
|
||||
#include <gna_slope_scale.h>
|
||||
@ -413,12 +414,12 @@ void make_gna_pwl(const DnnActivation fun,
|
||||
y_upper = tmp;
|
||||
}
|
||||
|
||||
int64_t x_lower_new = FLOAT_TO_INT32((x_lower / in_scale) / abs(pow_scale) * in_scale);
|
||||
int64_t x_upper_new = FLOAT_TO_INT32((x_upper / in_scale) / abs(pow_scale) * in_scale);
|
||||
int64_t x_lower_new = FLOAT_TO_INT32((x_lower / in_scale) / std::fabs(pow_scale) * in_scale);
|
||||
int64_t x_upper_new = FLOAT_TO_INT32((x_upper / in_scale) / std::fabs(pow_scale) * in_scale);
|
||||
x_lower = static_cast<int32_t>(x_lower_new);
|
||||
x_upper = static_cast<int32_t>(x_upper_new);
|
||||
if (x_lower_new < INT32_MIN) {
|
||||
int16_t offset_lower = abs(x_lower_new - INT32_MIN) / in_scale * out_scale;
|
||||
int16_t offset_lower = std::abs(x_lower_new - INT32_MIN) / in_scale * out_scale;
|
||||
x_lower = INT32_MIN;
|
||||
y_lower = y_lower + offset_lower;
|
||||
}
|
||||
|
@ -132,6 +132,22 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
|
||||
return reqConfId;
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::getNumberOfGnaDevices() {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t numberOfGnaDevices = 0;
|
||||
auto status = Gna2DeviceGetCount(&numberOfGnaDevices);
|
||||
checkGna2Status(status);
|
||||
return numberOfGnaDevices;
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::selectGnaDevice() {
|
||||
const auto deviceCount = getNumberOfGnaDevices();
|
||||
if (deviceCount != 1) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported number of GNA devices detected = " << deviceCount;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void GNADeviceHelper::checkGna2Status(Gna2Status status, const Gna2Model& gnaModel) {
|
||||
if (!Gna2StatusIsSuccessful(status)) {
|
||||
std::vector<char> gna2StatusBuffer(1024);
|
||||
|
@ -69,12 +69,13 @@ public:
|
||||
bool isPerformanceMeasuring = false) :
|
||||
isPerformanceMeasuring(isPerformanceMeasuring) {
|
||||
#else
|
||||
explicit GNADeviceHelper(Gna2DeviceVersion gna2HwConsistency = Gna2DeviceVersionSoftwareEmulation,
|
||||
explicit GNADeviceHelper(Gna2DeviceVersion gna2HwConsistency = Gna2DeviceVersionSoftwareEmulation,
|
||||
uint8_t lib_async_n_threads = 1,
|
||||
bool use_openmp = false,
|
||||
bool isPerformanceMeasuring = false) :
|
||||
gna2HwConsistency(gna2HwConsistency),
|
||||
isPerformanceMeasuring(isPerformanceMeasuring) {
|
||||
isPerformanceMeasuring(isPerformanceMeasuring),
|
||||
nGnaDeviceIndex{selectGnaDevice()} {
|
||||
#endif
|
||||
open(lib_async_n_threads);
|
||||
initGnaPerfCounters();
|
||||
@ -116,6 +117,8 @@ public:
|
||||
#endif
|
||||
void releaseModel(const uint32_t model_id);
|
||||
uint32_t createRequestConfig(const uint32_t model_id);
|
||||
static uint32_t getNumberOfGnaDevices();
|
||||
static uint32_t selectGnaDevice();
|
||||
bool hasGnaHw() const {
|
||||
return Gna2DeviceVersionSoftwareEmulation != detectedGnaDevVersion;
|
||||
}
|
||||
|
@ -107,9 +107,10 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
|
||||
switch (header.version.minor) {
|
||||
case 1:
|
||||
readBits(tempHeader2dot1, is);
|
||||
header = Header2dot2::ModelHeader(tempHeader2dot1);
|
||||
header = Header2dot3::ModelHeader(tempHeader2dot1);
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
readBits(header, is);
|
||||
break;
|
||||
default:
|
||||
@ -166,7 +167,30 @@ void GNAModelSerial::Import(void *basePointer,
|
||||
InferenceEngine::OutputsDataMap& outputsDataMap) {
|
||||
is.exceptions(std::istream::failbit);
|
||||
|
||||
if (modelHeader.version.major == 2) {
|
||||
if (modelHeader.version.minor >= 3) {
|
||||
for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
|
||||
uint32_t nameSize = 0;
|
||||
readNBits<32>(nameSize, is);
|
||||
std::string inName("", nameSize);
|
||||
readNBytes(&inName[0], nameSize, is);
|
||||
inputNames.push_back(inName.substr(0, nameSize - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
|
||||
|
||||
if (modelHeader.version.major == 2) {
|
||||
if (modelHeader.version.minor >= 3) {
|
||||
for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
|
||||
uint32_t nameSize = 0;
|
||||
readNBits<32>(nameSize, is);
|
||||
std::string outName("", nameSize);
|
||||
readNBytes(&outName[0], nameSize, is);
|
||||
outputNames.push_back(outName.substr(0, nameSize - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
ImportOutputs(is, basePointer, desc, outputsDataMap);
|
||||
|
||||
for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
|
||||
@ -311,9 +335,19 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
|
||||
|
||||
writeBits(header, os);
|
||||
|
||||
for (auto &name : inputNames) {
|
||||
const auto nameSize = strlen(name.c_str()) + 1;
|
||||
writeBits(static_cast<uint32_t>(nameSize), os);
|
||||
writeNBytes(name.c_str(), nameSize , os);
|
||||
}
|
||||
for (const auto &input : inputs) {
|
||||
writeBits(convert_to_serial(input), os);
|
||||
}
|
||||
for (auto &name : outputNames) {
|
||||
const auto nameSize = strlen(name.c_str()) + 1;
|
||||
writeBits(static_cast<uint32_t>(nameSize), os);
|
||||
writeNBytes(name.c_str(), nameSize, os);
|
||||
}
|
||||
for (const auto &output : outputs) {
|
||||
writeBits(convert_to_serial(output), os);
|
||||
}
|
||||
@ -691,7 +725,8 @@ void GNAModelSerial::ImportInputs(std::istream &is,
|
||||
dataMap.clear();
|
||||
|
||||
for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
|
||||
std::string name = "input" + std::to_string(inputIndex);
|
||||
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
|
||||
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
|
||||
HeaderLatest::RuntimeEndPoint input;
|
||||
is.read(reinterpret_cast<char *>(&input), sizeof(input));
|
||||
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
|
||||
@ -719,7 +754,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
|
||||
desc.resize(modelHeader.nOutputs);
|
||||
|
||||
for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
|
||||
std::string name = "output" + std::to_string(outputIndex);
|
||||
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
|
||||
? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
|
||||
HeaderLatest::RuntimeEndPoint output;
|
||||
is.read(reinterpret_cast<char *>(&output), sizeof(output));
|
||||
OutputDesc description;
|
||||
|
@ -32,6 +32,8 @@ private:
|
||||
#endif
|
||||
std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> inputs;
|
||||
std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> outputs;
|
||||
std::vector<std::string> inputNames;
|
||||
std::vector<std::string> outputNames;
|
||||
uint32_t nRotateRows = 0;
|
||||
uint32_t nRotateColumns = 0;
|
||||
bool doRotateInput = false;
|
||||
@ -63,6 +65,13 @@ private:
|
||||
const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
|
||||
inputs(serializeInputs(inputsDataMap, inputDesc)),
|
||||
outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
|
||||
for (auto const& input : inputsDataMap) {
|
||||
inputNames.push_back(input.first);
|
||||
}
|
||||
|
||||
for (auto const& input : outputsDataMap) {
|
||||
outputNames.push_back(input.first);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -36,8 +36,8 @@ Parameter GNAPlugin::GetMetric(const std::string& name, const std::map<std::stri
|
||||
}
|
||||
|
||||
if (!options.count(KEY_DEVICE_ID)) {
|
||||
if (availableDevices.size() == 1) {
|
||||
return availableDevices[0];
|
||||
if (availableDevices.size() == 1 || availableDevices.size() == 2) {
|
||||
return availableDevices.back(); // detection order is GNA_SW, GNA_HW
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "KEY_DEVICE_ID not set in request for FULL_DEVICE_NAME";
|
||||
}
|
||||
|
@ -631,11 +631,25 @@ void InsertIdentityLayerPass::run() {
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
for (auto & l : *pLayers) {
|
||||
for (auto && prev : getCandidatesForIdentityInsertion(l)) {
|
||||
// Do an upstream search until Functional layer is found
|
||||
auto original_prev_layer = prev;
|
||||
auto true_layer = l;
|
||||
while (LayerInfo(prev).isNonFunctional()) {
|
||||
if (CNNNetHasPrevLayer(prev.get()) && prev->outData.size() == 1) {
|
||||
true_layer = prev;
|
||||
prev = CNNNetPrevLayer(prev);
|
||||
} else {
|
||||
gnawarn() << "Could not find Functional parent for " << original_prev_layer->name << ", using original layer";
|
||||
prev = original_prev_layer;
|
||||
true_layer = l;
|
||||
break;
|
||||
}
|
||||
}
|
||||
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
|
||||
// actual insertion
|
||||
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
|
||||
|
||||
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
|
||||
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
|
||||
|
||||
CNNLayerPtr activationLayer =
|
||||
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
|
||||
@ -643,17 +657,17 @@ void InsertIdentityLayerPass::run() {
|
||||
// TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
|
||||
// detecting ins-data-idx
|
||||
size_t insDataIdx = std::numeric_limits<size_t>::max();
|
||||
for (size_t i = 0; i != l->insData.size(); i++) {
|
||||
if (getCreatorLayer(l->insData[i].lock()).lock() == prev) {
|
||||
for (size_t i = 0; i != true_layer->insData.size(); i++) {
|
||||
if (getCreatorLayer(true_layer->insData[i].lock()).lock() == prev) {
|
||||
insDataIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (insDataIdx == std::numeric_limits<size_t>::max()) {
|
||||
THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << l->name;
|
||||
THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << true_layer->name;
|
||||
}
|
||||
|
||||
auto inputData = l->insData[insDataIdx].lock();
|
||||
auto inputData = true_layer->insData[insDataIdx].lock();
|
||||
|
||||
auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
|
||||
auto activationLayerWithQuant = quantized ?
|
||||
@ -681,7 +695,7 @@ void InsertIdentityLayerPass::run() {
|
||||
activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"];
|
||||
}
|
||||
|
||||
CNNNetworkInsertLayer(prev, notAll ? l : CNNLayerPtr(nullptr), activationLayerWithQuant);
|
||||
CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), activationLayerWithQuant);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,5 +7,5 @@
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5))
|
||||
#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5):((a)+0.5))
|
||||
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
|
||||
#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
|
||||
|
@ -0,0 +1,122 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "backend/dnn_types.h"
|
||||
#include "serial/headers/2dot1/gna_model_header.hpp"
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace Header2dot3 {
|
||||
|
||||
|
||||
/**
|
||||
* @brief Header version 2.3
|
||||
*/
|
||||
struct ModelHeader {
|
||||
/**
|
||||
*@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
|
||||
*/
|
||||
char gnam[4] = {};
|
||||
/**
|
||||
* @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
|
||||
* usually it is an indicator of working with version of model different that is current export function produce
|
||||
*/
|
||||
uint32_t headerSize = 0u;
|
||||
struct Version {
|
||||
/**
|
||||
* @details Version of format Major – unsigned int, ex: 0x0001
|
||||
* every change in the header or in the layers definition should be reflected in version change
|
||||
* for backward compatibility new parsers can read old versions of model with certain restrictions
|
||||
*/
|
||||
uint16_t major = 2u;
|
||||
/**
|
||||
* @details Version of Format Minor – unsigned int, corresponding to build revision for example
|
||||
* changes in minor version are not affected layout of model
|
||||
*/
|
||||
uint32_t minor = 3u;
|
||||
} version;
|
||||
/**
|
||||
* @brief Memory required to be allocated using GNAAlloc()
|
||||
*/
|
||||
uint64_t gnaMemSize = 0ull;
|
||||
/**
|
||||
* @brief Number of GNA Layers
|
||||
*/
|
||||
uint64_t layersCount = 0ull;
|
||||
/**
|
||||
* @brief Grouping level
|
||||
*/
|
||||
uint32_t nGroup = 0u;
|
||||
/**
|
||||
* Convolution related setting - they are affecting input transformation
|
||||
*/
|
||||
uint32_t nRotateRows = 0u;
|
||||
uint32_t nRotateColumns = 0u;
|
||||
bool doRotateInput = false;
|
||||
|
||||
uint32_t nInputs = 0u;
|
||||
uint32_t nOutputs = 0u;
|
||||
|
||||
/**
|
||||
* Reserved Data might be here
|
||||
*/
|
||||
ModelHeader() = default;
|
||||
ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
|
||||
gnaMemSize = old.gnaMemSize;
|
||||
layersCount = old.layersCount;
|
||||
nGroup = old.nGroup;
|
||||
nRotateRows = old.nRotateRows;
|
||||
nRotateColumns = old.nRotateColumns;
|
||||
nInputs = old.nInputs;
|
||||
nOutputs = old.nOutputs;
|
||||
}
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
/*
|
||||
* In runtime endpoint mostly same as in serial version, except of descriptor field
|
||||
*/
|
||||
struct RuntimeEndPoint {
|
||||
/**
|
||||
* if scale factor is different then pased into infer , network might need to be requantized
|
||||
*/
|
||||
float scaleFactor = 0;
|
||||
/**
|
||||
* Pointer descriptor
|
||||
*/
|
||||
void* descriptor_ptr = nullptr;
|
||||
/**
|
||||
* Endpoint resolution in bytes.
|
||||
*/
|
||||
uint32_t element_size = 0;
|
||||
/**
|
||||
* Number of elements
|
||||
*/
|
||||
uint32_t elements_count = 0;
|
||||
/**
|
||||
* Offset in bytes of pointer descriptor
|
||||
*/
|
||||
uint64_t descriptor_offset = 0ull;
|
||||
|
||||
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
|
||||
|
||||
RuntimeEndPoint() = default;
|
||||
RuntimeEndPoint(double scaleFactor,
|
||||
void* descriptor_ptr,
|
||||
uint32_t element_size,
|
||||
uint32_t elements_count,
|
||||
intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
|
||||
descriptor_ptr(descriptor_ptr),
|
||||
element_size(element_size),
|
||||
elements_count(elements_count),
|
||||
orientation(orientation) {
|
||||
}
|
||||
};
|
||||
} // namespace Header2dot3
|
||||
} // namespace GNAPluginNS
|
@ -4,11 +4,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "serial/headers/2dot2/gna_model_header.hpp"
|
||||
#include "serial/headers/2dot3/gna_model_header.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace HeaderLatest {
|
||||
using ModelHeader = GNAPluginNS::Header2dot2::ModelHeader;
|
||||
using RuntimeEndPoint = GNAPluginNS::Header2dot2::RuntimeEndPoint;
|
||||
using ModelHeader = GNAPluginNS::Header2dot3::ModelHeader;
|
||||
using RuntimeEndPoint = GNAPluginNS::Header2dot3::RuntimeEndPoint;
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ file (GLOB LIBRARY_SRC
|
||||
|
||||
# TODO: WA for OneHot pass usage in reshape
|
||||
set(LEGACY_SRC_ROOT "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src/")
|
||||
list(APPEND LIBRARY_SRC
|
||||
set(LEGACY_LIBRARY_SHARED_SRCS
|
||||
"${LEGACY_SRC_ROOT}/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp"
|
||||
"${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp")
|
||||
|
||||
@ -125,6 +125,7 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
|
||||
|
||||
add_library(${TARGET_NAME} SHARED
|
||||
${IE_STATIC_DEPENDENT_FILES}
|
||||
${LEGACY_LIBRARY_SHARED_SRCS}
|
||||
${vs_version_file}
|
||||
$<TARGET_OBJECTS:${TARGET_NAME}_obj>)
|
||||
|
||||
@ -137,7 +138,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE pugixml openvino::itt ${CMAKE_DL_LI
|
||||
${NGRAPH_LIBRARIES} inference_engine_transformations)
|
||||
|
||||
target_include_directories(${TARGET_NAME} INTERFACE ${PUBLIC_HEADERS_DIR}
|
||||
PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
$<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
if(WIN32)
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
|
||||
|
@ -371,19 +371,42 @@ inline CNNLayerSet CNNNetGetAllInputLayers(const ICNNNetwork& network) {
|
||||
InputsDataMap inputs;
|
||||
network.getInputsInfo(inputs);
|
||||
|
||||
OutputsDataMap outputs;
|
||||
network.getOutputsInfo(outputs);
|
||||
|
||||
std::vector<DataPtr> entryDataSet;
|
||||
entryDataSet.reserve(inputs.size() + outputs.size());
|
||||
for (const auto &kvp : inputs)
|
||||
entryDataSet.push_back(kvp.second->getInputData());
|
||||
for (const auto &kvp : outputs)
|
||||
entryDataSet.push_back(kvp.second);
|
||||
|
||||
CNNLayerSet inputLayers;
|
||||
std::unordered_set<CNNLayer*> allLayers;
|
||||
|
||||
if (inputs.empty()) return inputLayers;
|
||||
if (entryDataSet.empty()) return inputLayers;
|
||||
|
||||
for (const auto& input : inputs) {
|
||||
auto& secondLayers = getInputTo(input.second->getInputData());
|
||||
// define any layer connected to provided Data object (consumer or creator)
|
||||
auto findConnectedLayer = [] (const DataPtr& data) -> CNNLayerPtr {
|
||||
auto consumerLayers = getInputTo(data);
|
||||
if (!consumerLayers.empty())
|
||||
return consumerLayers.begin()->second;
|
||||
|
||||
if (secondLayers.empty()) continue;
|
||||
auto creator = getCreatorLayer(data).lock();
|
||||
if (creator != nullptr)
|
||||
return creator;
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
for (const auto& data : entryDataSet) {
|
||||
auto entryLayer = findConnectedLayer(data);
|
||||
|
||||
if (entryLayer == nullptr) continue;
|
||||
|
||||
details::UnorderedDFS(
|
||||
allLayers, secondLayers.begin()->second,
|
||||
[&](CNNLayerPtr layer) {
|
||||
allLayers, entryLayer,
|
||||
[&inputLayers](const CNNLayerPtr& layer) {
|
||||
if (layer->insData.empty()) {
|
||||
inputLayers.insert(layer);
|
||||
}
|
||||
|
@ -132,13 +132,6 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
|
||||
THROW_IE_EXCEPTION << "Cannot cast layer to TensorIterator.";
|
||||
}
|
||||
|
||||
std::map<uint64_t, std::vector<std::pair<std::string, uint64_t>>> ngraph_parameter_id_to_ie_layer_port;
|
||||
std::map<std::pair<std::string, uint64_t>, uint64_t> ie_layer_port_to_tensor_iterator_input_id;
|
||||
|
||||
// inputs/outputs of TensorIterator body (ie)
|
||||
std::map<std::string, DataPtr> in_info_map;
|
||||
std::map<std::string, DataPtr> out_info_map;
|
||||
|
||||
// inputs/outputs of TensorIterator (ngraph representation)
|
||||
auto parameters = tensor_iterator->get_function()->get_parameters();
|
||||
auto results = tensor_iterator->get_function()->get_results();
|
||||
@ -148,10 +141,7 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
|
||||
// IE TensorIterator doesn't include such nodes so we create CNNNetwork in a separate scope
|
||||
// to call the destructor and delete these "Input"/data nodes.
|
||||
|
||||
// These layers will hold the necessary subnet after destruction of CNNNetwork.
|
||||
std::set<InferenceEngine::CNNLayerPtr> body_input_layers;
|
||||
// This map will save information about data nodes
|
||||
std::map<std::string, std::vector<TensorDesc>> layer_name_to_tensor_desc;
|
||||
TensorIterator::Body body;
|
||||
{
|
||||
CNNNetwork body_net(tensor_iterator->get_function());
|
||||
CNNNetwork net(InferenceEngine::details::convertFunctionToICNNNetwork(body_net.getFunction(), body_net));
|
||||
@ -163,73 +153,102 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
|
||||
}
|
||||
|
||||
// Get inputs/outputs of cnn network
|
||||
InputsDataMap in_info_map_with_parameters;
|
||||
in_info_map_with_parameters = net.getInputsInfo();
|
||||
out_info_map = net.getOutputsInfo();
|
||||
auto in_info_map_with_parameters = net.getInputsInfo();
|
||||
auto out_info_map = net.getOutputsInfo();
|
||||
|
||||
// Fill the map to get layer and port of the body by the parameter index.
|
||||
IE_ASSERT(in_info_map_with_parameters.size() == parameters.size());
|
||||
IE_ASSERT(out_info_map.size() == results.size());
|
||||
|
||||
InferenceEngine::TensorIterator::Body temp_body;
|
||||
temp_body.inputs.resize(in_info_map_with_parameters.size());
|
||||
temp_body.outputs.resize(out_info_map.size());
|
||||
|
||||
// Fill inputs/outs in order aligned with ng representation
|
||||
uint64_t counter = 0;
|
||||
for (const auto& param : parameters) {
|
||||
auto info = in_info_map_with_parameters.at(param->get_friendly_name());
|
||||
auto data_ptr = info->getInputData();
|
||||
auto input_to = getInputTo(data_ptr);
|
||||
for (const auto& next_layer : input_to) {
|
||||
auto port_idx = find_input_idx(next_layer.second, data_ptr);
|
||||
ngraph_parameter_id_to_ie_layer_port[counter].push_back({next_layer.first, port_idx});
|
||||
temp_body.inputs[counter++] = info->getInputData();
|
||||
}
|
||||
|
||||
auto map_ng_result_to_ie_name = [] (std::shared_ptr<ngraph::op::v0::Result> res_op) {
|
||||
auto result = res_op->input(0).get_source_output();
|
||||
|
||||
std::string name = result.get_node()->get_friendly_name();
|
||||
if (result.get_node()->get_output_size() > 1) {
|
||||
name += "." + std::to_string(result.get_index());
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
// Temporary body to call deep copy
|
||||
InferenceEngine::TensorIterator::Body temp_body;
|
||||
for (const auto& in : in_info_map_with_parameters) {
|
||||
temp_body.inputs.emplace_back(in.second->getInputData());
|
||||
}
|
||||
|
||||
for (const auto& out : out_info_map) {
|
||||
temp_body.outputs.emplace_back(out.second);
|
||||
counter = 0;
|
||||
for (const auto& result : results) {
|
||||
auto data = out_info_map.at(map_ng_result_to_ie_name(result));
|
||||
temp_body.outputs[counter++] = data;
|
||||
}
|
||||
|
||||
// This deep copy will hold all unreachable constants. See the comment in CopyTIBody function.
|
||||
auto deep_cp_body = InferenceEngine::NetPass::CopyTIBody(temp_body);
|
||||
for (const auto& data_ptr : deep_cp_body.inputs) {
|
||||
auto input_to = getInputTo(data_ptr);
|
||||
for (const auto& node : input_to) {
|
||||
// Make it compatible with ir v7: delete Input layers in body
|
||||
if (node.second->type != "Input") {
|
||||
body_input_layers.emplace(node.second);
|
||||
// Save information about data nodes to re-create them with correct names.
|
||||
for (const auto& data : node.second->insData) {
|
||||
layer_name_to_tensor_desc[node.second->name].emplace_back(data.lock()->getTensorDesc());
|
||||
}
|
||||
}
|
||||
body = InferenceEngine::NetPass::CopyTIBody(temp_body);
|
||||
|
||||
// Check if data is really const layer holder
|
||||
auto is_constant_holder = [] (const DataPtr data) {
|
||||
return data->getPrecision() == Precision::UNSPECIFIED;
|
||||
};
|
||||
|
||||
// Strip unreached node holder from Inputs node.
|
||||
auto holder = body.inputs.back();
|
||||
if (is_constant_holder(holder)) {
|
||||
auto& holder_map = getInputTo(holder);
|
||||
// remove_if
|
||||
for( auto it = holder_map.begin(); it != holder_map.end(); ) {
|
||||
if( it->second->type == "Input")
|
||||
it = holder_map.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& data_ptr : deep_cp_body.outputs) {
|
||||
out_info_map[data_ptr->getName()] = data_ptr;
|
||||
}
|
||||
}
|
||||
// TODO: Disable this WA after total switch onto Ngraph
|
||||
// WA: Some plugins (like GPU) require matching of Data object name and producer Layer name.
|
||||
// Data name is expected in format "[layer_name]" or "[layer_name].[port_idx]" in case
|
||||
// of multiple inputs. We have to restore it if possible and ignore original names of
|
||||
// Ngraph parameter and result ops.
|
||||
// Will not change data name if:
|
||||
// - data has several consumer layers
|
||||
// - data has no consumer (example if data is straight used as output)
|
||||
//
|
||||
for (auto &in : body.inputs) {
|
||||
if (is_constant_holder(in))
|
||||
continue;
|
||||
|
||||
auto holder = std::make_shared<Data>("const_holder", Precision::UNSPECIFIED);
|
||||
for (const auto& input_layer : body_input_layers) {
|
||||
// Save all constants to the holder so that they are not deleted.
|
||||
if (input_layer->insData.empty()) {
|
||||
getInputTo(holder)[input_layer->name] = input_layer;
|
||||
continue;
|
||||
const auto input_to = getInputTo(in);
|
||||
if (input_to.size() != 1)
|
||||
continue;
|
||||
|
||||
const auto consumer_layer = input_to.begin()->second;
|
||||
const auto consumer_in_port_set = consumer_layer->insData;
|
||||
const auto found = std::find_if(consumer_in_port_set.begin(), consumer_in_port_set.end(),
|
||||
[&in] (const DataWeakPtr &wptr) { return wptr.lock() == in; });
|
||||
IE_ASSERT(found != consumer_in_port_set.end());
|
||||
const auto consumer_port_idx = std::distance(consumer_in_port_set.begin(), found);
|
||||
|
||||
auto new_name = consumer_layer->name;
|
||||
if (consumer_in_port_set.size() > 1) {
|
||||
new_name += '.' + std::to_string(consumer_port_idx);
|
||||
}
|
||||
in->setName(new_name);
|
||||
}
|
||||
|
||||
// Re-create the data nodes with the correct names and fill inputs of TensorIterator (ie)
|
||||
for (size_t i = 0; i < input_layer->insData.size(); i++) {
|
||||
if (!input_layer->insData[i].lock()) {
|
||||
std::string data_name = (input_layer->insData.size() == 1)
|
||||
? input_layer->name
|
||||
: input_layer->name + "." + std::to_string(i);
|
||||
|
||||
DataPtr data(new Data(data_name, layer_name_to_tensor_desc[input_layer->name][i]));
|
||||
input_layer->insData[i] = data;
|
||||
getInputTo(data)[input_layer->name] = input_layer;
|
||||
in_info_map[data_name] = data;
|
||||
// TODO: this WA restore original precisions of outputs.
|
||||
// convertFunctionToICNNNetwork has internal fallback policy for unsupported
|
||||
// precisions for inputs/outputs ports. Particular for U8 will be translated
|
||||
// to FP32. However Loop body has strong requirements for continue_condition
|
||||
// port, it should be BOOL(U8).
|
||||
//
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
auto result = results[i];
|
||||
auto output = body.outputs[i];
|
||||
if (result->get_element_type() == ngraph::element::u8) {
|
||||
output->setPrecision(InferenceEngine::Precision::U8);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -238,44 +257,11 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
|
||||
LayerParams params = {layer->get_friendly_name(), "TensorIterator",
|
||||
details::convertPrecision(layer->get_output_element_type(0))};
|
||||
auto res = std::make_shared<InferenceEngine::TensorIterator>(params);
|
||||
|
||||
// Body: inputs
|
||||
uint64_t counter = 0;
|
||||
for (const auto& in : in_info_map) {
|
||||
res->body.inputs.emplace_back(in.second);
|
||||
|
||||
// Fill the map to get the input index by layer and port of the body.
|
||||
auto input_to = getInputTo(in.second);
|
||||
for (const auto& next_layer : input_to) {
|
||||
auto port_idx = find_input_idx(next_layer.second, in.second);
|
||||
ie_layer_port_to_tensor_iterator_input_id[{next_layer.first, port_idx}] = counter;
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
// the holder should be the last input element.
|
||||
res->body.inputs.emplace_back(holder);
|
||||
|
||||
// Body: outputs
|
||||
for (const auto& out : out_info_map) {
|
||||
res->body.outputs.emplace_back(out.second);
|
||||
}
|
||||
res->body = body;
|
||||
|
||||
// Port map: outputs
|
||||
for (const auto& desc : tensor_iterator->get_output_descriptions()) {
|
||||
auto result = results[desc->m_body_value_index]->input(0).get_source_output();
|
||||
|
||||
std::string name = result.get_node()->get_friendly_name();
|
||||
if (result.get_node()->get_output_size() > 1) {
|
||||
name += "." + std::to_string(result.get_index());
|
||||
}
|
||||
auto output_layer = out_info_map.at(name);
|
||||
|
||||
// Find index in outputs of the IE TensorIterator body
|
||||
auto it = std::find(res->body.outputs.begin(), res->body.outputs.end(), output_layer);
|
||||
if (it == res->body.outputs.end()) {
|
||||
THROW_IE_EXCEPTION << "Output layer not found.";
|
||||
}
|
||||
auto body_output_idx = it - res->body.outputs.begin();
|
||||
auto body_output_idx = desc->m_body_value_index;
|
||||
|
||||
std::string type_name = desc->get_type_info().name;
|
||||
if (type_name == "ConcatOutputDescription") {
|
||||
@ -301,56 +287,44 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
|
||||
|
||||
// Port map : inputs and back edges
|
||||
for (const auto& desc : tensor_iterator->get_input_descriptions()) {
|
||||
for (const auto& mapping : ngraph_parameter_id_to_ie_layer_port[desc->m_body_parameter_index]) {
|
||||
auto body_input_index = ie_layer_port_to_tensor_iterator_input_id.at(mapping);
|
||||
std::string type_name = desc->get_type_info().name;
|
||||
auto body_input_index = desc->m_body_parameter_index;
|
||||
|
||||
if (type_name == "SliceInputDescription") {
|
||||
auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::SliceInputDescription>(desc);
|
||||
IE_ASSERT(input_desc != nullptr);
|
||||
if (const auto slice_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::SliceInputDescription>(desc)) {
|
||||
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
|
||||
static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
|
||||
static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
|
||||
static_cast<int>(slice_desc->m_part_size)});
|
||||
} else if (const auto merge_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::MergedInputDescription>(desc)) {
|
||||
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
|
||||
|
||||
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index),
|
||||
static_cast<int>(input_desc->m_axis), static_cast<int>(input_desc->m_stride),
|
||||
static_cast<int>(input_desc->m_start), static_cast<int>(input_desc->m_end),
|
||||
static_cast<int>(input_desc->m_part_size)});
|
||||
} else if (type_name == "MergedInputDescription") {
|
||||
auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::MergedInputDescription>(desc);
|
||||
IE_ASSERT(input_desc != nullptr);
|
||||
auto body_output_idx = merge_desc->m_body_value_index;
|
||||
|
||||
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
|
||||
|
||||
auto result = results[input_desc->m_body_value_index]->inputs()[0].get_source_output();
|
||||
|
||||
// Create correct name for output.
|
||||
std::string output_name = result.get_node()->get_friendly_name();
|
||||
if (result.get_node()->get_output_size() > 1) {
|
||||
output_name += "." + std::to_string(result.get_index());
|
||||
}
|
||||
|
||||
auto output_layer = out_info_map.at(output_name);
|
||||
// Find index in outputs of the IE TensorIterator body
|
||||
auto it = std::find(res->body.outputs.begin(), res->body.outputs.end(), output_layer);
|
||||
if (it == res->body.outputs.end()) {
|
||||
THROW_IE_EXCEPTION << "Output layer not found.";
|
||||
}
|
||||
auto body_output_idx = it - res->body.outputs.begin();
|
||||
|
||||
res->back_edges.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
|
||||
} else if (type_name == "InvariantInputDescription") {
|
||||
auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::InvariantInputDescription>(desc);
|
||||
IE_ASSERT(input_desc != nullptr);
|
||||
|
||||
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Incorrect type of the input description.";
|
||||
}
|
||||
res->back_edges.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
|
||||
} else if (const auto inv_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::InvariantInputDescription>(desc)) {
|
||||
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
|
||||
static_cast<int>(inv_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Incorrect type of the input description.";
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto loop_op = std::dynamic_pointer_cast<const ngraph::opset5::Loop>(layer)) {
|
||||
auto spec_port = loop_op->get_special_body_ports();
|
||||
if (spec_port.current_iteration_input_idx != -1) {
|
||||
auto ie_port_idx = spec_port.current_iteration_input_idx;
|
||||
res->params["loop_body_current_iteration_idx"] = std::to_string(ie_port_idx);
|
||||
}
|
||||
if (spec_port.body_condition_output_idx != -1) {
|
||||
auto body_output_idx = spec_port.body_condition_output_idx;
|
||||
res->params["loop_body_condition_output_idx"] = std::to_string(body_output_idx);
|
||||
}
|
||||
res->params["loop_trip_count_idx"] = "0";
|
||||
res->params["loop_execution_condition_idx"] = "1";
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1173,14 +1147,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ReverseSequence>::createLayer(const std:
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
CNNLayer::Ptr NodeConverter<ngraph::op::Reshape>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
|
||||
LayerParams params = {layer->get_friendly_name(), "Reshape",
|
||||
details::convertPrecision(layer->get_output_element_type(0))};
|
||||
auto res = std::make_shared<InferenceEngine::ReshapeLayer>(params);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
CNNLayer::Ptr NodeConverter<ngraph::op::ShapeOf>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
|
||||
LayerParams params = {layer->get_friendly_name(), "ShapeOf",
|
||||
|
@ -46,15 +46,28 @@ static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr>& heads) {
|
||||
CNNLayerSet inputLayers;
|
||||
std::unordered_set<CNNLayer*> allLayers;
|
||||
|
||||
// define any layer connected to provided Data object (consumer or creator)
|
||||
auto findConnectedLayer = [] (const DataPtr& data) -> CNNLayerPtr {
|
||||
auto consumerLayers = getInputTo(data);
|
||||
if (!consumerLayers.empty())
|
||||
return consumerLayers.begin()->second;
|
||||
|
||||
auto creator = getCreatorLayer(data).lock();
|
||||
if (creator != nullptr)
|
||||
return creator;
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
// Define all start layers
|
||||
for (const auto& data : heads) {
|
||||
auto& secondLayers = getInputTo(data);
|
||||
auto entryLayer = findConnectedLayer(data);
|
||||
|
||||
if (secondLayers.empty()) continue;
|
||||
if (entryLayer == nullptr) continue;
|
||||
|
||||
details::UnorderedDFS(
|
||||
allLayers, secondLayers.begin()->second,
|
||||
[&](CNNLayerPtr layer) {
|
||||
allLayers, entryLayer,
|
||||
[&inputLayers](const CNNLayerPtr &layer) {
|
||||
if (layer->insData.empty()) {
|
||||
inputLayers.insert(layer);
|
||||
}
|
||||
@ -77,10 +90,17 @@ static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr>& heads) {
|
||||
std::vector<CNNLayerPtr> TIBodySortTopologically(const TensorIterator::Body& body) {
|
||||
std::vector<CNNLayerPtr> all_layers;
|
||||
|
||||
auto all_input_layers = getAllInputs(body.inputs);
|
||||
// In case of graph with several connected component
|
||||
// total entry point is a union of [inputs]U[outputs]
|
||||
// All internal nodes are achievable starting from this.
|
||||
auto total_entry_point = body.inputs;
|
||||
total_entry_point.insert(total_entry_point.end(),
|
||||
body.outputs.begin(), body.outputs.end());
|
||||
|
||||
auto all_input_layers = getAllInputs(total_entry_point);
|
||||
CNNNetForestDFS(
|
||||
all_input_layers,
|
||||
[&](CNNLayerPtr current) {
|
||||
[&all_layers](const CNNLayerPtr ¤t) {
|
||||
all_layers.push_back(current);
|
||||
},
|
||||
false);
|
||||
@ -143,9 +163,17 @@ TensorIterator::Body CopyTIBody(const TensorIterator::Body& body, std::string su
|
||||
}
|
||||
|
||||
TensorIterator::Body res;
|
||||
for (auto& in : body.inputs) res.inputs.emplace_back(old2new_d[in.get()]);
|
||||
for (auto& in : body.inputs) {
|
||||
auto found = old2new_d.find(in.get());
|
||||
IE_ASSERT(found != old2new_d.end());
|
||||
res.inputs.emplace_back(found->second);
|
||||
}
|
||||
|
||||
for (auto& out : body.outputs) res.outputs.emplace_back(old2new_d[out.get()]);
|
||||
for (auto& out : body.outputs) {
|
||||
auto found = old2new_d.find(out.get());
|
||||
IE_ASSERT(found != old2new_d.end());
|
||||
res.outputs.emplace_back(found->second);
|
||||
}
|
||||
|
||||
// Fake holder.
|
||||
// The graph itself is a shared_ptr set where parent holds child.
|
||||
|
@ -110,64 +110,73 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
|
||||
this->_name = "subgraph";
|
||||
this->reuse_io_tensors = false;
|
||||
|
||||
std::unordered_map<CNNLayerPtr, MKLDNNNodePtr> layer2node;
|
||||
std::unordered_set<DataPtr> unused_data; // nodes which has no consumers (output or just unused)
|
||||
// Map data object onto producer layer(node)
|
||||
std::unordered_map<Data*, std::pair<MKLDNNNodePtr, int>> data2node;
|
||||
|
||||
auto _parent_port = [] (const DataPtr &data) -> int {
|
||||
auto parent = getCreatorLayer(data).lock();
|
||||
for (int i = 0; parent->outData.size(); i++)
|
||||
if (data == parent->outData[i])
|
||||
return i;
|
||||
return -1;
|
||||
};
|
||||
// nodes which has no consumers (output or just unused). But doesn't marked as graph output.
|
||||
// Will be stored as fake output separately.
|
||||
std::unordered_set<DataPtr> unused_data;
|
||||
|
||||
auto _child_port = [] (const DataPtr &data, const CNNLayerPtr &layer) -> int {
|
||||
for (int i = 0; layer->insData.size(); i++)
|
||||
if (data == layer->insData[i].lock())
|
||||
return i;
|
||||
return -1;
|
||||
};
|
||||
// Step 1. Replicate input nodes
|
||||
for (const auto &input : subgraph.inputs) {
|
||||
if (input->getPrecision() == Precision::UNSPECIFIED) continue; // const node holder
|
||||
|
||||
auto creator = getCreatorLayer(input).lock();
|
||||
if (creator == nullptr) {
|
||||
creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()}));
|
||||
creator->outData.push_back(input);
|
||||
}
|
||||
|
||||
// Replicate All Nodes in topological order
|
||||
for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
|
||||
CNNLayerPtr _layer = layer;
|
||||
const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache));
|
||||
data2node[input.get()] = {node, 0};
|
||||
|
||||
const MKLDNNNodePtr node(MKLDNNNode::factory().create(_layer, getEngine(), extMgr, weightsCache));
|
||||
graphNodes.push_back(node);
|
||||
layer2node[layer] = node;
|
||||
inputNodes[input->getName()] = node;
|
||||
|
||||
if (getInputTo(input).empty()) {
|
||||
unused_data.insert(input);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2. Replicate all internal nodes.
|
||||
for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
|
||||
const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
|
||||
graphNodes.push_back(node);
|
||||
|
||||
for (int port = 0; port < layer->insData.size(); port++) {
|
||||
auto data = layer->insData[port].lock();
|
||||
auto parent_layer = getCreatorLayer(data).lock();
|
||||
if (!parent_layer) continue; // no parent means that it is input data node (or memory/const layer)
|
||||
|
||||
auto parent_node = layer2node[parent_layer];
|
||||
auto port_info = data2node[data.get()];
|
||||
auto parent_node = port_info.first;
|
||||
auto parent_port_idx = port_info.second;
|
||||
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port));
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port));
|
||||
node->addEdge(edge);
|
||||
graphEdges.push_back(edge);
|
||||
}
|
||||
int out_port_idx = 0;
|
||||
for (auto &out_data : layer->outData) {
|
||||
data2node[out_data.get()] = {node, out_port_idx++};
|
||||
if (getInputTo(out_data).empty()) {
|
||||
unused_data.insert(out_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3. Add output nodes and output stubs for unused data objects.
|
||||
for (const auto &output : subgraph.outputs) {
|
||||
auto parent_layer = getCreatorLayer(output).lock();
|
||||
auto parent_node = layer2node[parent_layer];
|
||||
auto port_info = data2node[output.get()];
|
||||
auto parent_node = port_info.first;
|
||||
auto parent_port_idx = port_info.second;
|
||||
|
||||
CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()}));
|
||||
layer->insData.push_back(output);
|
||||
|
||||
const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
|
||||
const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
|
||||
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(output), 0));
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
|
||||
node->addEdge(edge);
|
||||
graphEdges.push_back(edge);
|
||||
|
||||
graphNodes.push_back(node);
|
||||
outputNodes.push_back(node);
|
||||
|
||||
@ -176,39 +185,20 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
|
||||
|
||||
// Add stub output node for unused data
|
||||
for (auto to_stub_data : unused_data) {
|
||||
auto parent_layer = getCreatorLayer(to_stub_data).lock();
|
||||
auto parent_node = layer2node[parent_layer];
|
||||
auto port_info = data2node[to_stub_data.get()];
|
||||
auto parent_node = port_info.first;
|
||||
auto parent_port_idx = port_info.second;
|
||||
|
||||
CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()}));
|
||||
CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()}));
|
||||
layer->insData.push_back(to_stub_data);
|
||||
|
||||
const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
|
||||
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0));
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
|
||||
node->addEdge(edge);
|
||||
graphEdges.push_back(edge);
|
||||
graphNodes.push_back(node);
|
||||
}
|
||||
|
||||
// Replicate input nodes
|
||||
for (const auto &input : subgraph.inputs) {
|
||||
if (input->getName() == "const_holder") continue;
|
||||
|
||||
CNNLayerPtr layer(new CNNLayer({"in_" + input->getName(), "Input", input->getTensorDesc().getPrecision()}));
|
||||
layer->outData.push_back(input);
|
||||
|
||||
const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
|
||||
|
||||
for (auto p : getInputTo(input)) {
|
||||
auto consumer = p.second;
|
||||
MKLDNNEdgePtr edge(new MKLDNNEdge(node, layer2node[consumer], 0, _child_port(input, consumer)));
|
||||
node->addEdge(edge);
|
||||
graphEdges.push_back(edge);
|
||||
}
|
||||
|
||||
graphNodes.push_back(node);
|
||||
inputNodes[input->getName()] = node;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::Replicate(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
|
||||
|
@ -76,6 +76,11 @@ public:
|
||||
return outputNodes;
|
||||
}
|
||||
|
||||
std::map<std::string, MKLDNNNodePtr>& GetInputNodes() {
|
||||
return inputNodes;
|
||||
}
|
||||
|
||||
|
||||
mkldnn::engine getEngine() const {
|
||||
return eng;
|
||||
}
|
||||
|
@ -600,7 +600,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
|
||||
return eltwiseNode &&
|
||||
(eltwiseNode->getOpType() == Relu ||
|
||||
(conv->getCnnLayer()->precision == Precision::FP32 &&
|
||||
IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})));
|
||||
IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
|
||||
Round})));
|
||||
};
|
||||
|
||||
for (int i = 0; i < graphNodes.size(); i++) {
|
||||
@ -678,7 +679,8 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
|
||||
if (eltwiseNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get Eltwise node " << childNode->getName();
|
||||
|
||||
if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})) {
|
||||
if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
|
||||
Hsigmoid, Round})) {
|
||||
return true;
|
||||
} else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) {
|
||||
if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2)
|
||||
@ -1044,7 +1046,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
|
||||
|
||||
return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) ||
|
||||
(eltwiseNode->getOpType() == Prelu) ||
|
||||
IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid}));
|
||||
IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
|
||||
Hsigmoid, Round}));
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -1258,7 +1261,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
|
||||
return eltwiseNode &&
|
||||
(eltwiseNode->getOpType() == Relu ||
|
||||
(conv->getCnnLayer()->precision == Precision::FP32 &&
|
||||
IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})));
|
||||
IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
|
||||
Round})));
|
||||
};
|
||||
|
||||
for (auto &graphNode : graphNodes) {
|
||||
@ -1611,7 +1615,7 @@ void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) {
|
||||
if (eltwiseNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get Eltwise node " << node->getName();
|
||||
return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish,
|
||||
Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt}) ||
|
||||
Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) ||
|
||||
((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) ||
|
||||
(eltwiseNode->getOpType() == Prelu));
|
||||
}
|
||||
|
@ -75,6 +75,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
|
||||
{ "HSwish", Eltwise },
|
||||
{ "Mish", Eltwise },
|
||||
{ "HSigmoid", Eltwise },
|
||||
{ "Round", Eltwise },
|
||||
{ "ScaleShift", Eltwise },
|
||||
{ "PReLU", Eltwise },
|
||||
{ "Norm", Lrn },
|
||||
@ -112,6 +113,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
|
||||
{ "BinaryConvolution", BinaryConvolution },
|
||||
{ "DeformableConvolution", DeformableConvolution },
|
||||
{ "TensorIterator", TensorIterator },
|
||||
{ "Loop", TensorIterator },
|
||||
{ "MemoryInput", MemoryInput}, // for construction from name ctor, arbitrary name is used
|
||||
{ "Memory", MemoryOutput }, // for construction from layer ctor
|
||||
{ "Convert", Convert },
|
||||
|
@ -312,7 +312,8 @@ private:
|
||||
auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
|
||||
switch (eltwiseNode.getOpType()) {
|
||||
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
|
||||
case Mish: case Hsigmoid: case Round:
|
||||
return jit_mkldnn_emitter::get_supported_precisions();
|
||||
case Add: return jit_add_emitter::get_supported_precisions();
|
||||
case MulAdd: return jit_mul_add_emitter::get_supported_precisions();
|
||||
@ -345,7 +346,8 @@ private:
|
||||
auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
|
||||
switch (eltwiseNode.getOpType()) {
|
||||
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
|
||||
case Mish: case Hsigmoid: case Round:
|
||||
return std::make_shared<jit_mkldnn_emitter>(this, isa, eltwiseNode, exec_prec);
|
||||
case Add: return std::make_shared<jit_add_emitter>(this, isa, eltwiseNode, exec_prec);
|
||||
case MulAdd: return std::make_shared<jit_mul_add_emitter>(this, isa, eltwiseNode, exec_prec);
|
||||
@ -764,6 +766,18 @@ MKLDNNEltwiseNode::initializers = {
|
||||
opType = Hsigmoid;
|
||||
algorithm = mkldnn::eltwise_hsigmoid;
|
||||
}},
|
||||
{"round", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
opType = Round;
|
||||
std::string mode = activationLayer->GetParamAsString("mode", "half_to_even");
|
||||
if (mode == "half_to_even")
|
||||
algorithm = mkldnn::eltwise_round_half_to_even;
|
||||
else if (mode == "half_away_from_zero")
|
||||
algorithm = mkldnn::eltwise_round_half_away_from_zero;
|
||||
else
|
||||
THROW_IE_EXCEPTION << "Round layer with name " << activationLayer->name << " doesn't support mode " << mode;
|
||||
}},
|
||||
};
|
||||
|
||||
void MKLDNNEltwiseNode::init() {
|
||||
@ -833,7 +847,8 @@ void MKLDNNEltwiseNode::init() {
|
||||
comparator(layerType, "swish") ||
|
||||
comparator(layerType, "hswish") ||
|
||||
comparator(layerType, "mish") ||
|
||||
comparator(layerType, "hsigmoid")) {
|
||||
comparator(layerType, "hsigmoid") ||
|
||||
comparator(layerType, "round")) {
|
||||
initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta);
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Unsupported algorithm for Eltwise node with name `" << getName() << "`.";
|
||||
@ -843,7 +858,8 @@ void MKLDNNEltwiseNode::init() {
|
||||
size_t MKLDNNEltwiseNode::getOpInputsNum() const {
|
||||
switch (getOpType()) {
|
||||
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case PowerStatic:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
|
||||
case Mish: case Hsigmoid: case Round:
|
||||
case LogicalNot:
|
||||
return 1;
|
||||
case Add: case Subtract: case Multiply: case Divide: case FloorMod: case Mod: case Maximum: case Minimum: case SquaredDifference:
|
||||
@ -1469,7 +1485,8 @@ void MKLDNNEltwiseNode::executeReference(const std::vector<const uint8_t *>& src
|
||||
|
||||
switch (getOpType()) {
|
||||
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
|
||||
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
|
||||
case Mish: case Hsigmoid: case Round:
|
||||
*dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break;
|
||||
case Add: *dst_ptr_f = src_f[0] + src_f[1]; break;
|
||||
case MulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break;
|
||||
@ -1570,6 +1587,8 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
|
||||
case mkldnn::eltwise_hswish:
|
||||
case mkldnn::eltwise_mish:
|
||||
case mkldnn::eltwise_hsigmoid:
|
||||
case mkldnn::eltwise_round_half_to_even:
|
||||
case mkldnn::eltwise_round_half_away_from_zero:
|
||||
ops.append_eltwise(1.0, getAlgorithm(), getAlpha(), getBeta());
|
||||
break;
|
||||
case mkldnn::depthwise_scale_shift:
|
||||
|
@ -59,7 +59,8 @@ enum EltwiseOpType {
|
||||
Prelu,
|
||||
Mish,
|
||||
Hswish,
|
||||
Hsigmoid
|
||||
Hsigmoid,
|
||||
Round
|
||||
};
|
||||
|
||||
struct jit_eltwise_params {
|
||||
|
@ -2123,7 +2123,7 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
if (eltwiseNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName();
|
||||
return isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
|
||||
Tanh, Swish, Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt});
|
||||
Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt});
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <map>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <legacy/graph_transformer.h>
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -50,96 +49,137 @@ static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNN
|
||||
|
||||
class PortIteratorHelper : public PortMapHelper {
|
||||
public:
|
||||
PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to,
|
||||
bool as_input, const InferenceEngine::TensorIterator::PortMap &port_map, const mkldnn::engine& eng, int n_iter) : as_input(as_input) {
|
||||
const auto &full_blob = as_input ? from : to;
|
||||
const auto &part_blob = !as_input ? from : to;
|
||||
PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
|
||||
const InferenceEngine::TensorIterator::PortMap &slice_rule, const mkldnn::engine& eng) {
|
||||
const auto &full_blob = sliced_src ? from : to;
|
||||
const auto &part_blob = !sliced_src ? from : to;
|
||||
|
||||
auto axis = port_map.axis;
|
||||
auto stride = port_map.stride;
|
||||
auto axis = slice_rule.axis;
|
||||
auto stride = slice_rule.stride;
|
||||
|
||||
auto full_dims = full_blob->GetDims();
|
||||
auto part_dims = part_blob->GetDims();
|
||||
|
||||
if (port_map.axis == -1) {
|
||||
// simple copy mode. No iteration through this tensor
|
||||
reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive());
|
||||
iter_count = n_iter;
|
||||
auto abs_stride = std::abs(stride);
|
||||
auto sign_of_stride = stride < 0.0f ? -1 : 1;
|
||||
|
||||
iter_count = full_dims[axis] / abs_stride;
|
||||
|
||||
full_dims[axis] = abs_stride;
|
||||
IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port";
|
||||
|
||||
// make chunk view
|
||||
auto chunk_desc = full_blob->GetDescriptor();
|
||||
chunk_desc.data.dims[axis] = abs_stride;
|
||||
chunk_desc.data.layout_desc.blocking.padding_dims[axis] = abs_stride; // TODO: asamption that plain tensor
|
||||
|
||||
mem_holder.push_back(full_blob->GetPrimitive());
|
||||
auto full_mem_handler = full_blob->GetPrimitive().get_data_handle();
|
||||
mem_holder.emplace_back(mkldnn::memory::primitive_desc(chunk_desc, eng), full_mem_handler);
|
||||
auto &chunk_mem_prim = mem_holder.back();
|
||||
|
||||
auto elem_size = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(chunk_desc.data.data_type));
|
||||
|
||||
chunk_stride_in_byte = chunk_desc.data.layout_desc.blocking.strides[0][axis] * elem_size * abs_stride;
|
||||
chunk_offset_in_byte = sign_of_stride < 0 ? (iter_count - 1) * chunk_stride_in_byte : 0;
|
||||
chunk_stride_in_byte *= sign_of_stride;
|
||||
|
||||
if (sliced_src) {
|
||||
reorders.emplace_back(chunk_mem_prim, to->GetPrimitive());
|
||||
} else {
|
||||
auto abs_stride = std::abs(stride);
|
||||
auto sign_of_stride = stride < 0.0f ? -1 : 1;
|
||||
|
||||
IE_ASSERT(n_iter == full_dims[axis] / abs_stride) << "Shape mismatch for tensor iterator port";
|
||||
|
||||
full_dims[axis] = abs_stride;
|
||||
IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port";
|
||||
|
||||
iter_count = n_iter;
|
||||
|
||||
// make chunk view
|
||||
auto chunk_desc = full_blob->GetDescriptor();
|
||||
chunk_desc.data.dims[axis] = abs_stride;
|
||||
chunk_desc.data.layout_desc.blocking.padding_dims[axis] = abs_stride; // TODO: asamption that plain tensor
|
||||
|
||||
mem_holder.push_back(full_blob->GetPrimitive());
|
||||
auto full_mem_handler = full_blob->GetPrimitive().get_data_handle();
|
||||
mem_holder.emplace_back(mkldnn::memory::primitive_desc(chunk_desc, eng), full_mem_handler);
|
||||
auto &chunk_mem_prim = mem_holder.back();
|
||||
|
||||
auto elem_size = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(chunk_desc.data.data_type));
|
||||
|
||||
chunk_stride_in_byte = chunk_desc.data.layout_desc.blocking.strides[0][axis] * elem_size * abs_stride;
|
||||
chunk_offset_in_byte = sign_of_stride < 0 ? (iter_count - 1) * chunk_stride_in_byte : 0;
|
||||
chunk_stride_in_byte *= sign_of_stride;
|
||||
|
||||
if (as_input) {
|
||||
reorders.emplace_back(chunk_mem_prim, to->GetPrimitive());
|
||||
} else {
|
||||
reorders.emplace_back(from->GetPrimitive(), chunk_mem_prim);
|
||||
}
|
||||
reorders.emplace_back(from->GetPrimitive(), chunk_mem_prim);
|
||||
}
|
||||
}
|
||||
|
||||
void execute(int n_iter, mkldnn::stream strm) override {
|
||||
if (chunk_stride_in_byte != 0) {
|
||||
IE_ASSERT(n_iter < iter_count);
|
||||
void execute(mkldnn::stream strm, int iter) override {
|
||||
IE_ASSERT(iter >= 0 && iter < iter_count);
|
||||
|
||||
auto full_mem = mem_holder[FULL_DATA];
|
||||
auto chunk_mem = mem_holder[CHUNK_DATA];
|
||||
auto full_mem = mem_holder[FULL_DATA];
|
||||
auto chunk_mem = mem_holder[CHUNK_DATA];
|
||||
|
||||
chunk_mem.set_data_handle(static_cast<uint8_t *>(full_mem.get_data_handle()) +
|
||||
chunk_offset_in_byte + chunk_stride_in_byte * n_iter);
|
||||
chunk_mem.set_data_handle(static_cast<uint8_t *>(full_mem.get_data_handle()) +
|
||||
chunk_offset_in_byte + chunk_stride_in_byte * iter);
|
||||
|
||||
strm.submit({reorders.begin(), reorders.end()});
|
||||
} else {
|
||||
if (as_input ? n_iter == 0 : n_iter == (iter_count - 1))
|
||||
strm.submit({reorders.begin(), reorders.end()});
|
||||
}
|
||||
};
|
||||
strm.submit({reorders.begin(), reorders.end()});
|
||||
}
|
||||
|
||||
private:
|
||||
bool as_input;
|
||||
ptrdiff_t chunk_stride_in_byte = 0;
|
||||
ptrdiff_t chunk_offset_in_byte = 0;
|
||||
|
||||
const int FULL_DATA = 0;
|
||||
const int CHUNK_DATA = 1;
|
||||
int iter_count;
|
||||
};
|
||||
|
||||
class BackEdgePortHelper : public PortMapHelper {
|
||||
public:
|
||||
BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng, int n_iter) {
|
||||
auto mem_desc = from->GetDescriptor();
|
||||
mem_holder.emplace_back(mkldnn::memory::primitive_desc(mem_desc, eng));
|
||||
BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
|
||||
reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive());
|
||||
iter_count = n_iter;
|
||||
}
|
||||
|
||||
void execute(int n_iter, mkldnn::stream strm) override {
|
||||
if (n_iter < iter_count - 1) {
|
||||
void execute(mkldnn::stream strm, int iter) override {
|
||||
if (iter != 0) {
|
||||
strm.submit({reorders.begin(), reorders.end()});
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
class IterCountPortHelper : public PortMapHelper {
|
||||
public:
|
||||
IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
|
||||
// Only scalar I32 tensor is supported
|
||||
IE_ASSERT(to->GetDataType() == memory::s32);
|
||||
IE_ASSERT(to->GetDims() == memory::dims{1});
|
||||
mem_holder.push_back(to->GetPrimitive());
|
||||
}
|
||||
|
||||
void execute(mkldnn::stream strm, int n_iter) override {
|
||||
auto mem = mem_holder[0];
|
||||
auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
|
||||
*data_ptr = n_iter;
|
||||
}
|
||||
};
|
||||
|
||||
class asBoolCheck : public PortChecker {
|
||||
public:
|
||||
asBoolCheck(const MKLDNNMemoryPtr &mem) {
|
||||
IE_ASSERT(mem->GetDataType() == memory::u8);
|
||||
IE_ASSERT(mem->GetDims() == memory::dims{1});
|
||||
mem_holder.push_back(mem->GetPrimitive());
|
||||
}
|
||||
|
||||
int getStatus() override {
|
||||
auto mem = mem_holder[0];
|
||||
auto data_ptr = static_cast<uint8_t*>(mem.get_data_handle());
|
||||
return *data_ptr == static_cast<uint8_t>(0) ? 0 : 1;
|
||||
}
|
||||
};
|
||||
|
||||
class asIntCheck : public PortChecker {
|
||||
public:
|
||||
asIntCheck(const MKLDNNMemoryPtr &mem) {
|
||||
IE_ASSERT(mem->GetDataType() == memory::s32);
|
||||
IE_ASSERT(mem->GetDims() == memory::dims{1});
|
||||
mem_holder.push_back(mem->GetPrimitive());
|
||||
}
|
||||
|
||||
int getStatus() override {
|
||||
auto mem = mem_holder[0];
|
||||
auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
|
||||
return *data_ptr;
|
||||
}
|
||||
};
|
||||
|
||||
class staticValueCheck : public PortChecker {
|
||||
public:
|
||||
staticValueCheck(const int &value) : value(value) {}
|
||||
|
||||
int getStatus() override {
|
||||
return value;
|
||||
}
|
||||
private:
|
||||
int value;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
@ -157,25 +197,19 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
|
||||
sub_graph.CreateGraph(ti->body, ext_mng, weightCache);
|
||||
|
||||
// Try to detect inputs and outputs by indexes
|
||||
std::map<std::string, MKLDNNNodePtr> in_map, out_map;
|
||||
for (auto node : sub_graph.GetNodes())
|
||||
if (node->getType() == Input) // filter by type Input
|
||||
in_map[node->getName().substr(3)] = node; // remove "in_" prefix
|
||||
|
||||
for (auto node : sub_graph.GetOutputNodes())
|
||||
out_map[node->getName().substr(4)] = node; // remove "out_" prefix
|
||||
|
||||
const auto &in_map = sub_graph.GetInputNodes();
|
||||
for (const auto &in_data : ti->body.inputs) {
|
||||
if (in_data->getName() == "const_holder") continue;
|
||||
|
||||
auto &in_node = in_map[in_data->getName()];
|
||||
auto &in_node = in_map.at(in_data->getName());
|
||||
auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr();
|
||||
input_mem.push_back(in_mem);
|
||||
}
|
||||
|
||||
for (const auto &out_data : ti->body.outputs) {
|
||||
auto &out_node = out_map[out_data->getName()];
|
||||
auto out_mem = out_node->getParentEdgeAt(0)->getMemoryPtr();
|
||||
// Assume that order of outputs in original TI and produces sub_graph is same
|
||||
const auto &out_vec = sub_graph.GetOutputNodes();
|
||||
for (size_t i = 0; i < out_vec.size(); i++) {
|
||||
auto out_mem = out_vec[i]->getParentEdgeAt(0)->getMemoryPtr();
|
||||
output_mem.push_back(out_mem);
|
||||
}
|
||||
}
|
||||
@ -194,52 +228,99 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
|
||||
if (ti == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot convert to TensorIterator layer.";
|
||||
|
||||
const auto &eng = getEngine();
|
||||
|
||||
for (auto map_rule : ti->input_port_map) {
|
||||
auto &extr_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
|
||||
auto &intr_mem = input_mem[map_rule.to];
|
||||
auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
|
||||
auto &to_mem = input_mem[map_rule.to];
|
||||
|
||||
auto mapper = std::shared_ptr<PortMapHelper>(
|
||||
new PortIteratorHelper (extr_mem, intr_mem, true, map_rule, getEngine(), n_iter));
|
||||
|
||||
in_port_mappers.push_back(mapper);
|
||||
if (map_rule.axis == -1)
|
||||
first_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
|
||||
else
|
||||
before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng));
|
||||
}
|
||||
|
||||
for (auto map_rule : ti->output_port_map) {
|
||||
auto &extr_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
|
||||
auto &intr_mem = output_mem[map_rule.to];
|
||||
auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
|
||||
auto &from_mem = output_mem[map_rule.to];
|
||||
|
||||
auto mapper = std::shared_ptr<PortMapHelper>(
|
||||
new PortIteratorHelper (intr_mem, extr_mem, false, map_rule, getEngine(), n_iter));
|
||||
|
||||
out_port_mappers.push_back(mapper);
|
||||
if (map_rule.axis == -1)
|
||||
last_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
|
||||
else
|
||||
after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng));
|
||||
}
|
||||
|
||||
for (auto map_rule : ti->back_edges) {
|
||||
auto from_mem = output_mem[map_rule.from];
|
||||
auto to_mem = input_mem[map_rule.to];
|
||||
|
||||
auto mapper = std::shared_ptr<PortMapHelper>(
|
||||
new BackEdgePortHelper(from_mem, to_mem, getEngine(), n_iter));
|
||||
before_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
|
||||
}
|
||||
|
||||
out_port_mappers.push_back(mapper);
|
||||
// special purpose ports
|
||||
constexpr auto key_cur_iter_port = "loop_body_current_iteration_idx";
|
||||
constexpr auto key_cond_port = "loop_body_condition_output_idx";
|
||||
constexpr auto key_trip_count_port = "loop_trip_count_idx";
|
||||
constexpr auto key_init_cond_port = "loop_execution_condition_idx";
|
||||
|
||||
auto iter_idx_ports = ti->GetParamAsInts(key_cur_iter_port, {});
|
||||
for (auto idx : iter_idx_ports) {
|
||||
auto to_mem = input_mem[idx];
|
||||
before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng));
|
||||
}
|
||||
|
||||
auto condition_port_idx = ti->GetParamAsInt(key_cond_port, -1);
|
||||
if (condition_port_idx == -1) {
|
||||
continue_cond_check.reset(new staticValueCheck(true)); // always true
|
||||
} else {
|
||||
auto mem = output_mem[condition_port_idx];
|
||||
continue_cond_check.reset(new asBoolCheck(mem));
|
||||
}
|
||||
|
||||
auto trip_count_port_idx = ti->GetParamAsInt(key_trip_count_port, -1);
|
||||
if (trip_count_port_idx == -1) {
|
||||
trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration
|
||||
} else {
|
||||
auto mem = getParentEdgesAtPort(trip_count_port_idx)[0]->getMemoryPtr();
|
||||
trip_count_check.reset(new asIntCheck(mem));
|
||||
}
|
||||
|
||||
auto init_cond_port_idx = ti->GetParamAsInt(key_init_cond_port, -1);
|
||||
if (init_cond_port_idx == -1) {
|
||||
initial_cond_check.reset(new staticValueCheck(true));
|
||||
} else {
|
||||
auto mem = getParentEdgesAtPort(init_cond_port_idx)[0]->getMemoryPtr();
|
||||
initial_cond_check.reset(new asBoolCheck(mem));
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) {
|
||||
sub_graph.ResetInferCount();
|
||||
|
||||
for (int i = 0; i < n_iter; i++) {
|
||||
bool continue_cond = initial_cond_check->getStatus();
|
||||
int max_num_iter = trip_count_check->getStatus();
|
||||
|
||||
for (auto &mapper : first_mappers)
|
||||
mapper->execute(strm);
|
||||
|
||||
// use "i != max_num_iter" only to allow "-1" works like infinite loop
|
||||
for (int i = 0; i != max_num_iter && continue_cond; i++) {
|
||||
// copy data to subgraph iteration
|
||||
for (auto &mapper : in_port_mappers)
|
||||
mapper->execute(i, strm);
|
||||
for (auto &mapper : before_mappers)
|
||||
mapper->execute(strm, i);
|
||||
|
||||
sub_graph.Infer();
|
||||
|
||||
continue_cond = continue_cond_check->getStatus();
|
||||
|
||||
// copy data from subgraph iteration to outputs
|
||||
// or next iteration inputs
|
||||
for (auto &mapper : out_port_mappers)
|
||||
mapper->execute(i, strm);
|
||||
// or to next iteration inputs
|
||||
for (auto &mapper : after_mappers)
|
||||
mapper->execute(strm, i);
|
||||
}
|
||||
|
||||
for (auto &mapper : last_mappers)
|
||||
mapper->execute(strm);
|
||||
}
|
||||
|
||||
bool MKLDNNTensorIteratorNode::created() const {
|
||||
|
@ -13,16 +13,35 @@
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
/**
|
||||
* Functor interface to perform some action with pointed tensors (captured in constructor)
|
||||
* Generally it's read, write or move data from specified tensors.
|
||||
* Action may depends on iteration index.
|
||||
*/
|
||||
class PortMapHelper {
|
||||
public:
|
||||
virtual ~PortMapHelper() = default;
|
||||
virtual void execute(int n_iter, mkldnn::stream strm) = 0;
|
||||
virtual void execute(mkldnn::stream strm, int n_iter = -1) = 0;
|
||||
protected:
|
||||
std::vector<mkldnn::reorder> reorders;
|
||||
std::vector<mkldnn::memory> mem_holder;
|
||||
int iter_count;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Functor interface to perform check of data tensor (captured in constructor)
|
||||
* Information extracted as int. Meaning of returned value is specific for
|
||||
* particular type of checker.
|
||||
*/
|
||||
class PortChecker {
|
||||
public:
|
||||
virtual ~PortChecker() = default;
|
||||
virtual int getStatus() = 0;
|
||||
protected:
|
||||
std::vector<mkldnn::memory> mem_holder;
|
||||
};
|
||||
|
||||
|
||||
class MKLDNNTensorIteratorNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
@ -35,6 +54,7 @@ public:
|
||||
void execute(mkldnn::stream strm) override;
|
||||
|
||||
void setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; }
|
||||
|
||||
private:
|
||||
int n_iter = 0;
|
||||
|
||||
@ -42,7 +62,16 @@ private:
|
||||
MKLDNNGraph sub_graph;
|
||||
std::vector<MKLDNNMemoryPtr> input_mem, output_mem;
|
||||
|
||||
std::vector<std::shared_ptr<PortMapHelper>> in_port_mappers, out_port_mappers;
|
||||
std::vector<std::shared_ptr<PortMapHelper>>
|
||||
first_mappers, /// < Applied once before loop
|
||||
last_mappers, /// < Applied once after loop
|
||||
before_mappers, /// < Applied before each iteration
|
||||
after_mappers; /// < Applied after each iteration
|
||||
|
||||
std::shared_ptr<PortChecker>
|
||||
trip_count_check, /// < Perform check of trip count value. value >= -1
|
||||
initial_cond_check, /// < Perform check of initial continue condition value. value [0, 1]
|
||||
continue_cond_check; /// < Perform check of continue condition value of body. value [0, 1]
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -332,18 +332,12 @@ static bool eliminate_squeeze(const std::shared_ptr<Node>& node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool eliminate_stop_gradient(const std::shared_ptr<Node>& node) {
|
||||
replace_output_update_name(node->output(0), node->input_value(0));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool pass::NopElimination::run_on_function(std::shared_ptr<Function> function) {
|
||||
static const std::unordered_map<NodeTypeInfo, std::function<bool(const std::shared_ptr<Node>&)>>
|
||||
dispatcher{{TI(opset3::Pad), &eliminate_nop},
|
||||
{TI(op::v0::Sum), &eliminate_sum},
|
||||
{TI(opset3::Convert), &eliminate_convert},
|
||||
{TI(op::v0::Slice), &eliminate_nop},
|
||||
{TI(op::v0::StopGradient), &eliminate_stop_gradient},
|
||||
{TI(opset3::Reshape), &eliminate_reshape_v1},
|
||||
{TI(opset3::Concat), &eliminate_concat},
|
||||
{TI(opset3::Squeeze), &eliminate_squeeze},
|
||||
|
@ -39,8 +39,6 @@ function(add_common_target TARGET_NAME STATIC_IE)
|
||||
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
if(WIN32)
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)
|
||||
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
|
||||
endif()
|
||||
|
||||
@ -54,6 +52,10 @@ function(add_common_target TARGET_NAME STATIC_IE)
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} inference_engine_transformations
|
||||
PRIVATE openvino::itt)
|
||||
|
||||
if(NOT STATIC_IE)
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_legacy)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
add_common_target("vpu_common_lib" FALSE)
|
||||
|
@ -0,0 +1,89 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadConvertNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,3,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_Cast/Cast" type="Convert" version="opset1">
|
||||
<data destination_type="f16"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP16">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_Cast/Cast" type="Convert" version="opset1">
|
||||
<data precision="FP16"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP16">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 0);
|
||||
}
|
@ -0,0 +1,184 @@
|
||||
// Copyright (C) 2019-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadDepthToSpaceNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="5,4,28,2" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="DepthToSpace" version="opset1">
|
||||
<data mode="blocks_first" block_size="2"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>5</dim>
|
||||
<dim>1</dim>
|
||||
<dim>56</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>5</dim>
|
||||
<dim>1</dim>
|
||||
<dim>56</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D/Cast_1204_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>6</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="0" size="24" precision="I64"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D" type="Reshape" version="opset1">
|
||||
<data special_zero="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>6</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>5</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>1</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_output/output/Transpose" type="Permute" version="opset1">
|
||||
<data order="0,3,4,1,5,2"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>5</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>1</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>5</dim>
|
||||
<dim>1</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_4D/Cast_1202_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="24" size="16" precision="I64"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="5" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Reshape" version="opset1">
|
||||
<data special_zero="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>5</dim>
|
||||
<dim>1</dim>
|
||||
<dim>28</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>5</dim>
|
||||
<dim>1</dim>
|
||||
<dim>56</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
|
||||
<edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 80, [](Blob::Ptr& weights) {
|
||||
auto* buffer = weights->buffer().as<int64_t*>();
|
||||
buffer[0] = 0;
|
||||
buffer[1] = 2;
|
||||
buffer[2] = 2;
|
||||
buffer[3] = 1;
|
||||
buffer[4] = 28;
|
||||
buffer[5] = 2;
|
||||
buffer[7] = 0;
|
||||
buffer[7] = 1;
|
||||
buffer[8] = 56;
|
||||
buffer[9] = 4;
|
||||
});
|
||||
}
|
@ -0,0 +1,179 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadFloorModNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,1,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Parameter" version="opset1">
|
||||
<data shape="1" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="EltwiseReshapeNormalization/Cast_163_const" type="Const" version="opset1">
|
||||
<data offset="0" size="24" shape="3" element_type="i64"/>
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="EltwiseReshapeNormalization" type="Reshape" version="opset1">
|
||||
<data special_zero="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="FloorMod" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="5" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
|
||||
<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
|
||||
<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
|
||||
<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="EltwiseReshapeNormalization/Cast_175_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="0" size="12" precision="I32"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="3" name="EltwiseReshapeNormalization" type="Reshape" version="opset1">
|
||||
<data special_zero="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Eltwise" version="opset1">
|
||||
<data operation="floor_mod"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
|
||||
<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
|
||||
<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
// compareIRs(model, modelV7, 0);
|
||||
compareIRs(model, modelV7, 40, [](Blob::Ptr& weights) {
|
||||
auto* buffer = weights->buffer().as<int64_t*>();
|
||||
buffer[0] = 1;
|
||||
buffer[1] = 1;
|
||||
buffer[2] = 1;
|
||||
});
|
||||
}
|
@ -0,0 +1,122 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadGatherNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,3,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Parameter" version="opset1">
|
||||
<data shape="1" element_type="i32"/>
|
||||
<output>
|
||||
<port id="0" precision="I32">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2/Cast_292_const" type="Const" version="opset1">
|
||||
<data offset="0" size="8" shape="" element_type="i64"/>
|
||||
<output>
|
||||
<port id="1" precision="I64"/>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2" type="Gather" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="2"/>
|
||||
</input>
|
||||
<output>
|
||||
<port id="3" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
|
||||
<edge from-layer="2" from-port="1" to-layer="3" to-port="2"/>
|
||||
<edge from-layer="3" from-port="3" to-layer="4" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="I32">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2" type="Gather">
|
||||
<data axis="0"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 16, [](Blob::Ptr& weights) {
|
||||
auto* buffer = weights->buffer().as<int64_t*>();
|
||||
buffer[0] = 0;
|
||||
});
|
||||
}
|
@ -0,0 +1,190 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadMinimumNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,1,27,27" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Parameter" version="opset1">
|
||||
<data shape="1,1,27,27" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/output/Minimum" type="Minimum" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/output/Minimum/negate1_" type="Power" version="opset1">
|
||||
<data power="1" scale="-1" shift="0"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="input_b" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="PartitionedCall/functional_1/output/Minimum/negate2_" type="Power" version="opset1">
|
||||
<data power="1" scale="-1" shift="0"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="PartitionedCall/functional_1/output/Minimum/Max_" type="Eltwise" version="opset1">
|
||||
<data operation="max"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="5" name="PartitionedCall/functional_1/output/Minimum" type="Power" version="opset1">
|
||||
<data power="1" scale="-1" shift="0"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>27</dim>
|
||||
<dim>27</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
|
||||
<edge from-layer="2" from-port="0" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="4" to-port="0"/>
|
||||
<edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
|
||||
<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 0);
|
||||
}
|
@ -0,0 +1,108 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadMultiplyNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Parameter" version="opset1">
|
||||
<data shape="1,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/output/mul" type="Multiply" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/output/mul" type="Eltwise" version="opset1">
|
||||
<data operation="prod"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 0);
|
||||
}
|
@ -0,0 +1,146 @@
|
||||
// Copyright (C) 2019-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadNormalizeL2Network) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="6,24,12,10" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="112_input_port_1/value114_const" type="Const" version="opset1">
|
||||
<data offset="0" size="8" shape="1" element_type="i64"/>
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="112" type="NormalizeL2" version="opset1">
|
||||
<data eps="1e-12" eps_mode="add"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="5354_const" type="Const" version="opset1">
|
||||
<data offset="8" size="4" shape="1" element_type="f32"/>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="PartitionedCall/functional_1/lambda/output" type="Multiply" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="5" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="4" to-port="0"/>
|
||||
<edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
|
||||
<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/lambda/output" type="Normalize">
|
||||
<data eps="1e-12" across_spatial="0" channel_shared="1"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>24</dim>
|
||||
<dim>12</dim>
|
||||
<dim>10</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<weights offset="0" size="96" precision="FP32"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 100, [](Blob::Ptr& weights) {
|
||||
auto* buffer = weights->buffer().as<int64_t*>();
|
||||
buffer[0] = 1;
|
||||
buffer[1] = 32831;
|
||||
});
|
||||
}
|
@ -0,0 +1,108 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//)
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadNotEqualNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Parameter" version="opset1">
|
||||
<data shape="1,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="NotEqual" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="BOOL">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="input_b" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Eltwise" version="opset1">
|
||||
<data operation="not_equal"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="BOOL">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 0);
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
// Copyright (C) 2019-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadReduceMinNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="data" type="Parameter" version="opset1">
|
||||
<data element_type="f32" shape="3,2,2"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="reduced/Cast_175_const" type="Const" version="opset1">
|
||||
<data element_type="i64" offset="0" shape="3" size="24"/>
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="reduced" type="ReduceMin" version="opset1">
|
||||
<data keep_dims="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="reduced/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="data" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="reduced/Cast_184_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="0" precision="I64" size="12"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="2" name="reduced" type="ReduceMin" version="opset1">
|
||||
<data keep_dims="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 100, [](Blob::Ptr& weights) {
|
||||
auto* buffer = weights->buffer().as<int64_t*>();
|
||||
buffer[0] = 0;
|
||||
buffer[1] = 1;
|
||||
buffer[2] = 2;
|
||||
});
|
||||
}
|
@ -0,0 +1,115 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadReduceProdNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="1,1,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Cast_186_const" type="Const" version="opset1">
|
||||
<data offset="0" size="8" shape="1" element_type="i64"/>
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="ReduceProd" version="opset1">
|
||||
<data keep_dims="False"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Cast_195_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="0" size="4" precision="I32"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="ReduceProd" version="opset1">
|
||||
<data keep_dims="False"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 16, [](Blob::Ptr& weights) {
|
||||
auto *buffer = weights->buffer().as<int64_t *>();
|
||||
buffer[0] = 1;
|
||||
});
|
||||
}
|
@ -0,0 +1,184 @@
|
||||
// Copyright (C) 2019-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadSpaceToDepthNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="saved_model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Parameter" version="opset1">
|
||||
<data shape="6,5,4,4" element_type="f32"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="SpaceToDepth" version="opset1">
|
||||
<data mode="blocks_first" block_size="2"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>20</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>20</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="saved_model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="input_a" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D/Cast_1217_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>6</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="0" size="24" precision="I64"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D" type="Reshape" version="opset1">
|
||||
<data special_zero="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>5</dim>
|
||||
<dim>4</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>6</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>5</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_output/output/Transpose" type="Permute" version="opset1">
|
||||
<data order="0,3,5,1,2,4"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>5</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>5</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_4D/Cast_1219_const" type="Const" version="opset1">
|
||||
<output>
|
||||
<port id="1" precision="I64">
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
<blobs>
|
||||
<custom offset="24" size="16" precision="I64"/>
|
||||
</blobs>
|
||||
</layer>
|
||||
<layer id="5" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Reshape" version="opset1">
|
||||
<data special_zero="True"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>6</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
<dim>5</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>6</dim>
|
||||
<dim>20</dim>
|
||||
<dim>2</dim>
|
||||
<dim>2</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
|
||||
<edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 80, [](Blob::Ptr& weights) {
|
||||
auto* buffer = weights->buffer().as<int64_t*>();
|
||||
buffer[0] = 6;
|
||||
buffer[1] = 5;
|
||||
buffer[2] = 2;
|
||||
buffer[3] = 2;
|
||||
buffer[4] = 2;
|
||||
buffer[5] = 2;
|
||||
buffer[7] = 6;
|
||||
buffer[7] = 14;
|
||||
buffer[8] = 2;
|
||||
buffer[9] = 2;
|
||||
});
|
||||
}
|
@ -0,0 +1,137 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <string>
|
||||
#include "ngraph_reader_tests.hpp"
|
||||
TEST_F(NGraphReaderTests, ReadSubtractNetwork) {
|
||||
std::string model = R"V0G0N(
|
||||
<net name="model" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="x" type="Parameter" version="opset1">
|
||||
<data element_type="f32" shape="3,4,5"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="y" type="Parameter" version="opset1">
|
||||
<data element_type="f32" shape="3,4,5"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="z/sub" type="Subtract" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="z/sink_port_0" type="Result" version="opset1">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
std::string modelV7 = R"V0G0N(
|
||||
<net name="model" version="7">
|
||||
<layers>
|
||||
<layer id="0" name="x" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="y" type="Input" version="opset1">
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="z/neg_" type="Power" version="opset1">
|
||||
<data power="1" scale="-1.0" shift="0"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="z/sub" type="Eltwise" version="opset1">
|
||||
<data operation="sum"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="1" from-port="0" to-layer="2" to-port="0"/>
|
||||
<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
compareIRs(model, modelV7, 0);
|
||||
}
|
@ -97,19 +97,6 @@ TEST(nop_elimination, eliminate_broadcast) {
|
||||
ASSERT_EQ(count_ops_of_type<op::v1::Broadcast>(f), 0);
|
||||
}
|
||||
|
||||
TEST(nop_elimination, eliminate_stop_gradient) {
|
||||
Shape shape{};
|
||||
auto A = make_shared<op::Parameter>(element::f32, shape);
|
||||
auto s = make_shared<op::v0::StopGradient>(A);
|
||||
auto f = make_shared<Function>(make_shared<op::v0::Abs>(s), ParameterVector{A});
|
||||
|
||||
pass::Manager pass_manager;
|
||||
pass_manager.register_pass<pass::NopElimination>();
|
||||
pass_manager.run_passes(f);
|
||||
|
||||
ASSERT_EQ(count_ops_of_type<op::v0::StopGradient>(f), 0);
|
||||
}
|
||||
|
||||
TEST(nop_elimination, pass_property) {
|
||||
auto pass = std::make_shared<ngraph::pass::NopElimination>();
|
||||
ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE));
|
||||
|
@ -23,34 +23,36 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
};
|
||||
|
||||
const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes = {
|
||||
{Sigmoid, {}},
|
||||
{Tanh, {}},
|
||||
{Relu, {}},
|
||||
{Exp, {}},
|
||||
{Log, {}},
|
||||
{Sign, {}},
|
||||
{Abs, {}},
|
||||
{Clamp, {{-2.0f, 2.0f}}},
|
||||
{Negative, {}},
|
||||
{Acos, {}},
|
||||
{Asin, {}},
|
||||
{Atan, {}},
|
||||
{Cos, {}},
|
||||
{Cosh, {}},
|
||||
{Floor, {}},
|
||||
{Sin, {}},
|
||||
{Sinh, {}},
|
||||
{Sqrt, {}},
|
||||
{Tan, {}},
|
||||
{Elu, {{0.1f}}},
|
||||
{Erf, {}},
|
||||
{HardSigmoid, {{0.2f, 0.5f}}},
|
||||
{Selu, {{1.6732f, 1.0507f}}},
|
||||
{Ceiling, {}},
|
||||
{Mish, {}},
|
||||
{HSwish, {}},
|
||||
{SoftPlus, {}},
|
||||
{HSigmoid, {}}
|
||||
{Sigmoid, {}},
|
||||
{Tanh, {}},
|
||||
{Relu, {}},
|
||||
{Exp, {}},
|
||||
{Log, {}},
|
||||
{Sign, {}},
|
||||
{Abs, {}},
|
||||
{Clamp, {{-2.0f, 2.0f}}},
|
||||
{Negative, {}},
|
||||
{Acos, {}},
|
||||
{Asin, {}},
|
||||
{Atan, {}},
|
||||
{Cos, {}},
|
||||
{Cosh, {}},
|
||||
{Floor, {}},
|
||||
{Sin, {}},
|
||||
{Sinh, {}},
|
||||
{Sqrt, {}},
|
||||
{Tan, {}},
|
||||
{Elu, {{0.1f}}},
|
||||
{Erf, {}},
|
||||
{HardSigmoid, {{0.2f, 0.5f}}},
|
||||
{Selu, {{1.6732f, 1.0507f}}},
|
||||
{Ceiling, {}},
|
||||
{Mish, {}},
|
||||
{HSwish, {}},
|
||||
{SoftPlus, {}},
|
||||
{HSigmoid, {}},
|
||||
{RoundHalfToEven, {}},
|
||||
{RoundHalfAwayFromZero, {}}
|
||||
};
|
||||
|
||||
const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {
|
||||
|
@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <ngraph/op/util/attr_types.hpp>
|
||||
#include "single_layer_tests/loop.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
@ -12,9 +11,9 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
// without clip values increase rapidly, so use only seq_lenghts = 2
|
||||
std::vector<bool> execute_first_iteration{true};
|
||||
std::vector<bool> is_body_condition_const{true, false};
|
||||
std::vector<bool> body_condition{true, false}; // works only if is_body_condition_const == true
|
||||
std::vector<int64_t> trip_count{1, 10, -1}; // -1 means infinity
|
||||
std::vector<bool> is_body_condition_const{true/*, false*/};
|
||||
std::vector<bool> body_condition{true/*, false*/}; // works only if is_body_condition_const == true
|
||||
std::vector<int64_t> trip_count{1, 10/*, -1*/}; // -1 means infinity
|
||||
std::vector<std::vector<std::pair<std::vector<size_t>, LOOP_IN_TYPE>>> inputs = {
|
||||
{{{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::MERGED}},
|
||||
};
|
||||
@ -31,4 +30,37 @@ namespace {
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
LoopTest::getTestCaseName);
|
||||
|
||||
static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types {
|
||||
// GCC4.8 limitation: have to specify type of each element in list
|
||||
// static_trip_count | max | dynamic_exit | axis
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, -1, -1 }, // n_iter 5, no dynamic exit
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, 3, -1 }, // n_iter 3, dynamic exit on 3
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, 7, -1 }, // n_iter 5, dynamic exit not reached
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ true , -1, 5, -1 }, // n_iter 5, inf loop with dynamic exit on 5
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, -1, 1 }, // n_iter 5, const for loop with auto concatenated out
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ false , 5, -1, -1 }, // |
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ false , 5, 3, -1 }, // | same with dynamic trip count
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ false , 5, 7, -1 }, // |
|
||||
std::tuple<bool, int64_t, int64_t, int64_t>{ false , -1, 5, -1 } // |
|
||||
};
|
||||
|
||||
using namespace testing;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop, StaticShapeLoopTest,
|
||||
Combine(
|
||||
Values(true),
|
||||
ValuesIn(static_loop_types),
|
||||
Values<int64_t>(7),
|
||||
Values<InferenceEngine::SizeVector>({2, 1, 4}),
|
||||
Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
|
||||
Values(CommonTestUtils::DEVICE_CPU)));
|
||||
using namespace testing;
|
||||
INSTANTIATE_TEST_CASE_P(smoke_TrivialLoop, TrivialLoopTest,
|
||||
Combine(
|
||||
Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
|
||||
Values<InferenceEngine::SizeVector>({2, 3, 4}),
|
||||
Values(CommonTestUtils::DEVICE_CPU)));
|
||||
|
||||
} // namespace
|
||||
|
@ -25,7 +25,22 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(5),
|
||||
::testing::Values(0),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({0, 3})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -53,8 +53,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// TODO: Issue: 38841
|
||||
R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
|
||||
R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
|
||||
// TODO: not supported yet, ticket 37690
|
||||
R"(.*Loop.*)",
|
||||
// TODO: Issue: 41694
|
||||
R"(.*smoke_Set2.*CTCLossLayerTest.*)",
|
||||
};
|
||||
|
@ -70,7 +70,14 @@ class ImportNetworkTest : public testing::WithParamInterface<exportImportNetwork
|
||||
if (inputStream.fail()) {
|
||||
FAIL() << "Cannot open file to import model: exported_model.blob";
|
||||
}
|
||||
auto importedOutputs = CalculateImportedNetwork(inputStream);
|
||||
auto importedNetwork = core->ImportNetwork(inputStream, targetDevice, configuration);
|
||||
for (const auto& next_input : importedNetwork.GetInputsInfo()) {
|
||||
ASSERT_NO_THROW(executableNetwork.GetInputsInfo()[next_input.first]);
|
||||
}
|
||||
for (const auto& next_output : importedNetwork.GetOutputsInfo()) {
|
||||
ASSERT_NO_THROW(executableNetwork.GetOutputsInfo()[next_output.first]);
|
||||
}
|
||||
auto importedOutputs = CalculateImportedNetwork(importedNetwork);
|
||||
Compare(importedOutputs, actualOutputs);
|
||||
}
|
||||
|
||||
@ -107,9 +114,7 @@ class ImportNetworkTest : public testing::WithParamInterface<exportImportNetwork
|
||||
std::map<std::string, std::string> exportConfiguration;
|
||||
std::map<std::string, std::string> importConfiguration;
|
||||
|
||||
std::vector<std::vector<std::uint8_t>> CalculateImportedNetwork(std::istream& networkModel) {
|
||||
auto importedNetwork = core->ImportNetwork(networkModel, targetDevice, configuration);
|
||||
|
||||
std::vector<std::vector<std::uint8_t>> CalculateImportedNetwork(InferenceEngine::ExecutableNetwork& importedNetwork) {
|
||||
auto refInferRequest = importedNetwork.CreateInferRequest();
|
||||
std::vector<InferenceEngine::InputInfo::CPtr> refInfos;
|
||||
for (const auto& input : importedNetwork.GetInputsInfo()) {
|
||||
|
@ -26,6 +26,7 @@ INSTANTIATE_TEST_CASE_P(DISABLED_smoke_NumSplitsCheck, SplitLayerTest,
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({30, 30})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA)),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
|
||||
|
@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#include <subgraph_tests/memory_eltwise_reshape_concat.hpp>
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
namespace {
|
||||
std::vector<size_t> input_multiples = {
|
||||
1,
|
||||
7,
|
||||
5,
|
||||
8
|
||||
};
|
||||
|
||||
std::vector<size_t> concat_sizes = {
|
||||
32,
|
||||
64
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {
|
||||
{"GNA_COMPACT_MODE", "NO"},
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"},
|
||||
};
|
||||
} // namespace
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_MemoryEltwiseReshapeConcatTest, MemoryEltwiseReshapeConcatTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::ValuesIn(input_multiples),
|
||||
::testing::ValuesIn(concat_sizes),
|
||||
::testing::Values(additional_config)),
|
||||
MemoryEltwiseReshapeConcatTest::getTestCaseName);
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -26,8 +26,22 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(5),
|
||||
::testing::Values(0),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({0, 3})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU)),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -16,7 +16,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(1),
|
||||
::testing::Values(5),
|
||||
// TODO: 0-axis excluded
|
||||
// Check (status == ie::StatusCode::OK) failed: Failed to reshape Network:
|
||||
// Failed to infer shapes for Split layer (Split_2) with error:
|
||||
@ -28,10 +28,11 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
|
||||
SplitLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, splitWithUnusedOutputsTest,
|
||||
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(5),
|
||||
// TODO: 0-axis excluded
|
||||
@ -49,5 +50,5 @@ INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, splitWithUnusedOutputs
|
||||
std::vector<size_t>({0, 4}),
|
||||
std::vector<size_t>({2, 3})),
|
||||
::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
|
||||
splitWithUnusedOutputsTest::getTestCaseName);
|
||||
SplitLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
@ -29,7 +29,7 @@ using LoopParams = typename std::tuple<
|
||||
std::string>; // Device name
|
||||
|
||||
class LoopTest : public testing::WithParamInterface<LoopParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<LoopParams> &obj);
|
||||
|
||||
@ -37,4 +37,108 @@ protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
|
||||
using StaticShapeLoopParams = typename std::tuple<
|
||||
bool,
|
||||
std::tuple<
|
||||
bool,
|
||||
int64_t,
|
||||
int64_t,
|
||||
int64_t
|
||||
>,
|
||||
int64_t,
|
||||
InferenceEngine::SizeVector,
|
||||
InferenceEngine::Precision,
|
||||
std::string
|
||||
>;
|
||||
|
||||
/**
|
||||
* Test case with static SHAPE version of loop operation.
|
||||
* Total iteration count is dynamic.
|
||||
*/
|
||||
class StaticShapeLoopTest : public testing::WithParamInterface<StaticShapeLoopParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj);
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
|
||||
std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
|
||||
|
||||
private:
|
||||
bool static_iter_num; // trip count provided by constant node
|
||||
bool static_continue_cond; // initial_cond provided by constant node
|
||||
int64_t max_iter_num; // -1 means infinity loop (expected dynamic exit condition in body)
|
||||
int64_t dynamic_exit; // -1 means always true
|
||||
int64_t axis; // -1 means no auto concatenation
|
||||
int64_t start_value;
|
||||
InferenceEngine::SizeVector data_shape;
|
||||
InferenceEngine::Precision data_prc;
|
||||
|
||||
int64_t actual_n_iter();
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
|
||||
class TrivialLoopTest : public testing::WithParamInterface<LayerTestsUtils::basicParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
protected:
|
||||
using RefBlobGenerator = std::function<InferenceEngine::Blob::Ptr (const InferenceEngine::TensorDesc &info)>;
|
||||
std::map<std::string, RefBlobGenerator> inputGens, outputGens;
|
||||
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
|
||||
auto found = inputGens.find(info.name());
|
||||
if (found != inputGens.end()) {
|
||||
return found->second(info.getTensorDesc());
|
||||
}
|
||||
|
||||
found = inputGens.find("");
|
||||
if (found != inputGens.end()) {
|
||||
return found->second(info.getTensorDesc());
|
||||
}
|
||||
|
||||
return LayerTestsCommon::GenerateInput(info);
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
|
||||
if (outputGens.empty())
|
||||
return LayerTestsCommon::CalculateRefs();
|
||||
|
||||
const auto results = function->get_results();
|
||||
const auto outs_info = cnnNetwork.getOutputsInfo();
|
||||
const auto num_out_blob = results.size();
|
||||
|
||||
std::vector<std::vector<std::uint8_t>> res_collection(num_out_blob);
|
||||
|
||||
for (int i = 0; i < num_out_blob; i++) {
|
||||
// TODO: name of original NG result doesn't match with outs after conversion.
|
||||
// Expected : auto name = results[i]->get_friendly_name();
|
||||
auto name = results[i]->get_input_node_ptr(0)->get_friendly_name();
|
||||
auto data = outs_info.at(name);
|
||||
IE_ASSERT(data != nullptr);
|
||||
|
||||
RefBlobGenerator generator;
|
||||
auto found = outputGens.find(name);
|
||||
if (found != outputGens.end()) {
|
||||
generator = found->second;
|
||||
} else {
|
||||
found = outputGens.find("");
|
||||
if (found != outputGens.end()) {
|
||||
generator = found->second;
|
||||
}
|
||||
}
|
||||
|
||||
IE_ASSERT(generator != nullptr) << "Test output generator is not specified";
|
||||
auto blob = generator(data->getTensorDesc());
|
||||
auto blob_size = blob->byteSize();
|
||||
auto blob_ptr = blob->buffer().as<uint8_t*>();
|
||||
|
||||
auto &res = res_collection[i];
|
||||
res.resize(blob_size);
|
||||
std::copy(blob_ptr, blob_ptr + blob_size, res.begin());
|
||||
}
|
||||
return res_collection;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -23,6 +23,7 @@ typedef std::tuple<
|
||||
InferenceEngine::Layout, // Input layout
|
||||
InferenceEngine::Layout, // Output layout
|
||||
std::vector<size_t>, // Input shapes
|
||||
std::vector<size_t>, // Used outputs indices
|
||||
std::string // Target device name
|
||||
> splitParams;
|
||||
|
||||
@ -35,26 +36,4 @@ protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
size_t, // Num splits
|
||||
size_t, // Axis
|
||||
InferenceEngine::Precision, // Net precision
|
||||
InferenceEngine::Precision, // Input precision
|
||||
InferenceEngine::Precision, // Output precision
|
||||
InferenceEngine::Layout, // Input layout
|
||||
InferenceEngine::Layout, // Output layout
|
||||
std::vector<size_t>, // Input shapes
|
||||
std::vector<size_t>, // Used outputs indices
|
||||
std::string // Target device name
|
||||
> splitWithUnusedOutputsParams;
|
||||
|
||||
class splitWithUnusedOutputsTest : public testing::WithParamInterface<splitWithUnusedOutputsParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<splitWithUnusedOutputsParams> obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
#pragma once
|
||||
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include <ie_core.hpp>
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
typedef std::tuple<
|
||||
std::string, // Target device name
|
||||
InferenceEngine::Precision, // Network precision
|
||||
size_t, // Mutiples of concat size to be used as input size
|
||||
size_t, // Concat size
|
||||
std::map<std::string, std::string> // Configuration
|
||||
> memoryEltwiseReshapeConcatParams;
|
||||
|
||||
class MemoryEltwiseReshapeConcatTest : public LayerTestsUtils::LayerTestsCommon,
|
||||
public testing::WithParamInterface<memoryEltwiseReshapeConcatParams> {
|
||||
private:
|
||||
void initTestModel();
|
||||
// you have to replace memory layers since ngraph does not support them
|
||||
void initNgraphFriendlyModel();
|
||||
|
||||
// since we switching models we need to generate and save these values in SetUp
|
||||
size_t inputSize;
|
||||
size_t concatSize;
|
||||
ngraph::element::Type ngPrc;
|
||||
std::vector<float> memory_init;
|
||||
std::vector<float> concat_vals;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void Run() override;
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<memoryEltwiseReshapeConcatParams> &obj);
|
||||
};
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -46,7 +46,9 @@ namespace LayerTestsDefinitions {
|
||||
result << "types=" << CommonTestUtils::vec2str(types_separate) << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
return result.str();
|
||||
auto res_str = result.str();
|
||||
std::replace(res_str.begin(), res_str.end(), '-', '_');
|
||||
return res_str;
|
||||
}
|
||||
|
||||
void LoopTest::SetUp() {
|
||||
@ -155,5 +157,227 @@ namespace LayerTestsDefinitions {
|
||||
|
||||
TEST_P(LoopTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
}
|
||||
|
||||
void StaticShapeLoopTest::SetUp() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
SetRefMode(LayerTestsUtils::IE);
|
||||
|
||||
auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
|
||||
std::tie(
|
||||
static_continue_cond,
|
||||
args_papck,
|
||||
start_value,
|
||||
data_shape,
|
||||
data_prc,
|
||||
targetDevice) = GetParam();
|
||||
|
||||
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
|
||||
const auto ngShape = ngraph::Shape{data_shape};
|
||||
const auto scalarShape = ngraph::Shape{};
|
||||
|
||||
ngraph::ParameterVector params{};
|
||||
auto cond_input_create = [¶ms] (ngraph::element::Type prc, const ngraph::Shape &shape, int value = 0, bool is_static = false)
|
||||
-> std::shared_ptr<ngraph::Node> {
|
||||
if (is_static)
|
||||
return std::make_shared<ngraph::opset5::Constant>(prc, shape, value);
|
||||
|
||||
auto input = std::make_shared<ngraph::op::Parameter>(prc, shape);
|
||||
params.push_back(input);
|
||||
return input;
|
||||
};
|
||||
|
||||
auto start = cond_input_create(prc, ngShape);
|
||||
auto count = cond_input_create(ngraph::element::i64, scalarShape, max_iter_num, static_iter_num);
|
||||
auto skip = cond_input_create(ngraph::element::boolean, scalarShape, true, static_continue_cond);
|
||||
|
||||
//
|
||||
// count skip start count skip start
|
||||
// / /
|
||||
// ___*___*____ __________*___*____ | idx | data | out |
|
||||
// | idx in | | ex_val idx in | | 0 | 7 | 7 |
|
||||
// | | / | | | / | / | | 1 | 7 | 8 |
|
||||
// | add | | less add | | 2 | 8 | 10 |
|
||||
// | | true | | | | | | 3 | 10 | 13 |
|
||||
// | | | | | | | | ~~~~~ * * * ~~~~~
|
||||
// | out cnd | | cnd out |
|
||||
// |___*____*___| |____*_____*________|
|
||||
// Full loop Dynamic exit loop
|
||||
// n_iter = count n_iter = ex_val
|
||||
//
|
||||
auto b_indx = std::make_shared<ngraph::op::Parameter>(ngraph::element::i64, ngraph::Shape{});
|
||||
auto b_data = std::make_shared<ngraph::op::Parameter>(prc, ngShape);
|
||||
auto b_indx_cast = std::make_shared<ngraph::op::Convert>(b_indx, prc);
|
||||
auto b_add = std::make_shared<ngraph::op::Add>(b_data, b_indx_cast, ngraph::op::AutoBroadcastSpec::NUMPY);
|
||||
|
||||
std::shared_ptr<ngraph::Node> b_cond;
|
||||
if (dynamic_exit == -1) {
|
||||
b_cond = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{}, true);
|
||||
} else {
|
||||
auto b_exit_value = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, scalarShape, dynamic_exit);
|
||||
b_cond = std::make_shared<ngraph::opset5::Less>(b_indx, b_exit_value);
|
||||
}
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {b_cond, b_add}, // TODO: check with reverse
|
||||
ngraph::ParameterVector {b_indx, b_data}); // TODO: check with reverse
|
||||
|
||||
auto loop = std::make_shared<ngraph::opset5::Loop>(count, skip);
|
||||
loop->set_function(body);
|
||||
loop->set_special_body_ports({0, 0});
|
||||
loop->set_merged_input(b_data, start, b_add);
|
||||
if (axis == -1)
|
||||
loop->get_iter_value(b_add, -1);
|
||||
else
|
||||
loop->get_concatenated_slices(b_add, 0, 1, 1, -1, axis);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {loop},
|
||||
params);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr StaticShapeLoopTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
|
||||
auto tdesc = info.getTensorDesc();
|
||||
auto blob = make_blob_with_precision(tdesc);
|
||||
blob->allocate();
|
||||
|
||||
if (tdesc.getLayout() == InferenceEngine::SCALAR) {
|
||||
auto scalar_1d = CommonTestUtils::make_reshape_view(blob, {1});
|
||||
CommonTestUtils::fill_data_with_broadcast(scalar_1d, 0, {static_cast<float>(max_iter_num)});
|
||||
} else {
|
||||
CommonTestUtils::fill_data_with_broadcast(blob, 0, {static_cast<float>(start_value)});
|
||||
}
|
||||
|
||||
return blob;
|
||||
}
|
||||
|
||||
int64_t StaticShapeLoopTest::actual_n_iter() {
|
||||
constexpr auto INF_N_ITER = std::numeric_limits<int64_t>::max();
|
||||
IE_ASSERT(dynamic_exit != -1 || max_iter_num != -1);
|
||||
|
||||
// dynamic_exit + 1 - because loop body looks like do-while loop with post condition check.
|
||||
return std::min(dynamic_exit == -1 ? INF_N_ITER : dynamic_exit + 1,
|
||||
max_iter_num == -1 ? INF_N_ITER : max_iter_num);
|
||||
}
|
||||
|
||||
// Predefined ref output
|
||||
std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::CalculateRefs() {
|
||||
bool auto_concat_out = (axis != -1);
|
||||
const auto n_iter = actual_n_iter();
|
||||
|
||||
auto ref_shape = data_shape;
|
||||
if (auto_concat_out)
|
||||
ref_shape[axis] *= n_iter;
|
||||
|
||||
using namespace CommonTestUtils;
|
||||
InferenceEngine::TensorDesc tdesc {data_prc, ref_shape, InferenceEngine::TensorDesc::getLayoutByDims(ref_shape)};
|
||||
std::vector<uint8_t> res(byte_size(tdesc));
|
||||
auto out = make_blob_with_precision(tdesc, res.data());
|
||||
|
||||
std::vector<float> vals(n_iter);
|
||||
float val = start_value;
|
||||
for (int i = 0; i < n_iter; i++) {
|
||||
val += i;
|
||||
vals[i] = val;
|
||||
}
|
||||
|
||||
if (auto_concat_out)
|
||||
fill_data_with_broadcast(out, axis, vals);
|
||||
else
|
||||
fill_data_with_broadcast(out, 0, {val}); // broadcast scalar data
|
||||
|
||||
return {res};
|
||||
}
|
||||
|
||||
TEST_P(StaticShapeLoopTest, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
|
||||
TEST_P(TrivialLoopTest, PassThroughBody) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
InferenceEngine::Precision iePrc;
|
||||
InferenceEngine::SizeVector ieShape;
|
||||
std::tie(iePrc, ieShape, targetDevice) = GetParam();
|
||||
|
||||
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iePrc);
|
||||
const auto shape = ngraph::Shape{ieShape};
|
||||
const auto scalarShape = ngraph::Shape{};
|
||||
|
||||
auto start = std::make_shared<ngraph::op::Parameter>(prc, shape);
|
||||
auto count = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, scalarShape, 5);
|
||||
auto icond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
|
||||
|
||||
// Loop body
|
||||
auto b_data = std::make_shared<ngraph::op::Parameter>(prc, shape);
|
||||
auto b_cond = std::make_shared<ngraph::op::Parameter>(ngraph::element::boolean, scalarShape);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {b_cond, b_data}, // | passthrough body, no data changes
|
||||
ngraph::ParameterVector {b_cond, b_data}); // | input -> output
|
||||
|
||||
auto loop = std::make_shared<ngraph::opset5::Loop>(count, icond);
|
||||
loop->set_function(body);
|
||||
loop->set_special_body_ports({-1, 0});
|
||||
loop->set_invariant_input(b_cond, icond);
|
||||
loop->set_invariant_input(b_data, start);
|
||||
loop->get_iter_value(b_data, -1);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {loop},
|
||||
ngraph::ParameterVector {start});
|
||||
|
||||
// Precalculated ref blobs
|
||||
auto blob = make_blob_with_precision({iePrc, ieShape, InferenceEngine::TensorDesc::getLayoutByDims(ieShape)});
|
||||
blob->allocate();
|
||||
CommonTestUtils::fill_data_with_broadcast(blob, 0, {10});
|
||||
|
||||
inputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
|
||||
outputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
|
||||
|
||||
Run();
|
||||
}
|
||||
|
||||
TEST_P(TrivialLoopTest, UnusedInputBody) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
InferenceEngine::Precision iePrc;
|
||||
InferenceEngine::SizeVector ieShape;
|
||||
std::tie(iePrc, ieShape, targetDevice) = GetParam();
|
||||
|
||||
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iePrc);
|
||||
const auto shape = ngraph::Shape{ieShape};
|
||||
const auto scalarShape = ngraph::Shape{};
|
||||
|
||||
auto start = std::make_shared<ngraph::op::Parameter>(prc, shape);
|
||||
auto count = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, scalarShape, 5);
|
||||
auto icond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
|
||||
|
||||
// Loop body
|
||||
auto b_data = std::make_shared<ngraph::op::Parameter>(prc, shape);
|
||||
auto b_cond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
|
||||
auto b_iter = std::make_shared<ngraph::op::Parameter>(ngraph::element::i64, scalarShape);
|
||||
|
||||
auto body = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {b_cond, b_data},
|
||||
ngraph::ParameterVector {b_data, b_iter});
|
||||
|
||||
auto loop = std::make_shared<ngraph::opset5::Loop>(count, icond);
|
||||
loop->set_function(body);
|
||||
loop->set_special_body_ports({1, 0});
|
||||
loop->set_invariant_input(b_data, start);
|
||||
loop->get_iter_value(b_data, -1);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(
|
||||
ngraph::OutputVector {loop},
|
||||
ngraph::ParameterVector {start});
|
||||
|
||||
// Precalculated ref blobs
|
||||
auto blob = make_blob_with_precision({iePrc, ieShape, InferenceEngine::TensorDesc::getLayoutByDims(ieShape)});
|
||||
blob->allocate();
|
||||
CommonTestUtils::fill_data_with_broadcast(blob, 0, {10});
|
||||
|
||||
inputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
|
||||
outputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
|
||||
|
||||
Run();
|
||||
}
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
@ -26,13 +26,16 @@ std::string SplitLayerTest::getTestCaseName(testing::TestParamInfo<splitParams>
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
InferenceEngine::Layout inLayout, outLayout;
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
InferenceEngine::SizeVector inputShapes, outIndices;
|
||||
std::string targetDevice;
|
||||
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, targetDevice) = obj.param;
|
||||
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outIndices, targetDevice) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
result << "numSplits=" << numSplits << "_";
|
||||
result << "axis=" << axis << "_";
|
||||
if (!outIndices.empty()) {
|
||||
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
|
||||
}
|
||||
result << "IS";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "inPRC=" << inPrc.name() << "_";
|
||||
@ -46,57 +49,14 @@ std::string SplitLayerTest::getTestCaseName(testing::TestParamInfo<splitParams>
|
||||
void SplitLayerTest::SetUp() {
|
||||
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
|
||||
size_t axis, numSplits;
|
||||
std::vector<size_t> inputShape;
|
||||
std::vector<size_t> inputShape, outIndices;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto split = std::dynamic_pointer_cast<ngraph::opset1::Split>(ngraph::builder::makeSplit(paramOuts[0],
|
||||
ngPrc, numSplits, axis));
|
||||
ngraph::ResultVector results;
|
||||
for (int i = 0; i < numSplits; i++) {
|
||||
results.push_back(std::make_shared<ngraph::opset1::Result>(split->output(i)));
|
||||
}
|
||||
function = std::make_shared<ngraph::Function>(results, params, "split");
|
||||
}
|
||||
|
||||
TEST_P(SplitLayerTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
std::string splitWithUnusedOutputsTest::getTestCaseName(testing::TestParamInfo<splitWithUnusedOutputsParams> obj) {
|
||||
size_t numSplits, axis;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
InferenceEngine::Layout inLayout, outLayout;
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
std::vector<size_t> outIndices;
|
||||
std::string targetDevice;
|
||||
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outIndices, targetDevice) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
result << "numSplits=" << numSplits << "_";
|
||||
result << "axis=" << axis << "_";
|
||||
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
|
||||
result << "IS";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "inPRC=" << inPrc.name() << "_";
|
||||
result << "outPRC=" << outPrc.name() << "_";
|
||||
result << "inL=" << inLayout << "_";
|
||||
result << "outL=" << outLayout << "_";
|
||||
result << "trgDev=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void splitWithUnusedOutputsTest::SetUp() {
|
||||
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
|
||||
size_t axis, numSplits;
|
||||
std::vector<size_t> inputShape;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::vector<size_t> outIndices;
|
||||
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outIndices, targetDevice) = this->GetParam();
|
||||
if (outIndices.empty()) {
|
||||
for (int i = 0; i < numSplits; ++i) {
|
||||
outIndices.push_back(i);
|
||||
}
|
||||
}
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
@ -110,7 +70,7 @@ void splitWithUnusedOutputsTest::SetUp() {
|
||||
function = std::make_shared<ngraph::Function>(results, params, "split");
|
||||
}
|
||||
|
||||
TEST_P(splitWithUnusedOutputsTest, CompareWithRefs) {
|
||||
TEST_P(SplitLayerTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,150 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
#include "ie_core.hpp"
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "functional_test_utils/precision_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "functional_test_utils/skip_tests_config.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include <transformations/op_conversions/lstm_cell_decomposition.hpp>
|
||||
#include "subgraph_tests/memory_eltwise_reshape_concat.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
std::string MemoryEltwiseReshapeConcatTest::getTestCaseName(const testing::TestParamInfo<memoryEltwiseReshapeConcatParams> &obj) {
|
||||
std::string targetDevice;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
size_t inputSize;
|
||||
size_t concatSize;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(targetDevice, netPrecision, inputSize, concatSize, config) = obj.param;
|
||||
std::ostringstream result;
|
||||
|
||||
result << "netPrecision=" << netPrecision.name() << "_";
|
||||
result << "IS=" << inputSize << "_";
|
||||
result << "CS=" << concatSize << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void MemoryEltwiseReshapeConcatTest::SetUp() {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::map<std::string, std::string> config;
|
||||
std::tie(targetDevice, netPrecision, inputSize, concatSize, config) = this->GetParam();
|
||||
configuration.insert(config.begin(), config.end());
|
||||
ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
const int seed = 0;
|
||||
std::mt19937 gen(static_cast<float>(seed));
|
||||
|
||||
auto generateFloatNumbers = [gen](std::size_t vec_len, float min, float max) mutable {
|
||||
std::vector<float> res;
|
||||
|
||||
std::uniform_real_distribution<float> dist(min, max);
|
||||
for (int i = 0; i < vec_len; i++)
|
||||
res.emplace_back(static_cast<float>(dist(gen)));
|
||||
|
||||
return res;
|
||||
};
|
||||
|
||||
memory_init = generateFloatNumbers(inputSize * concatSize, -1.0f, 1.0f);
|
||||
concat_vals = generateFloatNumbers(concatSize, 12.0f, 14.0f);
|
||||
}
|
||||
|
||||
void MemoryEltwiseReshapeConcatTest::initTestModel() {
|
||||
InferenceEngine::SizeVector input_dims = {1, inputSize * concatSize};
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto memory_constant = ngraph::builder::makeConstant<float>(ngPrc, input_dims, memory_init);
|
||||
memory_constant->set_friendly_name("memory_constant");
|
||||
auto memory_read = std::make_shared<ngraph::op::ReadValue>(memory_constant, "memory");
|
||||
memory_read->set_friendly_name("memory_read");
|
||||
|
||||
auto mul = ngraph::builder::makeEltwise(input_parameter[0], memory_read, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
mul->set_friendly_name("multiplication");
|
||||
|
||||
auto memory_write = std::make_shared<ngraph::op::Assign>(mul, "memory");
|
||||
memory_write->set_friendly_name("memory_write");
|
||||
|
||||
auto reshape_1_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>({inputSize, concatSize}));
|
||||
reshape_1_pattern->set_friendly_name("reshape_pattern");
|
||||
auto reshape_1 = std::make_shared<ngraph::op::v1::Reshape>(mul, reshape_1_pattern, false);
|
||||
reshape_1->set_friendly_name("reshape");
|
||||
|
||||
auto concat_constant = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals);
|
||||
concat_constant->set_friendly_name("concat_constant");
|
||||
|
||||
auto concat = ngraph::builder::makeConcat({concat_constant, reshape_1}, 0);
|
||||
|
||||
memory_write->add_control_dependency(memory_read);
|
||||
concat->add_control_dependency(memory_write);
|
||||
|
||||
auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4},
|
||||
std::vector<size_t>({1, 1, inputSize + 1, concatSize}));
|
||||
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(concat, final_reshape_pattern, false);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "memory_multiply_reshape_concat");
|
||||
}
|
||||
|
||||
void MemoryEltwiseReshapeConcatTest::initNgraphFriendlyModel() {
|
||||
InferenceEngine::SizeVector input_dims = {1, inputSize * concatSize};
|
||||
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
|
||||
|
||||
auto memory_constant = ngraph::builder::makeConstant<float>(ngPrc, input_dims, memory_init);
|
||||
memory_constant->set_friendly_name("memory_constant");
|
||||
|
||||
auto mul = ngraph::builder::makeEltwise(input_parameter[0], memory_constant, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||
mul->set_friendly_name("multiplication");
|
||||
|
||||
auto reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>({1, inputSize, concatSize}));
|
||||
reshape_pattern->set_friendly_name("reshape_pattern");
|
||||
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(mul, reshape_pattern, false);
|
||||
reshape->set_friendly_name("reshape");
|
||||
|
||||
auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, 0);
|
||||
squeeze_const->set_friendly_name("squeeze_const");
|
||||
auto squeeze = std::make_shared<ngraph::op::Squeeze>(reshape, squeeze_const);
|
||||
squeeze->set_friendly_name("squeeze");
|
||||
|
||||
auto concat_constant = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals);
|
||||
concat_constant->set_friendly_name("concat_constant");
|
||||
|
||||
auto concat = ngraph::builder::makeConcat({concat_constant, squeeze}, 0);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(concat, input_parameter, "memory_multiply_reshape_concat");
|
||||
}
|
||||
|
||||
void MemoryEltwiseReshapeConcatTest::Run() {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
initTestModel();
|
||||
LoadNetwork();
|
||||
|
||||
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::SizeVector({1, inputSize * concatSize}),
|
||||
InferenceEngine::Layout::NC);
|
||||
|
||||
auto states = executableNetwork.QueryState();
|
||||
auto state_values_blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
|
||||
memory_init.data(), memory_init.size());
|
||||
states[0].SetState(state_values_blob);
|
||||
Infer();
|
||||
initNgraphFriendlyModel();
|
||||
Validate();
|
||||
}
|
||||
|
||||
TEST_P(MemoryEltwiseReshapeConcatTest, CompareWithRefs) {
|
||||
Run();
|
||||
};
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -104,6 +104,10 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine:
|
||||
auto src_ptr = get_data(values);
|
||||
|
||||
switch (blob->getTensorDesc().getPrecision()) {
|
||||
case InferenceEngine::Precision::U64:
|
||||
case InferenceEngine::Precision::I64:
|
||||
copy_7D<uint64_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
|
||||
break;
|
||||
case InferenceEngine::Precision::FP32:
|
||||
case InferenceEngine::Precision::I32:
|
||||
copy_7D<uint32_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
|
||||
@ -189,6 +193,12 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b
|
||||
return new_blob;
|
||||
}
|
||||
|
||||
size_t byte_size(const InferenceEngine::TensorDesc &tdesc) {
|
||||
auto prc = tdesc.getPrecision();
|
||||
auto dims = tdesc.getDims();
|
||||
return prc.size() * std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
/**
|
||||
* repeated filling tensor with data.
|
||||
*
|
||||
|
@ -72,6 +72,14 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b
|
||||
*/
|
||||
void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val);
|
||||
|
||||
|
||||
/**
|
||||
* Calculate size of buffer required for provided tensor descriptor.
|
||||
* @param tdesc provided tensor descriptor
|
||||
* @return size in bytes
|
||||
*/
|
||||
size_t byte_size(const InferenceEngine::TensorDesc &tdesc);
|
||||
|
||||
static void fill_data_bbox(float *data, size_t size, int height, int width, float omega) {
|
||||
float center_h = (height - 1.0f) / 2;
|
||||
float center_w = (width - 1.0f) / 2;
|
||||
|
@ -60,6 +60,14 @@ GNA2_API Gna2Status Gna2DeviceClose(
|
||||
return Gna2StatusSuccess;
|
||||
}
|
||||
|
||||
GNA2_API Gna2Status Gna2DeviceGetCount(
|
||||
uint32_t* numberOfDevices) {
|
||||
if (numberOfDevices != nullptr) {
|
||||
*numberOfDevices = 1;
|
||||
}
|
||||
return Gna2StatusSuccess;
|
||||
}
|
||||
|
||||
GNA2_API enum Gna2Status Gna2MemoryFree(
|
||||
void * memory) {
|
||||
return Gna2StatusSuccess;
|
||||
|
@ -69,6 +69,14 @@ GNA2_API Gna2Status Gna2DeviceClose(
|
||||
return Gna2StatusSuccess;
|
||||
}
|
||||
|
||||
GNA2_API Gna2Status Gna2DeviceGetCount(
|
||||
uint32_t * numberOfDevices) {
|
||||
if (numberOfDevices != nullptr) {
|
||||
*numberOfDevices = 1;
|
||||
}
|
||||
return Gna2StatusSuccess;
|
||||
}
|
||||
|
||||
GNA2_API enum Gna2Status Gna2MemoryFree(
|
||||
void * memory) {
|
||||
if (current != nullptr) {
|
||||
|
@ -60,10 +60,10 @@ public:
|
||||
void * alloc(size_t size) noexcept override {
|
||||
return ptr;
|
||||
}
|
||||
virtual bool free(void* handle) noexcept {
|
||||
bool free(void* handle) noexcept override {
|
||||
return true;
|
||||
}
|
||||
virtual void Release() noexcept {
|
||||
void Release() noexcept override {
|
||||
delete this;
|
||||
}
|
||||
};
|
||||
|
@ -102,6 +102,9 @@ class GNACppApi {
|
||||
MOCK_METHOD1(Gna2DeviceClose, Gna2Status (
|
||||
uint32_t deviceIndex));
|
||||
|
||||
MOCK_METHOD1(Gna2DeviceGetCount, Gna2Status (
|
||||
uint32_t * numberOfDevices));
|
||||
|
||||
MOCK_METHOD1(Gna2MemoryFree, Gna2Status (
|
||||
void * memory));
|
||||
|
||||
|
@ -100,24 +100,23 @@ struct resample : public primitive_base<resample> {
|
||||
/// @param scale Resample scale.
|
||||
/// @param num_filter Input filter. Only used by bilinear sample_type.
|
||||
/// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
|
||||
/// @param with_activation Enables Relu activation.
|
||||
/// @param activation_slp Relu activation slope.
|
||||
resample(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
tensor output_size,
|
||||
uint32_t num_filter,
|
||||
resample_type operation_type = resample_type::nearest,
|
||||
bool with_activation = false,
|
||||
float activation_slp = 0.0f,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
output_size(output_size),
|
||||
num_filter(num_filter),
|
||||
axesAndScales({}),
|
||||
pads_begin({}),
|
||||
pads_end({}),
|
||||
align_corners(1),
|
||||
operation_type(operation_type),
|
||||
shape_calc_mode(shape_calculation_mode::sizes),
|
||||
with_activation(with_activation),
|
||||
activation_negative_slope(activation_slp),
|
||||
antialias(0),
|
||||
cube_coeff(0.0f),
|
||||
coord_trans_mode(coordinate_transformation_mode::asymmetric),
|
||||
round_mode(nearest_mode::floor) {
|
||||
if (operation_type == resample_type::caffe_bilinear) {
|
||||
@ -132,8 +131,6 @@ struct resample : public primitive_base<resample> {
|
||||
/// @param pads_end Optional end padding for input.
|
||||
/// @param align_corners Align corner pixels of the input and output tensors.
|
||||
/// @param resample_type Resample bilinear method.
|
||||
/// @param with_activation Enables Relu activation.
|
||||
/// @param activation_slp Relu activation slope.
|
||||
resample(const primitive_id& id,
|
||||
const primitive_id& input,
|
||||
tensor output_size,
|
||||
@ -141,19 +138,18 @@ struct resample : public primitive_base<resample> {
|
||||
std::vector<int32_t> pads_end = {},
|
||||
int32_t align_corners = 1,
|
||||
resample_type operation_type = resample_type::bilinear,
|
||||
bool with_activation = false,
|
||||
float activation_slp = 0.0f,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
output_size(output_size),
|
||||
num_filter(0),
|
||||
axesAndScales({}),
|
||||
pads_begin(pads_begin),
|
||||
pads_end(pads_end),
|
||||
align_corners(align_corners),
|
||||
operation_type(operation_type),
|
||||
shape_calc_mode(shape_calculation_mode::sizes),
|
||||
with_activation(with_activation),
|
||||
activation_negative_slope(activation_slp),
|
||||
antialias(0),
|
||||
cube_coeff(0.0f),
|
||||
coord_trans_mode(coordinate_transformation_mode::asymmetric),
|
||||
round_mode(nearest_mode::floor) {}
|
||||
|
||||
@ -170,19 +166,20 @@ struct resample : public primitive_base<resample> {
|
||||
std::vector<int32_t> pads_end = {},
|
||||
int32_t antialias = 0,
|
||||
float cube_coeff = -0.75f,
|
||||
resample_type mode = resample_type::caffe_bilinear,
|
||||
resample_type operation_type = resample_type::caffe_bilinear,
|
||||
shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes,
|
||||
coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel,
|
||||
nearest_mode nm = nearest_mode::round_prefer_floor,
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, output_padding),
|
||||
output_size(output_size),
|
||||
num_filter(0),
|
||||
axesAndScales(axesAndScales),
|
||||
pads_begin(pads_begin),
|
||||
pads_end(pads_end),
|
||||
operation_type(mode),
|
||||
align_corners(1),
|
||||
operation_type(operation_type),
|
||||
shape_calc_mode(shape_calc_mode),
|
||||
with_activation(false),
|
||||
antialias(antialias),
|
||||
cube_coeff(cube_coeff),
|
||||
coord_trans_mode(ctm),
|
||||
@ -200,21 +197,17 @@ struct resample : public primitive_base<resample> {
|
||||
std::vector<int32_t> pads_end;
|
||||
/// @param align_corners corner pixels of the input and output tensors
|
||||
int32_t align_corners;
|
||||
/// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
|
||||
/// @param operation_type Resample method (nearest neighbor/bilinear/caffe bilinear).
|
||||
resample_type operation_type;
|
||||
/// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape.
|
||||
shape_calculation_mode shape_calc_mode;
|
||||
/// @brief Enables Relu activation.
|
||||
bool with_activation;
|
||||
/// @brief Relu activation slope.
|
||||
float activation_negative_slope;
|
||||
/// @param antialias is a flag that specifies whether to perform anti-aliasing.
|
||||
int32_t antialias;
|
||||
/// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic.
|
||||
float cube_coeff;
|
||||
/// @param specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
|
||||
/// @param coord_trans_mode specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
|
||||
coordinate_transformation_mode coord_trans_mode;
|
||||
/// @param specifies round mode when mode == nearest and is used only when mode == nearest.
|
||||
/// @param round_mode specifies round mode when mode == nearest and is used only when mode == nearest.
|
||||
nearest_mode round_mode;
|
||||
};
|
||||
/// @}
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
};
|
||||
}
|
||||
|
||||
JitConstants GetJitConstants(const eltwise_params& params) const;
|
||||
JitConstants GetJitConstants(const eltwise_params& params) const override;
|
||||
|
||||
protected:
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
|
@ -24,7 +24,7 @@ class ReduceKernel_b_fs_yx_fsv16 : public ReduceKernelBase {
|
||||
public:
|
||||
ReduceKernel_b_fs_yx_fsv16() : ReduceKernelBase("reduce_gpu_b_fs_yx_fsv16") {}
|
||||
virtual ~ReduceKernel_b_fs_yx_fsv16() {}
|
||||
virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const;
|
||||
CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
|
||||
JitConstants GetJitConstants(const reduce_params& params) const override;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
@ -24,7 +24,7 @@ class ReduceKernelRef : public ReduceKernelBase {
|
||||
public:
|
||||
ReduceKernelRef() : ReduceKernelBase("reduce_ref") {}
|
||||
virtual ~ReduceKernelRef() {}
|
||||
virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const;
|
||||
CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
JitConstants GetJitConstants(const reduce_params& params) const override;
|
||||
|
@ -50,7 +50,7 @@ public:
|
||||
protected:
|
||||
virtual CommonDispatchData SetDefault(const space_to_depth_params& params, const optional_params&) const;
|
||||
virtual JitConstants GetJitConstants(const space_to_depth_params& params) const;
|
||||
virtual bool Validate(const Params& p, const optional_params& o) const;
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return { FusedOpType::ELTWISE,
|
||||
FusedOpType::QUANTIZE,
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
}
|
||||
|
||||
std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
|
||||
cl::Event get() { return _event; }
|
||||
cl::Event get() override { return _event; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<gpu_toolkit> _ctx;
|
||||
@ -91,7 +91,7 @@ public:
|
||||
_attached = true;
|
||||
}
|
||||
|
||||
cl::Event get() { return _last_ocl_event; }
|
||||
cl::Event get() override { return _last_ocl_event; }
|
||||
std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
|
||||
|
||||
private:
|
||||
|
@ -118,9 +118,6 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
|
||||
get_default_optional_params<kernel_selector::resample_optional_params>(arg.get_program());
|
||||
|
||||
const auto& primitive = arg.get_primitive();
|
||||
if (primitive->with_activation)
|
||||
convert_activation_func_params(primitive, us_params.activations);
|
||||
|
||||
size_t dimsNum = arg.get_output_layout().format.dimension();
|
||||
us_params.resampleType = convert_to_sample_type(primitive->operation_type);
|
||||
us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode);
|
||||
|
@ -118,7 +118,6 @@ std::string resample_inst::to_string(resample_node const& node) {
|
||||
resample_info.add("nearest_mode:", "simple");
|
||||
|
||||
resample_info.add("output_size", desc->output_size);
|
||||
resample_info.add("with activation", desc->with_activation);
|
||||
resample_info.add("output padding lower size", desc->output_padding.lower_size());
|
||||
resample_info.add("output padding upper size", desc->output_padding.upper_size());
|
||||
|
||||
|
2
inference-engine/thirdparty/mkl-dnn
vendored
2
inference-engine/thirdparty/mkl-dnn
vendored
@ -1 +1 @@
|
||||
Subproject commit d7d8ed46078b637794bc91215e1a982bb0f1683a
|
||||
Subproject commit 5ef085d5af65e8966e03cdfcbaa65761d61a5c9a
|
@ -343,6 +343,8 @@ extensions/front/tf/__init__.py
|
||||
extensions/front/tf/activation_ext.py
|
||||
extensions/front/tf/argmax_ext.py
|
||||
extensions/front/tf/assign_elimination.py
|
||||
extensions/front/tf/automl_efficientdet.json
|
||||
extensions/front/tf/AutomlEfficientDet.py
|
||||
extensions/front/tf/basic_lstm_cell.py
|
||||
extensions/front/tf/batch_to_space_ext.py
|
||||
extensions/front/tf/BatchMatMul_ext.py
|
||||
|
@ -15,9 +15,10 @@
|
||||
"""
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.front.tf.graph_utils import create_op_with_const_inputs
|
||||
from mo.graph.graph import Node, Graph, rename_nodes
|
||||
from mo.ops.concat import Concat
|
||||
from mo.ops.expand_dims import ExpandDims
|
||||
from mo.ops.unsqueeze import Unsqueeze
|
||||
|
||||
|
||||
class Pack(FrontReplacementOp):
|
||||
@ -25,15 +26,15 @@ class Pack(FrontReplacementOp):
|
||||
enabled = True
|
||||
|
||||
def replace_op(self, graph: Graph, node: Node):
|
||||
out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports()),
|
||||
'name': node.name + '/Concat_', }).create_node()
|
||||
out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports())}).create_node()
|
||||
pack_name = node.soft_get('name', node.id)
|
||||
|
||||
for ind in node.in_ports():
|
||||
expand_dims_node = ExpandDims(graph, {'expand_axis': int64_array([node.axis]),
|
||||
'name': node.name + '/ExpandDims_'}).create_node()
|
||||
node.in_port(ind).get_connection().set_destination(expand_dims_node.in_port(0))
|
||||
expand_dims_node.out_port(0).connect(out_node.in_port(ind))
|
||||
# Replace edge from out port 0 of the matched node with a edge from node out_node.id with port 0.
|
||||
# The "explicit" version of the return value is: [(out_node.id, 0)])
|
||||
unsqueeze_node = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array([node.axis])},
|
||||
{'name': node.soft_get('name', node.id) + '/Unsqueeze'})
|
||||
node.in_port(ind).get_connection().set_destination(unsqueeze_node.in_port(0))
|
||||
unsqueeze_node.out_port(0).connect(out_node.in_port(ind))
|
||||
|
||||
rename_nodes([(node, pack_name + '/TBR'), (out_node, pack_name)])
|
||||
return [out_node.id]
|
||||
|
||||
|
@ -20,6 +20,7 @@ import numpy as np
|
||||
from generator import generator, generate
|
||||
|
||||
from extensions.front.Pack import Pack
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.utils.ir_engine.compare_graphs import compare_graphs
|
||||
from mo.utils.unittest.graph import build_graph
|
||||
|
||||
@ -32,12 +33,16 @@ nodes_attributes = {
|
||||
'pack': {'axis': None, 'type': None, 'kind': 'op', 'op': 'Pack'},
|
||||
# Test operation
|
||||
'last': {'type': None, 'value': None, 'kind': 'op', 'op': None},
|
||||
# ExpandDims, Concat and Const operations
|
||||
# Unsqueeze, Concat and Const operations
|
||||
'const_1': {'value': None, 'type': None, 'kind': 'op', 'op': 'Const'},
|
||||
'ExpandDims_0': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
|
||||
'ExpandDims_1': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
|
||||
'ExpandDims_2': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
|
||||
'ExpandDims_3': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
|
||||
'Unsqueeze_0': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
|
||||
'Unsqueeze_1': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
|
||||
'Unsqueeze_2': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
|
||||
'Unsqueeze_3': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
|
||||
'Unsqueeze_0_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
|
||||
'Unsqueeze_1_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
|
||||
'Unsqueeze_2_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
|
||||
'Unsqueeze_3_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
|
||||
'concat_1': {'axis': None, 'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
|
||||
}
|
||||
|
||||
@ -65,15 +70,17 @@ class PackTest(unittest.TestCase):
|
||||
graph_ref_edges = []
|
||||
for i in range(num_inputs - num_placeholders + 1):
|
||||
for j in range(num_placeholders):
|
||||
graph_ref_edges.append(('placeholder_{}'.format(j), 'ExpandDims_{}'.format(i + j)))
|
||||
graph_ref_edges.append(('ExpandDims_{}'.format(i + j), 'concat_1'))
|
||||
graph_ref_edges.append(('placeholder_{}'.format(j), 'Unsqueeze_{}'.format(i + j)))
|
||||
graph_ref_edges.append(('Unsqueeze_{}'.format(i + j), 'concat_1'))
|
||||
graph_ref_edges.append(('concat_1', 'last'))
|
||||
|
||||
update_graph_ref_attributes = {}
|
||||
for i in range(num_placeholders):
|
||||
update_graph_ref_attributes['placeholder_{}'.format(i)] = {'shape': np.array([1, 227, 227, 3])}
|
||||
for i in range(num_inputs):
|
||||
update_graph_ref_attributes['ExpandDims_{}'.format(i)] = {'expand_axis': np.array([axis])}
|
||||
graph_ref_edges.append(('Unsqueeze_{}_axis'.format(i), 'Unsqueeze_{}'.format(i)))
|
||||
update_graph_ref_attributes['Unsqueeze_{}_axis'.format(i)] = {'shape': int64_array([1]),
|
||||
'value': int64_array([axis])}
|
||||
update_graph_ref_attributes['concat_1'] = {'axis': axis}
|
||||
|
||||
graph_ref = build_graph(nodes_attributes, graph_ref_edges, update_graph_ref_attributes,
|
||||
|
140
model-optimizer/extensions/front/tf/AutomlEfficientDet.py
Normal file
140
model-optimizer/extensions/front/tf/AutomlEfficientDet.py
Normal file
@ -0,0 +1,140 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from extensions.front.Pack import Pack
|
||||
from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
|
||||
from extensions.front.eltwise_n import EltwiseNReplacement
|
||||
from extensions.front.tf.pad_tf_to_pad import PadTFToPad
|
||||
from extensions.ops.DetectionOutput import DetectionOutput
|
||||
from extensions.ops.activation_ops import Sigmoid
|
||||
from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
|
||||
from mo.front.common.partial_infer.utils import int64_array
|
||||
from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
|
||||
from mo.graph.graph import Graph, Node
|
||||
from mo.middle.passes.convert_data_type import data_type_str_to_np
|
||||
from mo.ops.concat import Concat
|
||||
from mo.ops.const import Const
|
||||
from mo.ops.reshape import Reshape
|
||||
from mo.ops.result import Result
|
||||
|
||||
|
||||
class EfficientDet(FrontReplacementFromConfigFileGeneral):
|
||||
replacement_id = 'AutomlEfficientDet'
|
||||
|
||||
def run_before(self):
|
||||
from extensions.front.ExpandDimsToUnsqueeze import ExpandDimsToUnsqueeze
|
||||
return [ExpandDimsToUnsqueeze, Pack, TransposeOrderNormalizer, PadTFToPad, EltwiseNReplacement]
|
||||
|
||||
class AnchorGenerator:
|
||||
def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale):
|
||||
self.min_level = min_level
|
||||
self.aspect_ratios = aspect_ratios
|
||||
self.anchor_scale = anchor_scale
|
||||
self.scales = [2 ** (float(s) / num_scales) for s in range(num_scales)]
|
||||
|
||||
def get(self, layer_id):
|
||||
widths = []
|
||||
heights = []
|
||||
for s in self.scales:
|
||||
for a in self.aspect_ratios:
|
||||
base_anchor_size = 2 ** (self.min_level + layer_id) * self.anchor_scale
|
||||
heights.append(base_anchor_size * s * a[1])
|
||||
widths.append(base_anchor_size * s * a[0])
|
||||
return widths, heights
|
||||
|
||||
def transform_graph(self, graph: Graph, replacement_descriptions: dict):
|
||||
parameter_node = graph.get_op_nodes(op='Parameter')[0]
|
||||
parameter_node['data_type'] = data_type_str_to_np(parameter_node.graph.graph['cmd_params'].data_type)
|
||||
parameter_node.out_port(0).disconnect()
|
||||
|
||||
# remove existing Result operations to remove unsupported sub-graph
|
||||
graph.remove_nodes_from([node.id for node in graph.get_op_nodes(op='Result')] + ['detections'])
|
||||
|
||||
# determine if the op which is a input/final result of mean value and scale applying to the input tensor
|
||||
# then connect it to the input of the first convolution of the model, so we remove the image pre-processing
|
||||
# which includes padding and resizing from the model
|
||||
preprocessing_input_node_id = replacement_descriptions['preprocessing_input_node']
|
||||
assert preprocessing_input_node_id in graph.nodes, 'The node with name "{}" is not found in the graph. This ' \
|
||||
'node should provide scaled image output and is specified' \
|
||||
' in the json file.'.format(preprocessing_input_node_id)
|
||||
preprocessing_input_node = Node(graph, preprocessing_input_node_id)
|
||||
preprocessing_input_node.in_port(0).get_connection().set_source(parameter_node.out_port(0))
|
||||
|
||||
preprocessing_output_node_id = replacement_descriptions['preprocessing_output_node']
|
||||
assert preprocessing_output_node_id in graph.nodes, 'The node with name "{}" is not found in the graph. This ' \
|
||||
'node should provide scaled image output and is specified' \
|
||||
' in the json file.'.format(preprocessing_output_node_id)
|
||||
preprocessing_output_node = Node(graph, preprocessing_output_node_id)
|
||||
preprocessing_output_node.out_port(0).disconnect()
|
||||
|
||||
convolution_nodes = [n for n in graph.pseudo_topological_sort() if n.soft_get('type') == 'Convolution']
|
||||
convolution_nodes[0].in_port(0).get_connection().set_source(preprocessing_output_node.out_port(0))
|
||||
|
||||
# create prior boxes (anchors) generator
|
||||
aspect_ratios = replacement_descriptions['aspect_ratios']
|
||||
assert len(aspect_ratios) % 2 == 0
|
||||
aspect_ratios = list(zip(aspect_ratios[::2], aspect_ratios[1::2]))
|
||||
priors_generator = self.AnchorGenerator(min_level=int(replacement_descriptions['min_level']),
|
||||
aspect_ratios=aspect_ratios,
|
||||
num_scales=int(replacement_descriptions['num_scales']),
|
||||
anchor_scale=replacement_descriptions['anchor_scale'])
|
||||
|
||||
prior_boxes = []
|
||||
for i in range(100):
|
||||
inp_name = 'box_net/box-predict{}/BiasAdd'.format('_%d' % i if i else '')
|
||||
if inp_name not in graph:
|
||||
break
|
||||
widths, heights = priors_generator.get(i)
|
||||
prior_box_op = PriorBoxClusteredOp(graph, {'width': np.array(widths),
|
||||
'height': np.array(heights),
|
||||
'clip': 0, 'flip': 0,
|
||||
'variance': replacement_descriptions['variance'],
|
||||
'offset': 0.5})
|
||||
prior_boxes.append(prior_box_op.create_node([Node(graph, inp_name), parameter_node]))
|
||||
|
||||
# concatenate prior box operations
|
||||
concat_prior_boxes = Concat(graph, {'axis': -1}).create_node()
|
||||
for idx, node in enumerate(prior_boxes):
|
||||
concat_prior_boxes.add_input_port(idx)
|
||||
concat_prior_boxes.in_port(idx).connect(node.out_port(0))
|
||||
|
||||
conf = Sigmoid(graph, dict(name='concat/sigmoid')).create_node([Node(graph, 'concat')])
|
||||
reshape_size_node = Const(graph, {'value': int64_array([0, -1])}).create_node([])
|
||||
logits = Reshape(graph, dict(name=conf.name + '/Flatten')).create_node([conf, reshape_size_node])
|
||||
deltas = Reshape(graph, dict(name='concat_1/Flatten')).create_node([Node(graph, 'concat_1'), reshape_size_node])
|
||||
|
||||
# revert convolution boxes prediction weights from yxYX to xyXY (convolutions share weights and bias)
|
||||
weights = Node(graph, 'box_net/box-predict/pointwise_kernel')
|
||||
weights.value = weights.value.reshape(-1, 4)[:, [1, 0, 3, 2]].reshape(weights.shape)
|
||||
bias = Node(graph, 'box_net/box-predict/bias')
|
||||
bias.value = bias.value.reshape(-1, 4)[:, [1, 0, 3, 2]].reshape(bias.shape)
|
||||
|
||||
detection_output_node = DetectionOutput(graph, dict(
|
||||
name='detections',
|
||||
num_classes=int(replacement_descriptions['num_classes']),
|
||||
share_location=1,
|
||||
background_label_id=int(replacement_descriptions['num_classes']) + 1,
|
||||
nms_threshold=replacement_descriptions['nms_threshold'],
|
||||
confidence_threshold=replacement_descriptions['confidence_threshold'],
|
||||
top_k=100,
|
||||
keep_top_k=100,
|
||||
code_type='caffe.PriorBoxParameter.CENTER_SIZE',
|
||||
)).create_node([deltas, logits, concat_prior_boxes])
|
||||
|
||||
output_op = Result(graph, dict(name='output'))
|
||||
output_op.create_node([detection_output_node])
|
18
model-optimizer/extensions/front/tf/automl_efficientdet.json
Normal file
18
model-optimizer/extensions/front/tf/automl_efficientdet.json
Normal file
@ -0,0 +1,18 @@
|
||||
[
|
||||
{
|
||||
"id": "AutomlEfficientDet",
|
||||
"custom_attributes": {
|
||||
"preprocessing_input_node": "convert_image",
|
||||
"preprocessing_output_node": "truediv",
|
||||
"aspect_ratios": [1.0, 1.0, 1.4, 0.7, 0.7, 1.4],
|
||||
"variance": [1.0, 1.0, 1.0, 1.0],
|
||||
"min_level": 3,
|
||||
"num_scales": 3,
|
||||
"anchor_scale": 4.0,
|
||||
"num_classes": 90,
|
||||
"nms_threshold": 0.6,
|
||||
"confidence_threshold": 0.2
|
||||
},
|
||||
"match_kind": "general"
|
||||
}
|
||||
]
|
@ -32,14 +32,14 @@ class Unsqueeze(Op):
|
||||
|
||||
def __init__(self, graph, attrs: dict):
|
||||
super().__init__(graph, {
|
||||
'op': __class__.op,
|
||||
'type': __class__.op,
|
||||
'op': self.op,
|
||||
'type': self.op,
|
||||
'version': 'opset1',
|
||||
'unsqueeze_dims': None,
|
||||
'reinterp_shape': True,
|
||||
'in_ports_count': 2,
|
||||
'out_ports_count': 1,
|
||||
'infer': __class__.infer
|
||||
'infer': self.infer
|
||||
}, attrs)
|
||||
|
||||
@staticmethod
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user