Merge branch 'master' into topk

This commit is contained in:
Mateusz Tabaka 2020-11-03 10:22:55 +01:00
commit a7c8365446
139 changed files with 4038 additions and 2282 deletions

View File

@ -60,7 +60,7 @@ function(build_ngraph)
ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE) ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
endif() endif()
if(NOT (ANDROID OR WINDOWS_STORE)) if(NOT (ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)) ))
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE) ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
else() else()
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE) ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)

View File

@ -26,7 +26,7 @@
- [Build Steps](#build-steps-3) - [Build Steps](#build-steps-3)
- [Use Custom OpenCV Builds for Inference Engine](#use-custom-opencv-builds-for-inference-engine) - [Use Custom OpenCV Builds for Inference Engine](#use-custom-opencv-builds-for-inference-engine)
- [Add Inference Engine to Your Project](#add-inference-engine-to-your-project) - [Add Inference Engine to Your Project](#add-inference-engine-to-your-project)
- [(Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2) - [(Optional) Additional Installation Steps for the Intel® Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2)
- [For Linux, Raspbian Stretch* OS](#for-linux-raspbian-stretch-os) - [For Linux, Raspbian Stretch* OS](#for-linux-raspbian-stretch-os)
- [Next Steps](#next-steps) - [Next Steps](#next-steps)
- [Additional Resources](#additional-resources) - [Additional Resources](#additional-resources)
@ -43,7 +43,7 @@ The open source version of Inference Engine includes the following plugins:
| CPU plugin | Intel® Xeon® with Intel® AVX2 and AVX512, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® SSE | | CPU plugin | Intel® Xeon® with Intel® AVX2 and AVX512, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® SSE |
| GPU plugin | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics | | GPU plugin | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics |
| GNA plugin | Intel® Speech Enabling Developer Kit, Amazon Alexa\* Premium Far-Field Developer Kit, Intel® Pentium® Silver processor J5005, Intel® Celeron® processor J4005, Intel® Core™ i3-8121U processor | | GNA plugin | Intel® Speech Enabling Developer Kit, Amazon Alexa\* Premium Far-Field Developer Kit, Intel® Pentium® Silver processor J5005, Intel® Celeron® processor J4005, Intel® Core™ i3-8121U processor |
| MYRIAD plugin | Intel® Movidius™ Neural Compute Stick powered by the Intel® Movidius™ Myriad™ 2, Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X | | MYRIAD plugin | Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
| Heterogeneous plugin | Heterogeneous plugin enables computing for inference on one network on several Intel® devices. | | Heterogeneous plugin | Heterogeneous plugin enables computing for inference on one network on several Intel® devices. |
## Build on Linux\* Systems ## Build on Linux\* Systems
@ -608,11 +608,11 @@ include_directories(${InferenceEngine_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} ${InferenceEngine_LIBRARIES} dl) target_link_libraries(${PROJECT_NAME} ${InferenceEngine_LIBRARIES} dl)
``` ```
## (Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2 ## (Optional) Additional Installation Steps for the Intel® Neural Compute Stick 2
> **NOTE**: These steps are only required if you want to perform inference on > **NOTE**: These steps are only required if you want to perform inference on the
Intel® Movidius™ Neural Compute Stick or the Intel® Neural Compute Stick 2 using Intel® Neural Compute Stick 2 using the Inference Engine MYRIAD Plugin. See also
the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get Started]. [Intel® Neural Compute Stick 2 Get Started].
### For Linux, Raspbian\* Stretch OS ### For Linux, Raspbian\* Stretch OS
@ -622,11 +622,10 @@ the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get
sudo usermod -a -G users "$(whoami)" sudo usermod -a -G users "$(whoami)"
``` ```
2. To perform inference on Intel® Movidius™ Neural Compute Stick and Intel® 2. To perform inference on Intel® Neural Compute Stick 2, install the USB rules
Neural Compute Stick 2, install the USB rules as follows: as follows:
```sh ```sh
cat <<EOF > 97-myriad-usbboot.rules cat <<EOF > 97-myriad-usbboot.rules
SUBSYSTEM=="usb", ATTRS{idProduct}=="2150", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
SUBSYSTEM=="usb", ATTRS{idProduct}=="2485", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1" SUBSYSTEM=="usb", ATTRS{idProduct}=="2485", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
SUBSYSTEM=="usb", ATTRS{idProduct}=="f63b", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1" SUBSYSTEM=="usb", ATTRS{idProduct}=="f63b", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
EOF EOF

View File

@ -15,10 +15,6 @@ else()
SET(ARCH_64 OFF) SET(ARCH_64 OFF)
endif() endif()
if (NOT ENABLE_MKL_DNN)
set(ENABLE_MKL OFF)
endif()
if(ENABLE_AVX512F) if(ENABLE_AVX512F)
if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920)) if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920))
# 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work # 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work

View File

@ -4,10 +4,27 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.13)
# Detect target
include(target_flags)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
if(X86_64)
set(ARCH_FOLDER intel64)
elseif(X86)
set(ARCH_FOLDER ia32)
elseif(MSVC AND ARM)
set(ARCH_FOLDER arm)
elseif(MSVC AND AARCH64)
set(ARCH_FOLDER arm64)
endif()
list(APPEND CMAKE_MODULE_PATH list(APPEND CMAKE_MODULE_PATH
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/download" "${OpenVINO_MAIN_SOURCE_DIR}/cmake/download"
"${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile" "${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile")
)
#
# CPack
#
include(CPackComponent) include(CPackComponent)
unset(IE_CPACK_COMPONENTS_ALL CACHE) unset(IE_CPACK_COMPONENTS_ALL CACHE)
@ -33,21 +50,14 @@ endif()
# Set library directory for cpack # Set library directory for cpack
# #
function(ie_cpack_set_library_dir) function(ie_cpack_set_library_dir)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
set(ARCH intel64)
elseif(ARCH STREQUAL "i386")
set(ARCH ia32)
endif()
if(WIN32) if(WIN32)
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE) set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE) set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE) set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
else() else()
set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE) set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE) set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE) set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
endif() endif()
endfunction() endfunction()
@ -109,28 +119,19 @@ function(set_temp_directory temp_variable source_tree_dir)
endif() endif()
endfunction() endfunction()
#
# Common scripts
#
include(coverage/coverage) include(coverage/coverage)
include(shellcheck/shellcheck) include(shellcheck/shellcheck)
# External dependencies # External dependencies
find_package(Threads) find_package(Threads)
# Detect target
include(target_flags)
# printing debug messages # printing debug messages
include(debug) include(debug)
# linking libraries without discarding symbols
include(whole_archive)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
if(X86_64)
set(ARCH_FOLDER intel64)
elseif(X86)
set(ARCH_FOLDER ia32)
endif()
if(OS_FOLDER) if(OS_FOLDER)
message ("**** OS FOLDER IS: [${OS_FOLDER}]") message ("**** OS FOLDER IS: [${OS_FOLDER}]")
if("${OS_FOLDER}" STREQUAL "ON") if("${OS_FOLDER}" STREQUAL "ON")
@ -237,6 +238,7 @@ include(os_flags)
include(sanitizer) include(sanitizer)
include(cross_compiled_func) include(cross_compiled_func)
include(faster_build) include(faster_build)
include(whole_archive)
include(api_validator/api_validator) include(api_validator/api_validator)
function(set_ci_build_number) function(set_ci_build_number)

View File

@ -17,11 +17,11 @@ ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ${ENABLE_MKL_DNN_DEFAULT}) ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ${ENABLE_MKL_DNN_DEFAULT})
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "WIN32 OR X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE; NOT WINDOWS_PHONE" OFF) ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
# FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but # FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
# this must be addressed in a proper way # this must be addressed in a proper way
ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX OR WIN32;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF) ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
ie_option (OS_FOLDER "create OS dedicated folder in output" OFF) ie_option (OS_FOLDER "create OS dedicated folder in output" OFF)

View File

@ -127,8 +127,10 @@ function(ie_avx512_optimization_flags flags)
endfunction() endfunction()
function(ie_arm_neon_optimization_flags flags) function(ie_arm_neon_optimization_flags flags)
if(WIN32 OR CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# nothing
elseif(ANDROID) elseif(ANDROID)
if(ANDROID_ABI STREQUAL "arm64-v8a") if(ANDROID_ABI STREQUAL "arm64-v8a")
set(${flags} "-mfpu=neon" PARENT_SCOPE) set(${flags} "-mfpu=neon" PARENT_SCOPE)

View File

@ -16,10 +16,25 @@ if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif() endif()
endif() endif()
if(MSVC64 OR MINGW64) macro(_ie_process_msvc_generator_platform flag_name)
# if cmake -A <ARM|ARM64> is passed
if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
set(AARCH64 ON)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM")
set(ARM ON)
elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "x64")
set(X86_64 ON) set(X86_64 ON)
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING)) elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32")
set(X86 ON) set(X86 ON)
else()
set(${flag_name} ON)
endif()
endmacro()
if(MSVC64 OR MINGW64)
_ie_process_msvc_generator_platform(X86_64)
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
_ie_process_msvc_generator_platform(X86)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(X86_64 ON) set(X86_64 ON)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")

View File

@ -7,15 +7,15 @@ macro(ie_parse_ci_build_number)
set(IE_VERSION_MAJOR ${CMAKE_MATCH_1}) set(IE_VERSION_MAJOR ${CMAKE_MATCH_1})
set(IE_VERSION_MINOR ${CMAKE_MATCH_2}) set(IE_VERSION_MINOR ${CMAKE_MATCH_2})
set(IE_VERSION_PATCH ${CMAKE_MATCH_3}) set(IE_VERSION_PATCH ${CMAKE_MATCH_3})
set(IE_VS_VER_HAS_WELL_DEFINED_VERSION 1) set(IE_VS_VER_HAS_VERSION 1)
else() else()
set(IE_VS_VER_HAS_WELL_DEFINED_VERSION 0) set(IE_VS_VER_HAS_VERSION 0)
endif() endif()
endmacro() endmacro()
ie_parse_ci_build_number() ie_parse_ci_build_number()
if(IE_VS_VER_HAS_WELL_DEFINED_VERSION) if(IE_VS_VER_HAS_VERSION)
set(IE_VS_VER_FILEVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0") set(IE_VS_VER_FILEVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
set(IE_VS_VER_PRODUCTVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0") set(IE_VS_VER_PRODUCTVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}.0") set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}.0")

View File

@ -1,7 +1,7 @@
#include <winver.h> #include <winver.h>
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
#if IE_VS_VER_HAS_WELL_DEFINED_VERSION #if @IE_VS_VER_HAS_VERSION@
FILEVERSION @IE_VS_VER_FILEVERSION_QUAD@ FILEVERSION @IE_VS_VER_FILEVERSION_QUAD@
PRODUCTVERSION @IE_VS_VER_PRODUCTVERSION_QUAD@ PRODUCTVERSION @IE_VS_VER_PRODUCTVERSION_QUAD@
#endif #endif
@ -20,7 +20,7 @@ BEGIN
BLOCK "040904E4" BLOCK "040904E4"
BEGIN BEGIN
VALUE "FileDescription", "@IE_VS_VER_FILEDESCRIPTION_STR@\0" VALUE "FileDescription", "@IE_VS_VER_FILEDESCRIPTION_STR@\0"
#if IE_VS_VER_HAS_WELL_DEFINED_VERSION #if @IE_VS_VER_HAS_VERSION@
VALUE "FileVersion", "@IE_VS_VER_FILEVERSION_STR@\0" VALUE "FileVersion", "@IE_VS_VER_FILEVERSION_STR@\0"
#endif #endif
VALUE "InternalName", "@IE_VS_VER_INTERNALNAME_STR@\0" VALUE "InternalName", "@IE_VS_VER_INTERNALNAME_STR@\0"

View File

@ -10,7 +10,7 @@ and mixed-reality headsets.
The OpenVINO™ toolkit: The OpenVINO™ toolkit:
* Enables CNN-based deep learning inference on the edge * Enables CNN-based deep learning inference on the edge
* Supports heterogeneous execution across an Intel&reg; CPU, Intel&reg; Integrated Graphics, Intel&reg; Movidius&trade; Neural Compute Stick and Intel&reg; Neural Compute Stick 2 * Supports heterogeneous execution across an Intel&reg; CPU, Intel&reg; Integrated Graphics, Intel&reg; Neural Compute Stick 2
* Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels * Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
* Includes optimized calls for computer vision standards including OpenCV\*, OpenCL&trade;, and OpenVX\* * Includes optimized calls for computer vision standards including OpenCV\*, OpenCL&trade;, and OpenVX\*

View File

@ -2,7 +2,7 @@
## Introducing MYRIAD Plugin ## Introducing MYRIAD Plugin
The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel&reg; Movidius&trade; Neural Compute Stick and Intel&reg; Neural Compute Stick 2. The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel&reg; Neural Compute Stick 2.
## Installation on Linux* OS ## Installation on Linux* OS
@ -23,10 +23,10 @@ The Inference Engine MYRIAD plugin supports the following networks:
* GoogleNet (Inception) v1, v2, v4 * GoogleNet (Inception) v1, v2, v4
* VGG family (VGG16, VGG19) * VGG family (VGG16, VGG19)
* SqueezeNet v1.0, v1.1 * SqueezeNet v1.0, v1.1
* ResNet v1 family (18\*\* \*\*\*, 50, 101, 152) * ResNet v1 family (18\*\*\*, 50, 101, 152)
* MobileNet (mobilenet-v1-1.0-224, mobilenet-v2) * MobileNet (mobilenet-v1-1.0-224, mobilenet-v2)
* Inception ResNet v2 * Inception ResNet v2
* DenseNet family\*\* (121,161,169,201) * DenseNet family (121,161,169,201)
* SSD-300, SSD-512, SSD-MobileNet, SSD-GoogleNet, SSD-SqueezeNet * SSD-300, SSD-512, SSD-MobileNet, SSD-GoogleNet, SSD-SqueezeNet
**TensorFlow\***: **TensorFlow\***:
@ -45,7 +45,7 @@ The Inference Engine MYRIAD plugin supports the following networks:
**MXNet\***: **MXNet\***:
* AlexNet and CaffeNet * AlexNet and CaffeNet
* DenseNet family\*\* (121,161,169,201) * DenseNet family (121,161,169,201)
* SqueezeNet v1.1 * SqueezeNet v1.1
* MobileNet v1, v2 * MobileNet v1, v2
* NiN * NiN
@ -55,8 +55,6 @@ The Inference Engine MYRIAD plugin supports the following networks:
* VGG family (VGG16, VGG19) * VGG family (VGG16, VGG19)
* SSD-Inception-v3, SSD-MobileNet, SSD-ResNet-50, SSD-300 * SSD-Inception-v3, SSD-MobileNet, SSD-ResNet-50, SSD-300
\*\* Network is tested on Intel&reg; Movidius&trade; Neural Compute Stick with BatchNormalization fusion optimization disabled during Model Optimizer import
\*\*\* Network is tested on Intel&reg; Neural Compute Stick 2 with BatchNormalization fusion optimization disabled during Model Optimizer import \*\*\* Network is tested on Intel&reg; Neural Compute Stick 2 with BatchNormalization fusion optimization disabled during Model Optimizer import
## Supported Configuration Parameters ## Supported Configuration Parameters

View File

@ -0,0 +1,96 @@
# Converting EfficientDet Models from TensorFlow {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}
This tutorial explains how to convert detection EfficientDet\* public models to the Intermediate Representation (IR).
## <a name="efficientdet-to-ir"></a>Convert EfficientDet Model to IR
On GitHub*, you can find several public versions of EfficientDet model implementation. This tutorial explains how to
convert models from the [https://github.com/google/automl/tree/master/efficientdet](https://github.com/google/automl/tree/master/efficientdet)
repository (commit 96e1fee) to IR.
### Get Frozen TensorFlow\* Model
Follow the instructions below to get frozen TensorFlow EfficientDet model. We use EfficientDet-D4 model as an example:
1. Clone the repository:<br>
```sh
git clone https://github.com/google/automl
cd automl/efficientdet
```
2. (Optional) Checkout to the commit that the conversion was tested on:<br>
```sh
git checkout 96e1fee
```
3. Install required dependencies:<br>
```sh
python3 -m pip install --upgrade pip
python3 -m pip install -r automl/efficientdet/requirements.txt
```
4. Download and extract the model checkpoint [efficientdet-d4.tar.gz](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz)
referenced in the "Pretrained EfficientDet Checkpoints" section of the model repository:<br>
```sh
wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
tar zxvf efficientdet-d4.tar.gz
```
5. Freeze the model:<br>
```sh
python3 model_inspect.py --runmode=saved_model --model_name=efficientdet-d4 --ckpt_path=efficientdet-d4 --saved_model_dir=savedmodeldir
```
As a result the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will be generated.
> **NOTE:** If you see an error `AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'` apply the fix from the [patch](https://github.com/google/automl/pull/846).
### Convert EfficientDet TensorFlow Model to the IR
To generate the IR of the EfficientDet TensorFlow model, run:<br>
```sh
python3 $MO_ROOT/mo.py \
--input_model savedmodeldir/efficientdet-d4_frozen.pb \
--tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/automl_efficientdet.json \
--input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
--reverse_input_channels
```
Where `$IMAGE_SIZE` is the size that the input image of the original TensorFlow model will be resized to. Different
EfficientDet models were trained with different input image sizes. To determine the right one refer to the `efficientdet_model_param_dict`
dictionary in the [hparams_config.py](https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304) file.
The attribute `image_size` specifies the shape to be specified for the model conversion.
The `tensorflow_use_custom_operations_config` command line parameter specifies the configuration json file containing hints
to the Model Optimizer on how to convert the model and trigger transformations implemented in the
`$MO_ROOT/extensions/front/tf/AutomlEfficientDet.py`. The json file contains some parameters which must be changed if you
train the model yourself and modified the `hparams_config` file or the parameters are different from the ones used for EfficientDet-D4.
The attribute names are self-explanatory or match the name in the `hparams_config` file.
> **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md).
OpenVINO&trade; toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to
[Object Detection for SSD C++ Sample](@ref openvino_inference_engine_samples_object_detection_sample_ssd_README) and
[Object Detection for SSD Python Sample](@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README).
## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR
The TensorFlow model produces as output a list of 7-element tuples: `[image_id, y_min, x_min, y_max, x_max, confidence, class_id]`, where:
* `image_id` -- image batch index.
* `y_min` -- absolute `y` coordinate of the lower left corner of the detected object.
* `x_min` -- absolute `x` coordinate of the lower left corner of the detected object.
* `y_max` -- absolute `y` coordinate of the upper right corner of the detected object.
* `x_max` -- absolute `x` coordinate of the upper right corner of the detected object.
* `confidence` -- is the confidence of the detected object.
* `class_id` -- is the id of the detected object class counted from 1.
The output of the IR is a list of 7-element tuples: `[image_id, class_id, confidence, x_min, y_min, x_max, y_max]`, where:
* `image_id` -- image batch index.
* `class_id` -- is the id of the detected object class counted from 0.
* `confidence` -- is the confidence of the detected object.
* `x_min` -- normalized `x` coordinate of the lower left corner of the detected object.
* `y_min` -- normalized `y` coordinate of the lower left corner of the detected object.
* `x_max` -- normalized `x` coordinate of the upper right corner of the detected object.
* `y_max` -- normalized `y` coordinate of the upper right corner of the detected object.
The first element with `image_id = -1` means end of data.
---
## See Also
* [Sub-Graph Replacement in Model Optimizer](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)

View File

@ -22,6 +22,7 @@
<tab type="user" title="Converting BERT from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow"/> <tab type="user" title="Converting BERT from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow"/>
<tab type="user" title="Convert TensorFlow* XLNet Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow"/> <tab type="user" title="Convert TensorFlow* XLNet Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow"/>
<tab type="user" title="Converting TensorFlow* Wide and Deep Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models"/> <tab type="user" title="Converting TensorFlow* Wide and Deep Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models"/>
<tab type="user" title="Converting EfficientDet Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models"/>
</tab> </tab>
<tab type="usergroup" title="Converting a MXNet* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet"> <tab type="usergroup" title="Converting a MXNet* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet">
<tab type="user" title="Converting a Style Transfer Model from MXNet" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet"/> <tab type="user" title="Converting a Style Transfer Model from MXNet" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet"/>

View File

@ -53,7 +53,7 @@ cd /home/<user>/Downloads/fpga_support_files/
./install_openvino_fpga_dependencies.sh ./install_openvino_fpga_dependencies.sh
``` ```
11. When asked, select the FPGA card, Intel® GPU, and Intel® Movidius™ Neural Compute Stick, then you can install the correct dependencies. 11. When asked, select the FPGA card, Intel® GPU, and Intel® Neural Compute Stick 2, then you can install the correct dependencies.
12. If you installed the 4.14 kernel as part of the installation script, you will need to reboot the machine and select the new kernel in the Ubuntu (grub) boot menu. You will also need to rerun `setup_env.sh` to set up your environmental variables again. 12. If you installed the 4.14 kernel as part of the installation script, you will need to reboot the machine and select the new kernel in the Ubuntu (grub) boot menu. You will also need to rerun `setup_env.sh` to set up your environmental variables again.

View File

@ -13,55 +13,64 @@ This guide provides installation steps for the Intel® distribution of OpenVINO
## Install the Runtime Package Using the PyPI Repository ## Install the Runtime Package Using the PyPI Repository
1. Set up and update pip to the highest version: ### Step 1. Set up and update pip to the highest version
```sh
python3 -m pip install --upgrade pip Run the command below:
``` ```sh
2. Install the Intel® distribution of OpenVINO™ toolkit: python3 -m pip install --upgrade pip
```
### Step 2. Install the Intel® distribution of OpenVINO™ toolkit
Run the command below:
```sh ```sh
pip install openvino-python pip install openvino-python
``` ```
3. Add PATH to environment variables. ### Step 3. Add PATH to environment variables
- Ubuntu* 18.04 and macOS*:
```sh Run a command for your operating system:
export LD_LIBRARY_PATH=<library_dir>:${LD_LIBRARY_PATH} - Ubuntu 18.04 and macOS:
``` ```sh
- Windows* 10: export LD_LIBRARY_PATH=<library_dir>:${LD_LIBRARY_PATH}
```sh ```
set PATH=<library_dir>;%PATH% - Windows* 10:
``` ```sh
How to find `library_dir`: set PATH=<library_dir>;%PATH%
- Ubuntu\*, macOS\*: ```
- Standard user: To find `library_dir`:
```sh **Ubuntu, macOS**:
echo $(python3 -m site --user-base)/lib - Standard user:
``` ```sh
- Root or sudo user: echo $(python3 -m site --user-base)/lib
```sh ```
/usr/local/lib - Root or sudo user:
``` ```sh
- Virtual environments or custom Python installations (from sources or tarball): /usr/local/lib
```sh ```
echo $(which python3)/../../lib - Virtual environments or custom Python installations (from sources or tarball):
``` ```sh
- Windows\*: echo $(which python3)/../../lib
- Standard Python: ```
```sh **Windows**:
python -c "import os, sys; print((os.path.dirname(sys.executable))+'\Library\\bin')" - Standard Python:
``` ```sh
- Virtual environments or custom Python installations (from sources or tarball): python -c "import os, sys; print((os.path.dirname(sys.executable))+'\Library\\bin')"
```sh ```
python -c "import os, sys; print((os.path.dirname(sys.executable))+'\..\Library\\bin')" - Virtual environments or custom Python installations (from sources or tarball):
``` ```sh
4. Verify that the package is installed: python -c "import os, sys; print((os.path.dirname(sys.executable))+'\..\Library\\bin')"
```sh ```
python3 -c "import openvino"
``` ### Step 4. Verify that the package is installed
Run the command below:
```sh
python3 -c "import openvino"
```
Now you are ready to develop and run your application. Now you are ready to develop and run your application.
## Additional Resources ## Additional Resources
- [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit). - [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit).

View File

@ -20,7 +20,9 @@ INSTANTIATE_TEST_CASE_P(NumSplitsCheck, SplitLayerTest,
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({30, 30, 30, 30})), ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({})),
::testing::Values("TEMPLATE")), ::testing::Values("TEMPLATE")),
SplitLayerTest::getTestCaseName); SplitLayerTest::getTestCaseName);
} // namespace } // namespace

View File

@ -185,7 +185,7 @@ if (ENABLE_OPENCV)
set(OPENCV_BUILD "36") set(OPENCV_BUILD "36")
set(OPENCV_BUILD_YOCTO "337") set(OPENCV_BUILD_YOCTO "337")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") if (AARCH64)
if(DEFINED ENV{THIRDPARTY_SERVER_PATH}) if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}") set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}")
elseif(DEFINED THIRDPARTY_SERVER_PATH) elseif(DEFINED THIRDPARTY_SERVER_PATH)
@ -220,10 +220,10 @@ if (ENABLE_OPENCV)
ENVIRONMENT "OpenCV_DIR" ENVIRONMENT "OpenCV_DIR"
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
elseif(LINUX) elseif(LINUX)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") if (AARCH64)
set(OPENCV_SUFFIX "yocto_kmb") set(OPENCV_SUFFIX "yocto_kmb")
set(OPENCV_BUILD "${OPENCV_BUILD_YOCTO}") set(OPENCV_BUILD "${OPENCV_BUILD_YOCTO}")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") elseif (ARM)
set(OPENCV_SUFFIX "debian9arm") set(OPENCV_SUFFIX "debian9arm")
elseif (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") elseif (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
set(OPENCV_SUFFIX "centos7") set(OPENCV_SUFFIX "centos7")

View File

@ -29,7 +29,7 @@ if (ENABLE_MKL_DNN)
endif() endif()
# "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ" # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
if(ARM) if(ARM OR (MSVC AND (ARM OR AARCH64)) )
set(THREADING_DEFAULT "SEQ") set(THREADING_DEFAULT "SEQ")
else() else()
set(THREADING_DEFAULT "TBB") set(THREADING_DEFAULT "TBB")

View File

@ -13,7 +13,7 @@ endif()
include(dependency_solver) include(dependency_solver)
set(VPU_SUPPORTED_FIRMWARES usb-ma2450 usb-ma2x8x pcie-ma248x) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma248x)
# #
# Default packages # Default packages
@ -66,11 +66,11 @@ foreach(firmware_name IN LISTS VPU_SUPPORTED_FIRMWARES)
string(TOUPPER "${firmware_name}" firmware_name_upper) string(TOUPPER "${firmware_name}" firmware_name_upper)
set(var_name VPU_FIRMWARE_${firmware_name_upper}_FILE) set(var_name VPU_FIRMWARE_${firmware_name_upper}_FILE)
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${firmware_name}.mvcmd") set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${firmware_name}.mvcmd")
# Handle PCIe elf firmware for Windows # Handle PCIe elf firmware for Windows
if (WIN32 AND "${firmware_name}" STREQUAL "pcie-ma248x") if (WIN32 AND "${firmware_name}" STREQUAL "pcie-ma248x")
set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${firmware_name}.elf") set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${firmware_name}.elf")
endif () endif ()
list(APPEND all_firmware_files ${firmware_out_file}) list(APPEND all_firmware_files ${firmware_out_file})
@ -79,7 +79,7 @@ foreach(firmware_name IN LISTS VPU_SUPPORTED_FIRMWARES)
COMMAND COMMAND
${CMAKE_COMMAND} -E copy ${${var_name}} ${firmware_out_file} ${CMAKE_COMMAND} -E copy ${${var_name}} ${firmware_out_file}
MAIN_DEPENDENCY ${${var_name}} MAIN_DEPENDENCY ${${var_name}}
COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}" COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}"
VERBATIM) VERBATIM)
install(FILES ${${var_name}} install(FILES ${${var_name}}

View File

@ -24,6 +24,14 @@
# define _AMD64_ # define _AMD64_
#endif #endif
#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
# define _ARM_
#endif
#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
# define _ARM64_
#endif
#include <string.h> #include <string.h>
#include <windef.h> #include <windef.h>
#include <fileapi.h> #include <fileapi.h>

View File

@ -59,10 +59,6 @@ else ()
endif() endif()
if (WIN32) if (WIN32)
if (NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
message(FATAL_ERROR "Only 64-bit supported on Windows")
endif()
set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS) set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling

View File

@ -98,6 +98,7 @@ int main(int argc, char *argv[]) {
// ----------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------
// --------------------------- 3. Configure input & output --------------------------------------------- // --------------------------- 3. Configure input & output ---------------------------------------------
if (network.getOutputsInfo().size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
// --------------------------- Prepare input blobs ----------------------------------------------------- // --------------------------- Prepare input blobs -----------------------------------------------------
slog::info << "Preparing input blobs" << slog::endl; slog::info << "Preparing input blobs" << slog::endl;
@ -214,7 +215,6 @@ int main(int argc, char *argv[]) {
// --------------------------- 8. Process output ------------------------------------------------------- // --------------------------- 8. Process output -------------------------------------------------------
slog::info << "Processing output blobs" << slog::endl; slog::info << "Processing output blobs" << slog::endl;
OutputsDataMap outputInfo(network.getOutputsInfo()); OutputsDataMap outputInfo(network.getOutputsInfo());
if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
Blob::Ptr outputBlob = inferRequest.GetBlob(outputInfo.begin()->first); Blob::Ptr outputBlob = inferRequest.GetBlob(outputInfo.begin()->first);
/** Validating -nt value **/ /** Validating -nt value **/

View File

@ -24,6 +24,14 @@
# define _AMD64_ # define _AMD64_
#endif #endif
#if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
# define _ARM_
#endif
#if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
# define _ARM64_
#endif
#include <string> #include <string>
#include <windef.h> #include <windef.h>
#include <fileapi.h> #include <fileapi.h>

View File

@ -86,6 +86,7 @@ int main(int argc, char *argv[]) {
// 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
CNNNetwork network = ie.ReadNetwork(input_model); CNNNetwork network = ie.ReadNetwork(input_model);
if (network.getOutputsInfo().size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
network.setBatchSize(1); network.setBatchSize(1);
// ----------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------

View File

@ -11,7 +11,6 @@ for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA
for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA
for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA
for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA
for %%A in ("%VPU_FIRMWARE_MA2450%") do set VPU_FIRMWARE_MA2450_FILENAME=%%~nxA
for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA
for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA
@ -86,16 +85,6 @@ if not "%HDDL%"=="" (
) )
) )
if not "%VPU_FIRMWARE_MA2450%"=="" (
if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
powershell -command "iwr -outf '%DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME%' %VPU_FIRMWARE_MA2450%"
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%"
call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME% -o%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%
del "%DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME%" /F /Q
)
)
if not "%VPU_FIRMWARE_MA2X8X%"=="" ( if not "%VPU_FIRMWARE_MA2X8X%"=="" (
if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" ( if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
mkdir "%DL_SDK_TEMP%\test_dependencies\VPU" mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
@ -139,13 +128,6 @@ if not "%MYRIAD%"=="" (
) )
) )
if not "%VPU_FIRMWARE_MA2450%"=="" (
if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%\*" intel64 /S /I /Y /R
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%\*" intel64 /S /I /Y /R
)
)
if not "%VPU_FIRMWARE_MA2X8X%"=="" ( if not "%VPU_FIRMWARE_MA2X8X%"=="" (
if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" ( if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R

View File

@ -37,7 +37,7 @@ add_path() {
fi fi
} }
runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2450 VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB) runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
export_library_path() { export_library_path() {
export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH

View File

@ -4,6 +4,7 @@
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <cmath>
#include <runtime/pwl.h> #include <runtime/pwl.h>
#include <gna_slope_scale.h> #include <gna_slope_scale.h>
@ -413,12 +414,12 @@ void make_gna_pwl(const DnnActivation fun,
y_upper = tmp; y_upper = tmp;
} }
int64_t x_lower_new = FLOAT_TO_INT32((x_lower / in_scale) / abs(pow_scale) * in_scale); int64_t x_lower_new = FLOAT_TO_INT32((x_lower / in_scale) / std::fabs(pow_scale) * in_scale);
int64_t x_upper_new = FLOAT_TO_INT32((x_upper / in_scale) / abs(pow_scale) * in_scale); int64_t x_upper_new = FLOAT_TO_INT32((x_upper / in_scale) / std::fabs(pow_scale) * in_scale);
x_lower = static_cast<int32_t>(x_lower_new); x_lower = static_cast<int32_t>(x_lower_new);
x_upper = static_cast<int32_t>(x_upper_new); x_upper = static_cast<int32_t>(x_upper_new);
if (x_lower_new < INT32_MIN) { if (x_lower_new < INT32_MIN) {
int16_t offset_lower = abs(x_lower_new - INT32_MIN) / in_scale * out_scale; int16_t offset_lower = std::abs(x_lower_new - INT32_MIN) / in_scale * out_scale;
x_lower = INT32_MIN; x_lower = INT32_MIN;
y_lower = y_lower + offset_lower; y_lower = y_lower + offset_lower;
} }

View File

@ -132,6 +132,22 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
return reqConfId; return reqConfId;
} }
uint32_t GNADeviceHelper::getNumberOfGnaDevices() {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t numberOfGnaDevices = 0;
auto status = Gna2DeviceGetCount(&numberOfGnaDevices);
checkGna2Status(status);
return numberOfGnaDevices;
}
uint32_t GNADeviceHelper::selectGnaDevice() {
const auto deviceCount = getNumberOfGnaDevices();
if (deviceCount != 1) {
THROW_GNA_EXCEPTION << "Unsupported number of GNA devices detected = " << deviceCount;
}
return 0;
}
void GNADeviceHelper::checkGna2Status(Gna2Status status, const Gna2Model& gnaModel) { void GNADeviceHelper::checkGna2Status(Gna2Status status, const Gna2Model& gnaModel) {
if (!Gna2StatusIsSuccessful(status)) { if (!Gna2StatusIsSuccessful(status)) {
std::vector<char> gna2StatusBuffer(1024); std::vector<char> gna2StatusBuffer(1024);

View File

@ -74,7 +74,8 @@ public:
bool use_openmp = false, bool use_openmp = false,
bool isPerformanceMeasuring = false) : bool isPerformanceMeasuring = false) :
gna2HwConsistency(gna2HwConsistency), gna2HwConsistency(gna2HwConsistency),
isPerformanceMeasuring(isPerformanceMeasuring) { isPerformanceMeasuring(isPerformanceMeasuring),
nGnaDeviceIndex{selectGnaDevice()} {
#endif #endif
open(lib_async_n_threads); open(lib_async_n_threads);
initGnaPerfCounters(); initGnaPerfCounters();
@ -116,6 +117,8 @@ public:
#endif #endif
void releaseModel(const uint32_t model_id); void releaseModel(const uint32_t model_id);
uint32_t createRequestConfig(const uint32_t model_id); uint32_t createRequestConfig(const uint32_t model_id);
static uint32_t getNumberOfGnaDevices();
static uint32_t selectGnaDevice();
bool hasGnaHw() const { bool hasGnaHw() const {
return Gna2DeviceVersionSoftwareEmulation != detectedGnaDevVersion; return Gna2DeviceVersionSoftwareEmulation != detectedGnaDevVersion;
} }

View File

@ -107,9 +107,10 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
switch (header.version.minor) { switch (header.version.minor) {
case 1: case 1:
readBits(tempHeader2dot1, is); readBits(tempHeader2dot1, is);
header = Header2dot2::ModelHeader(tempHeader2dot1); header = Header2dot3::ModelHeader(tempHeader2dot1);
break; break;
case 2: case 2:
case 3:
readBits(header, is); readBits(header, is);
break; break;
default: default:
@ -166,7 +167,30 @@ void GNAModelSerial::Import(void *basePointer,
InferenceEngine::OutputsDataMap& outputsDataMap) { InferenceEngine::OutputsDataMap& outputsDataMap) {
is.exceptions(std::istream::failbit); is.exceptions(std::istream::failbit);
if (modelHeader.version.major == 2) {
if (modelHeader.version.minor >= 3) {
for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
uint32_t nameSize = 0;
readNBits<32>(nameSize, is);
std::string inName("", nameSize);
readNBytes(&inName[0], nameSize, is);
inputNames.push_back(inName.substr(0, nameSize - 1));
}
}
}
ImportInputs(is, basePointer, inputsDesc, inputsDataMap); ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
if (modelHeader.version.major == 2) {
if (modelHeader.version.minor >= 3) {
for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
uint32_t nameSize = 0;
readNBits<32>(nameSize, is);
std::string outName("", nameSize);
readNBytes(&outName[0], nameSize, is);
outputNames.push_back(outName.substr(0, nameSize - 1));
}
}
}
ImportOutputs(is, basePointer, desc, outputsDataMap); ImportOutputs(is, basePointer, desc, outputsDataMap);
for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) { for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
@ -311,9 +335,19 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
writeBits(header, os); writeBits(header, os);
for (auto &name : inputNames) {
const auto nameSize = strlen(name.c_str()) + 1;
writeBits(static_cast<uint32_t>(nameSize), os);
writeNBytes(name.c_str(), nameSize , os);
}
for (const auto &input : inputs) { for (const auto &input : inputs) {
writeBits(convert_to_serial(input), os); writeBits(convert_to_serial(input), os);
} }
for (auto &name : outputNames) {
const auto nameSize = strlen(name.c_str()) + 1;
writeBits(static_cast<uint32_t>(nameSize), os);
writeNBytes(name.c_str(), nameSize, os);
}
for (const auto &output : outputs) { for (const auto &output : outputs) {
writeBits(convert_to_serial(output), os); writeBits(convert_to_serial(output), os);
} }
@ -691,7 +725,8 @@ void GNAModelSerial::ImportInputs(std::istream &is,
dataMap.clear(); dataMap.clear();
for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) { for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
std::string name = "input" + std::to_string(inputIndex); const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
HeaderLatest::RuntimeEndPoint input; HeaderLatest::RuntimeEndPoint input;
is.read(reinterpret_cast<char *>(&input), sizeof(input)); is.read(reinterpret_cast<char *>(&input), sizeof(input));
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset)); inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
@ -719,7 +754,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
desc.resize(modelHeader.nOutputs); desc.resize(modelHeader.nOutputs);
for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) { for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
std::string name = "output" + std::to_string(outputIndex); const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
HeaderLatest::RuntimeEndPoint output; HeaderLatest::RuntimeEndPoint output;
is.read(reinterpret_cast<char *>(&output), sizeof(output)); is.read(reinterpret_cast<char *>(&output), sizeof(output));
OutputDesc description; OutputDesc description;

View File

@ -32,6 +32,8 @@ private:
#endif #endif
std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> inputs; std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> inputs;
std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> outputs; std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> outputs;
std::vector<std::string> inputNames;
std::vector<std::string> outputNames;
uint32_t nRotateRows = 0; uint32_t nRotateRows = 0;
uint32_t nRotateColumns = 0; uint32_t nRotateColumns = 0;
bool doRotateInput = false; bool doRotateInput = false;
@ -63,6 +65,13 @@ private:
const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model), const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
inputs(serializeInputs(inputsDataMap, inputDesc)), inputs(serializeInputs(inputsDataMap, inputDesc)),
outputs(serializeOutputs(outputsDataMap, outputsDesc)) { outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
for (auto const& input : inputsDataMap) {
inputNames.push_back(input.first);
}
for (auto const& input : outputsDataMap) {
outputNames.push_back(input.first);
}
} }
#else #else

View File

@ -36,8 +36,8 @@ Parameter GNAPlugin::GetMetric(const std::string& name, const std::map<std::stri
} }
if (!options.count(KEY_DEVICE_ID)) { if (!options.count(KEY_DEVICE_ID)) {
if (availableDevices.size() == 1) { if (availableDevices.size() == 1 || availableDevices.size() == 2) {
return availableDevices[0]; return availableDevices.back(); // detection order is GNA_SW, GNA_HW
} else { } else {
THROW_GNA_EXCEPTION << "KEY_DEVICE_ID not set in request for FULL_DEVICE_NAME"; THROW_GNA_EXCEPTION << "KEY_DEVICE_ID not set in request for FULL_DEVICE_NAME";
} }

View File

@ -631,11 +631,25 @@ void InsertIdentityLayerPass::run() {
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front()); auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
for (auto & l : *pLayers) { for (auto & l : *pLayers) {
for (auto && prev : getCandidatesForIdentityInsertion(l)) { for (auto && prev : getCandidatesForIdentityInsertion(l)) {
// Do an upstream search until Functional layer is found
auto original_prev_layer = prev;
auto true_layer = l;
while (LayerInfo(prev).isNonFunctional()) {
if (CNNNetHasPrevLayer(prev.get()) && prev->outData.size() == 1) {
true_layer = prev;
prev = CNNNetPrevLayer(prev);
} else {
gnawarn() << "Could not find Functional parent for " << original_prev_layer->name << ", using original layer";
prev = original_prev_layer;
true_layer = l;
break;
}
}
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++; int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
// actual insertion // actual insertion
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers); auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush; gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
CNNLayerPtr activationLayer = CNNLayerPtr activationLayer =
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32})); std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
@ -643,17 +657,17 @@ void InsertIdentityLayerPass::run() {
// TODO: why index is 0 ? - better use direct indexing in getCandidateFunction // TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
// detecting ins-data-idx // detecting ins-data-idx
size_t insDataIdx = std::numeric_limits<size_t>::max(); size_t insDataIdx = std::numeric_limits<size_t>::max();
for (size_t i = 0; i != l->insData.size(); i++) { for (size_t i = 0; i != true_layer->insData.size(); i++) {
if (getCreatorLayer(l->insData[i].lock()).lock() == prev) { if (getCreatorLayer(true_layer->insData[i].lock()).lock() == prev) {
insDataIdx = i; insDataIdx = i;
break; break;
} }
} }
if (insDataIdx == std::numeric_limits<size_t>::max()) { if (insDataIdx == std::numeric_limits<size_t>::max()) {
THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << l->name; THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << true_layer->name;
} }
auto inputData = l->insData[insDataIdx].lock(); auto inputData = true_layer->insData[insDataIdx].lock();
auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc()); auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
auto activationLayerWithQuant = quantized ? auto activationLayerWithQuant = quantized ?
@ -681,7 +695,7 @@ void InsertIdentityLayerPass::run() {
activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"]; activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"];
} }
CNNNetworkInsertLayer(prev, notAll ? l : CNNLayerPtr(nullptr), activationLayerWithQuant); CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), activationLayerWithQuant);
} }
} }
} }

View File

@ -7,5 +7,5 @@
#include <cstdint> #include <cstdint>
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5)) #define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5):((a)+0.5)) #define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))

View File

@ -0,0 +1,122 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cstdint>
#include "backend/dnn_types.h"
#include "serial/headers/2dot1/gna_model_header.hpp"
#pragma pack(push, 1)
namespace GNAPluginNS {
namespace Header2dot3 {
/**
* @brief Header version 2.3
*/
struct ModelHeader {
/**
*@brief MagicNumber GNAM in ascii table, equals to hex 0x474e414d
*/
char gnam[4] = {};
/**
* @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
* usually it is an indicator of working with version of model different that is current export function produce
*/
uint32_t headerSize = 0u;
struct Version {
/**
* @details Version of format Major unsigned int, ex: 0x0001
* every change in the header or in the layers definition should be reflected in version change
* for backward compatibility new parsers can read old versions of model with certain restrictions
*/
uint16_t major = 2u;
/**
* @details Version of Format Minor unsigned int, corresponding to build revision for example
* changes in minor version are not affected layout of model
*/
uint32_t minor = 3u;
} version;
/**
* @brief Memory required to be allocated using GNAAlloc()
*/
uint64_t gnaMemSize = 0ull;
/**
* @brief Number of GNA Layers
*/
uint64_t layersCount = 0ull;
/**
* @brief Grouping level
*/
uint32_t nGroup = 0u;
/**
* Convolution related setting - they are affecting input transformation
*/
uint32_t nRotateRows = 0u;
uint32_t nRotateColumns = 0u;
bool doRotateInput = false;
uint32_t nInputs = 0u;
uint32_t nOutputs = 0u;
/**
* Reserved Data might be here
*/
ModelHeader() = default;
ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
gnaMemSize = old.gnaMemSize;
layersCount = old.layersCount;
nGroup = old.nGroup;
nRotateRows = old.nRotateRows;
nRotateColumns = old.nRotateColumns;
nInputs = old.nInputs;
nOutputs = old.nOutputs;
}
};
#pragma pack(pop)
/*
* In runtime endpoint mostly same as in serial version, except of descriptor field
*/
struct RuntimeEndPoint {
/**
* if scale factor is different then pased into infer , network might need to be requantized
*/
float scaleFactor = 0;
/**
* Pointer descriptor
*/
void* descriptor_ptr = nullptr;
/**
* Endpoint resolution in bytes.
*/
uint32_t element_size = 0;
/**
* Number of elements
*/
uint32_t elements_count = 0;
/**
* Offset in bytes of pointer descriptor
*/
uint64_t descriptor_offset = 0ull;
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
RuntimeEndPoint() = default;
RuntimeEndPoint(double scaleFactor,
void* descriptor_ptr,
uint32_t element_size,
uint32_t elements_count,
intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
descriptor_ptr(descriptor_ptr),
element_size(element_size),
elements_count(elements_count),
orientation(orientation) {
}
};
} // namespace Header2dot3
} // namespace GNAPluginNS

View File

@ -4,11 +4,11 @@
#pragma once #pragma once
#include "serial/headers/2dot2/gna_model_header.hpp" #include "serial/headers/2dot3/gna_model_header.hpp"
namespace GNAPluginNS { namespace GNAPluginNS {
namespace HeaderLatest { namespace HeaderLatest {
using ModelHeader = GNAPluginNS::Header2dot2::ModelHeader; using ModelHeader = GNAPluginNS::Header2dot3::ModelHeader;
using RuntimeEndPoint = GNAPluginNS::Header2dot2::RuntimeEndPoint; using RuntimeEndPoint = GNAPluginNS::Header2dot3::RuntimeEndPoint;
} }
} }

View File

@ -12,7 +12,7 @@ file (GLOB LIBRARY_SRC
# TODO: WA for OneHot pass usage in reshape # TODO: WA for OneHot pass usage in reshape
set(LEGACY_SRC_ROOT "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src/") set(LEGACY_SRC_ROOT "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src/")
list(APPEND LIBRARY_SRC set(LEGACY_LIBRARY_SHARED_SRCS
"${LEGACY_SRC_ROOT}/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp" "${LEGACY_SRC_ROOT}/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp"
"${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp") "${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp")
@ -125,6 +125,7 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
add_library(${TARGET_NAME} SHARED add_library(${TARGET_NAME} SHARED
${IE_STATIC_DEPENDENT_FILES} ${IE_STATIC_DEPENDENT_FILES}
${LEGACY_LIBRARY_SHARED_SRCS}
${vs_version_file} ${vs_version_file}
$<TARGET_OBJECTS:${TARGET_NAME}_obj>) $<TARGET_OBJECTS:${TARGET_NAME}_obj>)
@ -137,7 +138,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE pugixml openvino::itt ${CMAKE_DL_LI
${NGRAPH_LIBRARIES} inference_engine_transformations) ${NGRAPH_LIBRARIES} inference_engine_transformations)
target_include_directories(${TARGET_NAME} INTERFACE ${PUBLIC_HEADERS_DIR} target_include_directories(${TARGET_NAME} INTERFACE ${PUBLIC_HEADERS_DIR}
PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>) PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
if(WIN32) if(WIN32)
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})

View File

@ -371,19 +371,42 @@ inline CNNLayerSet CNNNetGetAllInputLayers(const ICNNNetwork& network) {
InputsDataMap inputs; InputsDataMap inputs;
network.getInputsInfo(inputs); network.getInputsInfo(inputs);
OutputsDataMap outputs;
network.getOutputsInfo(outputs);
std::vector<DataPtr> entryDataSet;
entryDataSet.reserve(inputs.size() + outputs.size());
for (const auto &kvp : inputs)
entryDataSet.push_back(kvp.second->getInputData());
for (const auto &kvp : outputs)
entryDataSet.push_back(kvp.second);
CNNLayerSet inputLayers; CNNLayerSet inputLayers;
std::unordered_set<CNNLayer*> allLayers; std::unordered_set<CNNLayer*> allLayers;
if (inputs.empty()) return inputLayers; if (entryDataSet.empty()) return inputLayers;
for (const auto& input : inputs) { // define any layer connected to provided Data object (consumer or creator)
auto& secondLayers = getInputTo(input.second->getInputData()); auto findConnectedLayer = [] (const DataPtr& data) -> CNNLayerPtr {
auto consumerLayers = getInputTo(data);
if (!consumerLayers.empty())
return consumerLayers.begin()->second;
if (secondLayers.empty()) continue; auto creator = getCreatorLayer(data).lock();
if (creator != nullptr)
return creator;
return nullptr;
};
for (const auto& data : entryDataSet) {
auto entryLayer = findConnectedLayer(data);
if (entryLayer == nullptr) continue;
details::UnorderedDFS( details::UnorderedDFS(
allLayers, secondLayers.begin()->second, allLayers, entryLayer,
[&](CNNLayerPtr layer) { [&inputLayers](const CNNLayerPtr& layer) {
if (layer->insData.empty()) { if (layer->insData.empty()) {
inputLayers.insert(layer); inputLayers.insert(layer);
} }

View File

@ -132,13 +132,6 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
THROW_IE_EXCEPTION << "Cannot cast layer to TensorIterator."; THROW_IE_EXCEPTION << "Cannot cast layer to TensorIterator.";
} }
std::map<uint64_t, std::vector<std::pair<std::string, uint64_t>>> ngraph_parameter_id_to_ie_layer_port;
std::map<std::pair<std::string, uint64_t>, uint64_t> ie_layer_port_to_tensor_iterator_input_id;
// inputs/outputs of TensorIterator body (ie)
std::map<std::string, DataPtr> in_info_map;
std::map<std::string, DataPtr> out_info_map;
// inputs/outputs of TensorIterator (ngraph representation) // inputs/outputs of TensorIterator (ngraph representation)
auto parameters = tensor_iterator->get_function()->get_parameters(); auto parameters = tensor_iterator->get_function()->get_parameters();
auto results = tensor_iterator->get_function()->get_results(); auto results = tensor_iterator->get_function()->get_results();
@ -148,10 +141,7 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
// IE TensorIterator doesn't include such nodes so we create CNNNetwork in a separate scope // IE TensorIterator doesn't include such nodes so we create CNNNetwork in a separate scope
// to call the destructor and delete these "Input"/data nodes. // to call the destructor and delete these "Input"/data nodes.
// These layers will hold the necessary subnet after destruction of CNNNetwork. TensorIterator::Body body;
std::set<InferenceEngine::CNNLayerPtr> body_input_layers;
// This map will save information about data nodes
std::map<std::string, std::vector<TensorDesc>> layer_name_to_tensor_desc;
{ {
CNNNetwork body_net(tensor_iterator->get_function()); CNNNetwork body_net(tensor_iterator->get_function());
CNNNetwork net(InferenceEngine::details::convertFunctionToICNNNetwork(body_net.getFunction(), body_net)); CNNNetwork net(InferenceEngine::details::convertFunctionToICNNNetwork(body_net.getFunction(), body_net));
@ -163,73 +153,102 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
} }
// Get inputs/outputs of cnn network // Get inputs/outputs of cnn network
InputsDataMap in_info_map_with_parameters; auto in_info_map_with_parameters = net.getInputsInfo();
in_info_map_with_parameters = net.getInputsInfo(); auto out_info_map = net.getOutputsInfo();
out_info_map = net.getOutputsInfo();
// Fill the map to get layer and port of the body by the parameter index. IE_ASSERT(in_info_map_with_parameters.size() == parameters.size());
IE_ASSERT(out_info_map.size() == results.size());
InferenceEngine::TensorIterator::Body temp_body;
temp_body.inputs.resize(in_info_map_with_parameters.size());
temp_body.outputs.resize(out_info_map.size());
// Fill inputs/outs in order aligned with ng representation
uint64_t counter = 0; uint64_t counter = 0;
for (const auto& param : parameters) { for (const auto& param : parameters) {
auto info = in_info_map_with_parameters.at(param->get_friendly_name()); auto info = in_info_map_with_parameters.at(param->get_friendly_name());
auto data_ptr = info->getInputData(); temp_body.inputs[counter++] = info->getInputData();
auto input_to = getInputTo(data_ptr);
for (const auto& next_layer : input_to) {
auto port_idx = find_input_idx(next_layer.second, data_ptr);
ngraph_parameter_id_to_ie_layer_port[counter].push_back({next_layer.first, port_idx});
}
counter++;
} }
// Temporary body to call deep copy auto map_ng_result_to_ie_name = [] (std::shared_ptr<ngraph::op::v0::Result> res_op) {
InferenceEngine::TensorIterator::Body temp_body; auto result = res_op->input(0).get_source_output();
for (const auto& in : in_info_map_with_parameters) {
temp_body.inputs.emplace_back(in.second->getInputData());
}
for (const auto& out : out_info_map) { std::string name = result.get_node()->get_friendly_name();
temp_body.outputs.emplace_back(out.second); if (result.get_node()->get_output_size() > 1) {
name += "." + std::to_string(result.get_index());
}
return name;
};
counter = 0;
for (const auto& result : results) {
auto data = out_info_map.at(map_ng_result_to_ie_name(result));
temp_body.outputs[counter++] = data;
} }
// This deep copy will hold all unreachable constants. See the comment in CopyTIBody function. // This deep copy will hold all unreachable constants. See the comment in CopyTIBody function.
auto deep_cp_body = InferenceEngine::NetPass::CopyTIBody(temp_body); body = InferenceEngine::NetPass::CopyTIBody(temp_body);
for (const auto& data_ptr : deep_cp_body.inputs) {
auto input_to = getInputTo(data_ptr); // Check if data is really const layer holder
for (const auto& node : input_to) { auto is_constant_holder = [] (const DataPtr data) {
// Make it compatible with ir v7: delete Input layers in body return data->getPrecision() == Precision::UNSPECIFIED;
if (node.second->type != "Input") { };
body_input_layers.emplace(node.second);
// Save information about data nodes to re-create them with correct names. // Strip unreached node holder from Inputs node.
for (const auto& data : node.second->insData) { auto holder = body.inputs.back();
layer_name_to_tensor_desc[node.second->name].emplace_back(data.lock()->getTensorDesc()); if (is_constant_holder(holder)) {
} auto& holder_map = getInputTo(holder);
} // remove_if
for( auto it = holder_map.begin(); it != holder_map.end(); ) {
if( it->second->type == "Input")
it = holder_map.erase(it);
else
++it;
} }
} }
for (const auto& data_ptr : deep_cp_body.outputs) { // TODO: Disable this WA after total switch onto Ngraph
out_info_map[data_ptr->getName()] = data_ptr; // WA: Some plugins (like GPU) require matching of Data object name and producer Layer name.
} // Data name is expected in format "[layer_name]" or "[layer_name].[port_idx]" in case
} // of multiple inputs. We have to restore it if possible and ignore original names of
// Ngraph parameter and result ops.
auto holder = std::make_shared<Data>("const_holder", Precision::UNSPECIFIED); // Will not change data name if:
for (const auto& input_layer : body_input_layers) { // - data has several consumer layers
// Save all constants to the holder so that they are not deleted. // - data has no consumer (example if data is straight used as output)
if (input_layer->insData.empty()) { //
getInputTo(holder)[input_layer->name] = input_layer; for (auto &in : body.inputs) {
if (is_constant_holder(in))
continue; continue;
const auto input_to = getInputTo(in);
if (input_to.size() != 1)
continue;
const auto consumer_layer = input_to.begin()->second;
const auto consumer_in_port_set = consumer_layer->insData;
const auto found = std::find_if(consumer_in_port_set.begin(), consumer_in_port_set.end(),
[&in] (const DataWeakPtr &wptr) { return wptr.lock() == in; });
IE_ASSERT(found != consumer_in_port_set.end());
const auto consumer_port_idx = std::distance(consumer_in_port_set.begin(), found);
auto new_name = consumer_layer->name;
if (consumer_in_port_set.size() > 1) {
new_name += '.' + std::to_string(consumer_port_idx);
}
in->setName(new_name);
} }
// Re-create the data nodes with the correct names and fill inputs of TensorIterator (ie) // TODO: this WA restore original precisions of outputs.
for (size_t i = 0; i < input_layer->insData.size(); i++) { // convertFunctionToICNNNetwork has internal fallback policy for unsupported
if (!input_layer->insData[i].lock()) { // precisions for inputs/outputs ports. Particular for U8 will be translated
std::string data_name = (input_layer->insData.size() == 1) // to FP32. However Loop body has strong requirements for continue_condition
? input_layer->name // port, it should be BOOL(U8).
: input_layer->name + "." + std::to_string(i); //
for (int i = 0; i < results.size(); i++) {
DataPtr data(new Data(data_name, layer_name_to_tensor_desc[input_layer->name][i])); auto result = results[i];
input_layer->insData[i] = data; auto output = body.outputs[i];
getInputTo(data)[input_layer->name] = input_layer; if (result->get_element_type() == ngraph::element::u8) {
in_info_map[data_name] = data; output->setPrecision(InferenceEngine::Precision::U8);
} }
} }
} }
@ -238,44 +257,11 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
LayerParams params = {layer->get_friendly_name(), "TensorIterator", LayerParams params = {layer->get_friendly_name(), "TensorIterator",
details::convertPrecision(layer->get_output_element_type(0))}; details::convertPrecision(layer->get_output_element_type(0))};
auto res = std::make_shared<InferenceEngine::TensorIterator>(params); auto res = std::make_shared<InferenceEngine::TensorIterator>(params);
res->body = body;
// Body: inputs
uint64_t counter = 0;
for (const auto& in : in_info_map) {
res->body.inputs.emplace_back(in.second);
// Fill the map to get the input index by layer and port of the body.
auto input_to = getInputTo(in.second);
for (const auto& next_layer : input_to) {
auto port_idx = find_input_idx(next_layer.second, in.second);
ie_layer_port_to_tensor_iterator_input_id[{next_layer.first, port_idx}] = counter;
}
counter++;
}
// the holder should be the last input element.
res->body.inputs.emplace_back(holder);
// Body: outputs
for (const auto& out : out_info_map) {
res->body.outputs.emplace_back(out.second);
}
// Port map: outputs // Port map: outputs
for (const auto& desc : tensor_iterator->get_output_descriptions()) { for (const auto& desc : tensor_iterator->get_output_descriptions()) {
auto result = results[desc->m_body_value_index]->input(0).get_source_output(); auto body_output_idx = desc->m_body_value_index;
std::string name = result.get_node()->get_friendly_name();
if (result.get_node()->get_output_size() > 1) {
name += "." + std::to_string(result.get_index());
}
auto output_layer = out_info_map.at(name);
// Find index in outputs of the IE TensorIterator body
auto it = std::find(res->body.outputs.begin(), res->body.outputs.end(), output_layer);
if (it == res->body.outputs.end()) {
THROW_IE_EXCEPTION << "Output layer not found.";
}
auto body_output_idx = it - res->body.outputs.begin();
std::string type_name = desc->get_type_info().name; std::string type_name = desc->get_type_info().name;
if (type_name == "ConcatOutputDescription") { if (type_name == "ConcatOutputDescription") {
@ -301,54 +287,42 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
// Port map : inputs and back edges // Port map : inputs and back edges
for (const auto& desc : tensor_iterator->get_input_descriptions()) { for (const auto& desc : tensor_iterator->get_input_descriptions()) {
for (const auto& mapping : ngraph_parameter_id_to_ie_layer_port[desc->m_body_parameter_index]) { auto body_input_index = desc->m_body_parameter_index;
auto body_input_index = ie_layer_port_to_tensor_iterator_input_id.at(mapping);
std::string type_name = desc->get_type_info().name;
if (type_name == "SliceInputDescription") {
auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::SliceInputDescription>(desc);
IE_ASSERT(input_desc != nullptr);
if (const auto slice_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::SliceInputDescription>(desc)) {
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap { res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
static_cast<int>(input_desc->m_axis), static_cast<int>(input_desc->m_stride), static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
static_cast<int>(input_desc->m_start), static_cast<int>(input_desc->m_end), static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
static_cast<int>(input_desc->m_part_size)}); static_cast<int>(slice_desc->m_part_size)});
} else if (type_name == "MergedInputDescription") { } else if (const auto merge_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::MergedInputDescription>(desc)) {
auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::MergedInputDescription>(desc);
IE_ASSERT(input_desc != nullptr);
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap { res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1}); static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
auto result = results[input_desc->m_body_value_index]->inputs()[0].get_source_output(); auto body_output_idx = merge_desc->m_body_value_index;
// Create correct name for output.
std::string output_name = result.get_node()->get_friendly_name();
if (result.get_node()->get_output_size() > 1) {
output_name += "." + std::to_string(result.get_index());
}
auto output_layer = out_info_map.at(output_name);
// Find index in outputs of the IE TensorIterator body
auto it = std::find(res->body.outputs.begin(), res->body.outputs.end(), output_layer);
if (it == res->body.outputs.end()) {
THROW_IE_EXCEPTION << "Output layer not found.";
}
auto body_output_idx = it - res->body.outputs.begin();
res->back_edges.emplace_back(InferenceEngine::TensorIterator::PortMap { res->back_edges.emplace_back(InferenceEngine::TensorIterator::PortMap {
static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1}); static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
} else if (type_name == "InvariantInputDescription") { } else if (const auto inv_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::InvariantInputDescription>(desc)) {
auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::InvariantInputDescription>(desc);
IE_ASSERT(input_desc != nullptr);
res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap { res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1}); static_cast<int>(inv_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
} else { } else {
THROW_IE_EXCEPTION << "Incorrect type of the input description."; THROW_IE_EXCEPTION << "Incorrect type of the input description.";
} }
} }
if (const auto loop_op = std::dynamic_pointer_cast<const ngraph::opset5::Loop>(layer)) {
auto spec_port = loop_op->get_special_body_ports();
if (spec_port.current_iteration_input_idx != -1) {
auto ie_port_idx = spec_port.current_iteration_input_idx;
res->params["loop_body_current_iteration_idx"] = std::to_string(ie_port_idx);
}
if (spec_port.body_condition_output_idx != -1) {
auto body_output_idx = spec_port.body_condition_output_idx;
res->params["loop_body_condition_output_idx"] = std::to_string(body_output_idx);
}
res->params["loop_trip_count_idx"] = "0";
res->params["loop_execution_condition_idx"] = "1";
} }
return res; return res;
@ -1173,14 +1147,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ReverseSequence>::createLayer(const std:
return res; return res;
} }
template <>
CNNLayer::Ptr NodeConverter<ngraph::op::Reshape>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
LayerParams params = {layer->get_friendly_name(), "Reshape",
details::convertPrecision(layer->get_output_element_type(0))};
auto res = std::make_shared<InferenceEngine::ReshapeLayer>(params);
return res;
}
template <> template <>
CNNLayer::Ptr NodeConverter<ngraph::op::ShapeOf>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const { CNNLayer::Ptr NodeConverter<ngraph::op::ShapeOf>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
LayerParams params = {layer->get_friendly_name(), "ShapeOf", LayerParams params = {layer->get_friendly_name(), "ShapeOf",

View File

@ -46,15 +46,28 @@ static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr>& heads) {
CNNLayerSet inputLayers; CNNLayerSet inputLayers;
std::unordered_set<CNNLayer*> allLayers; std::unordered_set<CNNLayer*> allLayers;
// define any layer connected to provided Data object (consumer or creator)
auto findConnectedLayer = [] (const DataPtr& data) -> CNNLayerPtr {
auto consumerLayers = getInputTo(data);
if (!consumerLayers.empty())
return consumerLayers.begin()->second;
auto creator = getCreatorLayer(data).lock();
if (creator != nullptr)
return creator;
return nullptr;
};
// Define all start layers // Define all start layers
for (const auto& data : heads) { for (const auto& data : heads) {
auto& secondLayers = getInputTo(data); auto entryLayer = findConnectedLayer(data);
if (secondLayers.empty()) continue; if (entryLayer == nullptr) continue;
details::UnorderedDFS( details::UnorderedDFS(
allLayers, secondLayers.begin()->second, allLayers, entryLayer,
[&](CNNLayerPtr layer) { [&inputLayers](const CNNLayerPtr &layer) {
if (layer->insData.empty()) { if (layer->insData.empty()) {
inputLayers.insert(layer); inputLayers.insert(layer);
} }
@ -77,10 +90,17 @@ static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr>& heads) {
std::vector<CNNLayerPtr> TIBodySortTopologically(const TensorIterator::Body& body) { std::vector<CNNLayerPtr> TIBodySortTopologically(const TensorIterator::Body& body) {
std::vector<CNNLayerPtr> all_layers; std::vector<CNNLayerPtr> all_layers;
auto all_input_layers = getAllInputs(body.inputs); // In case of graph with several connected component
// total entry point is a union of [inputs]U[outputs]
// All internal nodes are achievable starting from this.
auto total_entry_point = body.inputs;
total_entry_point.insert(total_entry_point.end(),
body.outputs.begin(), body.outputs.end());
auto all_input_layers = getAllInputs(total_entry_point);
CNNNetForestDFS( CNNNetForestDFS(
all_input_layers, all_input_layers,
[&](CNNLayerPtr current) { [&all_layers](const CNNLayerPtr &current) {
all_layers.push_back(current); all_layers.push_back(current);
}, },
false); false);
@ -143,9 +163,17 @@ TensorIterator::Body CopyTIBody(const TensorIterator::Body& body, std::string su
} }
TensorIterator::Body res; TensorIterator::Body res;
for (auto& in : body.inputs) res.inputs.emplace_back(old2new_d[in.get()]); for (auto& in : body.inputs) {
auto found = old2new_d.find(in.get());
IE_ASSERT(found != old2new_d.end());
res.inputs.emplace_back(found->second);
}
for (auto& out : body.outputs) res.outputs.emplace_back(old2new_d[out.get()]); for (auto& out : body.outputs) {
auto found = old2new_d.find(out.get());
IE_ASSERT(found != old2new_d.end());
res.outputs.emplace_back(found->second);
}
// Fake holder. // Fake holder.
// The graph itself is a shared_ptr set where parent holds child. // The graph itself is a shared_ptr set where parent holds child.

View File

@ -110,64 +110,73 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
this->_name = "subgraph"; this->_name = "subgraph";
this->reuse_io_tensors = false; this->reuse_io_tensors = false;
std::unordered_map<CNNLayerPtr, MKLDNNNodePtr> layer2node; // Map data object onto producer layer(node)
std::unordered_set<DataPtr> unused_data; // nodes which has no consumers (output or just unused) std::unordered_map<Data*, std::pair<MKLDNNNodePtr, int>> data2node;
auto _parent_port = [] (const DataPtr &data) -> int { // nodes which has no consumers (output or just unused). But doesn't marked as graph output.
auto parent = getCreatorLayer(data).lock(); // Will be stored as fake output separately.
for (int i = 0; parent->outData.size(); i++) std::unordered_set<DataPtr> unused_data;
if (data == parent->outData[i])
return i;
return -1;
};
auto _child_port = [] (const DataPtr &data, const CNNLayerPtr &layer) -> int { // Step 1. Replicate input nodes
for (int i = 0; layer->insData.size(); i++) for (const auto &input : subgraph.inputs) {
if (data == layer->insData[i].lock()) if (input->getPrecision() == Precision::UNSPECIFIED) continue; // const node holder
return i;
return -1;
};
auto creator = getCreatorLayer(input).lock();
if (creator == nullptr) {
creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()}));
creator->outData.push_back(input);
}
// Replicate All Nodes in topological order const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache));
for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) { data2node[input.get()] = {node, 0};
CNNLayerPtr _layer = layer;
const MKLDNNNodePtr node(MKLDNNNode::factory().create(_layer, getEngine(), extMgr, weightsCache));
graphNodes.push_back(node); graphNodes.push_back(node);
layer2node[layer] = node; inputNodes[input->getName()] = node;
if (getInputTo(input).empty()) {
unused_data.insert(input);
}
}
// Step 2. Replicate all internal nodes.
for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
graphNodes.push_back(node);
for (int port = 0; port < layer->insData.size(); port++) { for (int port = 0; port < layer->insData.size(); port++) {
auto data = layer->insData[port].lock(); auto data = layer->insData[port].lock();
auto parent_layer = getCreatorLayer(data).lock();
if (!parent_layer) continue; // no parent means that it is input data node (or memory/const layer)
auto parent_node = layer2node[parent_layer]; auto port_info = data2node[data.get()];
auto parent_node = port_info.first;
auto parent_port_idx = port_info.second;
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port)); MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port));
node->addEdge(edge); node->addEdge(edge);
graphEdges.push_back(edge); graphEdges.push_back(edge);
} }
int out_port_idx = 0;
for (auto &out_data : layer->outData) { for (auto &out_data : layer->outData) {
data2node[out_data.get()] = {node, out_port_idx++};
if (getInputTo(out_data).empty()) { if (getInputTo(out_data).empty()) {
unused_data.insert(out_data); unused_data.insert(out_data);
} }
} }
} }
// Step 3. Add output nodes and output stubs for unused data objects.
for (const auto &output : subgraph.outputs) { for (const auto &output : subgraph.outputs) {
auto parent_layer = getCreatorLayer(output).lock(); auto port_info = data2node[output.get()];
auto parent_node = layer2node[parent_layer]; auto parent_node = port_info.first;
auto parent_port_idx = port_info.second;
CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()})); CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()}));
layer->insData.push_back(output); layer->insData.push_back(output);
const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(output), 0)); MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
node->addEdge(edge); node->addEdge(edge);
graphEdges.push_back(edge); graphEdges.push_back(edge);
graphNodes.push_back(node); graphNodes.push_back(node);
outputNodes.push_back(node); outputNodes.push_back(node);
@ -176,39 +185,20 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
// Add stub output node for unused data // Add stub output node for unused data
for (auto to_stub_data : unused_data) { for (auto to_stub_data : unused_data) {
auto parent_layer = getCreatorLayer(to_stub_data).lock(); auto port_info = data2node[to_stub_data.get()];
auto parent_node = layer2node[parent_layer]; auto parent_node = port_info.first;
auto parent_port_idx = port_info.second;
CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()})); CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()}));
layer->insData.push_back(to_stub_data); layer->insData.push_back(to_stub_data);
const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0)); MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
node->addEdge(edge); node->addEdge(edge);
graphEdges.push_back(edge); graphEdges.push_back(edge);
graphNodes.push_back(node); graphNodes.push_back(node);
} }
// Replicate input nodes
for (const auto &input : subgraph.inputs) {
if (input->getName() == "const_holder") continue;
CNNLayerPtr layer(new CNNLayer({"in_" + input->getName(), "Input", input->getTensorDesc().getPrecision()}));
layer->outData.push_back(input);
const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
for (auto p : getInputTo(input)) {
auto consumer = p.second;
MKLDNNEdgePtr edge(new MKLDNNEdge(node, layer2node[consumer], 0, _child_port(input, consumer)));
node->addEdge(edge);
graphEdges.push_back(edge);
}
graphNodes.push_back(node);
inputNodes[input->getName()] = node;
}
} }
void MKLDNNGraph::Replicate(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) { void MKLDNNGraph::Replicate(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {

View File

@ -76,6 +76,11 @@ public:
return outputNodes; return outputNodes;
} }
std::map<std::string, MKLDNNNodePtr>& GetInputNodes() {
return inputNodes;
}
mkldnn::engine getEngine() const { mkldnn::engine getEngine() const {
return eng; return eng;
} }

View File

@ -600,7 +600,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
return eltwiseNode && return eltwiseNode &&
(eltwiseNode->getOpType() == Relu || (eltwiseNode->getOpType() == Relu ||
(conv->getCnnLayer()->precision == Precision::FP32 && (conv->getCnnLayer()->precision == Precision::FP32 &&
IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid}))); IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
Round})));
}; };
for (int i = 0; i < graphNodes.size(); i++) { for (int i = 0; i < graphNodes.size(); i++) {
@ -678,7 +679,8 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
if (eltwiseNode == nullptr) if (eltwiseNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get Eltwise node " << childNode->getName(); THROW_IE_EXCEPTION << "Cannot get Eltwise node " << childNode->getName();
if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})) { if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
Hsigmoid, Round})) {
return true; return true;
} else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) { } else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) {
if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2) if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2)
@ -1044,7 +1046,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) || return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) ||
(eltwiseNode->getOpType() == Prelu) || (eltwiseNode->getOpType() == Prelu) ||
IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})); IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
Hsigmoid, Round}));
} }
return false; return false;
@ -1258,7 +1261,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
return eltwiseNode && return eltwiseNode &&
(eltwiseNode->getOpType() == Relu || (eltwiseNode->getOpType() == Relu ||
(conv->getCnnLayer()->precision == Precision::FP32 && (conv->getCnnLayer()->precision == Precision::FP32 &&
IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid}))); IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
Round})));
}; };
for (auto &graphNode : graphNodes) { for (auto &graphNode : graphNodes) {
@ -1611,7 +1615,7 @@ void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) {
if (eltwiseNode == nullptr) if (eltwiseNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get Eltwise node " << node->getName(); THROW_IE_EXCEPTION << "Cannot get Eltwise node " << node->getName();
return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish, return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish,
Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt}) || Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) ||
((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) || ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) ||
(eltwiseNode->getOpType() == Prelu)); (eltwiseNode->getOpType() == Prelu));
} }

View File

@ -75,6 +75,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
{ "HSwish", Eltwise }, { "HSwish", Eltwise },
{ "Mish", Eltwise }, { "Mish", Eltwise },
{ "HSigmoid", Eltwise }, { "HSigmoid", Eltwise },
{ "Round", Eltwise },
{ "ScaleShift", Eltwise }, { "ScaleShift", Eltwise },
{ "PReLU", Eltwise }, { "PReLU", Eltwise },
{ "Norm", Lrn }, { "Norm", Lrn },
@ -112,6 +113,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
{ "BinaryConvolution", BinaryConvolution }, { "BinaryConvolution", BinaryConvolution },
{ "DeformableConvolution", DeformableConvolution }, { "DeformableConvolution", DeformableConvolution },
{ "TensorIterator", TensorIterator }, { "TensorIterator", TensorIterator },
{ "Loop", TensorIterator },
{ "MemoryInput", MemoryInput}, // for construction from name ctor, arbitrary name is used { "MemoryInput", MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Memory", MemoryOutput }, // for construction from layer ctor { "Memory", MemoryOutput }, // for construction from layer ctor
{ "Convert", Convert }, { "Convert", Convert },

View File

@ -312,7 +312,8 @@ private:
auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node); auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
switch (eltwiseNode.getOpType()) { switch (eltwiseNode.getOpType()) {
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid: case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
case Mish: case Hsigmoid: case Round:
return jit_mkldnn_emitter::get_supported_precisions(); return jit_mkldnn_emitter::get_supported_precisions();
case Add: return jit_add_emitter::get_supported_precisions(); case Add: return jit_add_emitter::get_supported_precisions();
case MulAdd: return jit_mul_add_emitter::get_supported_precisions(); case MulAdd: return jit_mul_add_emitter::get_supported_precisions();
@ -345,7 +346,8 @@ private:
auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node); auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
switch (eltwiseNode.getOpType()) { switch (eltwiseNode.getOpType()) {
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid: case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
case Mish: case Hsigmoid: case Round:
return std::make_shared<jit_mkldnn_emitter>(this, isa, eltwiseNode, exec_prec); return std::make_shared<jit_mkldnn_emitter>(this, isa, eltwiseNode, exec_prec);
case Add: return std::make_shared<jit_add_emitter>(this, isa, eltwiseNode, exec_prec); case Add: return std::make_shared<jit_add_emitter>(this, isa, eltwiseNode, exec_prec);
case MulAdd: return std::make_shared<jit_mul_add_emitter>(this, isa, eltwiseNode, exec_prec); case MulAdd: return std::make_shared<jit_mul_add_emitter>(this, isa, eltwiseNode, exec_prec);
@ -764,6 +766,18 @@ MKLDNNEltwiseNode::initializers = {
opType = Hsigmoid; opType = Hsigmoid;
algorithm = mkldnn::eltwise_hsigmoid; algorithm = mkldnn::eltwise_hsigmoid;
}}, }},
{"round", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
opType = Round;
std::string mode = activationLayer->GetParamAsString("mode", "half_to_even");
if (mode == "half_to_even")
algorithm = mkldnn::eltwise_round_half_to_even;
else if (mode == "half_away_from_zero")
algorithm = mkldnn::eltwise_round_half_away_from_zero;
else
THROW_IE_EXCEPTION << "Round layer with name " << activationLayer->name << " doesn't support mode " << mode;
}},
}; };
void MKLDNNEltwiseNode::init() { void MKLDNNEltwiseNode::init() {
@ -833,7 +847,8 @@ void MKLDNNEltwiseNode::init() {
comparator(layerType, "swish") || comparator(layerType, "swish") ||
comparator(layerType, "hswish") || comparator(layerType, "hswish") ||
comparator(layerType, "mish") || comparator(layerType, "mish") ||
comparator(layerType, "hsigmoid")) { comparator(layerType, "hsigmoid") ||
comparator(layerType, "round")) {
initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta); initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta);
} else { } else {
THROW_IE_EXCEPTION << "Unsupported algorithm for Eltwise node with name `" << getName() << "`."; THROW_IE_EXCEPTION << "Unsupported algorithm for Eltwise node with name `" << getName() << "`.";
@ -843,7 +858,8 @@ void MKLDNNEltwiseNode::init() {
size_t MKLDNNEltwiseNode::getOpInputsNum() const { size_t MKLDNNEltwiseNode::getOpInputsNum() const {
switch (getOpType()) { switch (getOpType()) {
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case PowerStatic: case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case PowerStatic:
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid: case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
case Mish: case Hsigmoid: case Round:
case LogicalNot: case LogicalNot:
return 1; return 1;
case Add: case Subtract: case Multiply: case Divide: case FloorMod: case Mod: case Maximum: case Minimum: case SquaredDifference: case Add: case Subtract: case Multiply: case Divide: case FloorMod: case Mod: case Maximum: case Minimum: case SquaredDifference:
@ -1469,7 +1485,8 @@ void MKLDNNEltwiseNode::executeReference(const std::vector<const uint8_t *>& src
switch (getOpType()) { switch (getOpType()) {
case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid: case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
case Mish: case Hsigmoid: case Round:
*dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break; *dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break;
case Add: *dst_ptr_f = src_f[0] + src_f[1]; break; case Add: *dst_ptr_f = src_f[0] + src_f[1]; break;
case MulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break; case MulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break;
@ -1570,6 +1587,8 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
case mkldnn::eltwise_hswish: case mkldnn::eltwise_hswish:
case mkldnn::eltwise_mish: case mkldnn::eltwise_mish:
case mkldnn::eltwise_hsigmoid: case mkldnn::eltwise_hsigmoid:
case mkldnn::eltwise_round_half_to_even:
case mkldnn::eltwise_round_half_away_from_zero:
ops.append_eltwise(1.0, getAlgorithm(), getAlpha(), getBeta()); ops.append_eltwise(1.0, getAlgorithm(), getAlpha(), getBeta());
break; break;
case mkldnn::depthwise_scale_shift: case mkldnn::depthwise_scale_shift:

View File

@ -59,7 +59,8 @@ enum EltwiseOpType {
Prelu, Prelu,
Mish, Mish,
Hswish, Hswish,
Hsigmoid Hsigmoid,
Round
}; };
struct jit_eltwise_params { struct jit_eltwise_params {

View File

@ -2123,7 +2123,7 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
if (eltwiseNode == nullptr) if (eltwiseNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName(); THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName();
return isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, return isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
Tanh, Swish, Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt}); Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt});
} }
return false; return false;

View File

@ -11,7 +11,6 @@
#include <map> #include <map>
#include <mkldnn_types.h> #include <mkldnn_types.h>
#include <mkldnn_extension_utils.h> #include <mkldnn_extension_utils.h>
#include <legacy/graph_transformer.h>
using namespace mkldnn; using namespace mkldnn;
using namespace MKLDNNPlugin; using namespace MKLDNNPlugin;
@ -50,32 +49,25 @@ static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNN
class PortIteratorHelper : public PortMapHelper { class PortIteratorHelper : public PortMapHelper {
public: public:
PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
bool as_input, const InferenceEngine::TensorIterator::PortMap &port_map, const mkldnn::engine& eng, int n_iter) : as_input(as_input) { const InferenceEngine::TensorIterator::PortMap &slice_rule, const mkldnn::engine& eng) {
const auto &full_blob = as_input ? from : to; const auto &full_blob = sliced_src ? from : to;
const auto &part_blob = !as_input ? from : to; const auto &part_blob = !sliced_src ? from : to;
auto axis = port_map.axis; auto axis = slice_rule.axis;
auto stride = port_map.stride; auto stride = slice_rule.stride;
auto full_dims = full_blob->GetDims(); auto full_dims = full_blob->GetDims();
auto part_dims = part_blob->GetDims(); auto part_dims = part_blob->GetDims();
if (port_map.axis == -1) {
// simple copy mode. No iteration through this tensor
reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive());
iter_count = n_iter;
} else {
auto abs_stride = std::abs(stride); auto abs_stride = std::abs(stride);
auto sign_of_stride = stride < 0.0f ? -1 : 1; auto sign_of_stride = stride < 0.0f ? -1 : 1;
IE_ASSERT(n_iter == full_dims[axis] / abs_stride) << "Shape mismatch for tensor iterator port"; iter_count = full_dims[axis] / abs_stride;
full_dims[axis] = abs_stride; full_dims[axis] = abs_stride;
IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port"; IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port";
iter_count = n_iter;
// make chunk view // make chunk view
auto chunk_desc = full_blob->GetDescriptor(); auto chunk_desc = full_blob->GetDescriptor();
chunk_desc.data.dims[axis] = abs_stride; chunk_desc.data.dims[axis] = abs_stride;
@ -92,54 +84,102 @@ public:
chunk_offset_in_byte = sign_of_stride < 0 ? (iter_count - 1) * chunk_stride_in_byte : 0; chunk_offset_in_byte = sign_of_stride < 0 ? (iter_count - 1) * chunk_stride_in_byte : 0;
chunk_stride_in_byte *= sign_of_stride; chunk_stride_in_byte *= sign_of_stride;
if (as_input) { if (sliced_src) {
reorders.emplace_back(chunk_mem_prim, to->GetPrimitive()); reorders.emplace_back(chunk_mem_prim, to->GetPrimitive());
} else { } else {
reorders.emplace_back(from->GetPrimitive(), chunk_mem_prim); reorders.emplace_back(from->GetPrimitive(), chunk_mem_prim);
} }
} }
}
void execute(int n_iter, mkldnn::stream strm) override { void execute(mkldnn::stream strm, int iter) override {
if (chunk_stride_in_byte != 0) { IE_ASSERT(iter >= 0 && iter < iter_count);
IE_ASSERT(n_iter < iter_count);
auto full_mem = mem_holder[FULL_DATA]; auto full_mem = mem_holder[FULL_DATA];
auto chunk_mem = mem_holder[CHUNK_DATA]; auto chunk_mem = mem_holder[CHUNK_DATA];
chunk_mem.set_data_handle(static_cast<uint8_t *>(full_mem.get_data_handle()) + chunk_mem.set_data_handle(static_cast<uint8_t *>(full_mem.get_data_handle()) +
chunk_offset_in_byte + chunk_stride_in_byte * n_iter); chunk_offset_in_byte + chunk_stride_in_byte * iter);
strm.submit({reorders.begin(), reorders.end()});
} else {
if (as_input ? n_iter == 0 : n_iter == (iter_count - 1))
strm.submit({reorders.begin(), reorders.end()}); strm.submit({reorders.begin(), reorders.end()});
} }
};
private: private:
bool as_input;
ptrdiff_t chunk_stride_in_byte = 0; ptrdiff_t chunk_stride_in_byte = 0;
ptrdiff_t chunk_offset_in_byte = 0; ptrdiff_t chunk_offset_in_byte = 0;
const int FULL_DATA = 0; const int FULL_DATA = 0;
const int CHUNK_DATA = 1; const int CHUNK_DATA = 1;
int iter_count;
}; };
class BackEdgePortHelper : public PortMapHelper { class BackEdgePortHelper : public PortMapHelper {
public: public:
BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng, int n_iter) { BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
auto mem_desc = from->GetDescriptor();
mem_holder.emplace_back(mkldnn::memory::primitive_desc(mem_desc, eng));
reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive()); reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive());
iter_count = n_iter;
} }
void execute(int n_iter, mkldnn::stream strm) override { void execute(mkldnn::stream strm, int iter) override {
if (n_iter < iter_count - 1) { if (iter != 0) {
strm.submit({reorders.begin(), reorders.end()}); strm.submit({reorders.begin(), reorders.end()});
} }
}; }
};
class IterCountPortHelper : public PortMapHelper {
public:
IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
// Only scalar I32 tensor is supported
IE_ASSERT(to->GetDataType() == memory::s32);
IE_ASSERT(to->GetDims() == memory::dims{1});
mem_holder.push_back(to->GetPrimitive());
}
void execute(mkldnn::stream strm, int n_iter) override {
auto mem = mem_holder[0];
auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
*data_ptr = n_iter;
}
};
class asBoolCheck : public PortChecker {
public:
asBoolCheck(const MKLDNNMemoryPtr &mem) {
IE_ASSERT(mem->GetDataType() == memory::u8);
IE_ASSERT(mem->GetDims() == memory::dims{1});
mem_holder.push_back(mem->GetPrimitive());
}
int getStatus() override {
auto mem = mem_holder[0];
auto data_ptr = static_cast<uint8_t*>(mem.get_data_handle());
return *data_ptr == static_cast<uint8_t>(0) ? 0 : 1;
}
};
class asIntCheck : public PortChecker {
public:
asIntCheck(const MKLDNNMemoryPtr &mem) {
IE_ASSERT(mem->GetDataType() == memory::s32);
IE_ASSERT(mem->GetDims() == memory::dims{1});
mem_holder.push_back(mem->GetPrimitive());
}
int getStatus() override {
auto mem = mem_holder[0];
auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
return *data_ptr;
}
};
class staticValueCheck : public PortChecker {
public:
staticValueCheck(const int &value) : value(value) {}
int getStatus() override {
return value;
}
private:
int value;
}; };
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin
@ -157,25 +197,19 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
sub_graph.CreateGraph(ti->body, ext_mng, weightCache); sub_graph.CreateGraph(ti->body, ext_mng, weightCache);
// Try to detect inputs and outputs by indexes // Try to detect inputs and outputs by indexes
std::map<std::string, MKLDNNNodePtr> in_map, out_map; const auto &in_map = sub_graph.GetInputNodes();
for (auto node : sub_graph.GetNodes())
if (node->getType() == Input) // filter by type Input
in_map[node->getName().substr(3)] = node; // remove "in_" prefix
for (auto node : sub_graph.GetOutputNodes())
out_map[node->getName().substr(4)] = node; // remove "out_" prefix
for (const auto &in_data : ti->body.inputs) { for (const auto &in_data : ti->body.inputs) {
if (in_data->getName() == "const_holder") continue; if (in_data->getName() == "const_holder") continue;
auto &in_node = in_map[in_data->getName()]; auto &in_node = in_map.at(in_data->getName());
auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr(); auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr();
input_mem.push_back(in_mem); input_mem.push_back(in_mem);
} }
for (const auto &out_data : ti->body.outputs) { // Assume that order of outputs in original TI and produces sub_graph is same
auto &out_node = out_map[out_data->getName()]; const auto &out_vec = sub_graph.GetOutputNodes();
auto out_mem = out_node->getParentEdgeAt(0)->getMemoryPtr(); for (size_t i = 0; i < out_vec.size(); i++) {
auto out_mem = out_vec[i]->getParentEdgeAt(0)->getMemoryPtr();
output_mem.push_back(out_mem); output_mem.push_back(out_mem);
} }
} }
@ -194,52 +228,99 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
if (ti == nullptr) if (ti == nullptr)
THROW_IE_EXCEPTION << "Cannot convert to TensorIterator layer."; THROW_IE_EXCEPTION << "Cannot convert to TensorIterator layer.";
const auto &eng = getEngine();
for (auto map_rule : ti->input_port_map) { for (auto map_rule : ti->input_port_map) {
auto &extr_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
auto &intr_mem = input_mem[map_rule.to]; auto &to_mem = input_mem[map_rule.to];
auto mapper = std::shared_ptr<PortMapHelper>( if (map_rule.axis == -1)
new PortIteratorHelper (extr_mem, intr_mem, true, map_rule, getEngine(), n_iter)); first_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
else
in_port_mappers.push_back(mapper); before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng));
} }
for (auto map_rule : ti->output_port_map) { for (auto map_rule : ti->output_port_map) {
auto &extr_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
auto &intr_mem = output_mem[map_rule.to]; auto &from_mem = output_mem[map_rule.to];
auto mapper = std::shared_ptr<PortMapHelper>( if (map_rule.axis == -1)
new PortIteratorHelper (intr_mem, extr_mem, false, map_rule, getEngine(), n_iter)); last_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
else
out_port_mappers.push_back(mapper); after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng));
} }
for (auto map_rule : ti->back_edges) { for (auto map_rule : ti->back_edges) {
auto from_mem = output_mem[map_rule.from]; auto from_mem = output_mem[map_rule.from];
auto to_mem = input_mem[map_rule.to]; auto to_mem = input_mem[map_rule.to];
auto mapper = std::shared_ptr<PortMapHelper>( before_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
new BackEdgePortHelper(from_mem, to_mem, getEngine(), n_iter)); }
out_port_mappers.push_back(mapper); // special purpose ports
constexpr auto key_cur_iter_port = "loop_body_current_iteration_idx";
constexpr auto key_cond_port = "loop_body_condition_output_idx";
constexpr auto key_trip_count_port = "loop_trip_count_idx";
constexpr auto key_init_cond_port = "loop_execution_condition_idx";
auto iter_idx_ports = ti->GetParamAsInts(key_cur_iter_port, {});
for (auto idx : iter_idx_ports) {
auto to_mem = input_mem[idx];
before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng));
}
auto condition_port_idx = ti->GetParamAsInt(key_cond_port, -1);
if (condition_port_idx == -1) {
continue_cond_check.reset(new staticValueCheck(true)); // always true
} else {
auto mem = output_mem[condition_port_idx];
continue_cond_check.reset(new asBoolCheck(mem));
}
auto trip_count_port_idx = ti->GetParamAsInt(key_trip_count_port, -1);
if (trip_count_port_idx == -1) {
trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration
} else {
auto mem = getParentEdgesAtPort(trip_count_port_idx)[0]->getMemoryPtr();
trip_count_check.reset(new asIntCheck(mem));
}
auto init_cond_port_idx = ti->GetParamAsInt(key_init_cond_port, -1);
if (init_cond_port_idx == -1) {
initial_cond_check.reset(new staticValueCheck(true));
} else {
auto mem = getParentEdgesAtPort(init_cond_port_idx)[0]->getMemoryPtr();
initial_cond_check.reset(new asBoolCheck(mem));
} }
} }
void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) { void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) {
sub_graph.ResetInferCount(); sub_graph.ResetInferCount();
for (int i = 0; i < n_iter; i++) { bool continue_cond = initial_cond_check->getStatus();
int max_num_iter = trip_count_check->getStatus();
for (auto &mapper : first_mappers)
mapper->execute(strm);
// use "i != max_num_iter" only to allow "-1" works like infinite loop
for (int i = 0; i != max_num_iter && continue_cond; i++) {
// copy data to subgraph iteration // copy data to subgraph iteration
for (auto &mapper : in_port_mappers) for (auto &mapper : before_mappers)
mapper->execute(i, strm); mapper->execute(strm, i);
sub_graph.Infer(); sub_graph.Infer();
continue_cond = continue_cond_check->getStatus();
// copy data from subgraph iteration to outputs // copy data from subgraph iteration to outputs
// or next iteration inputs // or to next iteration inputs
for (auto &mapper : out_port_mappers) for (auto &mapper : after_mappers)
mapper->execute(i, strm); mapper->execute(strm, i);
} }
for (auto &mapper : last_mappers)
mapper->execute(strm);
} }
bool MKLDNNTensorIteratorNode::created() const { bool MKLDNNTensorIteratorNode::created() const {

View File

@ -13,16 +13,35 @@
namespace MKLDNNPlugin { namespace MKLDNNPlugin {
/**
* Functor interface to perform some action with pointed tensors (captured in constructor)
* Generally it's read, write or move data from specified tensors.
* Action may depends on iteration index.
*/
class PortMapHelper { class PortMapHelper {
public: public:
virtual ~PortMapHelper() = default; virtual ~PortMapHelper() = default;
virtual void execute(int n_iter, mkldnn::stream strm) = 0; virtual void execute(mkldnn::stream strm, int n_iter = -1) = 0;
protected: protected:
std::vector<mkldnn::reorder> reorders; std::vector<mkldnn::reorder> reorders;
std::vector<mkldnn::memory> mem_holder; std::vector<mkldnn::memory> mem_holder;
int iter_count;
}; };
/**
* Functor interface to perform check of data tensor (captured in constructor)
* Information extracted as int. Meaning of returned value is specific for
* particular type of checker.
*/
class PortChecker {
public:
virtual ~PortChecker() = default;
virtual int getStatus() = 0;
protected:
std::vector<mkldnn::memory> mem_holder;
};
class MKLDNNTensorIteratorNode : public MKLDNNNode { class MKLDNNTensorIteratorNode : public MKLDNNNode {
public: public:
MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
@ -35,6 +54,7 @@ public:
void execute(mkldnn::stream strm) override; void execute(mkldnn::stream strm) override;
void setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } void setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; }
private: private:
int n_iter = 0; int n_iter = 0;
@ -42,7 +62,16 @@ private:
MKLDNNGraph sub_graph; MKLDNNGraph sub_graph;
std::vector<MKLDNNMemoryPtr> input_mem, output_mem; std::vector<MKLDNNMemoryPtr> input_mem, output_mem;
std::vector<std::shared_ptr<PortMapHelper>> in_port_mappers, out_port_mappers; std::vector<std::shared_ptr<PortMapHelper>>
first_mappers, /// < Applied once before loop
last_mappers, /// < Applied once after loop
before_mappers, /// < Applied before each iteration
after_mappers; /// < Applied after each iteration
std::shared_ptr<PortChecker>
trip_count_check, /// < Perform check of trip count value. value >= -1
initial_cond_check, /// < Perform check of initial continue condition value. value [0, 1]
continue_cond_check; /// < Perform check of continue condition value of body. value [0, 1]
}; };
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@ -332,18 +332,12 @@ static bool eliminate_squeeze(const std::shared_ptr<Node>& node) {
return false; return false;
} }
static bool eliminate_stop_gradient(const std::shared_ptr<Node>& node) {
replace_output_update_name(node->output(0), node->input_value(0));
return true;
}
bool pass::NopElimination::run_on_function(std::shared_ptr<Function> function) { bool pass::NopElimination::run_on_function(std::shared_ptr<Function> function) {
static const std::unordered_map<NodeTypeInfo, std::function<bool(const std::shared_ptr<Node>&)>> static const std::unordered_map<NodeTypeInfo, std::function<bool(const std::shared_ptr<Node>&)>>
dispatcher{{TI(opset3::Pad), &eliminate_nop}, dispatcher{{TI(opset3::Pad), &eliminate_nop},
{TI(op::v0::Sum), &eliminate_sum}, {TI(op::v0::Sum), &eliminate_sum},
{TI(opset3::Convert), &eliminate_convert}, {TI(opset3::Convert), &eliminate_convert},
{TI(op::v0::Slice), &eliminate_nop}, {TI(op::v0::Slice), &eliminate_nop},
{TI(op::v0::StopGradient), &eliminate_stop_gradient},
{TI(opset3::Reshape), &eliminate_reshape_v1}, {TI(opset3::Reshape), &eliminate_reshape_v1},
{TI(opset3::Concat), &eliminate_concat}, {TI(opset3::Concat), &eliminate_concat},
{TI(opset3::Squeeze), &eliminate_squeeze}, {TI(opset3::Squeeze), &eliminate_squeeze},

View File

@ -39,8 +39,6 @@ function(add_common_target TARGET_NAME STATIC_IE)
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>) $<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
if(WIN32) if(WIN32)
target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
endif() endif()
@ -54,6 +52,10 @@ function(add_common_target TARGET_NAME STATIC_IE)
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} inference_engine_transformations target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} inference_engine_transformations
PRIVATE openvino::itt) PRIVATE openvino::itt)
if(NOT STATIC_IE)
target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_legacy)
endif()
endfunction() endfunction()
add_common_target("vpu_common_lib" FALSE) add_common_target("vpu_common_lib" FALSE)

View File

@ -0,0 +1,89 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadConvertNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,3,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_Cast/Cast" type="Convert" version="opset1">
<data destination_type="f16"/>
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="1" precision="FP16">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_Cast/Cast" type="Convert" version="opset1">
<data precision="FP16"/>
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="1" precision="FP16">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 0);
}

View File

@ -0,0 +1,184 @@
// Copyright (C) 2019-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadDepthToSpaceNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="5,4,28,2" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>5</dim>
<dim>4</dim>
<dim>28</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="DepthToSpace" version="opset1">
<data mode="blocks_first" block_size="2"/>
<input>
<port id="0">
<dim>5</dim>
<dim>4</dim>
<dim>28</dim>
<dim>2</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>5</dim>
<dim>1</dim>
<dim>56</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>5</dim>
<dim>1</dim>
<dim>56</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>5</dim>
<dim>4</dim>
<dim>28</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D/Cast_1204_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>6</dim>
</port>
</output>
<blobs>
<custom offset="0" size="24" precision="I64"/>
</blobs>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D" type="Reshape" version="opset1">
<data special_zero="True"/>
<input>
<port id="0">
<dim>5</dim>
<dim>4</dim>
<dim>28</dim>
<dim>2</dim>
</port>
<port id="1">
<dim>6</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>5</dim>
<dim>2</dim>
<dim>2</dim>
<dim>1</dim>
<dim>28</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_output/output/Transpose" type="Permute" version="opset1">
<data order="0,3,4,1,5,2"/>
<input>
<port id="0">
<dim>5</dim>
<dim>2</dim>
<dim>2</dim>
<dim>1</dim>
<dim>28</dim>
<dim>2</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>5</dim>
<dim>1</dim>
<dim>28</dim>
<dim>2</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_4D/Cast_1202_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>4</dim>
</port>
</output>
<blobs>
<custom offset="24" size="16" precision="I64"/>
</blobs>
</layer>
<layer id="5" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Reshape" version="opset1">
<data special_zero="True"/>
<input>
<port id="0">
<dim>5</dim>
<dim>1</dim>
<dim>28</dim>
<dim>2</dim>
<dim>2</dim>
<dim>2</dim>
</port>
<port id="1">
<dim>4</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>5</dim>
<dim>1</dim>
<dim>56</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
<edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
<edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 80, [](Blob::Ptr& weights) {
auto* buffer = weights->buffer().as<int64_t*>();
buffer[0] = 0;
buffer[1] = 2;
buffer[2] = 2;
buffer[3] = 1;
buffer[4] = 28;
buffer[5] = 2;
buffer[7] = 0;
buffer[7] = 1;
buffer[8] = 56;
buffer[9] = 4;
});
}

View File

@ -0,0 +1,179 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadFloorModNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,1,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Parameter" version="opset1">
<data shape="1" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="2" name="EltwiseReshapeNormalization/Cast_163_const" type="Const" version="opset1">
<data offset="0" size="24" shape="3" element_type="i64"/>
<output>
<port id="1" precision="I64">
<dim>3</dim>
</port>
</output>
</layer>
<layer id="3" name="EltwiseReshapeNormalization" type="Reshape" version="opset1">
<data special_zero="True"/>
<input>
<port id="0">
<dim>1</dim>
</port>
<port id="1">
<dim>3</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</output>
</layer>
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="FloorMod" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="5" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="2" name="EltwiseReshapeNormalization/Cast_175_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>3</dim>
</port>
</output>
<blobs>
<custom offset="0" size="12" precision="I32"/>
</blobs>
</layer>
<layer id="3" name="EltwiseReshapeNormalization" type="Reshape" version="opset1">
<data special_zero="True"/>
<input>
<port id="0">
<dim>1</dim>
</port>
<port id="1">
<dim>3</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</output>
</layer>
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Eltwise" version="opset1">
<data operation="floor_mod"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
</edges>
</net>
)V0G0N";
// compareIRs(model, modelV7, 0);
compareIRs(model, modelV7, 40, [](Blob::Ptr& weights) {
auto* buffer = weights->buffer().as<int64_t*>();
buffer[0] = 1;
buffer[1] = 1;
buffer[2] = 1;
});
}

View File

@ -0,0 +1,122 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadGatherNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,3,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Parameter" version="opset1">
<data shape="1" element_type="i32"/>
<output>
<port id="0" precision="I32">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2/Cast_292_const" type="Const" version="opset1">
<data offset="0" size="8" shape="" element_type="i64"/>
<output>
<port id="1" precision="I64"/>
</output>
</layer>
<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2" type="Gather" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
<port id="2"/>
</input>
<output>
<port id="3" precision="FP32">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="4" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
<edge from-layer="2" from-port="1" to-layer="3" to-port="2"/>
<edge from-layer="3" from-port="3" to-layer="4" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Input" version="opset1">
<output>
<port id="0" precision="I32">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2" type="Gather">
<data axis="0"/>
<input>
<port id="0">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 16, [](Blob::Ptr& weights) {
auto* buffer = weights->buffer().as<int64_t*>();
buffer[0] = 0;
});
}

View File

@ -0,0 +1,190 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadMinimumNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,1,27,27" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Parameter" version="opset1">
<data shape="1,1,27,27" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/output/Minimum" type="Minimum" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/output/Minimum/negate1_" type="Power" version="opset1">
<data power="1" scale="-1" shift="0"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="2" name="input_b" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="3" name="PartitionedCall/functional_1/output/Minimum/negate2_" type="Power" version="opset1">
<data power="1" scale="-1" shift="0"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="4" name="PartitionedCall/functional_1/output/Minimum/Max_" type="Eltwise" version="opset1">
<data operation="max"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
<layer id="5" name="PartitionedCall/functional_1/output/Minimum" type="Power" version="opset1">
<data power="1" scale="-1" shift="0"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>27</dim>
<dim>27</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
<edge from-layer="2" from-port="0" to-layer="3" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="4" to-port="0"/>
<edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 0);
}

View File

@ -0,0 +1,108 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadMultiplyNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Parameter" version="opset1">
<data shape="1,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/output/mul" type="Multiply" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/output/mul" type="Eltwise" version="opset1">
<data operation="prod"/>
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 0);
}

View File

@ -0,0 +1,146 @@
// Copyright (C) 2019-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadNormalizeL2Network) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="6,24,12,10" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</output>
</layer>
<layer id="1" name="112_input_port_1/value114_const" type="Const" version="opset1">
<data offset="0" size="8" shape="1" element_type="i64"/>
<output>
<port id="1" precision="I64">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="2" name="112" type="NormalizeL2" version="opset1">
<data eps="1e-12" eps_mode="add"/>
<input>
<port id="0">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</output>
</layer>
<layer id="3" name="5354_const" type="Const" version="opset1">
<data offset="8" size="4" shape="1" element_type="f32"/>
<output>
<port id="1" precision="FP32">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="4" name="PartitionedCall/functional_1/lambda/output" type="Multiply" version="opset1">
<input>
<port id="0">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</output>
</layer>
<layer id="5" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="4" to-port="0"/>
<edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/lambda/output" type="Normalize">
<data eps="1e-12" across_spatial="0" channel_shared="1"/>
<input>
<port id="0">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>6</dim>
<dim>24</dim>
<dim>12</dim>
<dim>10</dim>
</port>
</output>
<blobs>
<weights offset="0" size="96" precision="FP32"/>
</blobs>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 100, [](Blob::Ptr& weights) {
auto* buffer = weights->buffer().as<int64_t*>();
buffer[0] = 1;
buffer[1] = 32831;
});
}

View File

@ -0,0 +1,108 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//)
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadNotEqualNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Parameter" version="opset1">
<data shape="1,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="NotEqual" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="2" precision="BOOL">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="input_b" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Eltwise" version="opset1">
<data operation="not_equal"/>
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="2" precision="BOOL">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 0);
}

View File

@ -0,0 +1,120 @@
// Copyright (C) 2019-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadReduceMinNetwork) {
std::string model = R"V0G0N(
<net name="model" version="10">
<layers>
<layer id="0" name="data" type="Parameter" version="opset1">
<data element_type="f32" shape="3,2,2"/>
<output>
<port id="0" precision="FP32">
<dim>3</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="1" name="reduced/Cast_175_const" type="Const" version="opset1">
<data element_type="i64" offset="0" shape="3" size="24"/>
<output>
<port id="1" precision="I64">
<dim>3</dim>
</port>
</output>
</layer>
<layer id="2" name="reduced" type="ReduceMin" version="opset1">
<data keep_dims="True"/>
<input>
<port id="0">
<dim>3</dim>
<dim>2</dim>
<dim>2</dim>
</port>
<port id="1">
<dim>3</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</output>
</layer>
<layer id="3" name="reduced/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="model" version="7">
<layers>
<layer id="0" name="data" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>3</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="1" name="reduced/Cast_184_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>3</dim>
</port>
</output>
<blobs>
<custom offset="0" precision="I64" size="12"/>
</blobs>
</layer>
<layer id="2" name="reduced" type="ReduceMin" version="opset1">
<data keep_dims="True"/>
<input>
<port id="0">
<dim>3</dim>
<dim>2</dim>
<dim>2</dim>
</port>
<port id="1">
<dim>3</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 100, [](Blob::Ptr& weights) {
auto* buffer = weights->buffer().as<int64_t*>();
buffer[0] = 0;
buffer[1] = 1;
buffer[2] = 2;
});
}

View File

@ -0,0 +1,115 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadReduceProdNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="1,1,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Cast_186_const" type="Const" version="opset1">
<data offset="0" size="8" shape="1" element_type="i64"/>
<output>
<port id="1" precision="I64">
<dim>1</dim>
</port>
</output>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="ReduceProd" version="opset1">
<data keep_dims="False"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Cast_195_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>1</dim>
</port>
</output>
<blobs>
<custom offset="0" size="4" precision="I32"/>
</blobs>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="ReduceProd" version="opset1">
<data keep_dims="False"/>
<input>
<port id="0">
<dim>1</dim>
<dim>1</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>1</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>1</dim>
<dim>4</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 16, [](Blob::Ptr& weights) {
auto *buffer = weights->buffer().as<int64_t *>();
buffer[0] = 1;
});
}

View File

@ -0,0 +1,184 @@
// Copyright (C) 2019-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadSpaceToDepthNetwork) {
std::string model = R"V0G0N(
<net name="saved_model" version="10">
<layers>
<layer id="0" name="input_a" type="Parameter" version="opset1">
<data shape="6,5,4,4" element_type="f32"/>
<output>
<port id="0" precision="FP32">
<dim>6</dim>
<dim>5</dim>
<dim>4</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="SpaceToDepth" version="opset1">
<data mode="blocks_first" block_size="2"/>
<input>
<port id="0">
<dim>6</dim>
<dim>5</dim>
<dim>4</dim>
<dim>4</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>6</dim>
<dim>20</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>6</dim>
<dim>20</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="saved_model" version="7">
<layers>
<layer id="0" name="input_a" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>6</dim>
<dim>5</dim>
<dim>4</dim>
<dim>4</dim>
</port>
</output>
</layer>
<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D/Cast_1217_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>6</dim>
</port>
</output>
<blobs>
<custom offset="0" size="24" precision="I64"/>
</blobs>
</layer>
<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D" type="Reshape" version="opset1">
<data special_zero="True"/>
<input>
<port id="0">
<dim>6</dim>
<dim>5</dim>
<dim>4</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>6</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>6</dim>
<dim>5</dim>
<dim>2</dim>
<dim>2</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_output/output/Transpose" type="Permute" version="opset1">
<data order="0,3,5,1,2,4"/>
<input>
<port id="0">
<dim>6</dim>
<dim>5</dim>
<dim>2</dim>
<dim>2</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>6</dim>
<dim>2</dim>
<dim>2</dim>
<dim>5</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_4D/Cast_1219_const" type="Const" version="opset1">
<output>
<port id="1" precision="I64">
<dim>4</dim>
</port>
</output>
<blobs>
<custom offset="24" size="16" precision="I64"/>
</blobs>
</layer>
<layer id="5" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Reshape" version="opset1">
<data special_zero="True"/>
<input>
<port id="0">
<dim>6</dim>
<dim>2</dim>
<dim>2</dim>
<dim>5</dim>
<dim>2</dim>
<dim>2</dim>
</port>
<port id="1">
<dim>4</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>6</dim>
<dim>20</dim>
<dim>2</dim>
<dim>2</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
<edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
<edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 80, [](Blob::Ptr& weights) {
auto* buffer = weights->buffer().as<int64_t*>();
buffer[0] = 6;
buffer[1] = 5;
buffer[2] = 2;
buffer[3] = 2;
buffer[4] = 2;
buffer[5] = 2;
buffer[7] = 6;
buffer[7] = 14;
buffer[8] = 2;
buffer[9] = 2;
});
}

View File

@ -0,0 +1,137 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include "ngraph_reader_tests.hpp"
TEST_F(NGraphReaderTests, ReadSubtractNetwork) {
std::string model = R"V0G0N(
<net name="model" version="10">
<layers>
<layer id="0" name="x" type="Parameter" version="opset1">
<data element_type="f32" shape="3,4,5"/>
<output>
<port id="0" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
<layer id="1" name="y" type="Parameter" version="opset1">
<data element_type="f32" shape="3,4,5"/>
<output>
<port id="0" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
<layer id="2" name="z/sub" type="Subtract" version="opset1">
<input>
<port id="0">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
<port id="1">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
<layer id="3" name="z/sink_port_0" type="Result" version="opset1">
<input>
<port id="0">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</input>
</layer>
</layers>
<edges>
<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
</edges>
</net>
)V0G0N";
std::string modelV7 = R"V0G0N(
<net name="model" version="7">
<layers>
<layer id="0" name="x" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
<layer id="1" name="y" type="Input" version="opset1">
<output>
<port id="0" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
<layer id="2" name="z/neg_" type="Power" version="opset1">
<data power="1" scale="-1.0" shift="0"/>
<input>
<port id="0">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</input>
<output>
<port id="1" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
<layer id="3" name="z/sub" type="Eltwise" version="opset1">
<data operation="sum"/>
<input>
<port id="0">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
<port id="1">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</input>
<output>
<port id="2" precision="FP32">
<dim>3</dim>
<dim>4</dim>
<dim>5</dim>
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="1" from-port="0" to-layer="2" to-port="0"/>
<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
</edges>
</net>
)V0G0N";
compareIRs(model, modelV7, 0);
}

View File

@ -97,19 +97,6 @@ TEST(nop_elimination, eliminate_broadcast) {
ASSERT_EQ(count_ops_of_type<op::v1::Broadcast>(f), 0); ASSERT_EQ(count_ops_of_type<op::v1::Broadcast>(f), 0);
} }
TEST(nop_elimination, eliminate_stop_gradient) {
Shape shape{};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto s = make_shared<op::v0::StopGradient>(A);
auto f = make_shared<Function>(make_shared<op::v0::Abs>(s), ParameterVector{A});
pass::Manager pass_manager;
pass_manager.register_pass<pass::NopElimination>();
pass_manager.run_passes(f);
ASSERT_EQ(count_ops_of_type<op::v0::StopGradient>(f), 0);
}
TEST(nop_elimination, pass_property) { TEST(nop_elimination, pass_property) {
auto pass = std::make_shared<ngraph::pass::NopElimination>(); auto pass = std::make_shared<ngraph::pass::NopElimination>();
ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE)); ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE));

View File

@ -50,7 +50,9 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
{Mish, {}}, {Mish, {}},
{HSwish, {}}, {HSwish, {}},
{SoftPlus, {}}, {SoftPlus, {}},
{HSigmoid, {}} {HSigmoid, {}},
{RoundHalfToEven, {}},
{RoundHalfAwayFromZero, {}}
}; };
const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = { const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {

View File

@ -3,7 +3,6 @@
// //
#include <vector> #include <vector>
#include <ngraph/op/util/attr_types.hpp>
#include "single_layer_tests/loop.hpp" #include "single_layer_tests/loop.hpp"
#include "common_test_utils/test_constants.hpp" #include "common_test_utils/test_constants.hpp"
@ -12,9 +11,9 @@ using namespace LayerTestsDefinitions;
namespace { namespace {
// without clip values increase rapidly, so use only seq_lenghts = 2 // without clip values increase rapidly, so use only seq_lenghts = 2
std::vector<bool> execute_first_iteration{true}; std::vector<bool> execute_first_iteration{true};
std::vector<bool> is_body_condition_const{true, false}; std::vector<bool> is_body_condition_const{true/*, false*/};
std::vector<bool> body_condition{true, false}; // works only if is_body_condition_const == true std::vector<bool> body_condition{true/*, false*/}; // works only if is_body_condition_const == true
std::vector<int64_t> trip_count{1, 10, -1}; // -1 means infinity std::vector<int64_t> trip_count{1, 10/*, -1*/}; // -1 means infinity
std::vector<std::vector<std::pair<std::vector<size_t>, LOOP_IN_TYPE>>> inputs = { std::vector<std::vector<std::pair<std::vector<size_t>, LOOP_IN_TYPE>>> inputs = {
{{{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::MERGED}}, {{{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::MERGED}},
}; };
@ -31,4 +30,37 @@ namespace {
::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::Values(CommonTestUtils::DEVICE_CPU)),
LoopTest::getTestCaseName); LoopTest::getTestCaseName);
static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types {
// GCC4.8 limitation: have to specify type of each element in list
// static_trip_count | max | dynamic_exit | axis
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, -1, -1 }, // n_iter 5, no dynamic exit
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, 3, -1 }, // n_iter 3, dynamic exit on 3
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, 7, -1 }, // n_iter 5, dynamic exit not reached
std::tuple<bool, int64_t, int64_t, int64_t>{ true , -1, 5, -1 }, // n_iter 5, inf loop with dynamic exit on 5
std::tuple<bool, int64_t, int64_t, int64_t>{ true , 5, -1, 1 }, // n_iter 5, const for loop with auto concatenated out
std::tuple<bool, int64_t, int64_t, int64_t>{ false , 5, -1, -1 }, // |
std::tuple<bool, int64_t, int64_t, int64_t>{ false , 5, 3, -1 }, // | same with dynamic trip count
std::tuple<bool, int64_t, int64_t, int64_t>{ false , 5, 7, -1 }, // |
std::tuple<bool, int64_t, int64_t, int64_t>{ false , -1, 5, -1 } // |
};
using namespace testing;
using namespace InferenceEngine;
INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop, StaticShapeLoopTest,
Combine(
Values(true),
ValuesIn(static_loop_types),
Values<int64_t>(7),
Values<InferenceEngine::SizeVector>({2, 1, 4}),
Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
Values(CommonTestUtils::DEVICE_CPU)));
using namespace testing;
INSTANTIATE_TEST_CASE_P(smoke_TrivialLoop, TrivialLoopTest,
Combine(
Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
Values<InferenceEngine::SizeVector>({2, 3, 4}),
Values(CommonTestUtils::DEVICE_CPU)));
} // namespace } // namespace

View File

@ -25,7 +25,22 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({30, 30, 30, 30})), ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
SplitLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
::testing::Combine(
::testing::Values(5),
::testing::Values(0),
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({0, 3})),
::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::Values(CommonTestUtils::DEVICE_CPU)),
SplitLayerTest::getTestCaseName); SplitLayerTest::getTestCaseName);
} // namespace } // namespace

View File

@ -53,8 +53,6 @@ std::vector<std::string> disabledTestPatterns() {
// TODO: Issue: 38841 // TODO: Issue: 38841
R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)", R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)", R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
// TODO: not supported yet, ticket 37690
R"(.*Loop.*)",
// TODO: Issue: 41694 // TODO: Issue: 41694
R"(.*smoke_Set2.*CTCLossLayerTest.*)", R"(.*smoke_Set2.*CTCLossLayerTest.*)",
}; };

View File

@ -70,7 +70,14 @@ class ImportNetworkTest : public testing::WithParamInterface<exportImportNetwork
if (inputStream.fail()) { if (inputStream.fail()) {
FAIL() << "Cannot open file to import model: exported_model.blob"; FAIL() << "Cannot open file to import model: exported_model.blob";
} }
auto importedOutputs = CalculateImportedNetwork(inputStream); auto importedNetwork = core->ImportNetwork(inputStream, targetDevice, configuration);
for (const auto& next_input : importedNetwork.GetInputsInfo()) {
ASSERT_NO_THROW(executableNetwork.GetInputsInfo()[next_input.first]);
}
for (const auto& next_output : importedNetwork.GetOutputsInfo()) {
ASSERT_NO_THROW(executableNetwork.GetOutputsInfo()[next_output.first]);
}
auto importedOutputs = CalculateImportedNetwork(importedNetwork);
Compare(importedOutputs, actualOutputs); Compare(importedOutputs, actualOutputs);
} }
@ -107,9 +114,7 @@ class ImportNetworkTest : public testing::WithParamInterface<exportImportNetwork
std::map<std::string, std::string> exportConfiguration; std::map<std::string, std::string> exportConfiguration;
std::map<std::string, std::string> importConfiguration; std::map<std::string, std::string> importConfiguration;
std::vector<std::vector<std::uint8_t>> CalculateImportedNetwork(std::istream& networkModel) { std::vector<std::vector<std::uint8_t>> CalculateImportedNetwork(InferenceEngine::ExecutableNetwork& importedNetwork) {
auto importedNetwork = core->ImportNetwork(networkModel, targetDevice, configuration);
auto refInferRequest = importedNetwork.CreateInferRequest(); auto refInferRequest = importedNetwork.CreateInferRequest();
std::vector<InferenceEngine::InputInfo::CPtr> refInfos; std::vector<InferenceEngine::InputInfo::CPtr> refInfos;
for (const auto& input : importedNetwork.GetInputsInfo()) { for (const auto& input : importedNetwork.GetInputsInfo()) {

View File

@ -26,6 +26,7 @@ INSTANTIATE_TEST_CASE_P(DISABLED_smoke_NumSplitsCheck, SplitLayerTest,
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({30, 30})), ::testing::Values(std::vector<size_t >({30, 30})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_GNA)), ::testing::Values(CommonTestUtils::DEVICE_GNA)),
SplitLayerTest::getTestCaseName); SplitLayerTest::getTestCaseName);

View File

@ -0,0 +1,35 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <subgraph_tests/memory_eltwise_reshape_concat.hpp>
#include "common_test_utils/test_constants.hpp"
namespace SubgraphTestsDefinitions {
namespace {
std::vector<size_t> input_multiples = {
1,
7,
5,
8
};
std::vector<size_t> concat_sizes = {
32,
64
};
std::map<std::string, std::string> additional_config = {
{"GNA_COMPACT_MODE", "NO"},
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
{"GNA_SCALE_FACTOR_0", "1638.4"},
};
} // namespace
INSTANTIATE_TEST_CASE_P(smoke_MemoryEltwiseReshapeConcatTest, MemoryEltwiseReshapeConcatTest,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(input_multiples),
::testing::ValuesIn(concat_sizes),
::testing::Values(additional_config)),
MemoryEltwiseReshapeConcatTest::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -26,8 +26,22 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({30, 30, 30, 30})), ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_GPU)), ::testing::Values(CommonTestUtils::DEVICE_GPU)),
SplitLayerTest::getTestCaseName); SplitLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
::testing::Combine(
::testing::Values(5),
::testing::Values(0),
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({0, 3})),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
SplitLayerTest::getTestCaseName);
} // namespace } // namespace

View File

@ -16,7 +16,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest, INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
::testing::Combine( ::testing::Combine(
::testing::Values(1), ::testing::Values(5),
// TODO: 0-axis excluded // TODO: 0-axis excluded
// Check (status == ie::StatusCode::OK) failed: Failed to reshape Network: // Check (status == ie::StatusCode::OK) failed: Failed to reshape Network:
// Failed to infer shapes for Split layer (Split_2) with error: // Failed to infer shapes for Split layer (Split_2) with error:
@ -28,10 +28,11 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})), ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({})),
::testing::Values(CommonTestUtils::DEVICE_MYRIAD)), ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
SplitLayerTest::getTestCaseName); SplitLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, splitWithUnusedOutputsTest, INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
::testing::Combine( ::testing::Combine(
::testing::Values(5), ::testing::Values(5),
// TODO: 0-axis excluded // TODO: 0-axis excluded
@ -49,5 +50,5 @@ INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, splitWithUnusedOutputs
std::vector<size_t>({0, 4}), std::vector<size_t>({0, 4}),
std::vector<size_t>({2, 3})), std::vector<size_t>({2, 3})),
::testing::Values(CommonTestUtils::DEVICE_MYRIAD)), ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
splitWithUnusedOutputsTest::getTestCaseName); SplitLayerTest::getTestCaseName);
} // namespace } // namespace

View File

@ -37,4 +37,108 @@ protected:
void SetUp() override; void SetUp() override;
}; };
using StaticShapeLoopParams = typename std::tuple<
bool,
std::tuple<
bool,
int64_t,
int64_t,
int64_t
>,
int64_t,
InferenceEngine::SizeVector,
InferenceEngine::Precision,
std::string
>;
/**
* Test case with static SHAPE version of loop operation.
* Total iteration count is dynamic.
*/
class StaticShapeLoopTest : public testing::WithParamInterface<StaticShapeLoopParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj);
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
private:
bool static_iter_num; // trip count provided by constant node
bool static_continue_cond; // initial_cond provided by constant node
int64_t max_iter_num; // -1 means infinity loop (expected dynamic exit condition in body)
int64_t dynamic_exit; // -1 means always true
int64_t axis; // -1 means no auto concatenation
int64_t start_value;
InferenceEngine::SizeVector data_shape;
InferenceEngine::Precision data_prc;
int64_t actual_n_iter();
protected:
void SetUp() override;
};
class TrivialLoopTest : public testing::WithParamInterface<LayerTestsUtils::basicParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
protected:
using RefBlobGenerator = std::function<InferenceEngine::Blob::Ptr (const InferenceEngine::TensorDesc &info)>;
std::map<std::string, RefBlobGenerator> inputGens, outputGens;
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
auto found = inputGens.find(info.name());
if (found != inputGens.end()) {
return found->second(info.getTensorDesc());
}
found = inputGens.find("");
if (found != inputGens.end()) {
return found->second(info.getTensorDesc());
}
return LayerTestsCommon::GenerateInput(info);
}
std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
if (outputGens.empty())
return LayerTestsCommon::CalculateRefs();
const auto results = function->get_results();
const auto outs_info = cnnNetwork.getOutputsInfo();
const auto num_out_blob = results.size();
std::vector<std::vector<std::uint8_t>> res_collection(num_out_blob);
for (int i = 0; i < num_out_blob; i++) {
// TODO: name of original NG result doesn't match with outs after conversion.
// Expected : auto name = results[i]->get_friendly_name();
auto name = results[i]->get_input_node_ptr(0)->get_friendly_name();
auto data = outs_info.at(name);
IE_ASSERT(data != nullptr);
RefBlobGenerator generator;
auto found = outputGens.find(name);
if (found != outputGens.end()) {
generator = found->second;
} else {
found = outputGens.find("");
if (found != outputGens.end()) {
generator = found->second;
}
}
IE_ASSERT(generator != nullptr) << "Test output generator is not specified";
auto blob = generator(data->getTensorDesc());
auto blob_size = blob->byteSize();
auto blob_ptr = blob->buffer().as<uint8_t*>();
auto &res = res_collection[i];
res.resize(blob_size);
std::copy(blob_ptr, blob_ptr + blob_size, res.begin());
}
return res_collection;
}
};
} // namespace LayerTestsDefinitions } // namespace LayerTestsDefinitions

View File

@ -23,6 +23,7 @@ typedef std::tuple<
InferenceEngine::Layout, // Input layout InferenceEngine::Layout, // Input layout
InferenceEngine::Layout, // Output layout InferenceEngine::Layout, // Output layout
std::vector<size_t>, // Input shapes std::vector<size_t>, // Input shapes
std::vector<size_t>, // Used outputs indices
std::string // Target device name std::string // Target device name
> splitParams; > splitParams;
@ -35,26 +36,4 @@ protected:
void SetUp() override; void SetUp() override;
}; };
typedef std::tuple<
size_t, // Num splits
size_t, // Axis
InferenceEngine::Precision, // Net precision
InferenceEngine::Precision, // Input precision
InferenceEngine::Precision, // Output precision
InferenceEngine::Layout, // Input layout
InferenceEngine::Layout, // Output layout
std::vector<size_t>, // Input shapes
std::vector<size_t>, // Used outputs indices
std::string // Target device name
> splitWithUnusedOutputsParams;
class splitWithUnusedOutputsTest : public testing::WithParamInterface<splitWithUnusedOutputsParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<splitWithUnusedOutputsParams> obj);
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions } // namespace LayerTestsDefinitions

View File

@ -0,0 +1,37 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "common_test_utils/test_common.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include <ie_core.hpp>
namespace SubgraphTestsDefinitions {
typedef std::tuple<
std::string, // Target device name
InferenceEngine::Precision, // Network precision
size_t, // Mutiples of concat size to be used as input size
size_t, // Concat size
std::map<std::string, std::string> // Configuration
> memoryEltwiseReshapeConcatParams;
class MemoryEltwiseReshapeConcatTest : public LayerTestsUtils::LayerTestsCommon,
public testing::WithParamInterface<memoryEltwiseReshapeConcatParams> {
private:
void initTestModel();
// you have to replace memory layers since ngraph does not support them
void initNgraphFriendlyModel();
// since we switching models we need to generate and save these values in SetUp
size_t inputSize;
size_t concatSize;
ngraph::element::Type ngPrc;
std::vector<float> memory_init;
std::vector<float> concat_vals;
protected:
void SetUp() override;
void Run() override;
public:
static std::string getTestCaseName(const testing::TestParamInfo<memoryEltwiseReshapeConcatParams> &obj);
};
} // namespace SubgraphTestsDefinitions

View File

@ -46,7 +46,9 @@ namespace LayerTestsDefinitions {
result << "types=" << CommonTestUtils::vec2str(types_separate) << "_"; result << "types=" << CommonTestUtils::vec2str(types_separate) << "_";
result << "netPRC=" << netPrecision.name() << "_"; result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice << "_"; result << "targetDevice=" << targetDevice << "_";
return result.str(); auto res_str = result.str();
std::replace(res_str.begin(), res_str.end(), '-', '_');
return res_str;
} }
void LoopTest::SetUp() { void LoopTest::SetUp() {
@ -155,5 +157,227 @@ namespace LayerTestsDefinitions {
TEST_P(LoopTest, CompareWithRefs) { TEST_P(LoopTest, CompareWithRefs) {
Run(); Run();
}
void StaticShapeLoopTest::SetUp() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
SetRefMode(LayerTestsUtils::IE);
auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
std::tie(
static_continue_cond,
args_papck,
start_value,
data_shape,
data_prc,
targetDevice) = GetParam();
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
const auto ngShape = ngraph::Shape{data_shape};
const auto scalarShape = ngraph::Shape{};
ngraph::ParameterVector params{};
auto cond_input_create = [&params] (ngraph::element::Type prc, const ngraph::Shape &shape, int value = 0, bool is_static = false)
-> std::shared_ptr<ngraph::Node> {
if (is_static)
return std::make_shared<ngraph::opset5::Constant>(prc, shape, value);
auto input = std::make_shared<ngraph::op::Parameter>(prc, shape);
params.push_back(input);
return input;
}; };
auto start = cond_input_create(prc, ngShape);
auto count = cond_input_create(ngraph::element::i64, scalarShape, max_iter_num, static_iter_num);
auto skip = cond_input_create(ngraph::element::boolean, scalarShape, true, static_continue_cond);
//
// count skip start count skip start
// / /
// ___*___*____ __________*___*____ | idx | data | out |
// | idx in | | ex_val idx in | | 0 | 7 | 7 |
// | | / | | | / | / | | 1 | 7 | 8 |
// | add | | less add | | 2 | 8 | 10 |
// | | true | | | | | | 3 | 10 | 13 |
// | | | | | | | | ~~~~~ * * * ~~~~~
// | out cnd | | cnd out |
// |___*____*___| |____*_____*________|
// Full loop Dynamic exit loop
// n_iter = count n_iter = ex_val
//
auto b_indx = std::make_shared<ngraph::op::Parameter>(ngraph::element::i64, ngraph::Shape{});
auto b_data = std::make_shared<ngraph::op::Parameter>(prc, ngShape);
auto b_indx_cast = std::make_shared<ngraph::op::Convert>(b_indx, prc);
auto b_add = std::make_shared<ngraph::op::Add>(b_data, b_indx_cast, ngraph::op::AutoBroadcastSpec::NUMPY);
std::shared_ptr<ngraph::Node> b_cond;
if (dynamic_exit == -1) {
b_cond = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{}, true);
} else {
auto b_exit_value = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, scalarShape, dynamic_exit);
b_cond = std::make_shared<ngraph::opset5::Less>(b_indx, b_exit_value);
}
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {b_cond, b_add}, // TODO: check with reverse
ngraph::ParameterVector {b_indx, b_data}); // TODO: check with reverse
auto loop = std::make_shared<ngraph::opset5::Loop>(count, skip);
loop->set_function(body);
loop->set_special_body_ports({0, 0});
loop->set_merged_input(b_data, start, b_add);
if (axis == -1)
loop->get_iter_value(b_add, -1);
else
loop->get_concatenated_slices(b_add, 0, 1, 1, -1, axis);
function = std::make_shared<ngraph::Function>(
ngraph::OutputVector {loop},
params);
}
InferenceEngine::Blob::Ptr StaticShapeLoopTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
auto tdesc = info.getTensorDesc();
auto blob = make_blob_with_precision(tdesc);
blob->allocate();
if (tdesc.getLayout() == InferenceEngine::SCALAR) {
auto scalar_1d = CommonTestUtils::make_reshape_view(blob, {1});
CommonTestUtils::fill_data_with_broadcast(scalar_1d, 0, {static_cast<float>(max_iter_num)});
} else {
CommonTestUtils::fill_data_with_broadcast(blob, 0, {static_cast<float>(start_value)});
}
return blob;
}
int64_t StaticShapeLoopTest::actual_n_iter() {
constexpr auto INF_N_ITER = std::numeric_limits<int64_t>::max();
IE_ASSERT(dynamic_exit != -1 || max_iter_num != -1);
// dynamic_exit + 1 - because loop body looks like do-while loop with post condition check.
return std::min(dynamic_exit == -1 ? INF_N_ITER : dynamic_exit + 1,
max_iter_num == -1 ? INF_N_ITER : max_iter_num);
}
// Predefined ref output
std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::CalculateRefs() {
bool auto_concat_out = (axis != -1);
const auto n_iter = actual_n_iter();
auto ref_shape = data_shape;
if (auto_concat_out)
ref_shape[axis] *= n_iter;
using namespace CommonTestUtils;
InferenceEngine::TensorDesc tdesc {data_prc, ref_shape, InferenceEngine::TensorDesc::getLayoutByDims(ref_shape)};
std::vector<uint8_t> res(byte_size(tdesc));
auto out = make_blob_with_precision(tdesc, res.data());
std::vector<float> vals(n_iter);
float val = start_value;
for (int i = 0; i < n_iter; i++) {
val += i;
vals[i] = val;
}
if (auto_concat_out)
fill_data_with_broadcast(out, axis, vals);
else
fill_data_with_broadcast(out, 0, {val}); // broadcast scalar data
return {res};
}
TEST_P(StaticShapeLoopTest, CompareWithRefs) {
Run();
}
TEST_P(TrivialLoopTest, PassThroughBody) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
InferenceEngine::Precision iePrc;
InferenceEngine::SizeVector ieShape;
std::tie(iePrc, ieShape, targetDevice) = GetParam();
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iePrc);
const auto shape = ngraph::Shape{ieShape};
const auto scalarShape = ngraph::Shape{};
auto start = std::make_shared<ngraph::op::Parameter>(prc, shape);
auto count = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, scalarShape, 5);
auto icond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
// Loop body
auto b_data = std::make_shared<ngraph::op::Parameter>(prc, shape);
auto b_cond = std::make_shared<ngraph::op::Parameter>(ngraph::element::boolean, scalarShape);
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {b_cond, b_data}, // | passthrough body, no data changes
ngraph::ParameterVector {b_cond, b_data}); // | input -> output
auto loop = std::make_shared<ngraph::opset5::Loop>(count, icond);
loop->set_function(body);
loop->set_special_body_ports({-1, 0});
loop->set_invariant_input(b_cond, icond);
loop->set_invariant_input(b_data, start);
loop->get_iter_value(b_data, -1);
function = std::make_shared<ngraph::Function>(
ngraph::OutputVector {loop},
ngraph::ParameterVector {start});
// Precalculated ref blobs
auto blob = make_blob_with_precision({iePrc, ieShape, InferenceEngine::TensorDesc::getLayoutByDims(ieShape)});
blob->allocate();
CommonTestUtils::fill_data_with_broadcast(blob, 0, {10});
inputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
outputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
Run();
}
TEST_P(TrivialLoopTest, UnusedInputBody) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
InferenceEngine::Precision iePrc;
InferenceEngine::SizeVector ieShape;
std::tie(iePrc, ieShape, targetDevice) = GetParam();
const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iePrc);
const auto shape = ngraph::Shape{ieShape};
const auto scalarShape = ngraph::Shape{};
auto start = std::make_shared<ngraph::op::Parameter>(prc, shape);
auto count = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, scalarShape, 5);
auto icond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
// Loop body
auto b_data = std::make_shared<ngraph::op::Parameter>(prc, shape);
auto b_cond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
auto b_iter = std::make_shared<ngraph::op::Parameter>(ngraph::element::i64, scalarShape);
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector {b_cond, b_data},
ngraph::ParameterVector {b_data, b_iter});
auto loop = std::make_shared<ngraph::opset5::Loop>(count, icond);
loop->set_function(body);
loop->set_special_body_ports({1, 0});
loop->set_invariant_input(b_data, start);
loop->get_iter_value(b_data, -1);
function = std::make_shared<ngraph::Function>(
ngraph::OutputVector {loop},
ngraph::ParameterVector {start});
// Precalculated ref blobs
auto blob = make_blob_with_precision({iePrc, ieShape, InferenceEngine::TensorDesc::getLayoutByDims(ieShape)});
blob->allocate();
CommonTestUtils::fill_data_with_broadcast(blob, 0, {10});
inputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
outputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
Run();
}
} // namespace LayerTestsDefinitions } // namespace LayerTestsDefinitions

View File

@ -26,13 +26,16 @@ std::string SplitLayerTest::getTestCaseName(testing::TestParamInfo<splitParams>
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
InferenceEngine::Precision inPrc, outPrc; InferenceEngine::Precision inPrc, outPrc;
InferenceEngine::Layout inLayout, outLayout; InferenceEngine::Layout inLayout, outLayout;
InferenceEngine::SizeVector inputShapes; InferenceEngine::SizeVector inputShapes, outIndices;
std::string targetDevice; std::string targetDevice;
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, targetDevice) = obj.param; std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outIndices, targetDevice) = obj.param;
std::ostringstream result; std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
result << "numSplits=" << numSplits << "_"; result << "numSplits=" << numSplits << "_";
result << "axis=" << axis << "_"; result << "axis=" << axis << "_";
if (!outIndices.empty()) {
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
}
result << "IS"; result << "IS";
result << "netPRC=" << netPrecision.name() << "_"; result << "netPRC=" << netPrecision.name() << "_";
result << "inPRC=" << inPrc.name() << "_"; result << "inPRC=" << inPrc.name() << "_";
@ -46,57 +49,14 @@ std::string SplitLayerTest::getTestCaseName(testing::TestParamInfo<splitParams>
void SplitLayerTest::SetUp() { void SplitLayerTest::SetUp() {
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING); SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
size_t axis, numSplits; size_t axis, numSplits;
std::vector<size_t> inputShape; std::vector<size_t> inputShape, outIndices;
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto split = std::dynamic_pointer_cast<ngraph::opset1::Split>(ngraph::builder::makeSplit(paramOuts[0],
ngPrc, numSplits, axis));
ngraph::ResultVector results;
for (int i = 0; i < numSplits; i++) {
results.push_back(std::make_shared<ngraph::opset1::Result>(split->output(i)));
}
function = std::make_shared<ngraph::Function>(results, params, "split");
}
TEST_P(SplitLayerTest, CompareWithRefs) {
Run();
};
std::string splitWithUnusedOutputsTest::getTestCaseName(testing::TestParamInfo<splitWithUnusedOutputsParams> obj) {
size_t numSplits, axis;
InferenceEngine::Precision netPrecision;
InferenceEngine::Precision inPrc, outPrc;
InferenceEngine::Layout inLayout, outLayout;
InferenceEngine::SizeVector inputShapes;
std::vector<size_t> outIndices;
std::string targetDevice;
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outIndices, targetDevice) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
result << "numSplits=" << numSplits << "_";
result << "axis=" << axis << "_";
result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
result << "IS";
result << "netPRC=" << netPrecision.name() << "_";
result << "inPRC=" << inPrc.name() << "_";
result << "outPRC=" << outPrc.name() << "_";
result << "inL=" << inLayout << "_";
result << "outL=" << outLayout << "_";
result << "trgDev=" << targetDevice;
return result.str();
}
void splitWithUnusedOutputsTest::SetUp() {
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
size_t axis, numSplits;
std::vector<size_t> inputShape;
InferenceEngine::Precision netPrecision;
std::vector<size_t> outIndices;
std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outIndices, targetDevice) = this->GetParam(); std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outIndices, targetDevice) = this->GetParam();
if (outIndices.empty()) {
for (int i = 0; i < numSplits; ++i) {
outIndices.push_back(i);
}
}
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector( auto paramOuts = ngraph::helpers::convert2OutputVector(
@ -110,7 +70,7 @@ void splitWithUnusedOutputsTest::SetUp() {
function = std::make_shared<ngraph::Function>(results, params, "split"); function = std::make_shared<ngraph::Function>(results, params, "split");
} }
TEST_P(splitWithUnusedOutputsTest, CompareWithRefs) { TEST_P(SplitLayerTest, CompareWithRefs) {
Run(); Run();
}; };

View File

@ -0,0 +1,150 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include <functional>
#include "ie_core.hpp"
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "functional_test_utils/precision_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "functional_test_utils/skip_tests_config.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include <transformations/op_conversions/lstm_cell_decomposition.hpp>
#include "subgraph_tests/memory_eltwise_reshape_concat.hpp"
namespace SubgraphTestsDefinitions {
std::string MemoryEltwiseReshapeConcatTest::getTestCaseName(const testing::TestParamInfo<memoryEltwiseReshapeConcatParams> &obj) {
std::string targetDevice;
InferenceEngine::Precision netPrecision;
size_t inputSize;
size_t concatSize;
std::map<std::string, std::string> config;
std::tie(targetDevice, netPrecision, inputSize, concatSize, config) = obj.param;
std::ostringstream result;
result << "netPrecision=" << netPrecision.name() << "_";
result << "IS=" << inputSize << "_";
result << "CS=" << concatSize << "_";
result << "targetDevice=" << targetDevice;
return result.str();
}
void MemoryEltwiseReshapeConcatTest::SetUp() {
InferenceEngine::Precision netPrecision;
std::map<std::string, std::string> config;
std::tie(targetDevice, netPrecision, inputSize, concatSize, config) = this->GetParam();
configuration.insert(config.begin(), config.end());
ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
const int seed = 0;
std::mt19937 gen(static_cast<float>(seed));
auto generateFloatNumbers = [gen](std::size_t vec_len, float min, float max) mutable {
std::vector<float> res;
std::uniform_real_distribution<float> dist(min, max);
for (int i = 0; i < vec_len; i++)
res.emplace_back(static_cast<float>(dist(gen)));
return res;
};
memory_init = generateFloatNumbers(inputSize * concatSize, -1.0f, 1.0f);
concat_vals = generateFloatNumbers(concatSize, 12.0f, 14.0f);
}
void MemoryEltwiseReshapeConcatTest::initTestModel() {
InferenceEngine::SizeVector input_dims = {1, inputSize * concatSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto memory_constant = ngraph::builder::makeConstant<float>(ngPrc, input_dims, memory_init);
memory_constant->set_friendly_name("memory_constant");
auto memory_read = std::make_shared<ngraph::op::ReadValue>(memory_constant, "memory");
memory_read->set_friendly_name("memory_read");
auto mul = ngraph::builder::makeEltwise(input_parameter[0], memory_read, ngraph::helpers::EltwiseTypes::MULTIPLY);
mul->set_friendly_name("multiplication");
auto memory_write = std::make_shared<ngraph::op::Assign>(mul, "memory");
memory_write->set_friendly_name("memory_write");
auto reshape_1_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>({inputSize, concatSize}));
reshape_1_pattern->set_friendly_name("reshape_pattern");
auto reshape_1 = std::make_shared<ngraph::op::v1::Reshape>(mul, reshape_1_pattern, false);
reshape_1->set_friendly_name("reshape");
auto concat_constant = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals);
concat_constant->set_friendly_name("concat_constant");
auto concat = ngraph::builder::makeConcat({concat_constant, reshape_1}, 0);
memory_write->add_control_dependency(memory_read);
concat->add_control_dependency(memory_write);
auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4},
std::vector<size_t>({1, 1, inputSize + 1, concatSize}));
auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(concat, final_reshape_pattern, false);
function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "memory_multiply_reshape_concat");
}
void MemoryEltwiseReshapeConcatTest::initNgraphFriendlyModel() {
InferenceEngine::SizeVector input_dims = {1, inputSize * concatSize};
auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
auto memory_constant = ngraph::builder::makeConstant<float>(ngPrc, input_dims, memory_init);
memory_constant->set_friendly_name("memory_constant");
auto mul = ngraph::builder::makeEltwise(input_parameter[0], memory_constant, ngraph::helpers::EltwiseTypes::MULTIPLY);
mul->set_friendly_name("multiplication");
auto reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>({1, inputSize, concatSize}));
reshape_pattern->set_friendly_name("reshape_pattern");
auto reshape = std::make_shared<ngraph::op::v1::Reshape>(mul, reshape_pattern, false);
reshape->set_friendly_name("reshape");
auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, 0);
squeeze_const->set_friendly_name("squeeze_const");
auto squeeze = std::make_shared<ngraph::op::Squeeze>(reshape, squeeze_const);
squeeze->set_friendly_name("squeeze");
auto concat_constant = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals);
concat_constant->set_friendly_name("concat_constant");
auto concat = ngraph::builder::makeConcat({concat_constant, squeeze}, 0);
function = std::make_shared<ngraph::Function>(concat, input_parameter, "memory_multiply_reshape_concat");
}
void MemoryEltwiseReshapeConcatTest::Run() {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
initTestModel();
LoadNetwork();
InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
InferenceEngine::SizeVector({1, inputSize * concatSize}),
InferenceEngine::Layout::NC);
auto states = executableNetwork.QueryState();
auto state_values_blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
memory_init.data(), memory_init.size());
states[0].SetState(state_values_blob);
Infer();
initNgraphFriendlyModel();
Validate();
}
TEST_P(MemoryEltwiseReshapeConcatTest, CompareWithRefs) {
Run();
};
} // namespace SubgraphTestsDefinitions

View File

@ -104,6 +104,10 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine:
auto src_ptr = get_data(values); auto src_ptr = get_data(values);
switch (blob->getTensorDesc().getPrecision()) { switch (blob->getTensorDesc().getPrecision()) {
case InferenceEngine::Precision::U64:
case InferenceEngine::Precision::I64:
copy_7D<uint64_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
break;
case InferenceEngine::Precision::FP32: case InferenceEngine::Precision::FP32:
case InferenceEngine::Precision::I32: case InferenceEngine::Precision::I32:
copy_7D<uint32_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims); copy_7D<uint32_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
@ -189,6 +193,12 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b
return new_blob; return new_blob;
} }
size_t byte_size(const InferenceEngine::TensorDesc &tdesc) {
auto prc = tdesc.getPrecision();
auto dims = tdesc.getDims();
return prc.size() * std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies<size_t>());
}
/** /**
* repeated filling tensor with data. * repeated filling tensor with data.
* *

View File

@ -72,6 +72,14 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b
*/ */
void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val); void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val);
/**
* Calculate size of buffer required for provided tensor descriptor.
* @param tdesc provided tensor descriptor
* @return size in bytes
*/
size_t byte_size(const InferenceEngine::TensorDesc &tdesc);
static void fill_data_bbox(float *data, size_t size, int height, int width, float omega) { static void fill_data_bbox(float *data, size_t size, int height, int width, float omega) {
float center_h = (height - 1.0f) / 2; float center_h = (height - 1.0f) / 2;
float center_w = (width - 1.0f) / 2; float center_w = (width - 1.0f) / 2;

View File

@ -60,6 +60,14 @@ GNA2_API Gna2Status Gna2DeviceClose(
return Gna2StatusSuccess; return Gna2StatusSuccess;
} }
GNA2_API Gna2Status Gna2DeviceGetCount(
uint32_t* numberOfDevices) {
if (numberOfDevices != nullptr) {
*numberOfDevices = 1;
}
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2MemoryFree( GNA2_API enum Gna2Status Gna2MemoryFree(
void * memory) { void * memory) {
return Gna2StatusSuccess; return Gna2StatusSuccess;

View File

@ -69,6 +69,14 @@ GNA2_API Gna2Status Gna2DeviceClose(
return Gna2StatusSuccess; return Gna2StatusSuccess;
} }
GNA2_API Gna2Status Gna2DeviceGetCount(
uint32_t * numberOfDevices) {
if (numberOfDevices != nullptr) {
*numberOfDevices = 1;
}
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2MemoryFree( GNA2_API enum Gna2Status Gna2MemoryFree(
void * memory) { void * memory) {
if (current != nullptr) { if (current != nullptr) {

View File

@ -60,10 +60,10 @@ public:
void * alloc(size_t size) noexcept override { void * alloc(size_t size) noexcept override {
return ptr; return ptr;
} }
virtual bool free(void* handle) noexcept { bool free(void* handle) noexcept override {
return true; return true;
} }
virtual void Release() noexcept { void Release() noexcept override {
delete this; delete this;
} }
}; };

View File

@ -102,6 +102,9 @@ class GNACppApi {
MOCK_METHOD1(Gna2DeviceClose, Gna2Status ( MOCK_METHOD1(Gna2DeviceClose, Gna2Status (
uint32_t deviceIndex)); uint32_t deviceIndex));
MOCK_METHOD1(Gna2DeviceGetCount, Gna2Status (
uint32_t * numberOfDevices));
MOCK_METHOD1(Gna2MemoryFree, Gna2Status ( MOCK_METHOD1(Gna2MemoryFree, Gna2Status (
void * memory)); void * memory));

View File

@ -100,24 +100,23 @@ struct resample : public primitive_base<resample> {
/// @param scale Resample scale. /// @param scale Resample scale.
/// @param num_filter Input filter. Only used by bilinear sample_type. /// @param num_filter Input filter. Only used by bilinear sample_type.
/// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear). /// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
/// @param with_activation Enables Relu activation.
/// @param activation_slp Relu activation slope.
resample(const primitive_id& id, resample(const primitive_id& id,
const primitive_id& input, const primitive_id& input,
tensor output_size, tensor output_size,
uint32_t num_filter, uint32_t num_filter,
resample_type operation_type = resample_type::nearest, resample_type operation_type = resample_type::nearest,
bool with_activation = false,
float activation_slp = 0.0f,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding), : primitive_base(id, {input}, output_padding),
output_size(output_size), output_size(output_size),
num_filter(num_filter), num_filter(num_filter),
axesAndScales({}),
pads_begin({}),
pads_end({}),
align_corners(1), align_corners(1),
operation_type(operation_type), operation_type(operation_type),
shape_calc_mode(shape_calculation_mode::sizes), shape_calc_mode(shape_calculation_mode::sizes),
with_activation(with_activation), antialias(0),
activation_negative_slope(activation_slp), cube_coeff(0.0f),
coord_trans_mode(coordinate_transformation_mode::asymmetric), coord_trans_mode(coordinate_transformation_mode::asymmetric),
round_mode(nearest_mode::floor) { round_mode(nearest_mode::floor) {
if (operation_type == resample_type::caffe_bilinear) { if (operation_type == resample_type::caffe_bilinear) {
@ -132,8 +131,6 @@ struct resample : public primitive_base<resample> {
/// @param pads_end Optional end padding for input. /// @param pads_end Optional end padding for input.
/// @param align_corners Align corner pixels of the input and output tensors. /// @param align_corners Align corner pixels of the input and output tensors.
/// @param resample_type Resample bilinear method. /// @param resample_type Resample bilinear method.
/// @param with_activation Enables Relu activation.
/// @param activation_slp Relu activation slope.
resample(const primitive_id& id, resample(const primitive_id& id,
const primitive_id& input, const primitive_id& input,
tensor output_size, tensor output_size,
@ -141,19 +138,18 @@ struct resample : public primitive_base<resample> {
std::vector<int32_t> pads_end = {}, std::vector<int32_t> pads_end = {},
int32_t align_corners = 1, int32_t align_corners = 1,
resample_type operation_type = resample_type::bilinear, resample_type operation_type = resample_type::bilinear,
bool with_activation = false,
float activation_slp = 0.0f,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding), : primitive_base(id, {input}, output_padding),
output_size(output_size), output_size(output_size),
num_filter(0), num_filter(0),
axesAndScales({}),
pads_begin(pads_begin), pads_begin(pads_begin),
pads_end(pads_end), pads_end(pads_end),
align_corners(align_corners), align_corners(align_corners),
operation_type(operation_type), operation_type(operation_type),
shape_calc_mode(shape_calculation_mode::sizes), shape_calc_mode(shape_calculation_mode::sizes),
with_activation(with_activation), antialias(0),
activation_negative_slope(activation_slp), cube_coeff(0.0f),
coord_trans_mode(coordinate_transformation_mode::asymmetric), coord_trans_mode(coordinate_transformation_mode::asymmetric),
round_mode(nearest_mode::floor) {} round_mode(nearest_mode::floor) {}
@ -170,19 +166,20 @@ struct resample : public primitive_base<resample> {
std::vector<int32_t> pads_end = {}, std::vector<int32_t> pads_end = {},
int32_t antialias = 0, int32_t antialias = 0,
float cube_coeff = -0.75f, float cube_coeff = -0.75f,
resample_type mode = resample_type::caffe_bilinear, resample_type operation_type = resample_type::caffe_bilinear,
shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes, shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes,
coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel, coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel,
nearest_mode nm = nearest_mode::round_prefer_floor, nearest_mode nm = nearest_mode::round_prefer_floor,
const padding& output_padding = padding()) const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding), : primitive_base(id, {input}, output_padding),
output_size(output_size), output_size(output_size),
num_filter(0),
axesAndScales(axesAndScales), axesAndScales(axesAndScales),
pads_begin(pads_begin), pads_begin(pads_begin),
pads_end(pads_end), pads_end(pads_end),
operation_type(mode), align_corners(1),
operation_type(operation_type),
shape_calc_mode(shape_calc_mode), shape_calc_mode(shape_calc_mode),
with_activation(false),
antialias(antialias), antialias(antialias),
cube_coeff(cube_coeff), cube_coeff(cube_coeff),
coord_trans_mode(ctm), coord_trans_mode(ctm),
@ -200,21 +197,17 @@ struct resample : public primitive_base<resample> {
std::vector<int32_t> pads_end; std::vector<int32_t> pads_end;
/// @param align_corners corner pixels of the input and output tensors /// @param align_corners corner pixels of the input and output tensors
int32_t align_corners; int32_t align_corners;
/// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear). /// @param operation_type Resample method (nearest neighbor/bilinear/caffe bilinear).
resample_type operation_type; resample_type operation_type;
/// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape. /// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape.
shape_calculation_mode shape_calc_mode; shape_calculation_mode shape_calc_mode;
/// @brief Enables Relu activation.
bool with_activation;
/// @brief Relu activation slope.
float activation_negative_slope;
/// @param antialias is a flag that specifies whether to perform anti-aliasing. /// @param antialias is a flag that specifies whether to perform anti-aliasing.
int32_t antialias; int32_t antialias;
/// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic. /// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic.
float cube_coeff; float cube_coeff;
/// @param specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor /// @param coord_trans_mode specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
coordinate_transformation_mode coord_trans_mode; coordinate_transformation_mode coord_trans_mode;
/// @param specifies round mode when mode == nearest and is used only when mode == nearest. /// @param round_mode specifies round mode when mode == nearest and is used only when mode == nearest.
nearest_mode round_mode; nearest_mode round_mode;
}; };
/// @} /// @}

View File

@ -34,7 +34,7 @@ public:
}; };
} }
JitConstants GetJitConstants(const eltwise_params& params) const; JitConstants GetJitConstants(const eltwise_params& params) const override;
protected: protected:
bool Validate(const Params& p, const optional_params& o) const override; bool Validate(const Params& p, const optional_params& o) const override;

View File

@ -24,7 +24,7 @@ class ReduceKernel_b_fs_yx_fsv16 : public ReduceKernelBase {
public: public:
ReduceKernel_b_fs_yx_fsv16() : ReduceKernelBase("reduce_gpu_b_fs_yx_fsv16") {} ReduceKernel_b_fs_yx_fsv16() : ReduceKernelBase("reduce_gpu_b_fs_yx_fsv16") {}
virtual ~ReduceKernel_b_fs_yx_fsv16() {} virtual ~ReduceKernel_b_fs_yx_fsv16() {}
virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const; CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
JitConstants GetJitConstants(const reduce_params& params) const override; JitConstants GetJitConstants(const reduce_params& params) const override;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override; ParamsKey GetSupportedKey() const override;

View File

@ -24,7 +24,7 @@ class ReduceKernelRef : public ReduceKernelBase {
public: public:
ReduceKernelRef() : ReduceKernelBase("reduce_ref") {} ReduceKernelRef() : ReduceKernelBase("reduce_ref") {}
virtual ~ReduceKernelRef() {} virtual ~ReduceKernelRef() {}
virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const; CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override; ParamsKey GetSupportedKey() const override;
JitConstants GetJitConstants(const reduce_params& params) const override; JitConstants GetJitConstants(const reduce_params& params) const override;

View File

@ -50,7 +50,7 @@ public:
protected: protected:
virtual CommonDispatchData SetDefault(const space_to_depth_params& params, const optional_params&) const; virtual CommonDispatchData SetDefault(const space_to_depth_params& params, const optional_params&) const;
virtual JitConstants GetJitConstants(const space_to_depth_params& params) const; virtual JitConstants GetJitConstants(const space_to_depth_params& params) const;
virtual bool Validate(const Params& p, const optional_params& o) const; bool Validate(const Params& p, const optional_params& o) const override;
std::vector<FusedOpType> GetSupportedFusedOps() const override { std::vector<FusedOpType> GetSupportedFusedOps() const override {
return { FusedOpType::ELTWISE, return { FusedOpType::ELTWISE,
FusedOpType::QUANTIZE, FusedOpType::QUANTIZE,

View File

@ -55,7 +55,7 @@ public:
} }
std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; } std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
cl::Event get() { return _event; } cl::Event get() override { return _event; }
private: private:
std::shared_ptr<gpu_toolkit> _ctx; std::shared_ptr<gpu_toolkit> _ctx;
@ -91,7 +91,7 @@ public:
_attached = true; _attached = true;
} }
cl::Event get() { return _last_ocl_event; } cl::Event get() override { return _last_ocl_event; }
std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; } std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
private: private:

View File

@ -118,9 +118,6 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
get_default_optional_params<kernel_selector::resample_optional_params>(arg.get_program()); get_default_optional_params<kernel_selector::resample_optional_params>(arg.get_program());
const auto& primitive = arg.get_primitive(); const auto& primitive = arg.get_primitive();
if (primitive->with_activation)
convert_activation_func_params(primitive, us_params.activations);
size_t dimsNum = arg.get_output_layout().format.dimension(); size_t dimsNum = arg.get_output_layout().format.dimension();
us_params.resampleType = convert_to_sample_type(primitive->operation_type); us_params.resampleType = convert_to_sample_type(primitive->operation_type);
us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode); us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode);

View File

@ -118,7 +118,6 @@ std::string resample_inst::to_string(resample_node const& node) {
resample_info.add("nearest_mode:", "simple"); resample_info.add("nearest_mode:", "simple");
resample_info.add("output_size", desc->output_size); resample_info.add("output_size", desc->output_size);
resample_info.add("with activation", desc->with_activation);
resample_info.add("output padding lower size", desc->output_padding.lower_size()); resample_info.add("output padding lower size", desc->output_padding.lower_size());
resample_info.add("output padding upper size", desc->output_padding.upper_size()); resample_info.add("output padding upper size", desc->output_padding.upper_size());

@ -1 +1 @@
Subproject commit d7d8ed46078b637794bc91215e1a982bb0f1683a Subproject commit 5ef085d5af65e8966e03cdfcbaa65761d61a5c9a

View File

@ -343,6 +343,8 @@ extensions/front/tf/__init__.py
extensions/front/tf/activation_ext.py extensions/front/tf/activation_ext.py
extensions/front/tf/argmax_ext.py extensions/front/tf/argmax_ext.py
extensions/front/tf/assign_elimination.py extensions/front/tf/assign_elimination.py
extensions/front/tf/automl_efficientdet.json
extensions/front/tf/AutomlEfficientDet.py
extensions/front/tf/basic_lstm_cell.py extensions/front/tf/basic_lstm_cell.py
extensions/front/tf/batch_to_space_ext.py extensions/front/tf/batch_to_space_ext.py
extensions/front/tf/BatchMatMul_ext.py extensions/front/tf/BatchMatMul_ext.py

View File

@ -15,9 +15,10 @@
""" """
from mo.front.common.partial_infer.utils import int64_array from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementOp from mo.front.common.replacement import FrontReplacementOp
from mo.graph.graph import Node, Graph from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Node, Graph, rename_nodes
from mo.ops.concat import Concat from mo.ops.concat import Concat
from mo.ops.expand_dims import ExpandDims from mo.ops.unsqueeze import Unsqueeze
class Pack(FrontReplacementOp): class Pack(FrontReplacementOp):
@ -25,15 +26,15 @@ class Pack(FrontReplacementOp):
enabled = True enabled = True
def replace_op(self, graph: Graph, node: Node): def replace_op(self, graph: Graph, node: Node):
out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports()), out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports())}).create_node()
'name': node.name + '/Concat_', }).create_node() pack_name = node.soft_get('name', node.id)
for ind in node.in_ports(): for ind in node.in_ports():
expand_dims_node = ExpandDims(graph, {'expand_axis': int64_array([node.axis]), unsqueeze_node = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array([node.axis])},
'name': node.name + '/ExpandDims_'}).create_node() {'name': node.soft_get('name', node.id) + '/Unsqueeze'})
node.in_port(ind).get_connection().set_destination(expand_dims_node.in_port(0)) node.in_port(ind).get_connection().set_destination(unsqueeze_node.in_port(0))
expand_dims_node.out_port(0).connect(out_node.in_port(ind)) unsqueeze_node.out_port(0).connect(out_node.in_port(ind))
# Replace edge from out port 0 of the matched node with a edge from node out_node.id with port 0.
# The "explicit" version of the return value is: [(out_node.id, 0)]) rename_nodes([(node, pack_name + '/TBR'), (out_node, pack_name)])
return [out_node.id] return [out_node.id]

View File

@ -20,6 +20,7 @@ import numpy as np
from generator import generator, generate from generator import generator, generate
from extensions.front.Pack import Pack from extensions.front.Pack import Pack
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs from mo.utils.ir_engine.compare_graphs import compare_graphs
from mo.utils.unittest.graph import build_graph from mo.utils.unittest.graph import build_graph
@ -32,12 +33,16 @@ nodes_attributes = {
'pack': {'axis': None, 'type': None, 'kind': 'op', 'op': 'Pack'}, 'pack': {'axis': None, 'type': None, 'kind': 'op', 'op': 'Pack'},
# Test operation # Test operation
'last': {'type': None, 'value': None, 'kind': 'op', 'op': None}, 'last': {'type': None, 'value': None, 'kind': 'op', 'op': None},
# ExpandDims, Concat and Const operations # Unsqueeze, Concat and Const operations
'const_1': {'value': None, 'type': None, 'kind': 'op', 'op': 'Const'}, 'const_1': {'value': None, 'type': None, 'kind': 'op', 'op': 'Const'},
'ExpandDims_0': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'}, 'Unsqueeze_0': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
'ExpandDims_1': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'}, 'Unsqueeze_1': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
'ExpandDims_2': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'}, 'Unsqueeze_2': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
'ExpandDims_3': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'}, 'Unsqueeze_3': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
'Unsqueeze_0_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
'Unsqueeze_1_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
'Unsqueeze_2_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
'Unsqueeze_3_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
'concat_1': {'axis': None, 'type': 'Concat', 'kind': 'op', 'op': 'Concat'}, 'concat_1': {'axis': None, 'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
} }
@ -65,15 +70,17 @@ class PackTest(unittest.TestCase):
graph_ref_edges = [] graph_ref_edges = []
for i in range(num_inputs - num_placeholders + 1): for i in range(num_inputs - num_placeholders + 1):
for j in range(num_placeholders): for j in range(num_placeholders):
graph_ref_edges.append(('placeholder_{}'.format(j), 'ExpandDims_{}'.format(i + j))) graph_ref_edges.append(('placeholder_{}'.format(j), 'Unsqueeze_{}'.format(i + j)))
graph_ref_edges.append(('ExpandDims_{}'.format(i + j), 'concat_1')) graph_ref_edges.append(('Unsqueeze_{}'.format(i + j), 'concat_1'))
graph_ref_edges.append(('concat_1', 'last')) graph_ref_edges.append(('concat_1', 'last'))
update_graph_ref_attributes = {} update_graph_ref_attributes = {}
for i in range(num_placeholders): for i in range(num_placeholders):
update_graph_ref_attributes['placeholder_{}'.format(i)] = {'shape': np.array([1, 227, 227, 3])} update_graph_ref_attributes['placeholder_{}'.format(i)] = {'shape': np.array([1, 227, 227, 3])}
for i in range(num_inputs): for i in range(num_inputs):
update_graph_ref_attributes['ExpandDims_{}'.format(i)] = {'expand_axis': np.array([axis])} graph_ref_edges.append(('Unsqueeze_{}_axis'.format(i), 'Unsqueeze_{}'.format(i)))
update_graph_ref_attributes['Unsqueeze_{}_axis'.format(i)] = {'shape': int64_array([1]),
'value': int64_array([axis])}
update_graph_ref_attributes['concat_1'] = {'axis': axis} update_graph_ref_attributes['concat_1'] = {'axis': axis}
graph_ref = build_graph(nodes_attributes, graph_ref_edges, update_graph_ref_attributes, graph_ref = build_graph(nodes_attributes, graph_ref_edges, update_graph_ref_attributes,

View File

@ -0,0 +1,140 @@
"""
Copyright (C) 2018-2020 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
from extensions.front.Pack import Pack
from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
from extensions.front.eltwise_n import EltwiseNReplacement
from extensions.front.tf.pad_tf_to_pad import PadTFToPad
from extensions.ops.DetectionOutput import DetectionOutput
from extensions.ops.activation_ops import Sigmoid
from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
from mo.front.common.partial_infer.utils import int64_array
from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
from mo.graph.graph import Graph, Node
from mo.middle.passes.convert_data_type import data_type_str_to_np
from mo.ops.concat import Concat
from mo.ops.const import Const
from mo.ops.reshape import Reshape
from mo.ops.result import Result
class EfficientDet(FrontReplacementFromConfigFileGeneral):
replacement_id = 'AutomlEfficientDet'
def run_before(self):
from extensions.front.ExpandDimsToUnsqueeze import ExpandDimsToUnsqueeze
return [ExpandDimsToUnsqueeze, Pack, TransposeOrderNormalizer, PadTFToPad, EltwiseNReplacement]
class AnchorGenerator:
def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale):
self.min_level = min_level
self.aspect_ratios = aspect_ratios
self.anchor_scale = anchor_scale
self.scales = [2 ** (float(s) / num_scales) for s in range(num_scales)]
def get(self, layer_id):
widths = []
heights = []
for s in self.scales:
for a in self.aspect_ratios:
base_anchor_size = 2 ** (self.min_level + layer_id) * self.anchor_scale
heights.append(base_anchor_size * s * a[1])
widths.append(base_anchor_size * s * a[0])
return widths, heights
def transform_graph(self, graph: Graph, replacement_descriptions: dict):
parameter_node = graph.get_op_nodes(op='Parameter')[0]
parameter_node['data_type'] = data_type_str_to_np(parameter_node.graph.graph['cmd_params'].data_type)
parameter_node.out_port(0).disconnect()
# remove existing Result operations to remove unsupported sub-graph
graph.remove_nodes_from([node.id for node in graph.get_op_nodes(op='Result')] + ['detections'])
# determine if the op which is a input/final result of mean value and scale applying to the input tensor
# then connect it to the input of the first convolution of the model, so we remove the image pre-processing
# which includes padding and resizing from the model
preprocessing_input_node_id = replacement_descriptions['preprocessing_input_node']
assert preprocessing_input_node_id in graph.nodes, 'The node with name "{}" is not found in the graph. This ' \
'node should provide scaled image output and is specified' \
' in the json file.'.format(preprocessing_input_node_id)
preprocessing_input_node = Node(graph, preprocessing_input_node_id)
preprocessing_input_node.in_port(0).get_connection().set_source(parameter_node.out_port(0))
preprocessing_output_node_id = replacement_descriptions['preprocessing_output_node']
assert preprocessing_output_node_id in graph.nodes, 'The node with name "{}" is not found in the graph. This ' \
'node should provide scaled image output and is specified' \
' in the json file.'.format(preprocessing_output_node_id)
preprocessing_output_node = Node(graph, preprocessing_output_node_id)
preprocessing_output_node.out_port(0).disconnect()
convolution_nodes = [n for n in graph.pseudo_topological_sort() if n.soft_get('type') == 'Convolution']
convolution_nodes[0].in_port(0).get_connection().set_source(preprocessing_output_node.out_port(0))
# create prior boxes (anchors) generator
aspect_ratios = replacement_descriptions['aspect_ratios']
assert len(aspect_ratios) % 2 == 0
aspect_ratios = list(zip(aspect_ratios[::2], aspect_ratios[1::2]))
priors_generator = self.AnchorGenerator(min_level=int(replacement_descriptions['min_level']),
aspect_ratios=aspect_ratios,
num_scales=int(replacement_descriptions['num_scales']),
anchor_scale=replacement_descriptions['anchor_scale'])
prior_boxes = []
for i in range(100):
inp_name = 'box_net/box-predict{}/BiasAdd'.format('_%d' % i if i else '')
if inp_name not in graph:
break
widths, heights = priors_generator.get(i)
prior_box_op = PriorBoxClusteredOp(graph, {'width': np.array(widths),
'height': np.array(heights),
'clip': 0, 'flip': 0,
'variance': replacement_descriptions['variance'],
'offset': 0.5})
prior_boxes.append(prior_box_op.create_node([Node(graph, inp_name), parameter_node]))
# concatenate prior box operations
concat_prior_boxes = Concat(graph, {'axis': -1}).create_node()
for idx, node in enumerate(prior_boxes):
concat_prior_boxes.add_input_port(idx)
concat_prior_boxes.in_port(idx).connect(node.out_port(0))
conf = Sigmoid(graph, dict(name='concat/sigmoid')).create_node([Node(graph, 'concat')])
reshape_size_node = Const(graph, {'value': int64_array([0, -1])}).create_node([])
logits = Reshape(graph, dict(name=conf.name + '/Flatten')).create_node([conf, reshape_size_node])
deltas = Reshape(graph, dict(name='concat_1/Flatten')).create_node([Node(graph, 'concat_1'), reshape_size_node])
# revert convolution boxes prediction weights from yxYX to xyXY (convolutions share weights and bias)
weights = Node(graph, 'box_net/box-predict/pointwise_kernel')
weights.value = weights.value.reshape(-1, 4)[:, [1, 0, 3, 2]].reshape(weights.shape)
bias = Node(graph, 'box_net/box-predict/bias')
bias.value = bias.value.reshape(-1, 4)[:, [1, 0, 3, 2]].reshape(bias.shape)
detection_output_node = DetectionOutput(graph, dict(
name='detections',
num_classes=int(replacement_descriptions['num_classes']),
share_location=1,
background_label_id=int(replacement_descriptions['num_classes']) + 1,
nms_threshold=replacement_descriptions['nms_threshold'],
confidence_threshold=replacement_descriptions['confidence_threshold'],
top_k=100,
keep_top_k=100,
code_type='caffe.PriorBoxParameter.CENTER_SIZE',
)).create_node([deltas, logits, concat_prior_boxes])
output_op = Result(graph, dict(name='output'))
output_op.create_node([detection_output_node])

View File

@ -0,0 +1,18 @@
[
{
"id": "AutomlEfficientDet",
"custom_attributes": {
"preprocessing_input_node": "convert_image",
"preprocessing_output_node": "truediv",
"aspect_ratios": [1.0, 1.0, 1.4, 0.7, 0.7, 1.4],
"variance": [1.0, 1.0, 1.0, 1.0],
"min_level": 3,
"num_scales": 3,
"anchor_scale": 4.0,
"num_classes": 90,
"nms_threshold": 0.6,
"confidence_threshold": 0.2
},
"match_kind": "general"
}
]

View File

@ -32,14 +32,14 @@ class Unsqueeze(Op):
def __init__(self, graph, attrs: dict): def __init__(self, graph, attrs: dict):
super().__init__(graph, { super().__init__(graph, {
'op': __class__.op, 'op': self.op,
'type': __class__.op, 'type': self.op,
'version': 'opset1', 'version': 'opset1',
'unsqueeze_dims': None, 'unsqueeze_dims': None,
'reinterp_shape': True, 'reinterp_shape': True,
'in_ports_count': 2, 'in_ports_count': 2,
'out_ports_count': 1, 'out_ports_count': 1,
'infer': __class__.infer 'infer': self.infer
}, attrs) }, attrs)
@staticmethod @staticmethod

Some files were not shown because too many files have changed in this diff Show More