Merge branch 'master' into topk

2020-11-03 10:22:55 +01:00 · 2020-11-03 10:22:55 +01:00 · a7c8365446
commit a7c8365446
parent b081ff7023 3f4d8b49ff
139 changed files with 4038 additions and 2282 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -60,7 +60,7 @@ function(build_ngraph)
        ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
    endif()
-    if(NOT (ANDROID OR WINDOWS_STORE))
+    if(NOT (ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64)) ))
        ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
    else()
        ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)
--- a/build-instruction.md
+++ b/build-instruction.md
@ -26,7 +26,7 @@
  - [Build Steps](#build-steps-3)
 - [Use Custom OpenCV Builds for Inference Engine](#use-custom-opencv-builds-for-inference-engine)
 - [Add Inference Engine to Your Project](#add-inference-engine-to-your-project)
- [(Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2)
+- [(Optional) Additional Installation Steps for the Intel® Neural Compute Stick 2](#optional-additional-installation-steps-for-the-intel-movidius-neural-compute-stick-and-neural-compute-stick-2)
  - [For Linux, Raspbian Stretch* OS](#for-linux-raspbian-stretch-os)
 - [Next Steps](#next-steps)
 - [Additional Resources](#additional-resources)
@ -43,7 +43,7 @@ The open source version of Inference Engine includes the following plugins:
 | CPU plugin           | Intel® Xeon® with Intel® AVX2 and AVX512, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® SSE |
 | GPU plugin           | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics |
 | GNA plugin           | Intel® Speech Enabling Developer Kit, Amazon Alexa\* Premium Far-Field Developer Kit, Intel® Pentium® Silver processor J5005, Intel® Celeron® processor J4005, Intel® Core™ i3-8121U processor |
-| MYRIAD plugin        | Intel® Movidius™ Neural Compute Stick powered by the Intel® Movidius™ Myriad™ 2, Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
+| MYRIAD plugin        | Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
 | Heterogeneous plugin | Heterogeneous plugin enables computing for inference on one network on several Intel® devices. |
 ## Build on Linux\* Systems
@ -608,11 +608,11 @@ include_directories(${InferenceEngine_INCLUDE_DIRS})
 target_link_libraries(${PROJECT_NAME} ${InferenceEngine_LIBRARIES} dl)
 ```
-## (Optional) Additional Installation Steps for the Intel® Movidius™ Neural Compute Stick and Neural Compute Stick 2
+## (Optional) Additional Installation Steps for the Intel® Neural Compute Stick 2
-> **NOTE**: These steps are only required if you want to perform inference on
+> **NOTE**: These steps are only required if you want to perform inference on the
-Intel® Movidius™ Neural Compute Stick or the Intel® Neural Compute Stick 2 using
+Intel® Neural Compute Stick 2 using the Inference Engine MYRIAD Plugin. See also
-the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get Started].
+[Intel® Neural Compute Stick 2 Get Started].
 ### For Linux, Raspbian\* Stretch OS
@ -622,11 +622,10 @@ the Inference Engine MYRIAD Plugin. See also [Intel® Neural Compute Stick 2 Get
 sudo usermod -a -G users "$(whoami)"
 ```
-2. To perform inference on Intel® Movidius™ Neural Compute Stick and Intel®
+2. To perform inference on Intel® Neural Compute Stick 2, install the USB rules
-   Neural Compute Stick 2, install the USB rules as follows:
+as follows:
 ```sh
 cat <<EOF > 97-myriad-usbboot.rules
 SUBSYSTEM=="usb", ATTRS{idProduct}=="2150", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
 SUBSYSTEM=="usb", ATTRS{idProduct}=="2485", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
 SUBSYSTEM=="usb", ATTRS{idProduct}=="f63b", ATTRS{idVendor}=="03e7", GROUP="users", MODE="0666", ENV{ID_MM_DEVICE_IGNORE}="1"
 EOF
--- a/cmake/check_features.cmake
+++ b/cmake/check_features.cmake
@ -15,10 +15,6 @@ else()
    SET(ARCH_64 OFF)
 endif()
 if (NOT ENABLE_MKL_DNN)
    set(ENABLE_MKL OFF)
 endif()
 if(ENABLE_AVX512F)
    if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920))
        # 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work
--- a/cmake/developer_package.cmake
+++ b/cmake/developer_package.cmake
@ -4,10 +4,27 @@
 cmake_minimum_required(VERSION 3.13)
 # Detect target
 include(target_flags)
 string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
 if(X86_64)
    set(ARCH_FOLDER intel64)
 elseif(X86)
    set(ARCH_FOLDER ia32)
 elseif(MSVC AND ARM)
    set(ARCH_FOLDER arm)
 elseif(MSVC AND AARCH64)
    set(ARCH_FOLDER arm64)
 endif()
 list(APPEND CMAKE_MODULE_PATH
        "${OpenVINO_MAIN_SOURCE_DIR}/cmake/download"
-        "${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile"
+        "${OpenVINO_MAIN_SOURCE_DIR}/cmake/cross_compile")
-        )
+
 #
 # CPack
 #
 include(CPackComponent)
 unset(IE_CPACK_COMPONENTS_ALL CACHE)
@ -33,21 +50,14 @@ endif()
 # Set library directory for cpack
 #
 function(ie_cpack_set_library_dir)
    string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
    if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
        set(ARCH intel64)
    elseif(ARCH STREQUAL "i386")
        set(ARCH ia32)
    endif()
    if(WIN32)
-        set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
+        set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
-        set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
+        set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
-        set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
+        set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER}/${CMAKE_BUILD_TYPE} PARENT_SCOPE)
    else()
-        set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
-        set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
-        set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH_FOLDER} PARENT_SCOPE)
    endif()
 endfunction()
@ -109,28 +119,19 @@ function(set_temp_directory temp_variable source_tree_dir)
    endif()
 endfunction()
 #
 # Common scripts
 #
 include(coverage/coverage)
 include(shellcheck/shellcheck)
 # External dependencies
 find_package(Threads)
 # Detect target
 include(target_flags)
 # printing debug messages
 include(debug)
 # linking libraries without discarding symbols
 include(whole_archive)
 string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER)
 if(X86_64)
    set(ARCH_FOLDER intel64)
 elseif(X86)
    set(ARCH_FOLDER ia32)
 endif()
 if(OS_FOLDER)
    message ("**** OS FOLDER IS: [${OS_FOLDER}]")
    if("${OS_FOLDER}" STREQUAL "ON")
@ -237,6 +238,7 @@ include(os_flags)
 include(sanitizer)
 include(cross_compiled_func)
 include(faster_build)
 include(whole_archive)
 include(api_validator/api_validator)
 function(set_ci_build_number)
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@ -17,11 +17,11 @@ ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
 ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ${ENABLE_MKL_DNN_DEFAULT})
-ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "WIN32 OR X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE; NOT WINDOWS_PHONE" OFF)
+ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
 # FIXME: there are compiler failures with LTO and Cross-Compile toolchains. Disabling for now, but
 #        this must be addressed in a proper way
-ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX OR WIN32;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
+ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX;NOT CMAKE_CROSSCOMPILING; CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9" OFF)
 ie_option (OS_FOLDER "create OS dedicated folder in output" OFF)
--- a/cmake/os_flags.cmake
+++ b/cmake/os_flags.cmake
@ -127,8 +127,10 @@ function(ie_avx512_optimization_flags flags)
 endfunction()
 function(ie_arm_neon_optimization_flags flags)
-    if(WIN32 OR CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
        message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
        # nothing
    elseif(ANDROID)
        if(ANDROID_ABI STREQUAL "arm64-v8a")
            set(${flags} "-mfpu=neon" PARENT_SCOPE)
--- a/cmake/target_flags.cmake
+++ b/cmake/target_flags.cmake
@ -16,10 +16,25 @@ if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  endif()
 endif()
-if(MSVC64 OR MINGW64)
+macro(_ie_process_msvc_generator_platform flag_name)
  # if cmake -A <ARM|ARM64> is passed
  if(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
    set(AARCH64 ON)
  elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM")
    set(ARM ON)
  elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "x64")
    set(X86_64 ON)
-elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
+  elseif(CMAKE_GENERATOR_PLATFORM STREQUAL "Win32")
    set(X86 ON)
  else()
    set(${flag_name} ON)
  endif()
 endmacro()
 if(MSVC64 OR MINGW64)
  _ie_process_msvc_generator_platform(X86_64)
 elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
  _ie_process_msvc_generator_platform(X86)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
  set(X86_64 ON)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
--- a/cmake/vs_version/vs_version.cmake
+++ b/cmake/vs_version/vs_version.cmake
@ -7,15 +7,15 @@ macro(ie_parse_ci_build_number)
        set(IE_VERSION_MAJOR ${CMAKE_MATCH_1})
        set(IE_VERSION_MINOR ${CMAKE_MATCH_2})
        set(IE_VERSION_PATCH ${CMAKE_MATCH_3})
-        set(IE_VS_VER_HAS_WELL_DEFINED_VERSION 1)
+        set(IE_VS_VER_HAS_VERSION 1)
    else()
-        set(IE_VS_VER_HAS_WELL_DEFINED_VERSION 0)
+        set(IE_VS_VER_HAS_VERSION 0)
    endif()
 endmacro()
 ie_parse_ci_build_number()
-if(IE_VS_VER_HAS_WELL_DEFINED_VERSION)
+if(IE_VS_VER_HAS_VERSION)
    set(IE_VS_VER_FILEVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
    set(IE_VS_VER_PRODUCTVERSION_QUAD "${IE_VERSION_MAJOR},${IE_VERSION_MINOR},${IE_VERSION_PATCH},0")
    set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH}.0")
--- a/cmake/vs_version/vs_version.rc.in
+++ b/cmake/vs_version/vs_version.rc.in
@ -1,7 +1,7 @@
 #include <winver.h>
 VS_VERSION_INFO         VERSIONINFO
-#if IE_VS_VER_HAS_WELL_DEFINED_VERSION
+#if @IE_VS_VER_HAS_VERSION@
  FILEVERSION           @IE_VS_VER_FILEVERSION_QUAD@
  PRODUCTVERSION        @IE_VS_VER_PRODUCTVERSION_QUAD@
 #endif
@ -20,7 +20,7 @@ BEGIN
    BLOCK "040904E4"
    BEGIN
      VALUE "FileDescription", "@IE_VS_VER_FILEDESCRIPTION_STR@\0"
-#if IE_VS_VER_HAS_WELL_DEFINED_VERSION
+#if @IE_VS_VER_HAS_VERSION@
      VALUE "FileVersion", "@IE_VS_VER_FILEVERSION_STR@\0"
 #endif
      VALUE "InternalName", "@IE_VS_VER_INTERNALNAME_STR@\0"
--- a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
+++ b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
@ -10,7 +10,7 @@ and mixed-reality headsets.
 The OpenVINO™ toolkit:
 * Enables CNN-based deep learning inference on the edge
-* Supports heterogeneous execution across an Intel&reg; CPU, Intel&reg; Integrated Graphics, Intel&reg; Movidius&trade; Neural Compute Stick and Intel&reg; Neural Compute Stick 2
+* Supports heterogeneous execution across an Intel&reg; CPU, Intel&reg; Integrated Graphics, Intel&reg; Neural Compute Stick 2
 * Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
 * Includes optimized calls for computer vision standards including OpenCV\*, OpenCL&trade;, and OpenVX\*
--- a/docs/IE_DG/supported_plugins/MYRIAD.md
+++ b/docs/IE_DG/supported_plugins/MYRIAD.md
@ -2,7 +2,7 @@
 ## Introducing MYRIAD Plugin
-The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel&reg; Movidius&trade; Neural Compute Stick and Intel&reg; Neural Compute Stick 2.
+The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel&reg; Neural Compute Stick 2.
 ## Installation on Linux* OS
@ -23,10 +23,10 @@ The Inference Engine MYRIAD plugin supports the following networks:
 * GoogleNet (Inception) v1, v2, v4
 * VGG family (VGG16, VGG19)
 * SqueezeNet v1.0, v1.1
-* ResNet v1 family (18\*\* \*\*\*, 50, 101, 152)
+* ResNet v1 family (18\*\*\*, 50, 101, 152)
 * MobileNet (mobilenet-v1-1.0-224, mobilenet-v2)
 * Inception ResNet v2
-* DenseNet family\*\* (121,161,169,201)
+* DenseNet family (121,161,169,201)
 * SSD-300, SSD-512, SSD-MobileNet, SSD-GoogleNet, SSD-SqueezeNet
 **TensorFlow\***:
@ -45,7 +45,7 @@ The Inference Engine MYRIAD plugin supports the following networks:
 **MXNet\***:
 * AlexNet and CaffeNet
-* DenseNet family\*\* (121,161,169,201)
+* DenseNet family (121,161,169,201)
 * SqueezeNet v1.1
 * MobileNet v1, v2
 * NiN
@ -55,8 +55,6 @@ The Inference Engine MYRIAD plugin supports the following networks:
 * VGG family (VGG16, VGG19)
 * SSD-Inception-v3, SSD-MobileNet, SSD-ResNet-50, SSD-300
 \*\* Network is tested on Intel&reg; Movidius&trade; Neural Compute Stick with BatchNormalization fusion optimization disabled during Model Optimizer import
 \*\*\* Network is tested on Intel&reg; Neural Compute Stick 2 with BatchNormalization fusion optimization disabled during Model Optimizer import
 ## Supported Configuration Parameters
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
@ -0,0 +1,96 @@
 # Converting EfficientDet Models from TensorFlow {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}
 This tutorial explains how to convert detection EfficientDet\* public models to the Intermediate Representation (IR). 
 ## <a name="efficientdet-to-ir"></a>Convert EfficientDet Model to IR
 On GitHub*, you can find several public versions of EfficientDet model implementation. This tutorial explains how to 
 convert models from the [https://github.com/google/automl/tree/master/efficientdet](https://github.com/google/automl/tree/master/efficientdet) 
 repository (commit 96e1fee) to IR.
 ### Get Frozen TensorFlow\* Model
 Follow the instructions below to get frozen TensorFlow EfficientDet model. We use EfficientDet-D4 model as an example:
 1. Clone the repository:<br>
 ```sh
 git clone https://github.com/google/automl
 cd automl/efficientdet
 ```
 2. (Optional) Checkout to the commit that the conversion was tested on:<br>
 ```sh
 git checkout 96e1fee
 ```
 3. Install required dependencies:<br>
 ```sh
 python3 -m pip install --upgrade pip
 python3 -m pip install -r automl/efficientdet/requirements.txt
 ```
 4. Download and extract the model checkpoint [efficientdet-d4.tar.gz](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz)
 referenced in the "Pretrained EfficientDet Checkpoints" section of the model repository:<br>
 ```sh
 wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
 tar zxvf efficientdet-d4.tar.gz
 ```
 5. Freeze the model:<br>
 ```sh
 python3 model_inspect.py --runmode=saved_model --model_name=efficientdet-d4  --ckpt_path=efficientdet-d4 --saved_model_dir=savedmodeldir
 ```
 As a result the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will be generated.
 > **NOTE:** If you see an error `AttributeError: module 'tensorflow_core.python.keras.api._v2.keras.initializers' has no attribute 'variance_scaling'` apply the fix from the [patch](https://github.com/google/automl/pull/846).
 ### Convert EfficientDet TensorFlow Model to the IR
 To generate the IR of the EfficientDet TensorFlow model, run:<br>
 ```sh
 python3 $MO_ROOT/mo.py \
 --input_model savedmodeldir/efficientdet-d4_frozen.pb \
 --tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/automl_efficientdet.json \
 --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
 --reverse_input_channels
 ```
 Where `$IMAGE_SIZE` is the size that the input image of the original TensorFlow model will be resized to. Different
 EfficientDet models were trained with different input image sizes. To determine the right one refer to the `efficientdet_model_param_dict`
 dictionary in the [hparams_config.py](https://github.com/google/automl/blob/96e1fee/efficientdet/hparams_config.py#L304) file.
 The attribute `image_size` specifies the shape to be specified for the model conversion.
 The `tensorflow_use_custom_operations_config` command line parameter specifies the configuration json file containing hints
 to the Model Optimizer on how to convert the model and trigger transformations implemented in the 
 `$MO_ROOT/extensions/front/tf/AutomlEfficientDet.py`. The json file contains some parameters which must be changed if you
 train the model yourself and modified the `hparams_config` file or the parameters are different from the ones used for EfficientDet-D4.
 The attribute names are self-explanatory or match the name in the `hparams_config` file.
 > **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md).
 OpenVINO&trade; toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to 
 [Object Detection for SSD C++ Sample](@ref openvino_inference_engine_samples_object_detection_sample_ssd_README) and 
 [Object Detection for SSD Python Sample](@ref openvino_inference_engine_ie_bridges_python_sample_object_detection_sample_ssd_README).
 ## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR
 The TensorFlow model produces as output a list of 7-element tuples: `[image_id, y_min, x_min, y_max, x_max, confidence, class_id]`, where:
 * `image_id` -- image batch index.
 * `y_min` -- absolute `y` coordinate of the lower left corner of the detected object.
 * `x_min` -- absolute `x` coordinate of the lower left corner of the detected object.
 * `y_max` -- absolute `y` coordinate of the upper right corner of the detected object.
 * `x_max` -- absolute `x` coordinate of the upper right corner of the detected object.
 * `confidence` -- is the confidence of the detected object.
 * `class_id` -- is the id of the detected object class counted from 1.
 The output of the IR is a list of 7-element tuples: `[image_id, class_id, confidence, x_min, y_min, x_max, y_max]`, where:
 * `image_id` -- image batch index.
 * `class_id` -- is the id of the detected object class counted from 0.
 * `confidence` -- is the confidence of the detected object.
 * `x_min` -- normalized `x` coordinate of the lower left corner of the detected object.
 * `y_min` -- normalized `y` coordinate of the lower left corner of the detected object.
 * `x_max` -- normalized `x` coordinate of the upper right corner of the detected object.
 * `y_max` -- normalized `y` coordinate of the upper right corner of the detected object.
 The first element with `image_id = -1` means end of data.
 ---
 ## See Also
 * [Sub-Graph Replacement in Model Optimizer](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@ -22,6 +22,7 @@
                            <tab type="user" title="Converting BERT from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow"/>
                            <tab type="user" title="Convert TensorFlow* XLNet Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow"/>
                            <tab type="user" title="Converting TensorFlow* Wide and Deep Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models"/>
                            <tab type="user" title="Converting EfficientDet Models from TensorFlow" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models"/>
                        </tab>
                        <tab type="usergroup" title="Converting a MXNet* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet">
                            <tab type="user" title="Converting a Style Transfer Model from MXNet" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet"/>
--- a/docs/install_guides/VisionAcceleratorFPGA_Configure_2018R5.md
+++ b/docs/install_guides/VisionAcceleratorFPGA_Configure_2018R5.md
@ -53,7 +53,7 @@ cd /home/<user>/Downloads/fpga_support_files/
 ./install_openvino_fpga_dependencies.sh
 ```
-11. When asked, select the FPGA card, Intel® GPU, and Intel® Movidius™ Neural Compute Stick, then you can install the correct dependencies.
+11. When asked, select the FPGA card, Intel® GPU, and Intel® Neural Compute Stick 2, then you can install the correct dependencies.
 12. If you installed the 4.14 kernel as part of the installation script, you will need to reboot the machine and select the new kernel in the Ubuntu (grub) boot menu. You will also need to rerun `setup_env.sh` to set up your environmental variables again.
--- a/docs/install_guides/installing-openvino-pip.md
+++ b/docs/install_guides/installing-openvino-pip.md
@ -13,55 +13,64 @@ This guide provides installation steps for the Intel® distribution of OpenVINO
 ## Install the Runtime Package Using the PyPI Repository
-1. Set up and update pip to the highest version:
+### Step 1. Set up and update pip to the highest version
-   ```sh
+
-   python3 -m pip install --upgrade pip
+Run the command below:
-   ```
+```sh
-2. Install the Intel® distribution of OpenVINO™ toolkit:
+python3 -m pip install --upgrade pip
 ```
 ### Step 2. Install the Intel® distribution of OpenVINO™ toolkit
 Run the command below:
   ```sh
   pip install openvino-python
   ```
-3. Add PATH to environment variables.
+### Step 3. Add PATH to environment variables
-   - Ubuntu* 18.04 and macOS*:
+
-   ```sh
+Run a command for your operating system:
-   export LD_LIBRARY_PATH=<library_dir>:${LD_LIBRARY_PATH}
+- Ubuntu 18.04 and macOS:
-   ```
+```sh
-   - Windows* 10:
+export LD_LIBRARY_PATH=<library_dir>:${LD_LIBRARY_PATH}
-   ```sh
+```
-   set PATH=<library_dir>;%PATH%
+- Windows* 10:
-   ```
+```sh
-  How to find `library_dir`:
+set PATH=<library_dir>;%PATH%
- - Ubuntu\*, macOS\*:
+```
-   - Standard user:
+To find `library_dir`:   
-     ```sh
+**Ubuntu, macOS**:
-     echo $(python3 -m site --user-base)/lib
+- Standard user:
-     ```
+```sh
-   - Root or sudo user:
+echo $(python3 -m site --user-base)/lib
-     ```sh
+```
-     /usr/local/lib
+- Root or sudo user:
-     ```
+```sh
-   - Virtual environments or custom Python installations (from sources or tarball):
+/usr/local/lib
-     ```sh
+```
-     echo $(which python3)/../../lib
+- Virtual environments or custom Python installations (from sources or tarball):
-     ```
+```sh
- - Windows\*:
+echo $(which python3)/../../lib
-   - Standard Python:
+```
-     ```sh
+**Windows**:
-      python -c "import os, sys; print((os.path.dirname(sys.executable))+'\Library\\bin')"
+- Standard Python:
-     ```
+```sh
-   - Virtual environments or custom Python installations (from sources or tarball):
+python -c "import os, sys; print((os.path.dirname(sys.executable))+'\Library\\bin')"
-     ```sh
+```
-      python -c "import os, sys; print((os.path.dirname(sys.executable))+'\..\Library\\bin')"
+- Virtual environments or custom Python installations (from sources or tarball):
-     ```
+```sh
-4. Verify that the package is installed:
+python -c "import os, sys; print((os.path.dirname(sys.executable))+'\..\Library\\bin')"
-   ```sh
+```
-   python3 -c "import openvino"
+
-   ```
+### Step 4. Verify that the package is installed
 Run the command below:
 ```sh
 python3 -c "import openvino"
 ```
 Now you are ready to develop and run your application.
 ## Additional Resources
 - [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit).
--- a/docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/split.cpp
+++ b/docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/split.cpp
@ -20,7 +20,9 @@ INSTANTIATE_TEST_CASE_P(NumSplitsCheck, SplitLayerTest,
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({30, 30, 30, 30})),
+                                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values("TEMPLATE")),
                        SplitLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@ -185,7 +185,7 @@ if (ENABLE_OPENCV)
    set(OPENCV_BUILD "36")
    set(OPENCV_BUILD_YOCTO "337")
-    if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+    if (AARCH64)
        if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
            set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}")
        elseif(DEFINED THIRDPARTY_SERVER_PATH)
@ -220,10 +220,10 @@ if (ENABLE_OPENCV)
                    ENVIRONMENT "OpenCV_DIR"
                    VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
        elseif(LINUX)
-            if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            if (AARCH64)
                set(OPENCV_SUFFIX "yocto_kmb")
                set(OPENCV_BUILD "${OPENCV_BUILD_YOCTO}")
-            elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
+            elseif (ARM)
                set(OPENCV_SUFFIX "debian9arm")
            elseif (LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
                set(OPENCV_SUFFIX "centos7")
--- a/inference-engine/cmake/features_ie.cmake
+++ b/inference-engine/cmake/features_ie.cmake
@ -29,7 +29,7 @@ if (ENABLE_MKL_DNN)
 endif()
 # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
-if(ARM)
+if(ARM OR (MSVC AND (ARM OR AARCH64)) )
    set(THREADING_DEFAULT "SEQ")
 else()
    set(THREADING_DEFAULT "TBB")
--- a/inference-engine/cmake/vpu_dependencies.cmake
+++ b/inference-engine/cmake/vpu_dependencies.cmake
@ -13,7 +13,7 @@ endif()
 include(dependency_solver)
-set(VPU_SUPPORTED_FIRMWARES usb-ma2450 usb-ma2x8x pcie-ma248x)
+set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma248x)
 #
 # Default packages
@ -66,11 +66,11 @@ foreach(firmware_name IN LISTS VPU_SUPPORTED_FIRMWARES)
    string(TOUPPER "${firmware_name}" firmware_name_upper)
    set(var_name VPU_FIRMWARE_${firmware_name_upper}_FILE)
-    set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${firmware_name}.mvcmd")
+    set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${firmware_name}.mvcmd")
    # Handle PCIe elf firmware for Windows
    if (WIN32 AND "${firmware_name}" STREQUAL "pcie-ma248x")
-        set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${firmware_name}.elf")
+        set(firmware_out_file "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}/${firmware_name}.elf")
    endif ()
    list(APPEND all_firmware_files ${firmware_out_file})
@ -79,7 +79,7 @@ foreach(firmware_name IN LISTS VPU_SUPPORTED_FIRMWARES)
        COMMAND
            ${CMAKE_COMMAND} -E copy ${${var_name}} ${firmware_out_file}
        MAIN_DEPENDENCY ${${var_name}}
-        COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}"
+        COMMENT "[VPU] Copy ${${var_name}} to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR}"
        VERBATIM)
    install(FILES ${${var_name}}
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/c_w_dirent.h
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/c_w_dirent.h
@ -24,6 +24,14 @@
 # define _AMD64_
 #endif
 #if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
 # define _ARM_
 #endif
 #if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
 # define _ARM64_
 #endif
 #include <string.h>
 #include <windef.h>
 #include <fileapi.h>
--- a/inference-engine/samples/CMakeLists.txt
+++ b/inference-engine/samples/CMakeLists.txt
@ -59,10 +59,6 @@ else ()
 endif()
 if (WIN32)
    if (NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
        message(FATAL_ERROR "Only 64-bit supported on Windows")
    endif()
    set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX")
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling
--- a/inference-engine/samples/classification_sample_async/main.cpp
+++ b/inference-engine/samples/classification_sample_async/main.cpp
@ -98,6 +98,7 @@ int main(int argc, char *argv[]) {
        // -----------------------------------------------------------------------------------------------------
        // --------------------------- 3. Configure input & output ---------------------------------------------
        if (network.getOutputsInfo().size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
        // --------------------------- Prepare input blobs -----------------------------------------------------
        slog::info << "Preparing input blobs" << slog::endl;
@ -214,7 +215,6 @@ int main(int argc, char *argv[]) {
        // --------------------------- 8. Process output -------------------------------------------------------
        slog::info << "Processing output blobs" << slog::endl;
        OutputsDataMap outputInfo(network.getOutputsInfo());
        if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
        Blob::Ptr outputBlob = inferRequest.GetBlob(outputInfo.begin()->first);
        /** Validating -nt value **/
--- a/inference-engine/samples/common/os/windows/w_dirent.h
+++ b/inference-engine/samples/common/os/windows/w_dirent.h
@ -24,6 +24,14 @@
 # define _AMD64_
 #endif
 #if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_)
 # define _ARM_
 #endif
 #if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_)
 # define _ARM64_
 #endif
 #include <string>
 #include <windef.h>
 #include <fileapi.h>
--- a/inference-engine/samples/hello_classification/main.cpp
+++ b/inference-engine/samples/hello_classification/main.cpp
@ -86,6 +86,7 @@ int main(int argc, char *argv[]) {
        // 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
        CNNNetwork network = ie.ReadNetwork(input_model);
        if (network.getOutputsInfo().size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
        network.setBatchSize(1);
        // -----------------------------------------------------------------------------------------------------
--- a/inference-engine/scripts/dependencies.bat
+++ b/inference-engine/scripts/dependencies.bat
@ -11,7 +11,6 @@ for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA
 for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA
 for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA
 for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA
 for %%A in ("%VPU_FIRMWARE_MA2450%") do set VPU_FIRMWARE_MA2450_FILENAME=%%~nxA
 for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA
 for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA
@ -86,16 +85,6 @@ if not "%HDDL%"=="" (
 	)
 )
 if not "%VPU_FIRMWARE_MA2450%"=="" (
 	if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
 		mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
 		powershell -command "iwr -outf '%DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME%' %VPU_FIRMWARE_MA2450%"
 		mkdir "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%"
 		call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME% -o%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%
 		del "%DL_SDK_TEMP%\test_dependencies\VPU\_%VPU_FIRMWARE_MA2450_FILENAME%" /F /Q
 	)
 )
 if not "%VPU_FIRMWARE_MA2X8X%"=="" (
 	if not exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
 		mkdir "%DL_SDK_TEMP%\test_dependencies\VPU"
@ -139,13 +128,6 @@ if not "%MYRIAD%"=="" (
 	)
 )
 if not "%VPU_FIRMWARE_MA2450%"=="" (
 	if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%" (
 		echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%\*" intel64  /S /I /Y /R
 		xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2450_FILENAME%\*" intel64  /S /I /Y /R
 	)
 )
 if not "%VPU_FIRMWARE_MA2X8X%"=="" (
 	if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
 		echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64  /S /I /Y /R
--- a/inference-engine/scripts/dependencies.sh
+++ b/inference-engine/scripts/dependencies.sh
@ -37,7 +37,7 @@ add_path() {
    fi
 }
-runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2450 VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
+runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
 export_library_path() {
    export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@ -4,6 +4,7 @@
 #include <vector>
 #include <iostream>
 #include <cmath>
 #include <runtime/pwl.h>
 #include <gna_slope_scale.h>
@ -413,12 +414,12 @@ void make_gna_pwl(const DnnActivation  fun,
                        y_upper = tmp;
                    }
-                    int64_t x_lower_new = FLOAT_TO_INT32((x_lower / in_scale) / abs(pow_scale) * in_scale);
+                    int64_t x_lower_new = FLOAT_TO_INT32((x_lower / in_scale) / std::fabs(pow_scale) * in_scale);
-                    int64_t x_upper_new = FLOAT_TO_INT32((x_upper / in_scale) / abs(pow_scale) * in_scale);
+                    int64_t x_upper_new = FLOAT_TO_INT32((x_upper / in_scale) / std::fabs(pow_scale) * in_scale);
                    x_lower = static_cast<int32_t>(x_lower_new);
                    x_upper = static_cast<int32_t>(x_upper_new);
                    if (x_lower_new < INT32_MIN) {
-                        int16_t offset_lower = abs(x_lower_new - INT32_MIN) / in_scale * out_scale;
+                        int16_t offset_lower = std::abs(x_lower_new - INT32_MIN) / in_scale * out_scale;
                        x_lower = INT32_MIN;
                        y_lower = y_lower + offset_lower;
                    }
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@ -132,6 +132,22 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
    return reqConfId;
 }
 uint32_t GNADeviceHelper::getNumberOfGnaDevices() {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t numberOfGnaDevices = 0;
    auto status = Gna2DeviceGetCount(&numberOfGnaDevices);
    checkGna2Status(status);
    return numberOfGnaDevices;
 }
 uint32_t GNADeviceHelper::selectGnaDevice() {
    const auto deviceCount = getNumberOfGnaDevices();
    if (deviceCount != 1) {
        THROW_GNA_EXCEPTION << "Unsupported number of GNA devices detected = " << deviceCount;
    }
    return 0;
 }
 void GNADeviceHelper::checkGna2Status(Gna2Status status, const Gna2Model& gnaModel) {
    if (!Gna2StatusIsSuccessful(status)) {
        std::vector<char> gna2StatusBuffer(1024);
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@ -74,7 +74,8 @@ public:
         bool use_openmp = false,
         bool isPerformanceMeasuring = false) :
         gna2HwConsistency(gna2HwConsistency),
-         isPerformanceMeasuring(isPerformanceMeasuring) {
+         isPerformanceMeasuring(isPerformanceMeasuring),
         nGnaDeviceIndex{selectGnaDevice()} {
 #endif
        open(lib_async_n_threads);
        initGnaPerfCounters();
@ -116,6 +117,8 @@ public:
 #endif
    void releaseModel(const uint32_t model_id);
    uint32_t createRequestConfig(const uint32_t model_id);
    static uint32_t getNumberOfGnaDevices();
    static uint32_t selectGnaDevice();
    bool hasGnaHw() const {
        return Gna2DeviceVersionSoftwareEmulation != detectedGnaDevVersion;
    }
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@ -107,9 +107,10 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
            switch (header.version.minor) {
                case 1:
                    readBits(tempHeader2dot1, is);
-                    header = Header2dot2::ModelHeader(tempHeader2dot1);
+                    header = Header2dot3::ModelHeader(tempHeader2dot1);
                    break;
                case 2:
                case 3:
                    readBits(header, is);
                    break;
                default:
@ -166,7 +167,30 @@ void GNAModelSerial::Import(void *basePointer,
        InferenceEngine::OutputsDataMap& outputsDataMap) {
    is.exceptions(std::istream::failbit);
    if (modelHeader.version.major == 2) {
        if (modelHeader.version.minor >= 3) {
            for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
                uint32_t nameSize = 0;
                readNBits<32>(nameSize, is);
                std::string inName("", nameSize);
                readNBytes(&inName[0], nameSize, is);
                inputNames.push_back(inName.substr(0, nameSize - 1));
            }
        }
    }
    ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
    if (modelHeader.version.major == 2) {
        if (modelHeader.version.minor >= 3) {
            for (auto inputIndex = 0; inputIndex < modelHeader.nOutputs; inputIndex++) {
                uint32_t nameSize = 0;
                readNBits<32>(nameSize, is);
                std::string outName("", nameSize);
                readNBytes(&outName[0], nameSize, is);
                outputNames.push_back(outName.substr(0, nameSize - 1));
            }
        }
    }
    ImportOutputs(is, basePointer, desc, outputsDataMap);
    for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
@ -311,9 +335,19 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
    writeBits(header, os);
    for (auto &name : inputNames) {
        const auto nameSize = strlen(name.c_str()) + 1;
        writeBits(static_cast<uint32_t>(nameSize), os);
        writeNBytes(name.c_str(), nameSize , os);
    }
    for (const auto &input : inputs) {
        writeBits(convert_to_serial(input), os);
    }
    for (auto &name : outputNames) {
        const auto nameSize = strlen(name.c_str()) + 1;
        writeBits(static_cast<uint32_t>(nameSize), os);
        writeNBytes(name.c_str(), nameSize, os);
    }
    for (const auto &output : outputs) {
        writeBits(convert_to_serial(output), os);
    }
@ -691,7 +725,8 @@ void GNAModelSerial::ImportInputs(std::istream &is,
    dataMap.clear();
    for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
-        std::string name = "input" + std::to_string(inputIndex);
+        const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
        HeaderLatest::RuntimeEndPoint input;
        is.read(reinterpret_cast<char *>(&input), sizeof(input));
        inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
@ -719,7 +754,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
    desc.resize(modelHeader.nOutputs);
    for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
-        std::string name = "output" + std::to_string(outputIndex);
+        const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                                  ? outputNames.at(outputIndex) : std::string("input" + std::to_string(outputIndex));
        HeaderLatest::RuntimeEndPoint output;
        is.read(reinterpret_cast<char *>(&output), sizeof(output));
        OutputDesc description;
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@ -32,6 +32,8 @@ private:
 #endif
    std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> inputs;
    std::vector<GNAPluginNS::HeaderLatest::RuntimeEndPoint> outputs;
    std::vector<std::string> inputNames;
    std::vector<std::string> outputNames;
    uint32_t nRotateRows = 0;
    uint32_t nRotateColumns = 0;
    bool doRotateInput = false;
@ -63,6 +65,13 @@ private:
        const InferenceEngine::OutputsDataMap& outputsDataMap) : gna2Model(model),
            inputs(serializeInputs(inputsDataMap, inputDesc)),
            outputs(serializeOutputs(outputsDataMap, outputsDesc)) {
        for (auto const& input : inputsDataMap) {
            inputNames.push_back(input.first);
        }
        for (auto const& input : outputsDataMap) {
            outputNames.push_back(input.first);
        }
    }
 #else
--- a/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
@ -36,8 +36,8 @@ Parameter GNAPlugin::GetMetric(const std::string& name, const std::map<std::stri
            }
            if (!options.count(KEY_DEVICE_ID)) {
-                if (availableDevices.size() == 1) {
+                if (availableDevices.size() == 1 || availableDevices.size() == 2) {
-                    return availableDevices[0];
+                    return availableDevices.back(); // detection order is GNA_SW, GNA_HW
                } else {
                    THROW_GNA_EXCEPTION << "KEY_DEVICE_ID not set in request for FULL_DEVICE_NAME";
                }
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -631,11 +631,25 @@ void InsertIdentityLayerPass::run() {
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
    for (auto & l : *pLayers) {
        for (auto && prev : getCandidatesForIdentityInsertion(l)) {
            // Do an upstream search until Functional layer is found
            auto original_prev_layer = prev;
            auto true_layer = l;
            while (LayerInfo(prev).isNonFunctional()) {
                if (CNNNetHasPrevLayer(prev.get()) && prev->outData.size() == 1) {
                    true_layer = prev;
                    prev = CNNNetPrevLayer(prev);
                } else {
                    gnawarn() << "Could not find Functional parent for " << original_prev_layer->name << ", using original layer";
                    prev = original_prev_layer;
                    true_layer = l;
                    break;
                }
            }
            int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
            // actual insertion
            auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
-            gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
+            gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
            CNNLayerPtr activationLayer =
                std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
@ -643,17 +657,17 @@ void InsertIdentityLayerPass::run() {
            // TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
            // detecting ins-data-idx
            size_t insDataIdx = std::numeric_limits<size_t>::max();
-            for (size_t i = 0; i != l->insData.size(); i++) {
+            for (size_t i = 0; i != true_layer->insData.size(); i++) {
-                if (getCreatorLayer(l->insData[i].lock()).lock() == prev) {
+                if (getCreatorLayer(true_layer->insData[i].lock()).lock() == prev) {
                    insDataIdx = i;
                    break;
                }
            }
            if (insDataIdx == std::numeric_limits<size_t>::max()) {
-                THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << l->name;
+                THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << true_layer->name;
            }
-            auto inputData = l->insData[insDataIdx].lock();
+            auto inputData = true_layer->insData[insDataIdx].lock();
            auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
            auto activationLayerWithQuant = quantized ?
@ -681,7 +695,7 @@ void InsertIdentityLayerPass::run() {
                activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"];
            }
-            CNNNetworkInsertLayer(prev, notAll ? l : CNNLayerPtr(nullptr), activationLayerWithQuant);
+            CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), activationLayerWithQuant);
        }
    }
 }
--- a/inference-engine/src/gna_plugin/round_float_define.hpp
+++ b/inference-engine/src/gna_plugin/round_float_define.hpp
@ -7,5 +7,5 @@
 #include <cstdint>
-#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5))
+#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
-#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5):((a)+0.5))
+#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
--- a/inference-engine/src/gna_plugin/serial/headers/2dot3/gna_model_header.hpp
+++ b/inference-engine/src/gna_plugin/serial/headers/2dot3/gna_model_header.hpp
@ -0,0 +1,122 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <cstdint>
 #include "backend/dnn_types.h"
 #include "serial/headers/2dot1/gna_model_header.hpp"
 #pragma pack(push, 1)
 namespace GNAPluginNS {
 namespace Header2dot3 {
 /**
 * @brief Header version 2.3
 */
 struct ModelHeader {
    /**
     *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
     */
    char gnam[4] = {};
    /**
     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
     * usually it is an indicator of working with version of model different that is current export function produce
     */
    uint32_t headerSize = 0u;
    struct Version {
        /**
         * @details Version of format Major – unsigned int, ex: 0x0001
         * every change in the header or in the layers definition should be reflected in version change
         * for backward compatibility new parsers can read old versions of model with certain restrictions
         */
        uint16_t major = 2u;
        /**
         * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
         * changes in minor version are not affected layout of model
         */
        uint32_t minor = 3u;
    } version;
    /**
     * @brief Memory required to be allocated using GNAAlloc()
     */
    uint64_t gnaMemSize = 0ull;
    /**
     * @brief Number of GNA Layers
     */
    uint64_t layersCount = 0ull;
    /**
     * @brief Grouping level
     */
    uint32_t nGroup = 0u;
    /**
     * Convolution related setting - they are affecting input transformation
     */
    uint32_t nRotateRows = 0u;
    uint32_t nRotateColumns = 0u;
    bool doRotateInput = false;
    uint32_t nInputs = 0u;
    uint32_t nOutputs = 0u;
    /**
     * Reserved Data might be here
     */
    ModelHeader() = default;
    ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
        gnaMemSize = old.gnaMemSize;
        layersCount = old.layersCount;
        nGroup = old.nGroup;
        nRotateRows = old.nRotateRows;
        nRotateColumns = old.nRotateColumns;
        nInputs = old.nInputs;
        nOutputs = old.nOutputs;
    }
 };
 #pragma pack(pop)
 /*
 * In runtime endpoint mostly same as in serial version, except of descriptor field
 */
 struct RuntimeEndPoint {
    /**
     * if scale factor is different then pased into infer , network might need to be requantized
     */
    float scaleFactor = 0;
    /**
     * Pointer descriptor
     */
    void* descriptor_ptr = nullptr;
    /**
     * Endpoint resolution in bytes.
     */
    uint32_t element_size = 0;
    /**
     * Number of elements
     */
    uint32_t elements_count = 0;
    /**
     * Offset in bytes of pointer descriptor
    */
    uint64_t descriptor_offset = 0ull;
    intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
    RuntimeEndPoint() = default;
    RuntimeEndPoint(double scaleFactor,
                    void* descriptor_ptr,
                    uint32_t element_size,
                    uint32_t elements_count,
                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
                                                           descriptor_ptr(descriptor_ptr),
                                                           element_size(element_size),
                                                           elements_count(elements_count),
                                                           orientation(orientation) {
    }
 };
 } // namespace Header2dot3
 } // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
+++ b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
@ -4,11 +4,11 @@
 #pragma once
-#include "serial/headers/2dot2/gna_model_header.hpp"
+#include "serial/headers/2dot3/gna_model_header.hpp"
 namespace GNAPluginNS {
 namespace HeaderLatest {
-using ModelHeader = GNAPluginNS::Header2dot2::ModelHeader;
+using ModelHeader = GNAPluginNS::Header2dot3::ModelHeader;
-using RuntimeEndPoint = GNAPluginNS::Header2dot2::RuntimeEndPoint;
+using RuntimeEndPoint = GNAPluginNS::Header2dot3::RuntimeEndPoint;
 }
 }
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@ -12,7 +12,7 @@ file (GLOB LIBRARY_SRC
 # TODO: WA for OneHot pass usage in reshape
 set(LEGACY_SRC_ROOT "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src/")
-list(APPEND LIBRARY_SRC
+set(LEGACY_LIBRARY_SHARED_SRCS
    "${LEGACY_SRC_ROOT}/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.cpp"
    "${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp")
@ -125,6 +125,7 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
 add_library(${TARGET_NAME} SHARED
            ${IE_STATIC_DEPENDENT_FILES}
            ${LEGACY_LIBRARY_SHARED_SRCS}
            ${vs_version_file}
            $<TARGET_OBJECTS:${TARGET_NAME}_obj>)
@ -137,7 +138,8 @@ target_link_libraries(${TARGET_NAME} PRIVATE pugixml openvino::itt ${CMAKE_DL_LI
                                             ${NGRAPH_LIBRARIES} inference_engine_transformations)
 target_include_directories(${TARGET_NAME} INTERFACE ${PUBLIC_HEADERS_DIR}
-    PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
+    PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
            $<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
 if(WIN32)
    set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
--- a/inference-engine/src/legacy_api/include/legacy/graph_tools.hpp
+++ b/inference-engine/src/legacy_api/include/legacy/graph_tools.hpp
@ -371,19 +371,42 @@ inline CNNLayerSet CNNNetGetAllInputLayers(const ICNNNetwork& network) {
    InputsDataMap inputs;
    network.getInputsInfo(inputs);
    OutputsDataMap outputs;
    network.getOutputsInfo(outputs);
    std::vector<DataPtr> entryDataSet;
    entryDataSet.reserve(inputs.size() + outputs.size());
    for (const auto &kvp : inputs)
        entryDataSet.push_back(kvp.second->getInputData());
    for (const auto &kvp : outputs)
        entryDataSet.push_back(kvp.second);
    CNNLayerSet inputLayers;
    std::unordered_set<CNNLayer*> allLayers;
-    if (inputs.empty()) return inputLayers;
+    if (entryDataSet.empty()) return inputLayers;
-    for (const auto& input : inputs) {
+    // define any layer connected to provided Data object (consumer or creator)
-        auto& secondLayers = getInputTo(input.second->getInputData());
+    auto findConnectedLayer = [] (const DataPtr& data) -> CNNLayerPtr {
        auto consumerLayers = getInputTo(data);
        if (!consumerLayers.empty())
            return consumerLayers.begin()->second;
-        if (secondLayers.empty()) continue;
+        auto creator = getCreatorLayer(data).lock();
        if (creator != nullptr)
            return creator;
        return nullptr;
    };
    for (const auto& data : entryDataSet) {
        auto entryLayer = findConnectedLayer(data);
        if (entryLayer == nullptr) continue;
        details::UnorderedDFS(
-            allLayers, secondLayers.begin()->second,
+            allLayers, entryLayer,
-            [&](CNNLayerPtr layer) {
+            [&inputLayers](const CNNLayerPtr& layer) {
                if (layer->insData.empty()) {
                    inputLayers.insert(layer);
                }
--- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
@ -132,13 +132,6 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
        THROW_IE_EXCEPTION << "Cannot cast layer to TensorIterator.";
    }
    std::map<uint64_t, std::vector<std::pair<std::string, uint64_t>>> ngraph_parameter_id_to_ie_layer_port;
    std::map<std::pair<std::string, uint64_t>, uint64_t> ie_layer_port_to_tensor_iterator_input_id;
    // inputs/outputs of TensorIterator body (ie)
    std::map<std::string, DataPtr> in_info_map;
    std::map<std::string, DataPtr> out_info_map;
    // inputs/outputs of TensorIterator (ngraph representation)
    auto parameters = tensor_iterator->get_function()->get_parameters();
    auto results = tensor_iterator->get_function()->get_results();
@ -148,10 +141,7 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
    // IE TensorIterator doesn't include such nodes so we create CNNNetwork in a separate scope
    // to call the destructor and delete these "Input"/data nodes.
-    // These layers will hold the necessary subnet after destruction of CNNNetwork.
+    TensorIterator::Body body;
    std::set<InferenceEngine::CNNLayerPtr> body_input_layers;
    // This map will save information about data nodes
    std::map<std::string, std::vector<TensorDesc>> layer_name_to_tensor_desc;
    {
        CNNNetwork body_net(tensor_iterator->get_function());
        CNNNetwork net(InferenceEngine::details::convertFunctionToICNNNetwork(body_net.getFunction(), body_net));
@ -163,73 +153,102 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
        }
        // Get inputs/outputs of cnn network
-        InputsDataMap in_info_map_with_parameters;
+        auto in_info_map_with_parameters = net.getInputsInfo();
-        in_info_map_with_parameters = net.getInputsInfo();
+        auto out_info_map = net.getOutputsInfo();
        out_info_map = net.getOutputsInfo();
-        // Fill the map to get layer and port of the body by the parameter index.
+        IE_ASSERT(in_info_map_with_parameters.size() == parameters.size());
        IE_ASSERT(out_info_map.size() == results.size());
        InferenceEngine::TensorIterator::Body temp_body;
        temp_body.inputs.resize(in_info_map_with_parameters.size());
        temp_body.outputs.resize(out_info_map.size());
        // Fill inputs/outs in order aligned with ng representation
        uint64_t counter = 0;
        for (const auto& param : parameters) {
            auto info = in_info_map_with_parameters.at(param->get_friendly_name());
-            auto data_ptr = info->getInputData();
+            temp_body.inputs[counter++] = info->getInputData();
            auto input_to = getInputTo(data_ptr);
            for (const auto& next_layer : input_to) {
                auto port_idx = find_input_idx(next_layer.second, data_ptr);
                ngraph_parameter_id_to_ie_layer_port[counter].push_back({next_layer.first, port_idx});
            }
            counter++;
        }
-        // Temporary body to call deep copy
+        auto map_ng_result_to_ie_name = [] (std::shared_ptr<ngraph::op::v0::Result> res_op) {
-        InferenceEngine::TensorIterator::Body temp_body;
+            auto result = res_op->input(0).get_source_output();
        for (const auto& in : in_info_map_with_parameters) {
            temp_body.inputs.emplace_back(in.second->getInputData());
        }
-        for (const auto& out : out_info_map) {
+            std::string name = result.get_node()->get_friendly_name();
-            temp_body.outputs.emplace_back(out.second);
+            if (result.get_node()->get_output_size() > 1) {
                name += "." + std::to_string(result.get_index());
            }
            return name;
        };
        counter = 0;
        for (const auto& result : results) {
            auto data = out_info_map.at(map_ng_result_to_ie_name(result));
            temp_body.outputs[counter++] = data;
        }
        // This deep copy will hold all unreachable constants. See the comment in CopyTIBody function.
-        auto deep_cp_body = InferenceEngine::NetPass::CopyTIBody(temp_body);
+        body = InferenceEngine::NetPass::CopyTIBody(temp_body);
-        for (const auto& data_ptr : deep_cp_body.inputs) {
+
-            auto input_to = getInputTo(data_ptr);
+        // Check if data is really const layer holder
-            for (const auto& node : input_to) {
+        auto is_constant_holder = [] (const DataPtr data) {
-                // Make it compatible with ir v7: delete Input layers in body
+            return data->getPrecision() == Precision::UNSPECIFIED;
-                if (node.second->type != "Input") {
+        };
-                    body_input_layers.emplace(node.second);
+
-                    // Save information about data nodes to re-create them with correct names.
+        // Strip unreached node holder from Inputs node.
-                    for (const auto& data : node.second->insData) {
+        auto holder = body.inputs.back();
-                        layer_name_to_tensor_desc[node.second->name].emplace_back(data.lock()->getTensorDesc());
+        if (is_constant_holder(holder)) {
-                    }
+            auto& holder_map = getInputTo(holder);
-                }
+            // remove_if
            for( auto it = holder_map.begin(); it != holder_map.end(); ) {
                if( it->second->type == "Input")
                    it = holder_map.erase(it);
                else
                    ++it;
            }
        }
-        for (const auto& data_ptr : deep_cp_body.outputs) {
+        // TODO: Disable this WA after total switch onto Ngraph
-            out_info_map[data_ptr->getName()] = data_ptr;
+        //   WA: Some plugins (like GPU) require matching of Data object name and producer Layer name.
-        }
+        //       Data name is expected in format "[layer_name]" or "[layer_name].[port_idx]" in case
-    }
+        //       of multiple inputs. We have to restore it if possible and ignore original names of
-
+        //       Ngraph parameter and result ops.
-    auto holder = std::make_shared<Data>("const_holder", Precision::UNSPECIFIED);
+        //       Will not change data name if:
-    for (const auto& input_layer : body_input_layers) {
+        //        - data has several consumer layers
-        // Save all constants to the holder so that they are not deleted.
+        //        - data has no consumer (example if data is straight used as output)
-        if (input_layer->insData.empty()) {
+        //
-            getInputTo(holder)[input_layer->name] = input_layer;
+        for (auto &in : body.inputs) {
            if (is_constant_holder(in))
                continue;
            const auto input_to = getInputTo(in);
            if (input_to.size() != 1)
                continue;
            const auto consumer_layer = input_to.begin()->second;
            const auto consumer_in_port_set = consumer_layer->insData;
            const auto found = std::find_if(consumer_in_port_set.begin(), consumer_in_port_set.end(),
                                      [&in] (const DataWeakPtr &wptr) { return wptr.lock() == in; });
            IE_ASSERT(found != consumer_in_port_set.end());
            const auto consumer_port_idx = std::distance(consumer_in_port_set.begin(), found);
            auto new_name = consumer_layer->name;
            if (consumer_in_port_set.size() > 1) {
                new_name += '.' + std::to_string(consumer_port_idx);
            }
            in->setName(new_name);
        }
-        // Re-create the data nodes with the correct names and fill inputs of TensorIterator (ie)
+        // TODO: this WA restore original precisions of outputs.
-        for (size_t i = 0; i < input_layer->insData.size(); i++) {
+        //       convertFunctionToICNNNetwork has internal fallback policy for unsupported
-            if (!input_layer->insData[i].lock()) {
+        //       precisions for inputs/outputs ports. Particular for U8 will be translated
-                std::string data_name = (input_layer->insData.size() == 1)
+        //       to FP32. However Loop body has strong requirements for continue_condition
-                                            ? input_layer->name
+        //       port, it should be BOOL(U8).
-                                            : input_layer->name + "." + std::to_string(i);
+        //
-
+        for (int i = 0; i < results.size(); i++) {
-                DataPtr data(new Data(data_name, layer_name_to_tensor_desc[input_layer->name][i]));
+            auto result = results[i];
-                input_layer->insData[i] = data;
+            auto output = body.outputs[i];
-                getInputTo(data)[input_layer->name] = input_layer;
+            if (result->get_element_type() == ngraph::element::u8) {
-                in_info_map[data_name] = data;
+                output->setPrecision(InferenceEngine::Precision::U8);
            }
        }
    }
@ -238,44 +257,11 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
    LayerParams params = {layer->get_friendly_name(), "TensorIterator",
                          details::convertPrecision(layer->get_output_element_type(0))};
    auto res = std::make_shared<InferenceEngine::TensorIterator>(params);
-
+    res->body = body;
    // Body: inputs
    uint64_t counter = 0;
    for (const auto& in : in_info_map) {
        res->body.inputs.emplace_back(in.second);
        // Fill the map to get the input index by layer and port of the body.
        auto input_to = getInputTo(in.second);
        for (const auto& next_layer : input_to) {
            auto port_idx = find_input_idx(next_layer.second, in.second);
            ie_layer_port_to_tensor_iterator_input_id[{next_layer.first, port_idx}] = counter;
        }
        counter++;
    }
    // the holder should be the last input element.
    res->body.inputs.emplace_back(holder);
    // Body: outputs
    for (const auto& out : out_info_map) {
        res->body.outputs.emplace_back(out.second);
    }
    // Port map: outputs
    for (const auto& desc : tensor_iterator->get_output_descriptions()) {
-        auto result = results[desc->m_body_value_index]->input(0).get_source_output();
+        auto body_output_idx = desc->m_body_value_index;
        std::string name = result.get_node()->get_friendly_name();
        if (result.get_node()->get_output_size() > 1) {
            name += "." + std::to_string(result.get_index());
        }
        auto output_layer = out_info_map.at(name);
        // Find index in outputs of the IE TensorIterator body
        auto it = std::find(res->body.outputs.begin(), res->body.outputs.end(), output_layer);
        if (it == res->body.outputs.end()) {
            THROW_IE_EXCEPTION << "Output layer not found.";
        }
        auto body_output_idx = it - res->body.outputs.begin();
        std::string type_name = desc->get_type_info().name;
        if (type_name == "ConcatOutputDescription") {
@ -301,54 +287,42 @@ CNNLayer::Ptr createSubGraphLayer(const std::shared_ptr<ngraph::Node>& layer) {
    // Port map : inputs and back edges
    for (const auto& desc : tensor_iterator->get_input_descriptions()) {
-        for (const auto& mapping : ngraph_parameter_id_to_ie_layer_port[desc->m_body_parameter_index]) {
+        auto body_input_index = desc->m_body_parameter_index;
            auto body_input_index = ie_layer_port_to_tensor_iterator_input_id.at(mapping);
            std::string type_name = desc->get_type_info().name;
            if (type_name == "SliceInputDescription") {
                auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::SliceInputDescription>(desc);
                IE_ASSERT(input_desc != nullptr);
        if (const auto slice_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::SliceInputDescription>(desc)) {
            res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
-                    static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index),
+                static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
-                    static_cast<int>(input_desc->m_axis), static_cast<int>(input_desc->m_stride),
+                static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
-                    static_cast<int>(input_desc->m_start), static_cast<int>(input_desc->m_end),
+                static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
-                    static_cast<int>(input_desc->m_part_size)});
+                static_cast<int>(slice_desc->m_part_size)});
-            } else if (type_name == "MergedInputDescription") {
+        } else if (const auto merge_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::MergedInputDescription>(desc)) {
                auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::MergedInputDescription>(desc);
                IE_ASSERT(input_desc != nullptr);
            res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
-                    static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+                static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
-                auto result = results[input_desc->m_body_value_index]->inputs()[0].get_source_output();
+            auto body_output_idx = merge_desc->m_body_value_index;
                // Create correct name for output.
                std::string output_name = result.get_node()->get_friendly_name();
                if (result.get_node()->get_output_size() > 1) {
                    output_name += "." + std::to_string(result.get_index());
                }
                auto output_layer = out_info_map.at(output_name);
                // Find index in outputs of the IE TensorIterator body
                auto it = std::find(res->body.outputs.begin(), res->body.outputs.end(), output_layer);
                if (it == res->body.outputs.end()) {
                    THROW_IE_EXCEPTION << "Output layer not found.";
                }
                auto body_output_idx = it - res->body.outputs.begin();
            res->back_edges.emplace_back(InferenceEngine::TensorIterator::PortMap {
                static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
-            } else if (type_name == "InvariantInputDescription") {
+        } else if (const auto inv_desc = std::dynamic_pointer_cast<ngraph::op::TensorIterator::InvariantInputDescription>(desc)) {
                auto input_desc = ::ngraph::as_type_ptr<ngraph::op::TensorIterator::InvariantInputDescription>(desc);
                IE_ASSERT(input_desc != nullptr);
            res->input_port_map.emplace_back(InferenceEngine::TensorIterator::PortMap {
-                        static_cast<int>(input_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+                    static_cast<int>(inv_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
        } else {
            THROW_IE_EXCEPTION << "Incorrect type of the input description.";
        }
    }
    if (const auto loop_op = std::dynamic_pointer_cast<const ngraph::opset5::Loop>(layer)) {
        auto spec_port = loop_op->get_special_body_ports();
        if (spec_port.current_iteration_input_idx != -1) {
            auto ie_port_idx = spec_port.current_iteration_input_idx;
            res->params["loop_body_current_iteration_idx"] = std::to_string(ie_port_idx);
        }
        if (spec_port.body_condition_output_idx != -1) {
            auto body_output_idx = spec_port.body_condition_output_idx;
            res->params["loop_body_condition_output_idx"] = std::to_string(body_output_idx);
        }
        res->params["loop_trip_count_idx"] = "0";
        res->params["loop_execution_condition_idx"] = "1";
    }
    return res;
@ -1173,14 +1147,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ReverseSequence>::createLayer(const std:
    return res;
 }
 template <>
 CNNLayer::Ptr NodeConverter<ngraph::op::Reshape>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
    LayerParams params = {layer->get_friendly_name(), "Reshape",
                          details::convertPrecision(layer->get_output_element_type(0))};
    auto res = std::make_shared<InferenceEngine::ReshapeLayer>(params);
    return res;
 }
 template <>
 CNNLayer::Ptr NodeConverter<ngraph::op::ShapeOf>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
    LayerParams params = {layer->get_friendly_name(), "ShapeOf",
--- a/inference-engine/src/legacy_api/src/net_pass.cpp
+++ b/inference-engine/src/legacy_api/src/net_pass.cpp
@ -46,15 +46,28 @@ static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr>& heads) {
    CNNLayerSet inputLayers;
    std::unordered_set<CNNLayer*> allLayers;
    // define any layer connected to provided Data object (consumer or creator)
    auto findConnectedLayer = [] (const DataPtr& data) -> CNNLayerPtr {
        auto consumerLayers = getInputTo(data);
        if (!consumerLayers.empty())
            return consumerLayers.begin()->second;
        auto creator = getCreatorLayer(data).lock();
        if (creator != nullptr)
            return creator;
        return nullptr;
    };
    // Define all start layers
    for (const auto& data : heads) {
-        auto& secondLayers = getInputTo(data);
+        auto entryLayer = findConnectedLayer(data);
-        if (secondLayers.empty()) continue;
+        if (entryLayer == nullptr) continue;
        details::UnorderedDFS(
-            allLayers, secondLayers.begin()->second,
+            allLayers, entryLayer,
-            [&](CNNLayerPtr layer) {
+            [&inputLayers](const CNNLayerPtr &layer) {
                if (layer->insData.empty()) {
                    inputLayers.insert(layer);
                }
@ -77,10 +90,17 @@ static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr>& heads) {
 std::vector<CNNLayerPtr> TIBodySortTopologically(const TensorIterator::Body& body) {
    std::vector<CNNLayerPtr> all_layers;
-    auto all_input_layers = getAllInputs(body.inputs);
+    // In case of graph with several connected component
    // total entry point is a union of [inputs]U[outputs]
    // All internal nodes are achievable starting from this.
    auto total_entry_point = body.inputs;
    total_entry_point.insert(total_entry_point.end(),
                             body.outputs.begin(), body.outputs.end());
    auto all_input_layers = getAllInputs(total_entry_point);
    CNNNetForestDFS(
        all_input_layers,
-        [&](CNNLayerPtr current) {
+        [&all_layers](const CNNLayerPtr &current) {
            all_layers.push_back(current);
        },
        false);
@ -143,9 +163,17 @@ TensorIterator::Body CopyTIBody(const TensorIterator::Body& body, std::string su
    }
    TensorIterator::Body res;
-    for (auto& in : body.inputs) res.inputs.emplace_back(old2new_d[in.get()]);
+    for (auto& in : body.inputs) {
        auto found = old2new_d.find(in.get());
        IE_ASSERT(found != old2new_d.end());
        res.inputs.emplace_back(found->second);
    }
-    for (auto& out : body.outputs) res.outputs.emplace_back(old2new_d[out.get()]);
+    for (auto& out : body.outputs) {
        auto found = old2new_d.find(out.get());
        IE_ASSERT(found != old2new_d.end());
        res.outputs.emplace_back(found->second);
    }
    // Fake holder.
    // The graph itself is a shared_ptr set where parent holds child.
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -110,64 +110,73 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
    this->_name = "subgraph";
    this->reuse_io_tensors = false;
-    std::unordered_map<CNNLayerPtr, MKLDNNNodePtr> layer2node;
+    // Map data object onto producer layer(node)
-    std::unordered_set<DataPtr> unused_data;  // nodes which has no consumers (output or just unused)
+    std::unordered_map<Data*, std::pair<MKLDNNNodePtr, int>> data2node;
-    auto _parent_port = [] (const DataPtr &data) -> int {
+    // nodes which has no consumers (output or just unused). But doesn't marked as graph output.
-        auto parent = getCreatorLayer(data).lock();
+    // Will be stored as fake output separately.
-        for (int i = 0; parent->outData.size(); i++)
+    std::unordered_set<DataPtr> unused_data;
            if (data == parent->outData[i])
                return i;
        return -1;
    };
-    auto _child_port = [] (const DataPtr &data, const CNNLayerPtr &layer) -> int {
+    // Step 1. Replicate input nodes
-        for (int i = 0; layer->insData.size(); i++)
+    for (const auto &input : subgraph.inputs) {
-            if (data == layer->insData[i].lock())
+        if (input->getPrecision() == Precision::UNSPECIFIED) continue;  // const node holder
                return i;
        return -1;
    };
        auto creator = getCreatorLayer(input).lock();
        if (creator == nullptr) {
            creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()}));
            creator->outData.push_back(input);
        }
-    // Replicate All Nodes in topological order
+        const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache));
-    for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
+        data2node[input.get()] = {node, 0};
        CNNLayerPtr _layer = layer;
        const MKLDNNNodePtr node(MKLDNNNode::factory().create(_layer, getEngine(), extMgr, weightsCache));
        graphNodes.push_back(node);
-        layer2node[layer] = node;
+        inputNodes[input->getName()] = node;
        if (getInputTo(input).empty()) {
            unused_data.insert(input);
        }
    }
    // Step 2. Replicate all internal nodes.
    for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
        graphNodes.push_back(node);
        for (int port = 0; port < layer->insData.size(); port++) {
            auto data = layer->insData[port].lock();
            auto parent_layer = getCreatorLayer(data).lock();
            if (!parent_layer) continue;  // no parent means that it is input data node (or memory/const layer)
-            auto parent_node = layer2node[parent_layer];
+            auto port_info = data2node[data.get()];
            auto parent_node = port_info.first;
            auto parent_port_idx = port_info.second;
-            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port));
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port));
            node->addEdge(edge);
            graphEdges.push_back(edge);
        }
        int out_port_idx = 0;
        for (auto &out_data : layer->outData) {
            data2node[out_data.get()] = {node, out_port_idx++};
            if (getInputTo(out_data).empty()) {
                unused_data.insert(out_data);
            }
        }
    }
    // Step 3. Add output nodes and output stubs for unused data objects.
    for (const auto &output : subgraph.outputs) {
-        auto parent_layer = getCreatorLayer(output).lock();
+        auto port_info = data2node[output.get()];
-        auto parent_node = layer2node[parent_layer];
+        auto parent_node = port_info.first;
        auto parent_port_idx = port_info.second;
        CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()}));
        layer->insData.push_back(output);
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
+        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(output), 0));
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
        node->addEdge(edge);
        graphEdges.push_back(edge);
        graphNodes.push_back(node);
        outputNodes.push_back(node);
@ -176,39 +185,20 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
    // Add stub output node for unused data
    for (auto to_stub_data : unused_data) {
-        auto parent_layer = getCreatorLayer(to_stub_data).lock();
+        auto port_info = data2node[to_stub_data.get()];
-        auto parent_node = layer2node[parent_layer];
+        auto parent_node = port_info.first;
        auto parent_port_idx = port_info.second;
-        CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()}));
+        CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()}));
        layer->insData.push_back(to_stub_data);
        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0));
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
        node->addEdge(edge);
        graphEdges.push_back(edge);
        graphNodes.push_back(node);
    }
    // Replicate input nodes
    for (const auto &input : subgraph.inputs) {
        if (input->getName() == "const_holder") continue;
        CNNLayerPtr layer(new CNNLayer({"in_" + input->getName(), "Input", input->getTensorDesc().getPrecision()}));
        layer->outData.push_back(input);
        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
        for (auto p : getInputTo(input)) {
            auto consumer = p.second;
            MKLDNNEdgePtr edge(new MKLDNNEdge(node, layer2node[consumer], 0, _child_port(input, consumer)));
            node->addEdge(edge);
            graphEdges.push_back(edge);
        }
        graphNodes.push_back(node);
        inputNodes[input->getName()] = node;
    }
 }
 void MKLDNNGraph::Replicate(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@ -76,6 +76,11 @@ public:
        return outputNodes;
    }
    std::map<std::string, MKLDNNNodePtr>& GetInputNodes() {
        return inputNodes;
    }
    mkldnn::engine getEngine() const {
        return eng;
    }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@ -600,7 +600,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
        return eltwiseNode &&
            (eltwiseNode->getOpType() == Relu ||
            (conv->getCnnLayer()->precision == Precision::FP32 &&
-            IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})));
+            IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
                                               Round})));
    };
    for (int i = 0; i < graphNodes.size(); i++) {
@ -678,7 +679,8 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
            if (eltwiseNode == nullptr)
                THROW_IE_EXCEPTION << "Cannot get Eltwise node " << childNode->getName();
-            if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})) {
+            if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
                                                   Hsigmoid, Round})) {
                return true;
            } else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) {
                if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2)
@ -1044,7 +1046,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
            return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) ||
                    (eltwiseNode->getOpType() == Prelu) ||
-                    IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid}));
+                    IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
                                                       Hsigmoid, Round}));
        }
        return false;
@ -1258,7 +1261,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
        return eltwiseNode &&
            (eltwiseNode->getOpType() == Relu ||
            (conv->getCnnLayer()->precision == Precision::FP32 &&
-             IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid})));
+             IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
                                                Round})));
    };
    for (auto &graphNode : graphNodes) {
@ -1611,7 +1615,7 @@ void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) {
            if (eltwiseNode == nullptr)
                THROW_IE_EXCEPTION << "Cannot get Eltwise node " << node->getName();
            return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish,
-                                                      Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt}) ||
+                                                      Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) ||
                    ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) ||
                     (eltwiseNode->getOpType() == Prelu));
        }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -75,6 +75,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
        { "HSwish", Eltwise },
        { "Mish", Eltwise },
        { "HSigmoid", Eltwise },
        { "Round", Eltwise },
        { "ScaleShift", Eltwise },
        { "PReLU", Eltwise },
        { "Norm", Lrn },
@ -112,6 +113,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
        { "BinaryConvolution", BinaryConvolution },
        { "DeformableConvolution", DeformableConvolution },
        { "TensorIterator", TensorIterator },
        { "Loop", TensorIterator },
        { "MemoryInput", MemoryInput},  // for construction from name ctor, arbitrary name is used
        { "Memory", MemoryOutput },  // for construction from layer ctor
        { "Convert", Convert },
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@ -312,7 +312,8 @@ private:
        auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
        switch (eltwiseNode.getOpType()) {
            case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
-            case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
+            case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
            case Mish: case Hsigmoid: case Round:
                return jit_mkldnn_emitter::get_supported_precisions();
            case Add:               return jit_add_emitter::get_supported_precisions();
            case MulAdd:            return jit_mul_add_emitter::get_supported_precisions();
@ -345,7 +346,8 @@ private:
        auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
        switch (eltwiseNode.getOpType()) {
            case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
-            case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
+            case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
            case Mish: case Hsigmoid: case Round:
                                    return std::make_shared<jit_mkldnn_emitter>(this, isa, eltwiseNode, exec_prec);
            case Add:               return std::make_shared<jit_add_emitter>(this, isa, eltwiseNode, exec_prec);
            case MulAdd:            return std::make_shared<jit_mul_add_emitter>(this, isa, eltwiseNode, exec_prec);
@ -764,6 +766,18 @@ MKLDNNEltwiseNode::initializers = {
            opType = Hsigmoid;
            algorithm = mkldnn::eltwise_hsigmoid;
        }},
        {"round", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
            alpha = 0.0f;
            beta = 0.0f;
            opType = Round;
            std::string mode = activationLayer->GetParamAsString("mode", "half_to_even");
            if (mode == "half_to_even")
                algorithm = mkldnn::eltwise_round_half_to_even;
            else if (mode == "half_away_from_zero")
                algorithm = mkldnn::eltwise_round_half_away_from_zero;
            else
                THROW_IE_EXCEPTION << "Round layer with name " << activationLayer->name << " doesn't support mode " << mode;
        }},
 };
 void MKLDNNEltwiseNode::init() {
@ -833,7 +847,8 @@ void MKLDNNEltwiseNode::init() {
               comparator(layerType, "swish") ||
               comparator(layerType, "hswish") ||
               comparator(layerType, "mish") ||
-               comparator(layerType, "hsigmoid")) {
+               comparator(layerType, "hsigmoid") ||
               comparator(layerType, "round")) {
        initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta);
    } else {
        THROW_IE_EXCEPTION << "Unsupported algorithm for Eltwise node with name `" << getName() << "`.";
@ -843,7 +858,8 @@ void MKLDNNEltwiseNode::init() {
 size_t MKLDNNEltwiseNode::getOpInputsNum() const {
    switch (getOpType()) {
        case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case PowerStatic:
-        case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
+        case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
        case Mish: case Hsigmoid: case Round:
        case LogicalNot:
            return 1;
        case Add: case Subtract: case Multiply: case Divide: case FloorMod: case Mod: case Maximum: case Minimum: case SquaredDifference:
@ -1469,7 +1485,8 @@ void MKLDNNEltwiseNode::executeReference(const std::vector<const uint8_t *>& src
            switch (getOpType()) {
                case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
-                case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: case Mish: case Hsigmoid:
+                case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
                case Mish: case Hsigmoid: case Round:
                    *dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break;
                case Add:               *dst_ptr_f = src_f[0] + src_f[1]; break;
                case MulAdd:            *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break;
@ -1570,6 +1587,8 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
        case mkldnn::eltwise_hswish:
        case mkldnn::eltwise_mish:
        case mkldnn::eltwise_hsigmoid:
        case mkldnn::eltwise_round_half_to_even:
        case mkldnn::eltwise_round_half_away_from_zero:
            ops.append_eltwise(1.0, getAlgorithm(), getAlpha(), getBeta());
            break;
        case mkldnn::depthwise_scale_shift:
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
@ -59,7 +59,8 @@ enum EltwiseOpType {
    Prelu,
    Mish,
    Hswish,
-    Hsigmoid
+    Hsigmoid,
    Round
 };
 struct jit_eltwise_params {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
@ -2123,7 +2123,7 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
        if (eltwiseNode == nullptr)
            THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName();
        return isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
-                                                  Tanh, Swish, Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt});
+                                                  Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt});
    }
    return false;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@ -11,7 +11,6 @@
 #include <map>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <legacy/graph_transformer.h>
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -50,32 +49,25 @@ static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNN
 class PortIteratorHelper : public PortMapHelper {
 public:
-    PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to,
+    PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
-            bool as_input, const InferenceEngine::TensorIterator::PortMap &port_map, const mkldnn::engine& eng, int n_iter) : as_input(as_input) {
+                       const InferenceEngine::TensorIterator::PortMap &slice_rule, const mkldnn::engine& eng) {
-        const auto &full_blob = as_input ? from : to;
+        const auto &full_blob = sliced_src ? from : to;
-        const auto &part_blob = !as_input ? from : to;
+        const auto &part_blob = !sliced_src ? from : to;
-        auto axis = port_map.axis;
+        auto axis = slice_rule.axis;
-        auto stride = port_map.stride;
+        auto stride = slice_rule.stride;
        auto full_dims = full_blob->GetDims();
        auto part_dims = part_blob->GetDims();
        if (port_map.axis == -1) {
            // simple copy mode. No iteration through this tensor
            reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive());
            iter_count = n_iter;
        } else {
        auto abs_stride = std::abs(stride);
        auto sign_of_stride = stride < 0.0f ? -1 : 1;
-            IE_ASSERT(n_iter == full_dims[axis] / abs_stride) << "Shape mismatch for tensor iterator port";
+        iter_count = full_dims[axis] / abs_stride;
        full_dims[axis] = abs_stride;
        IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port";
            iter_count = n_iter;
        // make chunk view
        auto chunk_desc =  full_blob->GetDescriptor();
        chunk_desc.data.dims[axis] = abs_stride;
@ -92,54 +84,102 @@ public:
        chunk_offset_in_byte = sign_of_stride < 0 ? (iter_count - 1) * chunk_stride_in_byte : 0;
        chunk_stride_in_byte *= sign_of_stride;
-            if (as_input) {
+        if (sliced_src) {
            reorders.emplace_back(chunk_mem_prim, to->GetPrimitive());
        } else {
            reorders.emplace_back(from->GetPrimitive(), chunk_mem_prim);
        }
    }
    }
-    void execute(int n_iter, mkldnn::stream strm) override {
+    void execute(mkldnn::stream strm, int iter) override {
-        if (chunk_stride_in_byte != 0) {
+        IE_ASSERT(iter >= 0 && iter < iter_count);
            IE_ASSERT(n_iter < iter_count);
        auto full_mem = mem_holder[FULL_DATA];
        auto chunk_mem = mem_holder[CHUNK_DATA];
        chunk_mem.set_data_handle(static_cast<uint8_t *>(full_mem.get_data_handle()) +
-                    chunk_offset_in_byte + chunk_stride_in_byte * n_iter);
+                chunk_offset_in_byte + chunk_stride_in_byte * iter);
            strm.submit({reorders.begin(), reorders.end()});
        } else {
            if (as_input ? n_iter == 0 : n_iter == (iter_count - 1))
        strm.submit({reorders.begin(), reorders.end()});
    }
    };
 private:
    bool as_input;
    ptrdiff_t chunk_stride_in_byte = 0;
    ptrdiff_t chunk_offset_in_byte = 0;
    const int FULL_DATA = 0;
    const int CHUNK_DATA = 1;
    int iter_count;
 };
 class BackEdgePortHelper : public PortMapHelper {
 public:
-    BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng, int n_iter) {
+    BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
        auto mem_desc =  from->GetDescriptor();
        mem_holder.emplace_back(mkldnn::memory::primitive_desc(mem_desc, eng));
        reorders.emplace_back(from->GetPrimitive(), to->GetPrimitive());
        iter_count = n_iter;
    }
-    void execute(int n_iter, mkldnn::stream strm) override {
+    void execute(mkldnn::stream strm, int iter) override {
-        if (n_iter < iter_count - 1) {
+        if (iter != 0) {
            strm.submit({reorders.begin(), reorders.end()});
        }
-    };
+    }
 };
 class IterCountPortHelper : public PortMapHelper {
 public:
    IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) {
        // Only scalar I32 tensor is supported
        IE_ASSERT(to->GetDataType() == memory::s32);
        IE_ASSERT(to->GetDims() == memory::dims{1});
        mem_holder.push_back(to->GetPrimitive());
    }
    void execute(mkldnn::stream strm, int n_iter) override {
        auto mem = mem_holder[0];
        auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
        *data_ptr = n_iter;
    }
 };
 class asBoolCheck : public PortChecker {
 public:
    asBoolCheck(const MKLDNNMemoryPtr &mem) {
        IE_ASSERT(mem->GetDataType() == memory::u8);
        IE_ASSERT(mem->GetDims() == memory::dims{1});
        mem_holder.push_back(mem->GetPrimitive());
    }
    int getStatus() override {
        auto mem = mem_holder[0];
        auto data_ptr = static_cast<uint8_t*>(mem.get_data_handle());
        return *data_ptr == static_cast<uint8_t>(0) ? 0 : 1;
    }
 };
 class asIntCheck : public PortChecker {
 public:
    asIntCheck(const MKLDNNMemoryPtr &mem) {
        IE_ASSERT(mem->GetDataType() == memory::s32);
        IE_ASSERT(mem->GetDims() == memory::dims{1});
        mem_holder.push_back(mem->GetPrimitive());
    }
    int getStatus() override {
        auto mem = mem_holder[0];
        auto data_ptr = static_cast<uint32_t*>(mem.get_data_handle());
        return *data_ptr;
    }
 };
 class staticValueCheck : public PortChecker {
 public:
    staticValueCheck(const int &value) : value(value) {}
    int getStatus() override {
        return value;
    }
 private:
    int value;
 };
 }  // namespace MKLDNNPlugin
@ -157,25 +197,19 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
    sub_graph.CreateGraph(ti->body, ext_mng, weightCache);
    // Try to detect inputs and outputs by indexes
-    std::map<std::string, MKLDNNNodePtr> in_map, out_map;
+    const auto &in_map = sub_graph.GetInputNodes();
    for (auto node : sub_graph.GetNodes())
        if (node->getType() == Input)  // filter by type Input
            in_map[node->getName().substr(3)] = node;  // remove "in_" prefix
    for (auto node : sub_graph.GetOutputNodes())
        out_map[node->getName().substr(4)] = node;  // remove "out_" prefix
    for (const auto &in_data : ti->body.inputs) {
        if (in_data->getName() == "const_holder") continue;
-        auto &in_node = in_map[in_data->getName()];
+        auto &in_node = in_map.at(in_data->getName());
        auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr();
        input_mem.push_back(in_mem);
    }
-    for (const auto &out_data : ti->body.outputs) {
+    // Assume that order of outputs in original TI and produces sub_graph is same
-        auto &out_node = out_map[out_data->getName()];
+    const auto &out_vec = sub_graph.GetOutputNodes();
-        auto out_mem = out_node->getParentEdgeAt(0)->getMemoryPtr();
+    for (size_t i = 0; i < out_vec.size(); i++) {
        auto out_mem = out_vec[i]->getParentEdgeAt(0)->getMemoryPtr();
        output_mem.push_back(out_mem);
    }
 }
@ -194,52 +228,99 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
    if (ti == nullptr)
        THROW_IE_EXCEPTION << "Cannot convert to TensorIterator layer.";
    const auto &eng = getEngine();
    for (auto map_rule : ti->input_port_map) {
-        auto &extr_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
+        auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
-        auto &intr_mem = input_mem[map_rule.to];
+        auto &to_mem = input_mem[map_rule.to];
-        auto mapper = std::shared_ptr<PortMapHelper>(
+        if (map_rule.axis == -1)
-                new PortIteratorHelper (extr_mem, intr_mem, true, map_rule, getEngine(), n_iter));
+            first_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
-
+        else
-        in_port_mappers.push_back(mapper);
+            before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng));
    }
    for (auto map_rule : ti->output_port_map) {
-        auto &extr_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
+        auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
-        auto &intr_mem = output_mem[map_rule.to];
+        auto &from_mem = output_mem[map_rule.to];
-        auto mapper = std::shared_ptr<PortMapHelper>(
+        if (map_rule.axis == -1)
-                new PortIteratorHelper (intr_mem, extr_mem, false, map_rule, getEngine(), n_iter));
+            last_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
-
+        else
-        out_port_mappers.push_back(mapper);
+            after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng));
    }
    for (auto map_rule : ti->back_edges) {
        auto from_mem = output_mem[map_rule.from];
        auto to_mem = input_mem[map_rule.to];
-        auto mapper = std::shared_ptr<PortMapHelper>(
+        before_mappers.emplace_back(new BackEdgePortHelper(from_mem, to_mem, eng));
-                new BackEdgePortHelper(from_mem, to_mem, getEngine(), n_iter));
+    }
-        out_port_mappers.push_back(mapper);
+    // special purpose ports
    constexpr auto key_cur_iter_port = "loop_body_current_iteration_idx";
    constexpr auto key_cond_port = "loop_body_condition_output_idx";
    constexpr auto key_trip_count_port = "loop_trip_count_idx";
    constexpr auto key_init_cond_port = "loop_execution_condition_idx";
    auto iter_idx_ports = ti->GetParamAsInts(key_cur_iter_port, {});
    for (auto idx : iter_idx_ports) {
        auto to_mem = input_mem[idx];
        before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng));
    }
    auto condition_port_idx = ti->GetParamAsInt(key_cond_port, -1);
    if (condition_port_idx == -1) {
        continue_cond_check.reset(new staticValueCheck(true)); // always true
    } else {
        auto mem = output_mem[condition_port_idx];
        continue_cond_check.reset(new asBoolCheck(mem));
    }
    auto trip_count_port_idx = ti->GetParamAsInt(key_trip_count_port, -1);
    if (trip_count_port_idx == -1) {
        trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration
    } else {
        auto mem = getParentEdgesAtPort(trip_count_port_idx)[0]->getMemoryPtr();
        trip_count_check.reset(new asIntCheck(mem));
    }
    auto init_cond_port_idx = ti->GetParamAsInt(key_init_cond_port, -1);
    if (init_cond_port_idx == -1) {
        initial_cond_check.reset(new staticValueCheck(true));
    } else {
        auto mem = getParentEdgesAtPort(init_cond_port_idx)[0]->getMemoryPtr();
        initial_cond_check.reset(new asBoolCheck(mem));
    }
 }
 void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) {
    sub_graph.ResetInferCount();
-    for (int i = 0; i < n_iter; i++) {
+    bool continue_cond = initial_cond_check->getStatus();
    int max_num_iter = trip_count_check->getStatus();
    for (auto &mapper : first_mappers)
        mapper->execute(strm);
    // use  "i != max_num_iter" only to allow "-1" works like infinite loop
    for (int i = 0; i != max_num_iter && continue_cond; i++) {
        // copy data to subgraph iteration
-        for (auto &mapper : in_port_mappers)
+        for (auto &mapper : before_mappers)
-            mapper->execute(i, strm);
+            mapper->execute(strm, i);
        sub_graph.Infer();
        continue_cond = continue_cond_check->getStatus();
        // copy data from subgraph iteration to outputs
-        // or next iteration inputs
+        // or to next iteration inputs
-        for (auto &mapper : out_port_mappers)
+        for (auto &mapper : after_mappers)
-            mapper->execute(i, strm);
+            mapper->execute(strm, i);
    }
    for (auto &mapper : last_mappers)
        mapper->execute(strm);
 }
 bool MKLDNNTensorIteratorNode::created() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
@ -13,16 +13,35 @@
 namespace MKLDNNPlugin {
 /**
 * Functor interface to perform some action with pointed tensors (captured in constructor)
 * Generally it's read, write or move data from specified tensors.
 * Action may depends on iteration index.
 */
 class PortMapHelper {
 public:
    virtual ~PortMapHelper() = default;
-    virtual void execute(int n_iter, mkldnn::stream strm) = 0;
+    virtual void execute(mkldnn::stream strm, int n_iter = -1) = 0;
 protected:
    std::vector<mkldnn::reorder> reorders;
    std::vector<mkldnn::memory> mem_holder;
    int iter_count;
 };
 /**
 * Functor interface to perform check of data tensor (captured in constructor)
 * Information extracted as int. Meaning of returned value is specific for
 * particular type of checker.
 */
 class PortChecker {
 public:
    virtual ~PortChecker() = default;
    virtual int getStatus() = 0;
 protected:
    std::vector<mkldnn::memory> mem_holder;
 };
 class MKLDNNTensorIteratorNode : public MKLDNNNode {
 public:
    MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
@ -35,6 +54,7 @@ public:
    void execute(mkldnn::stream strm) override;
    void setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; }
 private:
    int n_iter = 0;
@ -42,7 +62,16 @@ private:
    MKLDNNGraph sub_graph;
    std::vector<MKLDNNMemoryPtr> input_mem, output_mem;
-    std::vector<std::shared_ptr<PortMapHelper>> in_port_mappers, out_port_mappers;
+    std::vector<std::shared_ptr<PortMapHelper>>
        first_mappers,   /// < Applied once before loop
        last_mappers,    /// < Applied once after loop
        before_mappers,  /// < Applied before each iteration
        after_mappers;   /// < Applied after each iteration
    std::shared_ptr<PortChecker>
        trip_count_check,      /// < Perform check of trip count value. value >= -1
        initial_cond_check,   /// < Perform check of initial continue condition value. value [0, 1]
        continue_cond_check;  /// < Perform check of continue condition value of body. value [0, 1]
 };
 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp
@ -332,18 +332,12 @@ static bool eliminate_squeeze(const std::shared_ptr<Node>& node) {
    return false;
 }
 static bool eliminate_stop_gradient(const std::shared_ptr<Node>& node) {
    replace_output_update_name(node->output(0), node->input_value(0));
    return true;
 }
 bool pass::NopElimination::run_on_function(std::shared_ptr<Function> function) {
    static const std::unordered_map<NodeTypeInfo, std::function<bool(const std::shared_ptr<Node>&)>>
        dispatcher{{TI(opset3::Pad), &eliminate_nop},
                   {TI(op::v0::Sum), &eliminate_sum},
                   {TI(opset3::Convert), &eliminate_convert},
                   {TI(op::v0::Slice), &eliminate_nop},
                   {TI(op::v0::StopGradient), &eliminate_stop_gradient},
                   {TI(opset3::Reshape), &eliminate_reshape_v1},
                   {TI(opset3::Concat), &eliminate_concat},
                   {TI(opset3::Squeeze), &eliminate_squeeze},
--- a/inference-engine/src/vpu/common/CMakeLists.txt
+++ b/inference-engine/src/vpu/common/CMakeLists.txt
@ -39,8 +39,6 @@ function(add_common_target TARGET_NAME STATIC_IE)
            $<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
    if(WIN32)
        target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)
        set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
    endif()
@ -54,6 +52,10 @@ function(add_common_target TARGET_NAME STATIC_IE)
    target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} inference_engine_transformations
                                         PRIVATE openvino::itt)
    if(NOT STATIC_IE)
        target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_legacy)
    endif()
 endfunction()
 add_common_target("vpu_common_lib" FALSE)
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/convert_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/convert_tests.cpp
@ -0,0 +1,89 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadConvertNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,3,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_Cast/Cast" type="Convert" version="opset1">
 			<data destination_type="f16"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP16">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_Cast/Cast" type="Convert" version="opset1">
 			<data precision="FP16"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP16">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 0);
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/depth_to_space_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/depth_to_space_tests.cpp
@ -0,0 +1,184 @@
 // Copyright (C) 2019-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadDepthToSpaceNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="5,4,28,2" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="DepthToSpace" version="opset1">
 			<data mode="blocks_first" block_size="2"/>
 			<input>
 				<port id="0">
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>5</dim>
 					<dim>1</dim>
 					<dim>56</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>5</dim>
 					<dim>1</dim>
 					<dim>56</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D/Cast_1204_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>6</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="0" size="24" precision="I64"/>
 			</blobs>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D" type="Reshape" version="opset1">
 			<data special_zero="True"/>
 			<input>
 				<port id="0">
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 				</port>
 				<port id="1">
 					<dim>6</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>5</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>1</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_output/output/Transpose" type="Permute" version="opset1">
 			<data order="0,3,4,1,5,2"/>
 			<input>
 				<port id="0">
 					<dim>5</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>1</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>5</dim>
 					<dim>1</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_4D/Cast_1202_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>4</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="24" size="16" precision="I64"/>
 			</blobs>
 		</layer>
 		<layer id="5" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Reshape" version="opset1">
 			<data special_zero="True"/>
 			<input>
 				<port id="0">
 					<dim>5</dim>
 					<dim>1</dim>
 					<dim>28</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 				<port id="1">
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>5</dim>
 					<dim>1</dim>
 					<dim>56</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 		<edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
 		<edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 80, [](Blob::Ptr& weights) {
                auto* buffer = weights->buffer().as<int64_t*>();
                buffer[0] = 0;
                buffer[1] = 2;
                buffer[2] = 2;
                buffer[3] = 1;
                buffer[4] = 28;
                buffer[5] = 2;
                buffer[7] = 0;
                buffer[7] = 1;
                buffer[8] = 56;
                buffer[9] = 4;
            });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/floor_mod_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/floor_mod_tests.cpp
@ -0,0 +1,179 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadFloorModNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,1,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Parameter" version="opset1">
 			<data shape="1" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="EltwiseReshapeNormalization/Cast_163_const" type="Const" version="opset1">
 			<data offset="0" size="24" shape="3" element_type="i64"/>
 			<output>
 				<port id="1" precision="I64">
 					<dim>3</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="EltwiseReshapeNormalization" type="Reshape" version="opset1">
 			<data special_zero="True"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 				</port>
 				<port id="1">
 					<dim>3</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="FloorMod" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="5" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
 		<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
 		<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
 		<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
 		<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="EltwiseReshapeNormalization/Cast_175_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>3</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="0" size="12" precision="I32"/>
 			</blobs>
 		</layer>
 		<layer id="3" name="EltwiseReshapeNormalization" type="Reshape" version="opset1">
 			<data special_zero="True"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 				</port>
 				<port id="1">
 					<dim>3</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Eltwise" version="opset1">
 			<data operation="floor_mod"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
 		<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
 		<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
 		<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    // compareIRs(model, modelV7, 0);
    compareIRs(model, modelV7, 40, [](Blob::Ptr& weights) {
                auto* buffer = weights->buffer().as<int64_t*>();
                buffer[0] = 1;
                buffer[1] = 1;
                buffer[2] = 1;
            });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/gather_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/gather_tests.cpp
@ -0,0 +1,122 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadGatherNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,3,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Parameter" version="opset1">
 			<data shape="1" element_type="i32"/>
 			<output>
 				<port id="0" precision="I32">
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2/Cast_292_const" type="Const" version="opset1">
 			<data offset="0" size="8" shape="" element_type="i64"/>
 			<output>
 				<port id="1" precision="I64"/>
 			</output>
 		</layer>
 		<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2" type="Gather" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 				</port>
 				<port id="2"/>
 			</input>
 			<output>
 				<port id="3" precision="FP32">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="4" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
 		<edge from-layer="2" from-port="1" to-layer="3" to-port="2"/>
 		<edge from-layer="3" from-port="3" to-layer="4" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="I32">
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_GatherV2/GatherV2" type="Gather">
 			<data axis="0"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>3</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 16, [](Blob::Ptr& weights) {
                auto* buffer = weights->buffer().as<int64_t*>();
                buffer[0] = 0;
            });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/minimum_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/minimum_tests.cpp
@ -0,0 +1,190 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadMinimumNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,1,27,27" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Parameter" version="opset1">
 			<data shape="1,1,27,27" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/output/Minimum" type="Minimum" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/output/Minimum/negate1_" type="Power" version="opset1">
 			<data power="1" scale="-1" shift="0"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="input_b" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="PartitionedCall/functional_1/output/Minimum/negate2_" type="Power" version="opset1">
 			<data power="1" scale="-1" shift="0"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="4" name="PartitionedCall/functional_1/output/Minimum/Max_" type="Eltwise" version="opset1">
 			<data operation="max"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="5" name="PartitionedCall/functional_1/output/Minimum" type="Power" version="opset1">
 			<data power="1" scale="-1" shift="0"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>27</dim>
 					<dim>27</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
 		<edge from-layer="2" from-port="0" to-layer="3" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="4" to-port="0"/>
 		<edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
 		<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 0);
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/multiply_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/multiply_tests.cpp
@ -0,0 +1,108 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadMultiplyNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Parameter" version="opset1">
 			<data shape="1,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/output/mul" type="Multiply" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/output/mul" type="Eltwise" version="opset1">
 			<data operation="prod"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 0);
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/normalize_l2_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/normalize_l2_tests.cpp
@ -0,0 +1,146 @@
 // Copyright (C) 2019-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadNormalizeL2Network) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
    <layers>
        <layer id="0" name="input_a" type="Parameter" version="opset1">
            <data shape="6,24,12,10" element_type="f32"/>
            <output>
                <port id="0" precision="FP32">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </output>
        </layer>
        <layer id="1" name="112_input_port_1/value114_const" type="Const" version="opset1">
            <data offset="0" size="8" shape="1" element_type="i64"/>
            <output>
                <port id="1" precision="I64">
                    <dim>1</dim>
                </port>
            </output>
        </layer>
        <layer id="2" name="112" type="NormalizeL2" version="opset1">
            <data eps="1e-12" eps_mode="add"/>
            <input>
                <port id="0">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
                <port id="1">
                    <dim>1</dim>
                </port>
            </input>
            <output>
                <port id="2" precision="FP32">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </output>
        </layer>
        <layer id="3" name="5354_const" type="Const" version="opset1">
            <data offset="8" size="4" shape="1" element_type="f32"/>
            <output>
                <port id="1" precision="FP32">
                    <dim>1</dim>
                </port>
            </output>
        </layer>
        <layer id="4" name="PartitionedCall/functional_1/lambda/output" type="Multiply" version="opset1">
            <input>
                <port id="0">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
                <port id="1">
                    <dim>1</dim>
                </port>
            </input>
            <output>
                <port id="2" precision="FP32">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </output>
        </layer>
        <layer id="5" name="Identity/sink_port_0" type="Result" version="opset1">
            <input>
                <port id="0">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </input>
        </layer>
    </layers>
    <edges>
        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
        <edge from-layer="2" from-port="2" to-layer="4" to-port="0"/>
        <edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
        <edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
    </edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
    <layers>
        <layer id="0" name="input_a" type="Input" version="opset1">
            <output>
                <port id="0" precision="FP32">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </output>
        </layer>
        <layer id="1" name="PartitionedCall/functional_1/lambda/output" type="Normalize">
            <data eps="1e-12" across_spatial="0" channel_shared="1"/>
            <input>
                <port id="0">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </input>
            <output>
                <port id="2" precision="FP32">
                    <dim>6</dim>
                    <dim>24</dim>
                    <dim>12</dim>
                    <dim>10</dim>
                </port>
            </output>
            <blobs>
                <weights offset="0" size="96" precision="FP32"/>
            </blobs>
        </layer>
    </layers>
    <edges>
        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
    </edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 100, [](Blob::Ptr& weights) {
                auto* buffer = weights->buffer().as<int64_t*>();
                buffer[0] = 1;
                buffer[1] = 32831;
             });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/not_equal_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/not_equal_tests.cpp
@ -0,0 +1,108 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //)
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadNotEqualNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Parameter" version="opset1">
 			<data shape="1,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="NotEqual" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="BOOL">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="input_b" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Eltwise" version="opset1">
 			<data operation="not_equal"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="BOOL">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 0);
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/reduce_min_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/reduce_min_tests.cpp
@ -0,0 +1,120 @@
 // Copyright (C) 2019-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadReduceMinNetwork) {
    std::string model = R"V0G0N(
 <net name="model" version="10">
 	<layers>
 		<layer id="0" name="data" type="Parameter" version="opset1">
 			<data element_type="f32" shape="3,2,2"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>3</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="reduced/Cast_175_const" type="Const" version="opset1">
 			<data element_type="i64" offset="0" shape="3" size="24"/>
 			<output>
 				<port id="1" precision="I64">
 					<dim>3</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="reduced" type="ReduceMin" version="opset1">
 			<data keep_dims="True"/>
 			<input>
 				<port id="0">
 					<dim>3</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 				<port id="1">
 					<dim>3</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="reduced/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="model" version="7">
 	<layers>
 		<layer id="0" name="data" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>3</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="reduced/Cast_184_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>3</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="0" precision="I64" size="12"/>
 			</blobs>
 		</layer>
 		<layer id="2" name="reduced" type="ReduceMin" version="opset1">
 			<data keep_dims="True"/>
 			<input>
 				<port id="0">
 					<dim>3</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 				<port id="1">
 					<dim>3</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 100, [](Blob::Ptr& weights) {
                auto* buffer = weights->buffer().as<int64_t*>();
                buffer[0] = 0;
                buffer[1] = 1;
                buffer[2] = 2;
             });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/reduce_prod_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/reduce_prod_tests.cpp
@ -0,0 +1,115 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadReduceProdNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="1,1,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Cast_186_const" type="Const" version="opset1">
 			<data offset="0" size="8" shape="1" element_type="i64"/>
 			<output>
 				<port id="1" precision="I64">
 					<dim>1</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="ReduceProd" version="opset1">
 			<data keep_dims="False"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Cast_195_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>1</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="0" size="4" precision="I32"/>
 			</blobs>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="ReduceProd" version="opset1">
 			<data keep_dims="False"/>
 			<input>
 				<port id="0">
 					<dim>1</dim>
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>1</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>1</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 16, [](Blob::Ptr& weights) {
                auto *buffer = weights->buffer().as<int64_t *>();
                buffer[0] = 1;
            });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/space_to_depth_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/space_to_depth_tests.cpp
@ -0,0 +1,184 @@
 // Copyright (C) 2019-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadSpaceToDepthNetwork) {
    std::string model = R"V0G0N(
 <net name="saved_model" version="10">
 	<layers>
 		<layer id="0" name="input_a" type="Parameter" version="opset1">
 			<data shape="6,5,4,4" element_type="f32"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>6</dim>
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="SpaceToDepth" version="opset1">
 			<data mode="blocks_first" block_size="2"/>
 			<input>
 				<port id="0">
 					<dim>6</dim>
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>6</dim>
 					<dim>20</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="Identity/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>6</dim>
 					<dim>20</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="saved_model" version="7">
 	<layers>
 		<layer id="0" name="input_a" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>6</dim>
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>4</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D/Cast_1217_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>6</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="0" size="24" precision="I64"/>
 			</blobs>
 		</layer>
 		<layer id="2" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_6D" type="Reshape" version="opset1">
 			<data special_zero="True"/>
 			<input>
 				<port id="0">
 					<dim>6</dim>
 					<dim>5</dim>
 					<dim>4</dim>
 					<dim>4</dim>
 				</port>
 				<port id="1">
 					<dim>6</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>6</dim>
 					<dim>5</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="PartitionedCall/functional_1/tf_op_layer_output/output/Transpose" type="Permute" version="opset1">
 			<data order="0,3,5,1,2,4"/>
 			<input>
 				<port id="0">
 					<dim>6</dim>
 					<dim>5</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>6</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>5</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="4" name="PartitionedCall/functional_1/tf_op_layer_output/output/Reshape_to_4D/Cast_1219_const" type="Const" version="opset1">
 			<output>
 				<port id="1" precision="I64">
 					<dim>4</dim>
 				</port>
 			</output>
 			<blobs>
 				<custom offset="24" size="16" precision="I64"/>
 			</blobs>
 		</layer>
 		<layer id="5" name="PartitionedCall/functional_1/tf_op_layer_output/output" type="Reshape" version="opset1">
 			<data special_zero="True"/>
 			<input>
 				<port id="0">
 					<dim>6</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 					<dim>5</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 				<port id="1">
 					<dim>4</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>6</dim>
 					<dim>20</dim>
 					<dim>2</dim>
 					<dim>2</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 		<edge from-layer="3" from-port="1" to-layer="5" to-port="0"/>
 		<edge from-layer="4" from-port="1" to-layer="5" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 80, [](Blob::Ptr& weights) {
                auto* buffer = weights->buffer().as<int64_t*>();
                buffer[0] = 6;
                buffer[1] = 5;
                buffer[2] = 2;
                buffer[3] = 2;
                buffer[4] = 2;
                buffer[5] = 2;
                buffer[7] = 6;
                buffer[7] = 14;
                buffer[8] = 2;
                buffer[9] = 2;
            });
 }
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/subtract_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/subtract_tests.cpp
@ -0,0 +1,137 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include "ngraph_reader_tests.hpp"
 TEST_F(NGraphReaderTests, ReadSubtractNetwork) {
    std::string model = R"V0G0N(
 <net name="model" version="10">
 	<layers>
 		<layer id="0" name="x" type="Parameter" version="opset1">
 			<data element_type="f32" shape="3,4,5"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="y" type="Parameter" version="opset1">
 			<data element_type="f32" shape="3,4,5"/>
 			<output>
 				<port id="0" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="z/sub" type="Subtract" version="opset1">
 			<input>
 				<port id="0">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 				<port id="1">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="z/sink_port_0" type="Result" version="opset1">
 			<input>
 				<port id="0">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</input>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
 		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
 	</edges>
 </net>
 )V0G0N";
    std::string modelV7 = R"V0G0N(
 <net name="model" version="7">
 	<layers>
 		<layer id="0" name="x" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="1" name="y" type="Input" version="opset1">
 			<output>
 				<port id="0" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="2" name="z/neg_" type="Power" version="opset1">
 			<data power="1" scale="-1.0" shift="0"/>
 			<input>
 				<port id="0">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</input>
 			<output>
 				<port id="1" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 		<layer id="3" name="z/sub" type="Eltwise" version="opset1">
 			<data operation="sum"/>
 			<input>
 				<port id="0">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 				<port id="1">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</input>
 			<output>
 				<port id="2" precision="FP32">
 					<dim>3</dim>
 					<dim>4</dim>
 					<dim>5</dim>
 				</port>
 			</output>
 		</layer>
 	</layers>
 	<edges>
 		<edge from-layer="1" from-port="0" to-layer="2" to-port="0"/>
 		<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
 		<edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
 	</edges>
 </net>
 )V0G0N";
    compareIRs(model, modelV7, 0);
 }
--- a/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
@ -97,19 +97,6 @@ TEST(nop_elimination, eliminate_broadcast) {
    ASSERT_EQ(count_ops_of_type<op::v1::Broadcast>(f), 0);
 }
 TEST(nop_elimination, eliminate_stop_gradient) {
    Shape shape{};
    auto A = make_shared<op::Parameter>(element::f32, shape);
    auto s = make_shared<op::v0::StopGradient>(A);
    auto f = make_shared<Function>(make_shared<op::v0::Abs>(s), ParameterVector{A});
    pass::Manager pass_manager;
    pass_manager.register_pass<pass::NopElimination>();
    pass_manager.run_passes(f);
    ASSERT_EQ(count_ops_of_type<op::v0::StopGradient>(f), 0);
 }
 TEST(nop_elimination, pass_property) {
    auto pass = std::make_shared<ngraph::pass::NopElimination>();
    ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE));
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -50,7 +50,9 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {Mish,                  {}},
        {HSwish,                {}},
        {SoftPlus,              {}},
-        {HSigmoid,    {}}
+        {HSigmoid,              {}},
        {RoundHalfToEven,       {}},
        {RoundHalfAwayFromZero, {}}
 };
 const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/loop.cpp
@ -3,7 +3,6 @@
 //
 #include <vector>
 #include <ngraph/op/util/attr_types.hpp>
 #include "single_layer_tests/loop.hpp"
 #include "common_test_utils/test_constants.hpp"
@ -12,9 +11,9 @@ using namespace LayerTestsDefinitions;
 namespace {
    // without clip values increase rapidly, so use only seq_lenghts = 2
    std::vector<bool> execute_first_iteration{true};
-    std::vector<bool> is_body_condition_const{true, false};
+    std::vector<bool> is_body_condition_const{true/*, false*/};
-    std::vector<bool> body_condition{true, false}; // works only if is_body_condition_const == true
+    std::vector<bool> body_condition{true/*, false*/}; // works only if is_body_condition_const == true
-    std::vector<int64_t> trip_count{1, 10, -1}; // -1 means infinity
+    std::vector<int64_t> trip_count{1, 10/*, -1*/}; // -1 means infinity
    std::vector<std::vector<std::pair<std::vector<size_t>, LOOP_IN_TYPE>>> inputs = {
            {{{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::INVARIANT}, {{32, 1, 10}, LOOP_IN_TYPE::MERGED}},
    };
@ -31,4 +30,37 @@ namespace {
                                    ::testing::ValuesIn(netPrecisions),
                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                            LoopTest::getTestCaseName);
    static const std::vector<std::tuple<bool, int64_t, int64_t, int64_t>> static_loop_types {
            //  GCC4.8 limitation: have to specify type of each element in list
            //                               static_trip_count |  max | dynamic_exit | axis
            std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5, -1, -1 },  // n_iter 5, no dynamic exit
            std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5,  3, -1 },  // n_iter 3, dynamic exit on 3
            std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5,  7, -1 },  // n_iter 5, dynamic exit not reached
            std::tuple<bool, int64_t, int64_t, int64_t>{  true , -1,  5, -1 },  // n_iter 5, inf loop with dynamic exit on 5
            std::tuple<bool, int64_t, int64_t, int64_t>{  true ,  5, -1,  1 },  // n_iter 5, const for loop with auto concatenated out
            std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5, -1, -1 },  // |
            std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5,  3, -1 },  // | same with dynamic trip count
            std::tuple<bool, int64_t, int64_t, int64_t>{ false ,  5,  7, -1 },  // |
            std::tuple<bool, int64_t, int64_t, int64_t>{ false , -1,  5, -1 }   // |
    };
    using namespace testing;
    using namespace InferenceEngine;
    INSTANTIATE_TEST_CASE_P(smoke_StaticShapeLoop, StaticShapeLoopTest,
                            Combine(
                                    Values(true),
                                    ValuesIn(static_loop_types),
                                    Values<int64_t>(7),
                                    Values<InferenceEngine::SizeVector>({2, 1, 4}),
                                    Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
                                    Values(CommonTestUtils::DEVICE_CPU)));
    using namespace testing;
    INSTANTIATE_TEST_CASE_P(smoke_TrivialLoop, TrivialLoopTest,
                            Combine(
                                    Values<InferenceEngine::Precision>(Precision::FP32, Precision::I32),
                                    Values<InferenceEngine::SizeVector>({2, 3, 4}),
                                    Values(CommonTestUtils::DEVICE_CPU)));
 }  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/split.cpp
@ -25,7 +25,22 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({30, 30, 30, 30})),
+                                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                        SplitLayerTest::getTestCaseName);
 INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
                        ::testing::Combine(
                                ::testing::Values(5),
                                ::testing::Values(0),
                                ::testing::ValuesIn(netPrecisions),
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
                                ::testing::Values(std::vector<size_t>({0, 3})),
                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                        SplitLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@ -53,8 +53,6 @@ std::vector<std::string> disabledTestPatterns() {
        // TODO: Issue: 38841
        R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
        R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
        // TODO: not supported yet, ticket 37690
        R"(.*Loop.*)",
        // TODO: Issue: 41694
        R"(.*smoke_Set2.*CTCLossLayerTest.*)",
    };
--- a/inference-engine/tests/functional/plugin/gna/import_export_network.cpp
+++ b/inference-engine/tests/functional/plugin/gna/import_export_network.cpp
@ -70,7 +70,14 @@ class ImportNetworkTest : public testing::WithParamInterface<exportImportNetwork
            if (inputStream.fail()) {
                FAIL() << "Cannot open file to import model: exported_model.blob";
            }
-            auto importedOutputs = CalculateImportedNetwork(inputStream);
+            auto importedNetwork = core->ImportNetwork(inputStream, targetDevice, configuration);
            for (const auto& next_input : importedNetwork.GetInputsInfo()) {
                ASSERT_NO_THROW(executableNetwork.GetInputsInfo()[next_input.first]);
            }
            for (const auto& next_output : importedNetwork.GetOutputsInfo()) {
                ASSERT_NO_THROW(executableNetwork.GetOutputsInfo()[next_output.first]);
            }
            auto importedOutputs = CalculateImportedNetwork(importedNetwork);
            Compare(importedOutputs, actualOutputs);
        }
@ -107,9 +114,7 @@ class ImportNetworkTest : public testing::WithParamInterface<exportImportNetwork
        std::map<std::string, std::string> exportConfiguration;
        std::map<std::string, std::string> importConfiguration;
-        std::vector<std::vector<std::uint8_t>> CalculateImportedNetwork(std::istream& networkModel) {
+        std::vector<std::vector<std::uint8_t>> CalculateImportedNetwork(InferenceEngine::ExecutableNetwork& importedNetwork) {
            auto importedNetwork = core->ImportNetwork(networkModel, targetDevice, configuration);
            auto refInferRequest = importedNetwork.CreateInferRequest();
            std::vector<InferenceEngine::InputInfo::CPtr> refInfos;
            for (const auto& input : importedNetwork.GetInputsInfo()) {
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/split.cpp
@ -26,6 +26,7 @@ INSTANTIATE_TEST_CASE_P(DISABLED_smoke_NumSplitsCheck, SplitLayerTest,
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(std::vector<size_t >({30, 30})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_GNA)),
                        SplitLayerTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_eltwise_reshape_concat.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/memory_eltwise_reshape_concat.cpp
@ -0,0 +1,35 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 #include <subgraph_tests/memory_eltwise_reshape_concat.hpp>
 #include "common_test_utils/test_constants.hpp"
 namespace SubgraphTestsDefinitions {
 namespace {
 std::vector<size_t> input_multiples = {
    1,
    7,
    5,
    8
 };
 std::vector<size_t> concat_sizes = {
    32,
    64
 };
 std::map<std::string, std::string> additional_config = {
    {"GNA_COMPACT_MODE", "NO"},
    {"GNA_DEVICE_MODE", "GNA_SW_FP32"},
    {"GNA_SCALE_FACTOR_0", "1638.4"},
 };
 } // namespace
 INSTANTIATE_TEST_CASE_P(smoke_MemoryEltwiseReshapeConcatTest, MemoryEltwiseReshapeConcatTest,
    ::testing::Combine(
        ::testing::Values(CommonTestUtils::DEVICE_GNA),
        ::testing::Values(InferenceEngine::Precision::FP32),
        ::testing::ValuesIn(input_multiples),
        ::testing::ValuesIn(concat_sizes),
        ::testing::Values(additional_config)),
    MemoryEltwiseReshapeConcatTest::getTestCaseName);
 } // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/split.cpp
@ -26,8 +26,22 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({30, 30, 30, 30})),
+                                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
                        SplitLayerTest::getTestCaseName);
 INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
                        ::testing::Combine(
                                ::testing::Values(5),
                                ::testing::Values(0),
                                ::testing::ValuesIn(netPrecisions),
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
                                ::testing::Values(std::vector<size_t>({0, 3})),
                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
                        SplitLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/split.cpp
@ -16,7 +16,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
                        ::testing::Combine(
-                                ::testing::Values(1),
+                                ::testing::Values(5),
                                // TODO: 0-axis excluded
                                //  Check (status == ie::StatusCode::OK) failed: Failed to reshape Network:
                                //  Failed to infer shapes for Split layer (Split_2) with error:
@ -28,10 +28,11 @@ INSTANTIATE_TEST_CASE_P(smoke_NumSplitsCheck, SplitLayerTest,
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(InferenceEngine::Layout::ANY),
                                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
                                ::testing::Values(std::vector<size_t>({})),
                                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
                        SplitLayerTest::getTestCaseName);
-INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, splitWithUnusedOutputsTest,
+INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, SplitLayerTest,
                        ::testing::Combine(
                                ::testing::Values(5),
                                // TODO: 0-axis excluded
@ -49,5 +50,5 @@ INSTANTIATE_TEST_CASE_P(smoke_splitWithUnusedOutputsTest, splitWithUnusedOutputs
                                                  std::vector<size_t>({0, 4}),
                                                  std::vector<size_t>({2, 3})),
                                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
-                        splitWithUnusedOutputsTest::getTestCaseName);
+                        SplitLayerTest::getTestCaseName);
 }  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
@ -37,4 +37,108 @@ protected:
    void SetUp() override;
 };
 using StaticShapeLoopParams = typename std::tuple<
        bool,
        std::tuple<
            bool,
            int64_t,
            int64_t,
            int64_t
            >,
        int64_t,
        InferenceEngine::SizeVector,
        InferenceEngine::Precision,
        std::string
        >;
 /**
 * Test case with static SHAPE version of loop operation.
 * Total iteration count is dynamic.
 */
 class StaticShapeLoopTest : public testing::WithParamInterface<StaticShapeLoopParams>,
                            virtual public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<StaticShapeLoopParams> &obj);
    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
    std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
 private:
    bool static_iter_num;       // trip count provided by constant node
    bool static_continue_cond;  // initial_cond provided by constant node
    int64_t max_iter_num;       // -1 means infinity loop (expected dynamic exit condition in body)
    int64_t dynamic_exit;       // -1 means always true
    int64_t axis;               // -1 means no auto concatenation
    int64_t start_value;
    InferenceEngine::SizeVector data_shape;
    InferenceEngine::Precision data_prc;
    int64_t actual_n_iter();
 protected:
    void SetUp() override;
 };
 class TrivialLoopTest : public testing::WithParamInterface<LayerTestsUtils::basicParams>,
                        virtual public LayerTestsUtils::LayerTestsCommon {
 protected:
    using RefBlobGenerator = std::function<InferenceEngine::Blob::Ptr (const InferenceEngine::TensorDesc &info)>;
    std::map<std::string, RefBlobGenerator> inputGens, outputGens;
    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
        auto found = inputGens.find(info.name());
        if (found != inputGens.end()) {
            return found->second(info.getTensorDesc());
        }
        found = inputGens.find("");
        if (found != inputGens.end()) {
            return found->second(info.getTensorDesc());
        }
        return LayerTestsCommon::GenerateInput(info);
    }
    std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
        if (outputGens.empty())
            return LayerTestsCommon::CalculateRefs();
        const auto results = function->get_results();
        const auto outs_info = cnnNetwork.getOutputsInfo();
        const auto num_out_blob = results.size();
        std::vector<std::vector<std::uint8_t>> res_collection(num_out_blob);
        for (int i = 0; i < num_out_blob; i++) {
            // TODO: name of original NG result doesn't match with outs after conversion.
            //       Expected : auto name = results[i]->get_friendly_name();
            auto name = results[i]->get_input_node_ptr(0)->get_friendly_name();
            auto data = outs_info.at(name);
            IE_ASSERT(data != nullptr);
            RefBlobGenerator generator;
            auto found = outputGens.find(name);
            if (found != outputGens.end()) {
                generator = found->second;
            } else {
                found = outputGens.find("");
                if (found != outputGens.end()) {
                    generator = found->second;
                }
            }
            IE_ASSERT(generator != nullptr) << "Test output generator is not specified";
            auto blob = generator(data->getTensorDesc());
            auto blob_size = blob->byteSize();
            auto blob_ptr = blob->buffer().as<uint8_t*>();
            auto &res = res_collection[i];
            res.resize(blob_size);
            std::copy(blob_ptr, blob_ptr + blob_size, res.begin());
        }
        return res_collection;
    }
 };
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/split.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/split.hpp
@ -23,6 +23,7 @@ typedef std::tuple<
        InferenceEngine::Layout,        // Input layout
        InferenceEngine::Layout,        // Output layout
        std::vector<size_t>,            // Input shapes
        std::vector<size_t>,            // Used outputs indices
        std::string                     // Target device name
 > splitParams;
@ -35,26 +36,4 @@ protected:
    void SetUp() override;
 };
 typedef std::tuple<
        size_t,                         // Num splits
        size_t,                         // Axis
        InferenceEngine::Precision,     // Net precision
        InferenceEngine::Precision,     // Input precision
        InferenceEngine::Precision,     // Output precision
        InferenceEngine::Layout,        // Input layout
        InferenceEngine::Layout,        // Output layout
        std::vector<size_t>,            // Input shapes
        std::vector<size_t>,            // Used outputs indices
        std::string                     // Target device name
 > splitWithUnusedOutputsParams;
 class splitWithUnusedOutputsTest : public testing::WithParamInterface<splitWithUnusedOutputsParams>,
                       virtual public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<splitWithUnusedOutputsParams> obj);
 protected:
    void SetUp() override;
 };
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_eltwise_reshape_concat.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/memory_eltwise_reshape_concat.hpp
@ -0,0 +1,37 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 #pragma once
 #include "common_test_utils/test_common.hpp"
 #include "functional_test_utils/layer_test_utils.hpp"
 #include <ie_core.hpp>
 namespace SubgraphTestsDefinitions {
 typedef std::tuple<
    std::string,                        // Target device name
    InferenceEngine::Precision,         // Network precision
    size_t,                             // Mutiples of concat size to be used as input size
    size_t,                             // Concat size
    std::map<std::string, std::string>  // Configuration
 > memoryEltwiseReshapeConcatParams;
 class MemoryEltwiseReshapeConcatTest : public LayerTestsUtils::LayerTestsCommon,
    public testing::WithParamInterface<memoryEltwiseReshapeConcatParams> {
 private:
    void initTestModel();
    // you have to replace memory layers since ngraph does not support them
    void initNgraphFriendlyModel();
    // since we switching models we need to generate and save these values in SetUp
    size_t inputSize;
    size_t concatSize;
    ngraph::element::Type ngPrc;
    std::vector<float> memory_init;
    std::vector<float> concat_vals;
 protected:
    void SetUp() override;
    void Run() override;
 public:
    static std::string getTestCaseName(const testing::TestParamInfo<memoryEltwiseReshapeConcatParams> &obj);
 };
 } // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/loop.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/loop.cpp
@ -46,7 +46,9 @@ namespace LayerTestsDefinitions {
        result << "types=" << CommonTestUtils::vec2str(types_separate) << "_";
        result << "netPRC=" << netPrecision.name() << "_";
        result << "targetDevice=" << targetDevice << "_";
-        return result.str();
+        auto res_str = result.str();
        std::replace(res_str.begin(), res_str.end(), '-', '_');
        return res_str;
    }
    void LoopTest::SetUp() {
@ -155,5 +157,227 @@ namespace LayerTestsDefinitions {
    TEST_P(LoopTest, CompareWithRefs) {
        Run();
    }
    void StaticShapeLoopTest::SetUp() {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        SetRefMode(LayerTestsUtils::IE);
        auto args_papck = std::tie(static_iter_num, max_iter_num, dynamic_exit, axis);
        std::tie(
            static_continue_cond,
            args_papck,
            start_value,
            data_shape,
            data_prc,
            targetDevice) = GetParam();
        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(data_prc);
        const auto ngShape = ngraph::Shape{data_shape};
        const auto scalarShape = ngraph::Shape{};
        ngraph::ParameterVector params{};
        auto cond_input_create = [&params] (ngraph::element::Type prc, const ngraph::Shape &shape, int value = 0, bool is_static = false)
                -> std::shared_ptr<ngraph::Node> {
            if (is_static)
                return std::make_shared<ngraph::opset5::Constant>(prc, shape, value);
            auto input = std::make_shared<ngraph::op::Parameter>(prc, shape);
            params.push_back(input);
            return input;
        };
        auto start = cond_input_create(prc, ngShape);
        auto count = cond_input_create(ngraph::element::i64, scalarShape, max_iter_num, static_iter_num);
        auto skip  = cond_input_create(ngraph::element::boolean, scalarShape, true, static_continue_cond);
        //
        //      count skip  start         count skip      start
        //                  /                             /
        //          ___*___*____           __________*___*____       | idx  | data | out |
        //         |  idx  in   |         | ex_val  idx  in   |      |  0   |  7   |  7  |
        //         |   |  /     |         |   |   /  |  /     |      |  1   |  7   |  8  |
        //         |   add      |         |   less   add      |      |  2   |  8   |  10 |
        //         |   |   true |         |    |     |        |      |  3   |  10  |  13 |
        //         |   |    |   |         |    |     |        |       ~~~~~  * * *  ~~~~~
        //         |  out  cnd  |         |   cnd   out       |
        //         |___*____*___|         |____*_____*________|
        //           Full loop              Dynamic exit loop
        //           n_iter = count         n_iter = ex_val
        //
        auto b_indx = std::make_shared<ngraph::op::Parameter>(ngraph::element::i64, ngraph::Shape{});
        auto b_data = std::make_shared<ngraph::op::Parameter>(prc, ngShape);
        auto b_indx_cast = std::make_shared<ngraph::op::Convert>(b_indx, prc);
        auto b_add  = std::make_shared<ngraph::op::Add>(b_data, b_indx_cast, ngraph::op::AutoBroadcastSpec::NUMPY);
        std::shared_ptr<ngraph::Node> b_cond;
        if (dynamic_exit == -1) {
            b_cond = std::make_shared<ngraph::opset5::Constant>(ngraph::element::boolean, ngraph::Shape{}, true);
        } else {
            auto b_exit_value = std::make_shared<ngraph::opset5::Constant>(ngraph::element::i64, scalarShape, dynamic_exit);
            b_cond = std::make_shared<ngraph::opset5::Less>(b_indx, b_exit_value);
        }
        auto body = std::make_shared<ngraph::Function>(
                ngraph::OutputVector    {b_cond, b_add},    // TODO: check with reverse
                ngraph::ParameterVector {b_indx, b_data});  // TODO: check with reverse
        auto loop = std::make_shared<ngraph::opset5::Loop>(count, skip);
        loop->set_function(body);
        loop->set_special_body_ports({0, 0});
        loop->set_merged_input(b_data, start, b_add);
        if (axis == -1)
            loop->get_iter_value(b_add, -1);
        else
            loop->get_concatenated_slices(b_add, 0, 1, 1, -1, axis);
        function = std::make_shared<ngraph::Function>(
                ngraph::OutputVector {loop},
                params);
    }
    InferenceEngine::Blob::Ptr StaticShapeLoopTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
        auto tdesc = info.getTensorDesc();
        auto blob = make_blob_with_precision(tdesc);
        blob->allocate();
        if (tdesc.getLayout() == InferenceEngine::SCALAR) {
            auto scalar_1d = CommonTestUtils::make_reshape_view(blob, {1});
            CommonTestUtils::fill_data_with_broadcast(scalar_1d, 0, {static_cast<float>(max_iter_num)});
        } else {
            CommonTestUtils::fill_data_with_broadcast(blob, 0, {static_cast<float>(start_value)});
        }
        return blob;
    }
    int64_t StaticShapeLoopTest::actual_n_iter() {
        constexpr auto INF_N_ITER = std::numeric_limits<int64_t>::max();
        IE_ASSERT(dynamic_exit != -1 || max_iter_num != -1);
        // dynamic_exit + 1 - because loop body looks like do-while loop with post condition check.
        return std::min(dynamic_exit == -1 ? INF_N_ITER : dynamic_exit + 1,
                        max_iter_num == -1 ? INF_N_ITER : max_iter_num);
    }
    // Predefined ref output
    std::vector<std::vector<std::uint8_t>> StaticShapeLoopTest::CalculateRefs() {
        bool auto_concat_out = (axis != -1);
        const auto n_iter = actual_n_iter();
        auto ref_shape = data_shape;
        if (auto_concat_out)
            ref_shape[axis] *= n_iter;
        using namespace CommonTestUtils;
        InferenceEngine::TensorDesc tdesc {data_prc, ref_shape, InferenceEngine::TensorDesc::getLayoutByDims(ref_shape)};
        std::vector<uint8_t> res(byte_size(tdesc));
        auto out = make_blob_with_precision(tdesc, res.data());
        std::vector<float> vals(n_iter);
        float val = start_value;
        for (int i = 0; i < n_iter; i++) {
            val += i;
            vals[i] = val;
        }
        if (auto_concat_out)
            fill_data_with_broadcast(out, axis, vals);
        else
            fill_data_with_broadcast(out, 0, {val});  // broadcast scalar data
        return {res};
    }
    TEST_P(StaticShapeLoopTest, CompareWithRefs) {
        Run();
    }
    TEST_P(TrivialLoopTest, PassThroughBody) {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        InferenceEngine::Precision iePrc;
        InferenceEngine::SizeVector ieShape;
        std::tie(iePrc, ieShape, targetDevice) = GetParam();
        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iePrc);
        const auto shape = ngraph::Shape{ieShape};
        const auto scalarShape = ngraph::Shape{};
        auto start = std::make_shared<ngraph::op::Parameter>(prc, shape);
        auto count = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, scalarShape, 5);
        auto icond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
        // Loop body
        auto b_data = std::make_shared<ngraph::op::Parameter>(prc, shape);
        auto b_cond = std::make_shared<ngraph::op::Parameter>(ngraph::element::boolean, scalarShape);
        auto body = std::make_shared<ngraph::Function>(
                ngraph::OutputVector    {b_cond, b_data},   // | passthrough body, no data changes
                ngraph::ParameterVector {b_cond, b_data});  // | input -> output
        auto loop = std::make_shared<ngraph::opset5::Loop>(count, icond);
        loop->set_function(body);
        loop->set_special_body_ports({-1, 0});
        loop->set_invariant_input(b_cond, icond);
        loop->set_invariant_input(b_data, start);
        loop->get_iter_value(b_data, -1);
        function = std::make_shared<ngraph::Function>(
                ngraph::OutputVector    {loop},
                ngraph::ParameterVector {start});
        // Precalculated ref blobs
        auto blob = make_blob_with_precision({iePrc, ieShape, InferenceEngine::TensorDesc::getLayoutByDims(ieShape)});
        blob->allocate();
        CommonTestUtils::fill_data_with_broadcast(blob, 0, {10});
        inputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
        outputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
        Run();
    }
    TEST_P(TrivialLoopTest, UnusedInputBody) {
        SKIP_IF_CURRENT_TEST_IS_DISABLED()
        InferenceEngine::Precision iePrc;
        InferenceEngine::SizeVector ieShape;
        std::tie(iePrc, ieShape, targetDevice) = GetParam();
        const auto prc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iePrc);
        const auto shape = ngraph::Shape{ieShape};
        const auto scalarShape = ngraph::Shape{};
        auto start = std::make_shared<ngraph::op::Parameter>(prc, shape);
        auto count = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, scalarShape, 5);
        auto icond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
        // Loop body
        auto b_data = std::make_shared<ngraph::op::Parameter>(prc, shape);
        auto b_cond = std::make_shared<ngraph::op::Constant>(ngraph::element::boolean, scalarShape, true);
        auto b_iter = std::make_shared<ngraph::op::Parameter>(ngraph::element::i64, scalarShape);
        auto body = std::make_shared<ngraph::Function>(
                ngraph::OutputVector    {b_cond, b_data},
                ngraph::ParameterVector {b_data, b_iter});
        auto loop = std::make_shared<ngraph::opset5::Loop>(count, icond);
        loop->set_function(body);
        loop->set_special_body_ports({1, 0});
        loop->set_invariant_input(b_data, start);
        loop->get_iter_value(b_data, -1);
        function = std::make_shared<ngraph::Function>(
                ngraph::OutputVector    {loop},
                ngraph::ParameterVector {start});
        // Precalculated ref blobs
        auto blob = make_blob_with_precision({iePrc, ieShape, InferenceEngine::TensorDesc::getLayoutByDims(ieShape)});
        blob->allocate();
        CommonTestUtils::fill_data_with_broadcast(blob, 0, {10});
        inputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
        outputGens[""] = [&] (InferenceEngine::TensorDesc tdesc) { return blob; };
        Run();
    }
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/split.cpp
@ -26,13 +26,16 @@ std::string SplitLayerTest::getTestCaseName(testing::TestParamInfo<splitParams>
    InferenceEngine::Precision netPrecision;
    InferenceEngine::Precision inPrc, outPrc;
    InferenceEngine::Layout inLayout, outLayout;
-    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::SizeVector inputShapes, outIndices;
    std::string targetDevice;
-    std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, targetDevice) = obj.param;
+    std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outIndices, targetDevice) = obj.param;
    std::ostringstream result;
    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
    result << "numSplits=" << numSplits << "_";
    result << "axis=" << axis << "_";
    if (!outIndices.empty()) {
        result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
    }
    result << "IS";
    result << "netPRC=" << netPrecision.name() << "_";
    result << "inPRC=" << inPrc.name() << "_";
@ -46,57 +49,14 @@ std::string SplitLayerTest::getTestCaseName(testing::TestParamInfo<splitParams>
 void SplitLayerTest::SetUp() {
    SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
    size_t axis, numSplits;
-    std::vector<size_t> inputShape;
+    std::vector<size_t> inputShape, outIndices;
    InferenceEngine::Precision netPrecision;
    std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = this->GetParam();
    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
    auto paramOuts = ngraph::helpers::convert2OutputVector(
            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
    auto split = std::dynamic_pointer_cast<ngraph::opset1::Split>(ngraph::builder::makeSplit(paramOuts[0],
                                                                                             ngPrc, numSplits, axis));
    ngraph::ResultVector results;
    for (int i = 0; i < numSplits; i++) {
        results.push_back(std::make_shared<ngraph::opset1::Result>(split->output(i)));
    }
    function = std::make_shared<ngraph::Function>(results, params, "split");
 }
 TEST_P(SplitLayerTest, CompareWithRefs) {
    Run();
 };
 std::string splitWithUnusedOutputsTest::getTestCaseName(testing::TestParamInfo<splitWithUnusedOutputsParams> obj) {
    size_t numSplits, axis;
    InferenceEngine::Precision netPrecision;
    InferenceEngine::Precision inPrc, outPrc;
    InferenceEngine::Layout inLayout, outLayout;
    InferenceEngine::SizeVector inputShapes;
    std::vector<size_t> outIndices;
    std::string targetDevice;
    std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outIndices, targetDevice) = obj.param;
    std::ostringstream result;
    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
    result << "numSplits=" << numSplits << "_";
    result << "axis=" << axis << "_";
    result << "outIndices" << CommonTestUtils::vec2str(outIndices) << "_";
    result << "IS";
    result << "netPRC=" << netPrecision.name() << "_";
    result << "inPRC=" << inPrc.name() << "_";
    result << "outPRC=" << outPrc.name() << "_";
    result << "inL=" << inLayout << "_";
    result << "outL=" << outLayout << "_";
    result << "trgDev=" << targetDevice;
    return result.str();
 }
 void splitWithUnusedOutputsTest::SetUp() {
    SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
    size_t axis, numSplits;
    std::vector<size_t> inputShape;
    InferenceEngine::Precision netPrecision;
    std::vector<size_t> outIndices;
    std::tie(numSplits, axis, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outIndices, targetDevice) = this->GetParam();
    if (outIndices.empty()) {
        for (int i = 0; i < numSplits; ++i) {
            outIndices.push_back(i);
        }
    }
    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
    auto paramOuts = ngraph::helpers::convert2OutputVector(
@ -110,7 +70,7 @@ void splitWithUnusedOutputsTest::SetUp() {
    function = std::make_shared<ngraph::Function>(results, params, "split");
 }
-TEST_P(splitWithUnusedOutputsTest, CompareWithRefs) {
+TEST_P(SplitLayerTest, CompareWithRefs) {
    Run();
 };
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/memory_eltwise_reshape_concat.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/memory_eltwise_reshape_concat.cpp
@ -0,0 +1,150 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <tuple>
 #include <string>
 #include <vector>
 #include <memory>
 #include <functional>
 #include "ie_core.hpp"
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/blob_utils.hpp"
 #include "functional_test_utils/precision_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "functional_test_utils/skip_tests_config.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 #include "ngraph_functions/builders.hpp"
 #include <transformations/op_conversions/lstm_cell_decomposition.hpp>
 #include "subgraph_tests/memory_eltwise_reshape_concat.hpp"
 namespace SubgraphTestsDefinitions {
 std::string MemoryEltwiseReshapeConcatTest::getTestCaseName(const testing::TestParamInfo<memoryEltwiseReshapeConcatParams> &obj) {
    std::string targetDevice;
    InferenceEngine::Precision netPrecision;
    size_t inputSize;
    size_t concatSize;
    std::map<std::string, std::string> config;
    std::tie(targetDevice, netPrecision, inputSize, concatSize, config) = obj.param;
    std::ostringstream result;
    result << "netPrecision=" << netPrecision.name() << "_";
    result << "IS=" << inputSize << "_";
    result << "CS=" << concatSize << "_";
    result << "targetDevice=" << targetDevice;
    return result.str();
 }
 void MemoryEltwiseReshapeConcatTest::SetUp() {
    InferenceEngine::Precision netPrecision;
    std::map<std::string, std::string> config;
    std::tie(targetDevice, netPrecision, inputSize, concatSize, config) = this->GetParam();
    configuration.insert(config.begin(), config.end());
    ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
    const int seed = 0;
    std::mt19937 gen(static_cast<float>(seed));
    auto generateFloatNumbers = [gen](std::size_t vec_len, float min, float max) mutable {
        std::vector<float> res;
        std::uniform_real_distribution<float> dist(min, max);
        for (int i = 0; i < vec_len; i++)
            res.emplace_back(static_cast<float>(dist(gen)));
        return res;
    };
    memory_init = generateFloatNumbers(inputSize * concatSize, -1.0f, 1.0f);
    concat_vals = generateFloatNumbers(concatSize, 12.0f, 14.0f);
 }
 void MemoryEltwiseReshapeConcatTest::initTestModel() {
    InferenceEngine::SizeVector input_dims = {1, inputSize * concatSize};
    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
    auto memory_constant = ngraph::builder::makeConstant<float>(ngPrc, input_dims, memory_init);
    memory_constant->set_friendly_name("memory_constant");
    auto memory_read = std::make_shared<ngraph::op::ReadValue>(memory_constant, "memory");
    memory_read->set_friendly_name("memory_read");
    auto mul = ngraph::builder::makeEltwise(input_parameter[0], memory_read, ngraph::helpers::EltwiseTypes::MULTIPLY);
    mul->set_friendly_name("multiplication");
    auto memory_write = std::make_shared<ngraph::op::Assign>(mul, "memory");
    memory_write->set_friendly_name("memory_write");
    auto reshape_1_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<size_t>({inputSize, concatSize}));
    reshape_1_pattern->set_friendly_name("reshape_pattern");
    auto reshape_1 = std::make_shared<ngraph::op::v1::Reshape>(mul, reshape_1_pattern, false);
    reshape_1->set_friendly_name("reshape");
    auto concat_constant = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals);
    concat_constant->set_friendly_name("concat_constant");
    auto concat = ngraph::builder::makeConcat({concat_constant, reshape_1}, 0);
    memory_write->add_control_dependency(memory_read);
    concat->add_control_dependency(memory_write);
    auto final_reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4},
                                                                        std::vector<size_t>({1, 1, inputSize + 1, concatSize}));
    auto final_reshape = std::make_shared<ngraph::op::v1::Reshape>(concat, final_reshape_pattern, false);
    function = std::make_shared<ngraph::Function>(final_reshape, input_parameter, "memory_multiply_reshape_concat");
 }
 void MemoryEltwiseReshapeConcatTest::initNgraphFriendlyModel() {
    InferenceEngine::SizeVector input_dims = {1, inputSize * concatSize};
    auto input_parameter = ngraph::builder::makeParams(ngPrc, {input_dims});
    auto memory_constant = ngraph::builder::makeConstant<float>(ngPrc, input_dims, memory_init);
    memory_constant->set_friendly_name("memory_constant");
    auto mul = ngraph::builder::makeEltwise(input_parameter[0], memory_constant, ngraph::helpers::EltwiseTypes::MULTIPLY);
    mul->set_friendly_name("multiplication");
    auto reshape_pattern = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>({1, inputSize, concatSize}));
    reshape_pattern->set_friendly_name("reshape_pattern");
    auto reshape = std::make_shared<ngraph::op::v1::Reshape>(mul, reshape_pattern, false);
    reshape->set_friendly_name("reshape");
    auto squeeze_const = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{1}, 0);
    squeeze_const->set_friendly_name("squeeze_const");
    auto squeeze = std::make_shared<ngraph::op::Squeeze>(reshape, squeeze_const);
    squeeze->set_friendly_name("squeeze");
    auto concat_constant = ngraph::builder::makeConstant(ngPrc, {1, concatSize}, concat_vals);
    concat_constant->set_friendly_name("concat_constant");
    auto concat = ngraph::builder::makeConcat({concat_constant, squeeze}, 0);
    function = std::make_shared<ngraph::Function>(concat, input_parameter, "memory_multiply_reshape_concat");
 }
 void MemoryEltwiseReshapeConcatTest::Run() {
    SKIP_IF_CURRENT_TEST_IS_DISABLED()
    initTestModel();
    LoadNetwork();
    InferenceEngine::TensorDesc state_description(InferenceEngine::Precision::FP32,
                                                  InferenceEngine::SizeVector({1, inputSize * concatSize}),
                                                  InferenceEngine::Layout::NC);
    auto states = executableNetwork.QueryState();
    auto state_values_blob = FuncTestUtils::createAndFillBlobWithFloatArray(state_description,
                                                                            memory_init.data(), memory_init.size());
    states[0].SetState(state_values_blob);
    Infer();
    initNgraphFriendlyModel();
    Validate();
 }
 TEST_P(MemoryEltwiseReshapeConcatTest, CompareWithRefs) {
    Run();
 };
 }  // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.cpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.cpp
@ -104,6 +104,10 @@ void fill_data_with_broadcast(InferenceEngine::Blob::Ptr& blob, InferenceEngine:
    auto src_ptr = get_data(values);
    switch (blob->getTensorDesc().getPrecision()) {
        case InferenceEngine::Precision::U64:
        case InferenceEngine::Precision::I64:
            copy_7D<uint64_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
            break;
        case InferenceEngine::Precision::FP32:
        case InferenceEngine::Precision::I32:
            copy_7D<uint32_t>(src_ptr, src_strides, dst_ptr, dst_strides, dst_dims);
@ -189,6 +193,12 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b
    return new_blob;
 }
 size_t byte_size(const InferenceEngine::TensorDesc &tdesc) {
    auto prc = tdesc.getPrecision();
    auto dims = tdesc.getDims();
    return prc.size() * std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies<size_t>());
 }
 /**
 * repeated filling tensor with data.
 *
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
@ -72,6 +72,14 @@ InferenceEngine::Blob::Ptr make_reshape_view(const InferenceEngine::Blob::Ptr &b
 */
 void fill_data_const(InferenceEngine::Blob::Ptr& blob, float val);
 /**
 * Calculate size of buffer required for provided tensor descriptor.
 * @param tdesc provided tensor descriptor
 * @return size in bytes
 */
 size_t byte_size(const InferenceEngine::TensorDesc &tdesc);
 static void fill_data_bbox(float *data, size_t size, int height, int width, float omega) {
    float center_h = (height - 1.0f) / 2;
    float center_w = (width - 1.0f) / 2;
--- a/inference-engine/tests/unit/gna/gna_api_stub.cpp
+++ b/inference-engine/tests/unit/gna/gna_api_stub.cpp
@ -60,6 +60,14 @@ GNA2_API Gna2Status Gna2DeviceClose(
    return Gna2StatusSuccess;
 }
 GNA2_API Gna2Status Gna2DeviceGetCount(
    uint32_t* numberOfDevices) {
    if (numberOfDevices != nullptr) {
        *numberOfDevices = 1;
    }
    return Gna2StatusSuccess;
 }
 GNA2_API enum Gna2Status Gna2MemoryFree(
    void * memory) {
    return Gna2StatusSuccess;
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
@ -69,6 +69,14 @@ GNA2_API Gna2Status Gna2DeviceClose(
    return Gna2StatusSuccess;
 }
 GNA2_API Gna2Status Gna2DeviceGetCount(
    uint32_t * numberOfDevices) {
    if (numberOfDevices != nullptr) {
        *numberOfDevices = 1;
    }
    return Gna2StatusSuccess;
 }
 GNA2_API enum Gna2Status Gna2MemoryFree(
    void * memory) {
    if (current != nullptr) {
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@ -60,10 +60,10 @@ public:
    void * alloc(size_t size) noexcept override {
        return ptr;
    }
-    virtual bool   free(void* handle) noexcept {
+    bool   free(void* handle) noexcept override {
        return true;
    }
-    virtual void Release() noexcept {
+    void Release() noexcept override {
        delete this;
    }
 };
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_mock_api.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_mock_api.hpp
@ -102,6 +102,9 @@ class GNACppApi {
    MOCK_METHOD1(Gna2DeviceClose, Gna2Status (
        uint32_t deviceIndex));
    MOCK_METHOD1(Gna2DeviceGetCount, Gna2Status (
        uint32_t * numberOfDevices));
    MOCK_METHOD1(Gna2MemoryFree, Gna2Status (
        void * memory));
--- a/inference-engine/thirdparty/clDNN/api/resample.hpp
+++ b/inference-engine/thirdparty/clDNN/api/resample.hpp
@ -100,24 +100,23 @@ struct resample : public primitive_base<resample> {
    /// @param scale Resample scale.
    /// @param num_filter Input filter. Only used by bilinear sample_type.
    /// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
    /// @param with_activation Enables Relu activation.
    /// @param activation_slp Relu activation slope.
    resample(const primitive_id& id,
             const primitive_id& input,
             tensor output_size,
             uint32_t num_filter,
             resample_type operation_type = resample_type::nearest,
             bool with_activation = false,
             float activation_slp = 0.0f,
             const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          output_size(output_size),
          num_filter(num_filter),
          axesAndScales({}),
          pads_begin({}),
          pads_end({}),
          align_corners(1),
          operation_type(operation_type),
          shape_calc_mode(shape_calculation_mode::sizes),
-          with_activation(with_activation),
+          antialias(0),
-          activation_negative_slope(activation_slp),
+          cube_coeff(0.0f),
          coord_trans_mode(coordinate_transformation_mode::asymmetric),
          round_mode(nearest_mode::floor) {
        if (operation_type == resample_type::caffe_bilinear) {
@ -132,8 +131,6 @@ struct resample : public primitive_base<resample> {
    /// @param pads_end Optional end padding for input.
    /// @param align_corners Align corner pixels of the input and output tensors.
    /// @param resample_type Resample bilinear method.
    /// @param with_activation Enables Relu activation.
    /// @param activation_slp Relu activation slope.
    resample(const primitive_id& id,
             const primitive_id& input,
             tensor output_size,
@ -141,19 +138,18 @@ struct resample : public primitive_base<resample> {
             std::vector<int32_t> pads_end = {},
             int32_t align_corners = 1,
             resample_type operation_type = resample_type::bilinear,
             bool with_activation = false,
             float activation_slp = 0.0f,
             const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          output_size(output_size),
          num_filter(0),
          axesAndScales({}),
          pads_begin(pads_begin),
          pads_end(pads_end),
          align_corners(align_corners),
          operation_type(operation_type),
          shape_calc_mode(shape_calculation_mode::sizes),
-          with_activation(with_activation),
+          antialias(0),
-          activation_negative_slope(activation_slp),
+          cube_coeff(0.0f),
          coord_trans_mode(coordinate_transformation_mode::asymmetric),
          round_mode(nearest_mode::floor) {}
@ -170,19 +166,20 @@ struct resample : public primitive_base<resample> {
             std::vector<int32_t> pads_end = {},
             int32_t antialias = 0,
             float cube_coeff = -0.75f,
-             resample_type mode = resample_type::caffe_bilinear,
+             resample_type operation_type = resample_type::caffe_bilinear,
             shape_calculation_mode shape_calc_mode = shape_calculation_mode::sizes,
             coordinate_transformation_mode ctm = coordinate_transformation_mode::half_pixel,
             nearest_mode nm = nearest_mode::round_prefer_floor,
             const padding& output_padding = padding())
        : primitive_base(id, {input}, output_padding),
          output_size(output_size),
          num_filter(0),
          axesAndScales(axesAndScales),
          pads_begin(pads_begin),
          pads_end(pads_end),
-          operation_type(mode),
+          align_corners(1),
          operation_type(operation_type),
          shape_calc_mode(shape_calc_mode),
          with_activation(false),
          antialias(antialias),
          cube_coeff(cube_coeff),
          coord_trans_mode(ctm),
@ -200,21 +197,17 @@ struct resample : public primitive_base<resample> {
    std::vector<int32_t> pads_end;
    /// @param align_corners corner pixels of the input and output tensors
    int32_t align_corners;
-    /// @param sample_type Resample method (nearest neighbor/bilinear/caffe bilinear).
+    /// @param operation_type Resample method (nearest neighbor/bilinear/caffe bilinear).
    resample_type operation_type;
    /// @param shape_calc_mode Specifies which input, sizes or scales, is used to calculate an output shape.
    shape_calculation_mode shape_calc_mode;
    /// @brief Enables Relu activation.
    bool with_activation;
    /// @brief Relu activation slope.
    float activation_negative_slope;
    /// @param antialias is a flag that specifies whether to perform anti-aliasing.
    int32_t antialias;
    /// @param cube_coeff specifies the parameter a for cubic interpolation. cube_coeff is used only when mode == cubic.
    float cube_coeff;
-    /// @param specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
+    /// @param coord_trans_mode specifies how to transform the coordinate in the resized tensor to the coordinate in the original tensor
    coordinate_transformation_mode coord_trans_mode;
-    /// @param specifies round mode when mode == nearest and is used only when mode == nearest.
+    /// @param round_mode specifies round mode when mode == nearest and is used only when mode == nearest.
    nearest_mode round_mode;
 };
 /// @}
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
@ -34,7 +34,7 @@ public:
        };
    }
-    JitConstants GetJitConstants(const eltwise_params& params) const;
+    JitConstants GetJitConstants(const eltwise_params& params) const override;
 protected:
    bool Validate(const Params& p, const optional_params& o) const override;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_b_fs_yx_fsv16.h
@ -24,7 +24,7 @@ class ReduceKernel_b_fs_yx_fsv16 : public ReduceKernelBase {
 public:
    ReduceKernel_b_fs_yx_fsv16() : ReduceKernelBase("reduce_gpu_b_fs_yx_fsv16") {}
    virtual ~ReduceKernel_b_fs_yx_fsv16() {}
-    virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const;
+    CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
    JitConstants GetJitConstants(const reduce_params& params) const override;
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reduce/reduce_kernel_ref.h
@ -24,7 +24,7 @@ class ReduceKernelRef : public ReduceKernelBase {
 public:
    ReduceKernelRef() : ReduceKernelBase("reduce_ref") {}
    virtual ~ReduceKernelRef() {}
-    virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const;
+    CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
    JitConstants GetJitConstants(const reduce_params& params) const override;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/space_to_depth/space_to_depth_kernel_ref.h
@ -50,7 +50,7 @@ public:
 protected:
    virtual CommonDispatchData SetDefault(const space_to_depth_params& params, const optional_params&) const;
    virtual JitConstants GetJitConstants(const space_to_depth_params& params) const;
-    virtual bool Validate(const Params& p, const optional_params& o) const;
+    bool Validate(const Params& p, const optional_params& o) const override;
    std::vector<FusedOpType> GetSupportedFusedOps() const override {
        return { FusedOpType::ELTWISE,
                 FusedOpType::QUANTIZE,
--- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h
@ -55,7 +55,7 @@ public:
    }
    std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
-    cl::Event get() { return _event; }
+    cl::Event get() override { return _event; }
 private:
    std::shared_ptr<gpu_toolkit> _ctx;
@ -91,7 +91,7 @@ public:
        _attached = true;
    }
-    cl::Event get() { return _last_ocl_event; }
+    cl::Event get() override { return _last_ocl_event; }
    std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
 private:
--- a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp
@ -118,9 +118,6 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
            get_default_optional_params<kernel_selector::resample_optional_params>(arg.get_program());
        const auto& primitive = arg.get_primitive();
        if (primitive->with_activation)
            convert_activation_func_params(primitive, us_params.activations);
        size_t dimsNum = arg.get_output_layout().format.dimension();
        us_params.resampleType = convert_to_sample_type(primitive->operation_type);
        us_params.nearestMode = convert_to_nearest_mode(primitive->round_mode);
--- a/inference-engine/thirdparty/clDNN/src/resample.cpp
+++ b/inference-engine/thirdparty/clDNN/src/resample.cpp
@ -118,7 +118,6 @@ std::string resample_inst::to_string(resample_node const& node) {
        resample_info.add("nearest_mode:", "simple");
    resample_info.add("output_size", desc->output_size);
    resample_info.add("with activation", desc->with_activation);
    resample_info.add("output padding lower size", desc->output_padding.lower_size());
    resample_info.add("output padding upper size", desc->output_padding.upper_size());
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@ -1 +1 @@
-Subproject commit d7d8ed46078b637794bc91215e1a982bb0f1683a
+Subproject commit 5ef085d5af65e8966e03cdfcbaa65761d61a5c9a
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@ -343,6 +343,8 @@ extensions/front/tf/__init__.py
 extensions/front/tf/activation_ext.py
 extensions/front/tf/argmax_ext.py
 extensions/front/tf/assign_elimination.py
 extensions/front/tf/automl_efficientdet.json
 extensions/front/tf/AutomlEfficientDet.py
 extensions/front/tf/basic_lstm_cell.py
 extensions/front/tf/batch_to_space_ext.py
 extensions/front/tf/BatchMatMul_ext.py
--- a/model-optimizer/extensions/front/Pack.py
+++ b/model-optimizer/extensions/front/Pack.py
@ -15,9 +15,10 @@
 """
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node, Graph
+from mo.front.tf.graph_utils import create_op_with_const_inputs
 from mo.graph.graph import Node, Graph, rename_nodes
 from mo.ops.concat import Concat
-from mo.ops.expand_dims import ExpandDims
+from mo.ops.unsqueeze import Unsqueeze
 class Pack(FrontReplacementOp):
@ -25,15 +26,15 @@ class Pack(FrontReplacementOp):
    enabled = True
    def replace_op(self, graph: Graph, node: Node):
-        out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports()),
+        out_node = Concat(graph, {'axis': node.axis, 'in_ports_count': len(node.in_ports())}).create_node()
-                                  'name': node.name + '/Concat_', }).create_node()
+        pack_name = node.soft_get('name', node.id)
        for ind in node.in_ports():
-            expand_dims_node = ExpandDims(graph, {'expand_axis': int64_array([node.axis]),
+            unsqueeze_node = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array([node.axis])},
-                                                  'name': node.name + '/ExpandDims_'}).create_node()
+                                                         {'name': node.soft_get('name', node.id) + '/Unsqueeze'})
-            node.in_port(ind).get_connection().set_destination(expand_dims_node.in_port(0))
+            node.in_port(ind).get_connection().set_destination(unsqueeze_node.in_port(0))
-            expand_dims_node.out_port(0).connect(out_node.in_port(ind))
+            unsqueeze_node.out_port(0).connect(out_node.in_port(ind))
-        # Replace edge from out port 0 of the matched node with a edge from node out_node.id with port 0.
+
-        # The "explicit" version of the return value is: [(out_node.id, 0)])
+        rename_nodes([(node, pack_name + '/TBR'), (out_node, pack_name)])
        return [out_node.id]
--- a/model-optimizer/extensions/front/Pack_test.py
+++ b/model-optimizer/extensions/front/Pack_test.py
@ -20,6 +20,7 @@ import numpy as np
 from generator import generator, generate
 from extensions.front.Pack import Pack
 from mo.front.common.partial_infer.utils import int64_array
 from mo.utils.ir_engine.compare_graphs import compare_graphs
 from mo.utils.unittest.graph import build_graph
@ -32,12 +33,16 @@ nodes_attributes = {
    'pack': {'axis': None, 'type': None, 'kind': 'op', 'op': 'Pack'},
    # Test operation
    'last': {'type': None, 'value': None, 'kind': 'op', 'op': None},
-    # ExpandDims, Concat and Const operations
+    # Unsqueeze, Concat and Const operations
    'const_1': {'value': None, 'type': None, 'kind': 'op', 'op': 'Const'},
-    'ExpandDims_0': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
+    'Unsqueeze_0': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
-    'ExpandDims_1': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
+    'Unsqueeze_1': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
-    'ExpandDims_2': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
+    'Unsqueeze_2': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
-    'ExpandDims_3': {'expand_axis': None, 'type': None, 'kind': 'op', 'op': 'ExpandDims'},
+    'Unsqueeze_3': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
    'Unsqueeze_0_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
    'Unsqueeze_1_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
    'Unsqueeze_2_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
    'Unsqueeze_3_axis': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': None, 'value': None},
    'concat_1': {'axis': None, 'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
 }
@ -65,15 +70,17 @@ class PackTest(unittest.TestCase):
        graph_ref_edges = []
        for i in range(num_inputs - num_placeholders + 1):
            for j in range(num_placeholders):
-                graph_ref_edges.append(('placeholder_{}'.format(j), 'ExpandDims_{}'.format(i + j)))
+                graph_ref_edges.append(('placeholder_{}'.format(j), 'Unsqueeze_{}'.format(i + j)))
-                graph_ref_edges.append(('ExpandDims_{}'.format(i + j), 'concat_1'))
+                graph_ref_edges.append(('Unsqueeze_{}'.format(i + j), 'concat_1'))
        graph_ref_edges.append(('concat_1', 'last'))
        update_graph_ref_attributes = {}
        for i in range(num_placeholders):
            update_graph_ref_attributes['placeholder_{}'.format(i)] = {'shape': np.array([1, 227, 227, 3])}
        for i in range(num_inputs):
-            update_graph_ref_attributes['ExpandDims_{}'.format(i)] = {'expand_axis': np.array([axis])}
+            graph_ref_edges.append(('Unsqueeze_{}_axis'.format(i), 'Unsqueeze_{}'.format(i)))
            update_graph_ref_attributes['Unsqueeze_{}_axis'.format(i)] = {'shape': int64_array([1]),
                                                                          'value': int64_array([axis])}
        update_graph_ref_attributes['concat_1'] = {'axis': axis}
        graph_ref = build_graph(nodes_attributes, graph_ref_edges, update_graph_ref_attributes,
--- a/model-optimizer/extensions/front/tf/AutomlEfficientDet.py
+++ b/model-optimizer/extensions/front/tf/AutomlEfficientDet.py
@ -0,0 +1,140 @@
 """
 Copyright (C) 2018-2020 Intel Corporation
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
      http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
 from extensions.front.Pack import Pack
 from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
 from extensions.front.eltwise_n import EltwiseNReplacement
 from extensions.front.tf.pad_tf_to_pad import PadTFToPad
 from extensions.ops.DetectionOutput import DetectionOutput
 from extensions.ops.activation_ops import Sigmoid
 from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
 from mo.graph.graph import Graph, Node
 from mo.middle.passes.convert_data_type import data_type_str_to_np
 from mo.ops.concat import Concat
 from mo.ops.const import Const
 from mo.ops.reshape import Reshape
 from mo.ops.result import Result
 class EfficientDet(FrontReplacementFromConfigFileGeneral):
    replacement_id = 'AutomlEfficientDet'
    def run_before(self):
        from extensions.front.ExpandDimsToUnsqueeze import ExpandDimsToUnsqueeze
        return [ExpandDimsToUnsqueeze, Pack, TransposeOrderNormalizer, PadTFToPad, EltwiseNReplacement]
    class AnchorGenerator:
        def __init__(self, min_level, aspect_ratios, num_scales, anchor_scale):
            self.min_level = min_level
            self.aspect_ratios = aspect_ratios
            self.anchor_scale = anchor_scale
            self.scales = [2 ** (float(s) / num_scales) for s in range(num_scales)]
        def get(self, layer_id):
            widths = []
            heights = []
            for s in self.scales:
                for a in self.aspect_ratios:
                    base_anchor_size = 2 ** (self.min_level + layer_id) * self.anchor_scale
                    heights.append(base_anchor_size * s * a[1])
                    widths.append(base_anchor_size * s * a[0])
            return widths, heights
    def transform_graph(self, graph: Graph, replacement_descriptions: dict):
        parameter_node = graph.get_op_nodes(op='Parameter')[0]
        parameter_node['data_type'] = data_type_str_to_np(parameter_node.graph.graph['cmd_params'].data_type)
        parameter_node.out_port(0).disconnect()
        # remove existing Result operations to remove unsupported sub-graph
        graph.remove_nodes_from([node.id for node in graph.get_op_nodes(op='Result')] + ['detections'])
        # determine if the op which is a input/final result of mean value and scale applying to the input tensor
        # then connect it to the input of the first convolution of the model, so we remove the image pre-processing
        # which includes padding and resizing from the model
        preprocessing_input_node_id = replacement_descriptions['preprocessing_input_node']
        assert preprocessing_input_node_id in graph.nodes, 'The node with name "{}" is not found in the graph. This ' \
                                                           'node should provide scaled image output and is specified' \
                                                           ' in the json file.'.format(preprocessing_input_node_id)
        preprocessing_input_node = Node(graph, preprocessing_input_node_id)
        preprocessing_input_node.in_port(0).get_connection().set_source(parameter_node.out_port(0))
        preprocessing_output_node_id = replacement_descriptions['preprocessing_output_node']
        assert preprocessing_output_node_id in graph.nodes, 'The node with name "{}" is not found in the graph. This ' \
                                                            'node should provide scaled image output and is specified' \
                                                            ' in the json file.'.format(preprocessing_output_node_id)
        preprocessing_output_node = Node(graph, preprocessing_output_node_id)
        preprocessing_output_node.out_port(0).disconnect()
        convolution_nodes = [n for n in graph.pseudo_topological_sort() if n.soft_get('type') == 'Convolution']
        convolution_nodes[0].in_port(0).get_connection().set_source(preprocessing_output_node.out_port(0))
        # create prior boxes (anchors) generator
        aspect_ratios = replacement_descriptions['aspect_ratios']
        assert len(aspect_ratios) % 2 == 0
        aspect_ratios = list(zip(aspect_ratios[::2], aspect_ratios[1::2]))
        priors_generator = self.AnchorGenerator(min_level=int(replacement_descriptions['min_level']),
                                                aspect_ratios=aspect_ratios,
                                                num_scales=int(replacement_descriptions['num_scales']),
                                                anchor_scale=replacement_descriptions['anchor_scale'])
        prior_boxes = []
        for i in range(100):
            inp_name = 'box_net/box-predict{}/BiasAdd'.format('_%d' % i if i else '')
            if inp_name not in graph:
                break
            widths, heights = priors_generator.get(i)
            prior_box_op = PriorBoxClusteredOp(graph, {'width': np.array(widths),
                                                       'height': np.array(heights),
                                                       'clip': 0, 'flip': 0,
                                                       'variance': replacement_descriptions['variance'],
                                                       'offset': 0.5})
            prior_boxes.append(prior_box_op.create_node([Node(graph, inp_name), parameter_node]))
        # concatenate prior box operations
        concat_prior_boxes = Concat(graph, {'axis': -1}).create_node()
        for idx, node in enumerate(prior_boxes):
            concat_prior_boxes.add_input_port(idx)
            concat_prior_boxes.in_port(idx).connect(node.out_port(0))
        conf = Sigmoid(graph, dict(name='concat/sigmoid')).create_node([Node(graph, 'concat')])
        reshape_size_node = Const(graph, {'value': int64_array([0, -1])}).create_node([])
        logits = Reshape(graph, dict(name=conf.name + '/Flatten')).create_node([conf, reshape_size_node])
        deltas = Reshape(graph, dict(name='concat_1/Flatten')).create_node([Node(graph, 'concat_1'), reshape_size_node])
        # revert convolution boxes prediction weights from yxYX to xyXY (convolutions share weights and bias)
        weights = Node(graph, 'box_net/box-predict/pointwise_kernel')
        weights.value = weights.value.reshape(-1, 4)[:, [1, 0, 3, 2]].reshape(weights.shape)
        bias = Node(graph, 'box_net/box-predict/bias')
        bias.value = bias.value.reshape(-1, 4)[:, [1, 0, 3, 2]].reshape(bias.shape)
        detection_output_node = DetectionOutput(graph, dict(
            name='detections',
            num_classes=int(replacement_descriptions['num_classes']),
            share_location=1,
            background_label_id=int(replacement_descriptions['num_classes']) + 1,
            nms_threshold=replacement_descriptions['nms_threshold'],
            confidence_threshold=replacement_descriptions['confidence_threshold'],
            top_k=100,
            keep_top_k=100,
            code_type='caffe.PriorBoxParameter.CENTER_SIZE',
        )).create_node([deltas, logits, concat_prior_boxes])
        output_op = Result(graph, dict(name='output'))
        output_op.create_node([detection_output_node])
--- a/model-optimizer/extensions/front/tf/automl_efficientdet.json
+++ b/model-optimizer/extensions/front/tf/automl_efficientdet.json
@ -0,0 +1,18 @@
 [
  {
    "id": "AutomlEfficientDet",
    "custom_attributes": {
      "preprocessing_input_node": "convert_image",
      "preprocessing_output_node": "truediv",
      "aspect_ratios": [1.0, 1.0, 1.4, 0.7, 0.7, 1.4],
      "variance": [1.0, 1.0, 1.0, 1.0],
      "min_level": 3,
      "num_scales": 3,
      "anchor_scale": 4.0,
      "num_classes": 90,
      "nms_threshold": 0.6,
      "confidence_threshold": 0.2
    },
    "match_kind": "general"
  }
 ]
--- a/model-optimizer/mo/ops/unsqueeze.py
+++ b/model-optimizer/mo/ops/unsqueeze.py
@ -32,14 +32,14 @@ class Unsqueeze(Op):
    def __init__(self, graph, attrs: dict):
        super().__init__(graph, {
-            'op': __class__.op,
+            'op': self.op,
-            'type': __class__.op,
+            'type': self.op,
            'version': 'opset1',
            'unsqueeze_dims': None,
            'reinterp_shape': True,
            'in_ports_count': 2,
            'out_ports_count': 1,
-            'infer': __class__.infer
+            'infer': self.infer
        }, attrs)
    @staticmethod
--- a/Show More
+++ b/Show More
		`@ -1 +1 @@`
			`Subproject commit d7d8ed46078b637794bc91215e1a982bb0f1683a`				`Subproject commit 5ef085d5af65e8966e03cdfcbaa65761d61a5c9a`